From 4553e1e673f7da53c418790d7e6455aae507db3c Mon Sep 17 00:00:00 2001 From: celogeek <65178+celogeek@users.noreply.github.com> Date: Thu, 27 Apr 2023 12:02:47 +0200 Subject: [PATCH] decode image into loader --- go.mod | 2 +- go.sum | 4 +- .../imageprocessing/epub_image_processing.go | 19 +- .../epub_image_processing_loader.go | 309 ++++++++++++------ 4 files changed, 204 insertions(+), 130 deletions(-) diff --git a/go.mod b/go.mod index 7e6174f..eb3aa15 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ require ( github.com/disintegration/gift v1.2.1 github.com/gofrs/uuid v4.4.0+incompatible github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 - github.com/nwaples/rardecode v1.1.3 + github.com/nwaples/rardecode/v2 v2.0.0-beta.2 github.com/raff/pdfreader v0.0.0-20220308062436-033e8ac577f0 github.com/schollz/progressbar/v3 v3.13.1 github.com/tcnksm/go-latest v0.0.0-20170313132115-e3007ae9052e diff --git a/go.sum b/go.sum index 54216f9..46eb668 100644 --- a/go.sum +++ b/go.sum @@ -23,8 +23,8 @@ github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWV github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= -github.com/nwaples/rardecode v1.1.3 h1:cWCaZwfM5H7nAD6PyEdcVnczzV8i/JtotnyW/dD9lEc= -github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0= +github.com/nwaples/rardecode/v2 v2.0.0-beta.2 h1:e3mzJFJs4k83GXBEiTaQ5HgSc/kOK8q0rDaRO0MPaOk= +github.com/nwaples/rardecode/v2 v2.0.0-beta.2/go.mod h1:yntwv/HfMc/Hbvtq9I19D1n58te3h6KsqCf3GxyfBGY= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/raff/pdfreader v0.0.0-20220308062436-033e8ac577f0 h1:fuFvfwIc+cpySYurvDNTs5LIHXP9Cj3reVRplj9Whv4= diff --git a/internal/epub/imageprocessing/epub_image_processing.go b/internal/epub/imageprocessing/epub_image_processing.go index 3dc3610..bedc149 100644 --- a/internal/epub/imageprocessing/epub_image_processing.go +++ b/internal/epub/imageprocessing/epub_image_processing.go @@ -6,9 +6,6 @@ package epubimageprocessing import ( "fmt" "image" - _ "image/jpeg" - _ "image/png" - "io" "os" "path/filepath" "strings" @@ -19,16 +16,8 @@ import ( epubimagefilters "github.com/celogeek/go-comic-converter/v2/internal/epub/imagefilters" epubprogress "github.com/celogeek/go-comic-converter/v2/internal/epub/progress" "github.com/disintegration/gift" - _ "golang.org/x/image/webp" ) -type tasks struct { - Id int - Reader io.Reader - Path string - Name string -} - // only accept jpg, png and webp as source file func isSupportedImage(path string) bool { switch strings.ToLower(filepath.Ext(path)) { @@ -104,13 +93,7 @@ func LoadImages(o *Options) ([]*epubimage.Image, error) { defer wg.Done() for img := range imageInput { - // Decode image - src, _, err := image.Decode(img.Reader) - if err != nil { - bar.Clear() - fmt.Fprintf(os.Stderr, "error processing image %s%s: %s\n", img.Path, img.Name, err) - os.Exit(1) - } + src := img.Image g := epubimagefilters.NewGift(src, o.Image) // Convert image diff --git a/internal/epub/imageprocessing/epub_image_processing_loader.go b/internal/epub/imageprocessing/epub_image_processing_loader.go index 6c9def8..24db36f 100644 --- a/internal/epub/imageprocessing/epub_image_processing_loader.go +++ b/internal/epub/imageprocessing/epub_image_processing_loader.go @@ -5,20 +5,32 @@ import ( "bytes" "errors" "fmt" + "image" + _ "image/jpeg" + _ "image/png" "io" "io/fs" "os" "path/filepath" "sort" + "sync" + + _ "golang.org/x/image/webp" epubimage "github.com/celogeek/go-comic-converter/v2/internal/epub/image" "github.com/celogeek/go-comic-converter/v2/internal/sortpath" - "github.com/nwaples/rardecode" + "github.com/nwaples/rardecode/v2" pdfimage "github.com/raff/pdfreader/image" "github.com/raff/pdfreader/pdfread" - "golang.org/x/image/tiff" ) +type tasks struct { + Id int + Image image.Image + Path string + Name string +} + type Options struct { Input string SortPathMode int @@ -30,28 +42,6 @@ type Options struct { var errNoImagesFound = errors.New("no images found") -// ensure copy image into a buffer -func (o *Options) mustExtractImage(imageOpener func() (io.ReadCloser, error)) *bytes.Buffer { - var b bytes.Buffer - if o.Dry { - return &b - } - - f, err := imageOpener() - if err != nil { - fmt.Fprintln(os.Stderr, err) - os.Exit(1) - } - defer f.Close() - - _, err = io.Copy(&b, f) - if err != nil { - fmt.Fprintln(os.Stderr, err) - os.Exit(1) - } - return &b -} - // load a directory of images func (o *Options) loadDir() (totalImages int, output chan *tasks, err error) { images := make([]string, 0) @@ -81,25 +71,64 @@ func (o *Options) loadDir() (totalImages int, output chan *tasks, err error) { sort.Sort(sortpath.By(images, o.SortPathMode)) - output = make(chan *tasks, o.Workers*2) + // Queue all file with id + type job struct { + Id int + Path string + } + jobs := make(chan *job) go func() { - defer close(output) - for i, img := range images { - p, fn := filepath.Split(img) - if p == input { - p = "" - } else { - p = p[len(input)+1:] - } - output <- &tasks{ - Id: i, - Reader: o.mustExtractImage(func() (io.ReadCloser, error) { return os.Open(img) }), - Path: p, - Name: fn, - } + defer close(jobs) + for i, path := range images { + jobs <- &job{i, path} } }() + // read in parallel and get an image + output = make(chan *tasks, o.Workers) + wg := &sync.WaitGroup{} + wg.Add(o.Workers) + for j := 0; j < o.Workers; j++ { + go func() { + defer wg.Done() + for job := range jobs { + var img image.Image + if !o.Dry { + f, err := os.Open(job.Path) + if err != nil { + fmt.Fprintf(os.Stderr, "\nerror processing image %s: %s\n", job.Path, err) + os.Exit(1) + } + img, _, err = image.Decode(f) + if err != nil { + fmt.Fprintf(os.Stderr, "\nerror processing image %s: %s\n", job.Path, err) + os.Exit(1) + } + f.Close() + } + + p, fn := filepath.Split(job.Path) + if p == input { + p = "" + } else { + p = p[len(input)+1:] + } + output <- &tasks{ + Id: job.Id, + Image: img, + Path: p, + Name: fn, + } + } + }() + } + + // wait all done and close + go func() { + wg.Wait() + close(output) + }() + return } @@ -136,50 +165,76 @@ func (o *Options) loadCbz() (totalImages int, output chan *tasks, err error) { indexedNames[name] = i } - output = make(chan *tasks, o.Workers*2) + type job struct { + Id int + F *zip.File + } + jobs := make(chan *job) go func() { - defer close(output) - defer r.Close() + defer close(jobs) for _, img := range images { - p, fn := filepath.Split(filepath.Clean(img.Name)) - output <- &tasks{ - Id: indexedNames[img.Name], - Reader: o.mustExtractImage(img.Open), - Path: p, - Name: fn, - } + jobs <- &job{indexedNames[img.Name], img} } }() + + output = make(chan *tasks, o.Workers) + wg := &sync.WaitGroup{} + wg.Add(o.Workers) + for j := 0; j < o.Workers; j++ { + go func() { + defer wg.Done() + for job := range jobs { + var img image.Image + if !o.Dry { + f, err := job.F.Open() + if err != nil { + fmt.Fprintf(os.Stderr, "\nerror processing image %s: %s\n", job.F.Name, err) + os.Exit(1) + } + img, _, err = image.Decode(f) + if err != nil { + fmt.Fprintf(os.Stderr, "\nerror processing image %s: %s\n", job.F.Name, err) + os.Exit(1) + } + f.Close() + } + + p, fn := filepath.Split(filepath.Clean(job.F.Name)) + output <- &tasks{ + Id: job.Id, + Image: img, + Path: p, + Name: fn, + } + } + }() + } + + go func() { + wg.Wait() + close(output) + r.Close() + }() return } // load a rar file that include images func (o *Options) loadCbr() (totalImages int, output chan *tasks, err error) { - // listing and indexing - rl, err := rardecode.OpenReader(o.Input, "") + var isSolid bool + files, err := rardecode.List(o.Input) if err != nil { return } names := make([]string, 0) - for { - f, ferr := rl.Next() - - if ferr != nil && ferr != io.EOF { - rl.Close() - err = ferr - return - } - - if f == nil { - break - } - + for _, f := range files { if !f.IsDir && isSupportedImage(f.Name) { + if f.Solid { + isSolid = true + } names = append(names, f.Name) } } - rl.Close() totalImages = len(names) if totalImages == 0 { @@ -194,46 +249,89 @@ func (o *Options) loadCbr() (totalImages int, output chan *tasks, err error) { indexedNames[name] = i } - // send file to the queue - output = make(chan *tasks, o.Workers*2) + type job struct { + Id int + Name string + Open func() (io.ReadCloser, error) + } + + jobs := make(chan *job) go func() { - defer close(output) - r, err := rardecode.OpenReader(o.Input, "") - if err != nil { - fmt.Fprintln(os.Stderr, err) - os.Exit(1) - - } - defer r.Close() - - for { - f, err := r.Next() - if err != nil && err != io.EOF { - fmt.Fprintln(os.Stderr, err) + defer close(jobs) + if isSolid && !o.Dry { + r, rerr := rardecode.OpenReader(o.Input) + if rerr != nil { + fmt.Fprintf(os.Stderr, "\nerror processing image %s: %s\n", o.Input, rerr) os.Exit(1) - } - if f == nil { - break - } - if idx, ok := indexedNames[f.Name]; ok { - var b bytes.Buffer - if !o.Dry { - io.Copy(&b, r) + defer r.Close() + for { + f, rerr := r.Next() + if rerr != nil { + if rerr == io.EOF { + break + } + fmt.Fprintf(os.Stderr, "\nerror processing image %s: %s\n", f.Name, rerr) + os.Exit(1) } - - p, fn := filepath.Split(filepath.Clean(f.Name)) - - output <- &tasks{ - Id: idx, - Reader: &b, - Path: p, - Name: fn, + if i, ok := indexedNames[f.Name]; ok { + var b bytes.Buffer + _, rerr = io.Copy(&b, r) + if rerr != nil { + fmt.Fprintf(os.Stderr, "\nerror processing image %s: %s\n", f.Name, rerr) + os.Exit(1) + } + jobs <- &job{i, f.Name, func() (io.ReadCloser, error) { + return io.NopCloser(bytes.NewReader(b.Bytes())), nil + }} + } + } + } else { + for _, img := range files { + if i, ok := indexedNames[img.Name]; ok { + jobs <- &job{i, img.Name, img.Open} } } } }() + // send file to the queue + output = make(chan *tasks, o.Workers) + wg := &sync.WaitGroup{} + wg.Add(o.Workers) + for j := 0; j < o.Workers; j++ { + go func() { + defer wg.Done() + for job := range jobs { + var img image.Image + if !o.Dry { + f, err := job.Open() + if err != nil { + fmt.Fprintf(os.Stderr, "\nerror processing image %s: %s\n", job.Name, err) + os.Exit(1) + } + img, _, err = image.Decode(f) + if err != nil { + fmt.Fprintf(os.Stderr, "\nerror processing image %s: %s\n", job.Name, err) + os.Exit(1) + } + f.Close() + } + + p, fn := filepath.Split(filepath.Clean(job.Name)) + output <- &tasks{ + Id: job.Id, + Image: img, + Path: p, + Name: fn, + } + } + }() + } + go func() { + wg.Wait() + close(output) + }() return } @@ -252,16 +350,9 @@ func (o *Options) loadPdf() (totalImages int, output chan *tasks, err error) { defer close(output) defer pdf.Close() for i := 0; i < totalImages; i++ { - var b bytes.Buffer - + var img image.Image if !o.Dry { - img, err := pdfimage.Extract(pdf, i+1) - if err != nil { - fmt.Fprintln(os.Stderr, err) - os.Exit(1) - } - - err = tiff.Encode(&b, img, nil) + img, err = pdfimage.Extract(pdf, i+1) if err != nil { fmt.Fprintln(os.Stderr, err) os.Exit(1) @@ -269,10 +360,10 @@ func (o *Options) loadPdf() (totalImages int, output chan *tasks, err error) { } output <- &tasks{ - Id: i, - Reader: &b, - Path: "", - Name: fmt.Sprintf(pageFmt, i+1), + Id: i, + Image: img, + Path: "", + Name: fmt.Sprintf(pageFmt, i+1), } } }()