diff --git a/go.mod b/go.mod index 7390d7b..bf1a2f6 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.19 require ( github.com/gofrs/uuid v3.1.0+incompatible + github.com/nwaples/rardecode v1.1.3 github.com/schollz/progressbar/v3 v3.12.2 golang.org/x/image v0.2.0 ) diff --git a/go.sum b/go.sum index 25233bd..2fc520b 100644 --- a/go.sum +++ b/go.sum @@ -9,6 +9,8 @@ github.com/mattn/go-runewidth v0.0.14 h1:+xnbZSEeDbOIg5/mE6JF0w6n9duR1l3/WmbinWV github.com/mattn/go-runewidth v0.0.14/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= +github.com/nwaples/rardecode v1.1.3 h1:cWCaZwfM5H7nAD6PyEdcVnczzV8i/JtotnyW/dD9lEc= +github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= diff --git a/internal/epub/core.go b/internal/epub/core.go index fd43c7a..a323207 100644 --- a/internal/epub/core.go +++ b/internal/epub/core.go @@ -1,34 +1,31 @@ package epub import ( - "archive/zip" "fmt" - "io/fs" - "os" "path/filepath" - "runtime" - "sort" "strings" - "sync" "text/template" "time" "github.com/gofrs/uuid" "github.com/schollz/progressbar/v3" - - imageconverter "go-comic-converter/internal/image-converter" ) -type EpubOptions struct { - Input string - Output string - Title string - Author string +type ImageOptions struct { + Crop bool ViewWidth int ViewHeight int Quality int - Crop bool - LimitMb int +} + +type EpubOptions struct { + Input string + Output string + Title string + Author string + LimitMb int + + *ImageOptions } type ePub struct { @@ -38,20 +35,13 @@ type ePub struct { UpdatedAt string imagesCount int - processingImages func() chan *image + processingImages func() chan *Image templateProcessor *template.Template } -type image struct { - Id int - Data *imageData - Width int - Height int -} - type epubPart struct { - Cover *image - Images []*image + Cover *Image + Images []*Image } func NewEpub(options *EpubOptions) *ePub { @@ -88,208 +78,12 @@ func (e *ePub) render(templateString string, data any) string { return result.String() } -func (e *ePub) load() error { - fi, err := os.Stat(e.Input) +func (e *ePub) getParts() ([]*epubPart, error) { + images, err := LoadImages(e.Input, e.ImageOptions) if err != nil { - return err + return nil, err } - if fi.IsDir() { - return e.loadDir() - } - - switch ext := strings.ToLower(filepath.Ext(e.Input)); ext { - case ".cbz": - return e.loadCBZ() - case ".cbr": - return e.loadCBR() - case ".pdf": - return e.loadPDF() - default: - return fmt.Errorf("unknown file format (%s): support .cbz, .cbr, .pdf", ext) - } -} - -func (e *ePub) loadCBZ() error { - r, err := zip.OpenReader(e.Input) - if err != nil { - return err - } - - images := make([]*zip.File, 0) - for _, f := range r.File { - if f.FileInfo().IsDir() { - continue - } - if strings.ToLower(filepath.Ext(f.Name)) != ".jpg" { - continue - } - images = append(images, f) - } - if len(images) == 0 { - r.Close() - return fmt.Errorf("no images found") - } - - sort.SliceStable(images, func(i, j int) bool { - return strings.Compare(images[i].Name, images[j].Name) < 0 - }) - - e.imagesCount = len(images) - - type task struct { - Id int - FZ *zip.File - } - - tasks := make(chan *task) - - e.processingImages = func() chan *image { - // defer r.Close() - wg := &sync.WaitGroup{} - results := make(chan *image) - for i := 0; i < runtime.NumCPU(); i++ { - wg.Add(1) - go func() { - defer wg.Done() - for imgTask := range tasks { - reader, err := imgTask.FZ.Open() - if err != nil { - panic(err) - } - data, w, h := imageconverter.Convert( - reader, - e.Crop, - e.ViewWidth, - e.ViewHeight, - e.Quality, - ) - name := fmt.Sprintf("OEBPS/Images/%d.jpg", imgTask.Id) - if imgTask.Id == 0 { - name = "OEBPS/Images/cover.jpg" - } - results <- &image{ - imgTask.Id, - newImageData(name, data), - w, - h, - } - } - }() - } - go func() { - for i, fz := range images { - tasks <- &task{i, fz} - } - close(tasks) - wg.Wait() - r.Close() - close(results) - }() - - return results - } - - return nil -} - -func (e *ePub) loadCBR() error { - return fmt.Errorf("no implemented") -} - -func (e *ePub) loadPDF() error { - return fmt.Errorf("no implemented") -} - -func (e *ePub) loadDir() error { - images := make([]string, 0) - err := filepath.WalkDir(e.Input, func(path string, d fs.DirEntry, err error) error { - if err != nil { - return err - } - if d.IsDir() { - return nil - } - ext := filepath.Ext(path) - if strings.ToLower(ext) != ".jpg" { - return nil - } - - images = append(images, path) - return nil - }) - if err != nil { - return err - } - if len(images) == 0 { - return fmt.Errorf("no images found") - } - sort.Strings(images) - - e.imagesCount = len(images) - - type task struct { - Id int - Path string - } - - tasks := make(chan *task) - - e.processingImages = func() chan *image { - wg := &sync.WaitGroup{} - results := make(chan *image) - for i := 0; i < runtime.NumCPU(); i++ { - wg.Add(1) - go func() { - defer wg.Done() - for imgTask := range tasks { - reader, err := os.Open(imgTask.Path) - if err != nil { - panic(err) - } - data, w, h := imageconverter.Convert( - reader, - e.Crop, - e.ViewWidth, - e.ViewHeight, - e.Quality, - ) - name := fmt.Sprintf("OEBPS/Images/%d.jpg", imgTask.Id) - if imgTask.Id == 0 { - name = "OEBPS/Images/cover.jpg" - } - results <- &image{ - imgTask.Id, - newImageData(name, data), - w, - h, - } - } - }() - } - go func() { - for i, path := range images { - tasks <- &task{i, path} - } - close(tasks) - wg.Wait() - close(results) - }() - return results - } - - return nil -} - -func (e *ePub) getParts() []*epubPart { - images := make([]*image, e.imagesCount) - bar := progressbar.Default(int64(e.imagesCount), "Processing") - for img := range e.processingImages() { - images[img.Id] = img - bar.Add(1) - } - bar.Close() - parts := make([]*epubPart, 0) cover := images[0] images = images[1:] @@ -298,7 +92,7 @@ func (e *ePub) getParts() []*epubPart { Cover: cover, Images: images, }) - return parts + return parts, nil } maxSize := uint64(e.LimitMb * 1024 * 1024) @@ -308,7 +102,7 @@ func (e *ePub) getParts() []*epubPart { baseSize := uint64(16*1024) + cover.Data.CompressedSize() currentSize := baseSize - currentImages := make([]*image, 0) + currentImages := make([]*Image, 0) part := 1 for _, img := range images { @@ -320,7 +114,7 @@ func (e *ePub) getParts() []*epubPart { }) part += 1 currentSize = baseSize - currentImages = make([]*image, 0) + currentImages = make([]*Image, 0) } currentSize += imgSize currentImages = append(currentImages, img) @@ -332,20 +126,19 @@ func (e *ePub) getParts() []*epubPart { }) } - return parts + return parts, nil } func (e *ePub) Write() error { - if err := e.load(); err != nil { - return err - } - type zipContent struct { Name string Content any } - epubParts := e.getParts() + epubParts, err := e.getParts() + if err != nil { + return err + } totalParts := len(epubParts) bar := progressbar.Default(int64(totalParts), "Writing Part") diff --git a/internal/epub/image_data.go b/internal/epub/image_data.go index fc872b3..a07a707 100644 --- a/internal/epub/image_data.go +++ b/internal/epub/image_data.go @@ -8,16 +8,16 @@ import ( "time" ) -type imageData struct { +type ImageData struct { Header *zip.FileHeader Data []byte } -func (img *imageData) CompressedSize() uint64 { +func (img *ImageData) CompressedSize() uint64 { return img.Header.CompressedSize64 + 30 + uint64(len(img.Header.Name)) } -func newImageData(name string, data []byte) *imageData { +func newImageData(name string, data []byte) *ImageData { cdata := bytes.NewBuffer([]byte{}) wcdata, err := flate.NewWriter(cdata, flate.BestCompression) if err != nil { @@ -29,7 +29,7 @@ func newImageData(name string, data []byte) *imageData { panic(err) } t := time.Now() - return &imageData{ + return &ImageData{ &zip.FileHeader{ Name: name, CompressedSize64: uint64(cdata.Len()), diff --git a/internal/epub/image_processing.go b/internal/epub/image_processing.go new file mode 100644 index 0000000..606187b --- /dev/null +++ b/internal/epub/image_processing.go @@ -0,0 +1,285 @@ +package epub + +import ( + "archive/zip" + "bytes" + "fmt" + imageconverter "go-comic-converter/internal/image-converter" + "io" + "io/fs" + "os" + "path/filepath" + "runtime" + "sort" + "strings" + "sync" + + "github.com/nwaples/rardecode" + "github.com/schollz/progressbar/v3" +) + +type Image struct { + Id int + Data *ImageData + Width int + Height int +} + +type imageTask struct { + Id int + Reader io.ReadCloser +} + +type readFakeCloser struct { + io.Reader +} + +func (rfc readFakeCloser) Close() error { return nil } + +func LoadImages(path string, options *ImageOptions) ([]*Image, error) { + images := make([]*Image, 0) + + fi, err := os.Stat(path) + if err != nil { + return nil, err + } + + var ( + imageCount int + imageInput chan *imageTask + ) + + if fi.IsDir() { + imageCount, imageInput, err = loadDir(path) + } else { + switch ext := strings.ToLower(filepath.Ext(path)); ext { + case ".cbz": + imageCount, imageInput, err = loadCbz(path) + case ".cbr": + imageCount, imageInput, err = loadCbr(path) + case ".pdf": + err = fmt.Errorf("not implemented") + default: + err = fmt.Errorf("unknown file format (%s): support .cbz, .cbr, .pdf", ext) + } + } + if err != nil { + return nil, err + } + + imageOutput := make(chan *Image) + + // processing + wg := &sync.WaitGroup{} + bar := progressbar.Default(int64(imageCount), "Processing") + for i := 0; i < runtime.NumCPU(); i++ { + wg.Add(1) + go func() { + defer wg.Done() + for img := range imageInput { + data, w, h := imageconverter.Convert( + img.Reader, + options.Crop, + options.ViewWidth, + options.ViewHeight, + options.Quality, + ) + name := fmt.Sprintf("OEBPS/Images/%d.jpg", img.Id) + if img.Id == 0 { + name = "OEBPS/Images/cover.jpg" + } + imageOutput <- &Image{ + img.Id, + newImageData(name, data), + w, + h, + } + } + }() + } + + go func() { + wg.Wait() + bar.Close() + close(imageOutput) + }() + + for image := range imageOutput { + images = append(images, image) + bar.Add(1) + } + + if len(images) == 0 { + return nil, fmt.Errorf("image not found") + } + + sort.Slice(images, func(i, j int) bool { + return images[i].Id < images[j].Id + }) + + return images, nil +} + +func loadDir(input string) (int, chan *imageTask, error) { + + images := make([]string, 0) + err := filepath.WalkDir(input, func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + if d.IsDir() { + return nil + } + ext := filepath.Ext(path) + if strings.ToLower(ext) != ".jpg" { + return nil + } + + images = append(images, path) + return nil + }) + if err != nil { + fmt.Println(err) + os.Exit(1) + } + if len(images) == 0 { + return 0, nil, fmt.Errorf("image not found") + } + + sort.Strings(images) + + output := make(chan *imageTask) + go func() { + defer close(output) + for i, img := range images { + f, err := os.Open(img) + if err != nil { + fmt.Println(err) + os.Exit(1) + } + output <- &imageTask{ + Id: i, + Reader: f, + } + } + }() + return len(images), output, nil +} + +func loadCbz(input string) (int, chan *imageTask, error) { + r, err := zip.OpenReader(input) + if err != nil { + return 0, nil, err + } + + images := make([]*zip.File, 0) + for _, f := range r.File { + if f.FileInfo().IsDir() { + continue + } + if strings.ToLower(filepath.Ext(f.Name)) != ".jpg" { + continue + } + images = append(images, f) + } + if len(images) == 0 { + r.Close() + return 0, nil, fmt.Errorf("no images found") + } + + sort.SliceStable(images, func(i, j int) bool { + return strings.Compare(images[i].Name, images[j].Name) < 0 + }) + + output := make(chan *imageTask) + go func() { + defer close(output) + for i, img := range images { + f, err := img.Open() + if err != nil { + fmt.Println(err) + os.Exit(1) + } + output <- &imageTask{ + Id: i, + Reader: f, + } + } + }() + return len(images), output, nil +} + +func loadCbr(input string) (int, chan *imageTask, error) { + rr, err := os.Open(input) + if err != nil { + return 0, nil, err + } + defer rr.Close() + rs, err := rr.Stat() + if err != nil { + return 0, nil, err + } + + bar := progressbar.DefaultBytes(rs.Size(), "Uncompressing") + defer bar.Close() + + r, err := rardecode.NewReader(io.TeeReader(rr, bar), "") + if err != nil { + return 0, nil, err + } + + type imageContent struct { + Name string + Data io.ReadCloser + } + + images := make([]*imageContent, 0) + + for { + f, err := r.Next() + + if f == nil { + break + } + + if err != nil { + return 0, nil, err + } + + if f.IsDir { + continue + } + + if strings.ToLower(filepath.Ext(f.Name)) != ".jpg" { + continue + } + + b := bytes.NewBuffer([]byte{}) + io.Copy(b, r) + + images = append(images, &imageContent{ + Name: f.Name, + Data: readFakeCloser{b}, + }) + } + + if len(images) == 0 { + return 0, nil, fmt.Errorf("no images found") + } + + sort.SliceStable(images, func(i, j int) bool { + return strings.Compare(images[i].Name, images[j].Name) < 0 + }) + + output := make(chan *imageTask) + go func() { + defer close(output) + for i, img := range images { + output <- &imageTask{ + Id: i, + Reader: img.Data, + } + } + }() + return len(images), output, nil +} diff --git a/internal/epub/zip.go b/internal/epub/zip.go index a026041..50732ea 100644 --- a/internal/epub/zip.go +++ b/internal/epub/zip.go @@ -50,7 +50,7 @@ func (e *epubZip) WriteMagic() error { return err } -func (e *epubZip) WriteImage(image *imageData) error { +func (e *epubZip) WriteImage(image *ImageData) error { m, err := e.wz.CreateRaw(image.Header) if err != nil { return err diff --git a/main.go b/main.go index f30bacc..d3168d4 100644 --- a/main.go +++ b/main.go @@ -127,15 +127,17 @@ func main() { fmt.Println(opt) if err := epub.NewEpub(&epub.EpubOptions{ - Input: opt.Input, - Output: opt.Output, - ViewWidth: profile.Width, - ViewHeight: profile.Height, - Quality: opt.Quality, - Crop: !opt.NoCrop, - LimitMb: opt.LimitMb, - Title: opt.Title, - Author: opt.Author, + Input: opt.Input, + Output: opt.Output, + LimitMb: opt.LimitMb, + Title: opt.Title, + Author: opt.Author, + ImageOptions: &epub.ImageOptions{ + ViewWidth: profile.Width, + ViewHeight: profile.Height, + Quality: opt.Quality, + Crop: !opt.NoCrop, + }, }).Write(); err != nil { fmt.Printf("Error: %v\n", err) os.Exit(1)