From 34993e36eeeec02e55132d42fed608394e7405cb Mon Sep 17 00:00:00 2001 From: celogeek <65178+celogeek@users.noreply.github.com> Date: Fri, 30 Dec 2022 14:10:19 +0100 Subject: [PATCH] convert pdf --- go.mod | 1 + go.sum | 2 ++ internal/epub/image_processing.go | 38 ++++++++++++++++++++++++++++++- 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index bf1a2f6..8322649 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.19 require ( github.com/gofrs/uuid v3.1.0+incompatible github.com/nwaples/rardecode v1.1.3 + github.com/raff/pdfreader v0.0.0-20220308062436-033e8ac577f0 github.com/schollz/progressbar/v3 v3.12.2 golang.org/x/image v0.2.0 ) diff --git a/go.sum b/go.sum index 2fc520b..4e605f3 100644 --- a/go.sum +++ b/go.sum @@ -13,6 +13,8 @@ github.com/nwaples/rardecode v1.1.3 h1:cWCaZwfM5H7nAD6PyEdcVnczzV8i/JtotnyW/dD9l github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/raff/pdfreader v0.0.0-20220308062436-033e8ac577f0 h1:fuFvfwIc+cpySYurvDNTs5LIHXP9Cj3reVRplj9Whv4= +github.com/raff/pdfreader v0.0.0-20220308062436-033e8ac577f0/go.mod h1:Ql3QqeGiYGlPOtYz+F/L7J27spqDcdH9LhDHOrrdsD4= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rivo/uniseg v0.4.3 h1:utMvzDsuh3suAEnhH0RdHmoPbU648o6CvXxTx4SBMOw= github.com/rivo/uniseg v0.4.3/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= diff --git a/internal/epub/image_processing.go b/internal/epub/image_processing.go index 5f83246..12948e7 100644 --- a/internal/epub/image_processing.go +++ b/internal/epub/image_processing.go @@ -15,6 +15,9 @@ import ( "sync" "github.com/nwaples/rardecode" + pdfimage "github.com/raff/pdfreader/image" + "github.com/raff/pdfreader/pdfread" + "golang.org/x/image/tiff" ) type Image struct { @@ -51,7 +54,7 @@ func LoadImages(path string, options *ImageOptions) ([]*Image, error) { case ".cbr", "rar": imageCount, imageInput, err = loadCbr(path) case ".pdf": - err = fmt.Errorf("not implemented") + imageCount, imageInput, err = loadPdf(path) default: err = fmt.Errorf("unknown file format (%s): support .cbz, .cbr, .pdf", ext) } @@ -276,3 +279,36 @@ func loadCbr(input string) (int, chan *imageTask, error) { return len(names), output, nil } + +func loadPdf(input string) (int, chan *imageTask, error) { + pdf := pdfread.Load(input) + if pdf == nil { + return 0, nil, fmt.Errorf("can't read pdf") + } + + nbPages := len(pdf.Pages()) + output := make(chan *imageTask) + go func() { + defer close(output) + defer pdf.Close() + for i := 0; i < nbPages; i++ { + img, err := pdfimage.Extract(pdf, i+1) + if err != nil { + panic(err) + } + + b := bytes.NewBuffer([]byte{}) + err = tiff.Encode(b, img, nil) + if err != nil { + panic(err) + } + + output <- &imageTask{ + Id: i, + Reader: io.NopCloser(b), + } + } + }() + + return nbPages, output, nil +}