convert pdf

This commit is contained in:
Celogeek 2022-12-30 14:10:19 +01:00
parent eb615c377a
commit 34993e36ee
Signed by: celogeek
GPG Key ID: E6B7BDCFC446233A
3 changed files with 40 additions and 1 deletions

1
go.mod
View File

@ -5,6 +5,7 @@ go 1.19
require ( require (
github.com/gofrs/uuid v3.1.0+incompatible github.com/gofrs/uuid v3.1.0+incompatible
github.com/nwaples/rardecode v1.1.3 github.com/nwaples/rardecode v1.1.3
github.com/raff/pdfreader v0.0.0-20220308062436-033e8ac577f0
github.com/schollz/progressbar/v3 v3.12.2 github.com/schollz/progressbar/v3 v3.12.2
golang.org/x/image v0.2.0 golang.org/x/image v0.2.0
) )

2
go.sum
View File

@ -13,6 +13,8 @@ github.com/nwaples/rardecode v1.1.3 h1:cWCaZwfM5H7nAD6PyEdcVnczzV8i/JtotnyW/dD9l
github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0= github.com/nwaples/rardecode v1.1.3/go.mod h1:5DzqNKiOdpKKBH87u8VlvAnPZMXcGRhxWkRpHbbfGS0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/raff/pdfreader v0.0.0-20220308062436-033e8ac577f0 h1:fuFvfwIc+cpySYurvDNTs5LIHXP9Cj3reVRplj9Whv4=
github.com/raff/pdfreader v0.0.0-20220308062436-033e8ac577f0/go.mod h1:Ql3QqeGiYGlPOtYz+F/L7J27spqDcdH9LhDHOrrdsD4=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.4.3 h1:utMvzDsuh3suAEnhH0RdHmoPbU648o6CvXxTx4SBMOw= github.com/rivo/uniseg v0.4.3 h1:utMvzDsuh3suAEnhH0RdHmoPbU648o6CvXxTx4SBMOw=
github.com/rivo/uniseg v0.4.3/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/rivo/uniseg v0.4.3/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=

View File

@ -15,6 +15,9 @@ import (
"sync" "sync"
"github.com/nwaples/rardecode" "github.com/nwaples/rardecode"
pdfimage "github.com/raff/pdfreader/image"
"github.com/raff/pdfreader/pdfread"
"golang.org/x/image/tiff"
) )
type Image struct { type Image struct {
@ -51,7 +54,7 @@ func LoadImages(path string, options *ImageOptions) ([]*Image, error) {
case ".cbr", "rar": case ".cbr", "rar":
imageCount, imageInput, err = loadCbr(path) imageCount, imageInput, err = loadCbr(path)
case ".pdf": case ".pdf":
err = fmt.Errorf("not implemented") imageCount, imageInput, err = loadPdf(path)
default: default:
err = fmt.Errorf("unknown file format (%s): support .cbz, .cbr, .pdf", ext) err = fmt.Errorf("unknown file format (%s): support .cbz, .cbr, .pdf", ext)
} }
@ -276,3 +279,36 @@ func loadCbr(input string) (int, chan *imageTask, error) {
return len(names), output, nil return len(names), output, nil
} }
func loadPdf(input string) (int, chan *imageTask, error) {
pdf := pdfread.Load(input)
if pdf == nil {
return 0, nil, fmt.Errorf("can't read pdf")
}
nbPages := len(pdf.Pages())
output := make(chan *imageTask)
go func() {
defer close(output)
defer pdf.Close()
for i := 0; i < nbPages; i++ {
img, err := pdfimage.Extract(pdf, i+1)
if err != nil {
panic(err)
}
b := bytes.NewBuffer([]byte{})
err = tiff.Encode(b, img, nil)
if err != nil {
panic(err)
}
output <- &imageTask{
Id: i,
Reader: io.NopCloser(b),
}
}
}()
return nbPages, output, nil
}