use tmp storage to limit memory usage

This commit is contained in:
Celogeek 2023-04-30 00:37:26 +02:00
parent 05ac50a453
commit 3858583728
Signed by: celogeek
SSH Key Fingerprint: SHA256:njNJLzoLQdbV9PC6ehcruRb0QnEgxABoCYZ+0+aUIYc
8 changed files with 225 additions and 103 deletions

View File

@ -4,6 +4,7 @@ Tools to create EPUB from images.
package epub
import (
"archive/zip"
"fmt"
"os"
"path/filepath"
@ -34,8 +35,9 @@ type ePub struct {
}
type epubPart struct {
Cover *epubimageprocessor.LoadedImage
LoadedImages epubimageprocessor.LoadedImages
Cover *epubimage.Image
Images []*epubimage.Image
Reader *zip.ReadCloser
}
// initialize EPUB
@ -68,19 +70,18 @@ func (e *ePub) render(templateString string, data map[string]any) string {
}
// write image to the zip
func (e *ePub) writeImage(wz *epubzip.EPUBZip, img *epubimageprocessor.LoadedImage) error {
func (e *ePub) writeImage(wz *epubzip.EPUBZip, img *epubimage.Image, zipImg *zip.File) error {
err := wz.WriteContent(
fmt.Sprintf("OEBPS/%s", img.Image.PagePath()),
img.EPUBPagePath(),
[]byte(e.render(epubtemplates.Text, map[string]any{
"Title": fmt.Sprintf("Image %d Part %d", img.Image.Id, img.Image.Part),
"Title": fmt.Sprintf("Image %d Part %d", img.Id, img.Part),
"ViewPort": fmt.Sprintf("width=%d,height=%d", e.Image.View.Width, e.Image.View.Height),
"ImagePath": img.Image.ImgPath(),
"ImageStyle": img.Image.ImgStyle(e.Image.View.Width, e.Image.View.Height, e.Image.Manga),
"ImagePath": img.ImgPath(),
"ImageStyle": img.ImgStyle(e.Image.View.Width, e.Image.View.Height, e.Image.Manga),
})),
)
if err == nil {
err = wz.WriteRaw(img.ZipImage)
err = wz.Copy(zipImg)
}
return err
@ -89,7 +90,7 @@ func (e *ePub) writeImage(wz *epubzip.EPUBZip, img *epubimageprocessor.LoadedIma
// write blank page
func (e *ePub) writeBlank(wz *epubzip.EPUBZip, img *epubimage.Image) error {
return wz.WriteContent(
fmt.Sprintf("OEBPS/%s", img.SpacePath()),
img.EPUBSpacePath(),
[]byte(e.render(epubtemplates.Blank, map[string]any{
"Title": fmt.Sprintf("Blank Page %d", img.Id),
"ViewPort": fmt.Sprintf("width=%d,height=%d", e.Image.View.Width, e.Image.View.Height),
@ -98,73 +99,78 @@ func (e *ePub) writeBlank(wz *epubzip.EPUBZip, img *epubimage.Image) error {
}
// extract image and split it into part
func (e *ePub) getParts() ([]*epubPart, error) {
loadedImages, err := e.imageProcessor.Load()
func (e *ePub) getParts() (parts []*epubPart, imgStorage *epubzip.EPUBZipStorageImageReader, err error) {
images, err := e.imageProcessor.Load()
if err != nil {
return nil, err
return nil, nil, err
}
// sort result by id and part
sort.Slice(loadedImages, func(i, j int) bool {
if loadedImages[i].Image.Id == loadedImages[j].Image.Id {
return loadedImages[i].Image.Part < loadedImages[j].Image.Part
sort.Slice(images, func(i, j int) bool {
if images[i].Id == images[j].Id {
return images[i].Part < images[j].Part
}
return loadedImages[i].Image.Id < loadedImages[j].Image.Id
return images[i].Id < images[j].Id
})
parts := make([]*epubPart, 0)
cover := loadedImages[0]
parts = make([]*epubPart, 0)
cover := images[0]
if e.Image.HasCover {
loadedImages = loadedImages[1:]
images = images[1:]
}
if e.Dry {
parts = append(parts, &epubPart{
Cover: cover,
LoadedImages: loadedImages,
Cover: cover,
Images: images,
})
return parts, nil
return parts, nil, nil
}
imgStorage, err = epubzip.NewEPUBZipStorageImageReader(e.ImgStorage())
if err != nil {
return nil, nil, err
}
// compute size of the EPUB part and try to be as close as possible of the target
maxSize := uint64(e.LimitMb * 1024 * 1024)
xhtmlSize := uint64(1024)
// descriptor files + title
baseSize := uint64(16*1024) + cover.ZipImage.CompressedSize()
baseSize := uint64(16*1024) + imgStorage.Size(cover.EPUBImgPath())
if e.Image.HasCover {
baseSize += cover.ZipImage.CompressedSize()
baseSize += imgStorage.Size(cover.EPUBImgPath())
}
currentSize := baseSize
currentImages := make([]*epubimageprocessor.LoadedImage, 0)
currentImages := make([]*epubimage.Image, 0)
part := 1
for _, img := range loadedImages {
imgSize := img.ZipImage.CompressedSize() + xhtmlSize
for _, img := range images {
imgSize := imgStorage.Size(img.EPUBImgPath()) + xhtmlSize
if maxSize > 0 && len(currentImages) > 0 && currentSize+imgSize > maxSize {
parts = append(parts, &epubPart{
Cover: cover,
LoadedImages: currentImages,
Cover: cover,
Images: currentImages,
})
part += 1
currentSize = baseSize
if !e.Image.HasCover {
currentSize += cover.ZipImage.CompressedSize()
currentSize += imgStorage.Size(cover.EPUBImgPath())
}
currentImages = make([]*epubimageprocessor.LoadedImage, 0)
currentImages = make([]*epubimage.Image, 0)
}
currentSize += imgSize
currentImages = append(currentImages, img)
}
if len(currentImages) > 0 {
parts = append(parts, &epubPart{
Cover: cover,
LoadedImages: currentImages,
Cover: cover,
Images: currentImages,
})
}
return parts, nil
return parts, imgStorage, nil
}
// create a tree from the directories.
@ -194,22 +200,26 @@ func (e *ePub) Write() error {
Content string
}
epubParts, err := e.getParts()
epubParts, imgStorage, err := e.getParts()
if err != nil {
return err
}
if e.Dry {
p := epubParts[0]
fmt.Fprintf(os.Stderr, "TOC:\n - %s\n%s\n", e.Title, e.getTree(p.LoadedImages.Images(), true))
fmt.Fprintf(os.Stderr, "TOC:\n - %s\n%s\n", e.Title, e.getTree(p.Images, true))
if e.DryVerbose {
if e.Image.HasCover {
fmt.Fprintf(os.Stderr, "Cover:\n%s\n", e.getTree([]*epubimage.Image{p.Cover.Image}, false))
fmt.Fprintf(os.Stderr, "Cover:\n%s\n", e.getTree([]*epubimage.Image{p.Cover}, false))
}
fmt.Fprintf(os.Stderr, "Files:\n%s\n", e.getTree(p.LoadedImages.Images(), false))
fmt.Fprintf(os.Stderr, "Files:\n%s\n", e.getTree(p.Images, false))
}
return nil
}
defer func() {
imgStorage.Close()
imgStorage.Remove()
}()
totalParts := len(epubParts)
@ -220,6 +230,7 @@ func (e *ePub) Write() error {
TotalJob: 2,
Quiet: e.Quiet,
})
for i, part := range epubParts {
ext := filepath.Ext(e.Output)
suffix := ""
@ -251,12 +262,12 @@ func (e *ePub) Write() error {
Publisher: e.Publisher,
UpdatedAt: e.UpdatedAt,
ImageOptions: e.Image,
Cover: part.Cover.Image,
Images: part.LoadedImages.Images(),
Cover: part.Cover,
Images: part.Images,
Current: i + 1,
Total: totalParts,
})},
{"OEBPS/toc.xhtml", epubtemplates.Toc(title, e.StripFirstDirectoryFromToc, part.LoadedImages.Images())},
{"OEBPS/toc.xhtml", epubtemplates.Toc(title, e.StripFirstDirectoryFromToc, part.Images)},
{"OEBPS/Text/style.css", e.render(epubtemplates.Style, map[string]any{
"PageWidth": e.Image.View.Width,
"PageHeight": e.Image.View.Height,
@ -269,7 +280,7 @@ func (e *ePub) Write() error {
"Title": title,
"ViewPort": fmt.Sprintf("width=%d,height=%d", e.Image.View.Width, e.Image.View.Height),
"ImagePath": "Images/title.jpg",
"ImageStyle": part.Cover.Image.ImgStyle(e.Image.View.Width, e.Image.View.Height, e.Image.Manga),
"ImageStyle": part.Cover.ImgStyle(e.Image.View.Width, e.Image.View.Height, e.Image.Manga),
})},
}
@ -281,27 +292,32 @@ func (e *ePub) Write() error {
return err
}
}
if err := wz.WriteRaw(e.imageProcessor.CoverTitleData(part.Cover.Image.Raw, title)); err != nil {
coverTitle, err := e.imageProcessor.CoverTitleData(part.Cover.Raw, title)
if err != nil {
return err
}
if err := wz.WriteRaw(coverTitle); err != nil {
return err
}
// Cover exist or part > 1
// If no cover, part 2 and more will include the image as a cover
if e.Image.HasCover || i > 0 {
if err := e.writeImage(wz, part.Cover); err != nil {
if err := e.writeImage(wz, part.Cover, imgStorage.Get(part.Cover.EPUBImgPath())); err != nil {
return err
}
}
lastImage := part.LoadedImages[len(part.LoadedImages)-1]
for _, img := range part.LoadedImages {
if err := e.writeImage(wz, img); err != nil {
lastImage := part.Images[len(part.Images)-1]
for _, img := range part.Images {
if err := e.writeImage(wz, img, imgStorage.Get(img.EPUBImgPath())); err != nil {
return err
}
// Double Page or Last Image that is not a double page
if img.Image.DoublePage || (img.Image.Part == 0 && img == lastImage) {
if err := e.writeBlank(wz, img.Image); err != nil {
if img.DoublePage || (img.Part == 0 && img == lastImage) {
if err := e.writeBlank(wz, img); err != nil {
return err
}
}
@ -309,6 +325,7 @@ func (e *ePub) Write() error {
bar.Add(1)
}
bar.Close()
fmt.Fprintln(os.Stderr)
return nil
}

View File

@ -31,6 +31,11 @@ func (i *Image) SpacePath() string {
return fmt.Sprintf("Text/%s.xhtml", i.SpaceKey())
}
// path of the blank page into the EPUB
func (i *Image) EPUBSpacePath() string {
return fmt.Sprintf("OEBPS/%s", i.SpacePath())
}
// key for page
func (i *Image) PageKey() string {
return fmt.Sprintf("page_%d_p%d", i.Id, i.Part)
@ -41,6 +46,11 @@ func (i *Image) PagePath() string {
return fmt.Sprintf("Text/%s.xhtml", i.PageKey())
}
// page path into the EPUB
func (i *Image) EPUBPagePath() string {
return fmt.Sprintf("OEBPS/%s", i.PagePath())
}
// key for image
func (i *Image) ImgKey() string {
return fmt.Sprintf("img_%d_p%d", i.Id, i.Part)
@ -51,6 +61,11 @@ func (i *Image) ImgPath() string {
return fmt.Sprintf("Images/%s.jpg", i.ImgKey())
}
// image path into the EPUB
func (i *Image) EPUBImgPath() string {
return fmt.Sprintf("OEBPS/%s", i.ImgPath())
}
// style to apply to the image.
//
// center by default.

View File

@ -6,6 +6,7 @@ package epubimageprocessor
import (
"fmt"
"image"
"os"
"sync"
epubimage "github.com/celogeek/go-comic-converter/v2/internal/epub/image"
@ -16,21 +17,6 @@ import (
"github.com/disintegration/gift"
)
type LoadedImage struct {
Image *epubimage.Image
ZipImage *epubzip.ZipImage
}
type LoadedImages []*LoadedImage
func (l LoadedImages) Images() []*epubimage.Image {
res := make([]*epubimage.Image, len(l))
for i, v := range l {
res[i] = v.Image
}
return res
}
type EPUBImageProcessor struct {
*epuboptions.Options
}
@ -40,9 +26,8 @@ func New(o *epuboptions.Options) *EPUBImageProcessor {
}
// extract and convert images
func (e *EPUBImageProcessor) Load() (LoadedImages, error) {
images := make(LoadedImages, 0)
func (e *EPUBImageProcessor) Load() (images []*epubimage.Image, err error) {
images = make([]*epubimage.Image, 0)
imageCount, imageInput, err := e.load()
if err != nil {
return nil, err
@ -51,19 +36,17 @@ func (e *EPUBImageProcessor) Load() (LoadedImages, error) {
// dry run, skip convertion
if e.Dry {
for img := range imageInput {
images = append(images, &LoadedImage{
Image: &epubimage.Image{
Id: img.Id,
Path: img.Path,
Name: img.Name,
},
images = append(images, &epubimage.Image{
Id: img.Id,
Path: img.Path,
Name: img.Name,
})
}
return images, nil
}
imageOutput := make(chan *LoadedImage)
imageOutput := make(chan *epubimage.Image)
// processing
bar := epubprogress.New(epubprogress.Options{
@ -75,6 +58,12 @@ func (e *EPUBImageProcessor) Load() (LoadedImages, error) {
})
wg := &sync.WaitGroup{}
imgStorage, err := epubzip.NewEPUBZipStorageImageWriter(e.ImgStorage())
if err != nil {
bar.Close()
return nil, err
}
for i := 0; i < e.WorkersRatio(50); i++ {
wg.Add(1)
go func() {
@ -101,10 +90,13 @@ func (e *EPUBImageProcessor) Load() (LoadedImages, error) {
Path: input.Path,
Name: input.Name,
}
imageOutput <- &LoadedImage{
Image: img,
ZipImage: epubzip.CompressImage(fmt.Sprintf("OEBPS/%s", img.ImgPath()), dst, e.Image.Quality),
if err = imgStorage.Add(img.EPUBImgPath(), dst, e.Image.Quality); err != nil {
bar.Close()
fmt.Fprintf(os.Stderr, "error with %s: %s", input.Name, err)
os.Exit(1)
}
imageOutput <- img
}
}
}()
@ -112,17 +104,18 @@ func (e *EPUBImageProcessor) Load() (LoadedImages, error) {
go func() {
wg.Wait()
imgStorage.Close()
close(imageOutput)
}()
for output := range imageOutput {
if output.Image.Part == 0 {
for img := range imageOutput {
if img.Part == 0 {
bar.Add(1)
}
if e.Image.NoBlankImage && output.Image.IsBlank {
if e.Image.NoBlankImage && img.IsBlank {
continue
}
images = append(images, output)
images = append(images, img)
}
bar.Close()
@ -220,7 +213,7 @@ func (e *EPUBImageProcessor) transformImage(src image.Image, srcId int) []image.
}
// create a title page with the cover
func (e *EPUBImageProcessor) CoverTitleData(img image.Image, title string) *epubzip.ZipImage {
func (e *EPUBImageProcessor) CoverTitleData(img image.Image, title string) (*epubzip.ZipImage, error) {
// Create a blur version of the cover
g := gift.New(epubimagefilters.CoverTitle(title))
dst := image.NewGray(g.Bounds(img.Bounds()))

View File

@ -85,8 +85,7 @@ func (e *EPUBImageProcessor) loadDir() (totalImages int, output chan *tasks, err
})
if err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
return
}
totalImages = len(images)

View File

@ -3,6 +3,8 @@ Options for EPUB creation.
*/
package epuboptions
import "fmt"
type Crop struct {
Enabled bool
Left, Up, Right, Bottom int
@ -47,3 +49,7 @@ func (o *Options) WorkersRatio(pct int) (nbWorkers int) {
}
return
}
func (o *Options) ImgStorage() string {
return fmt.Sprintf("%s.tmp", o.Output)
}

View File

@ -58,6 +58,10 @@ func (e *EPUBZip) WriteMagic() error {
return err
}
func (e *EPUBZip) Copy(fz *zip.File) error {
return e.wz.Copy(fz)
}
// Write image. They are already compressed, so we write them down directly.
func (e *EPUBZip) WriteRaw(raw *ZipImage) error {
m, err := e.wz.CreateRaw(raw.Header)

View File

@ -4,11 +4,9 @@ import (
"archive/zip"
"bytes"
"compress/flate"
"fmt"
"hash/crc32"
"image"
"image/jpeg"
"os"
"time"
)
@ -17,18 +15,8 @@ type ZipImage struct {
Data []byte
}
// compressed size of the image with the header
func (img *ZipImage) CompressedSize() uint64 {
return img.Header.CompressedSize64 + 30 + uint64(len(img.Header.Name))
}
func exitWithError(err error) {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
// create gzip encoded jpeg
func CompressImage(filename string, img image.Image, quality int) *ZipImage {
func CompressImage(filename string, img image.Image, quality int) (*ZipImage, error) {
var (
data, cdata bytes.Buffer
err error
@ -36,22 +24,22 @@ func CompressImage(filename string, img image.Image, quality int) *ZipImage {
err = jpeg.Encode(&data, img, &jpeg.Options{Quality: quality})
if err != nil {
exitWithError(err)
return nil, err
}
wcdata, err := flate.NewWriter(&cdata, flate.BestCompression)
if err != nil {
exitWithError(err)
return nil, err
}
_, err = wcdata.Write(data.Bytes())
if err != nil {
exitWithError(err)
return nil, err
}
err = wcdata.Close()
if err != nil {
exitWithError(err)
return nil, err
}
t := time.Now()
@ -66,5 +54,5 @@ func CompressImage(filename string, img image.Image, quality int) *ZipImage {
ModifiedDate: uint16(t.Day() + int(t.Month())<<5 + (t.Year()-1980)<<9),
},
cdata.Bytes(),
}
}, nil
}

View File

@ -0,0 +1,100 @@
package epubzip
import (
"archive/zip"
"image"
"os"
"sync"
)
type EPUBZipStorageImageWriter struct {
fh *os.File
fz *zip.Writer
mut *sync.Mutex
}
func NewEPUBZipStorageImageWriter(filename string) (*EPUBZipStorageImageWriter, error) {
fh, err := os.Create(filename)
if err != nil {
return nil, err
}
fz := zip.NewWriter(fh)
return &EPUBZipStorageImageWriter{fh, fz, &sync.Mutex{}}, nil
}
func (e *EPUBZipStorageImageWriter) Close() error {
if err := e.fz.Close(); err != nil {
e.fh.Close()
return err
}
return e.fh.Close()
}
func (e *EPUBZipStorageImageWriter) Add(filename string, img image.Image, quality int) error {
zipImage, err := CompressImage(filename, img, quality)
if err != nil {
return err
}
e.mut.Lock()
defer e.mut.Unlock()
fh, err := e.fz.CreateRaw(zipImage.Header)
if err != nil {
return err
}
_, err = fh.Write(zipImage.Data)
if err != nil {
return err
}
return nil
}
type EPUBZipStorageImageReader struct {
filename string
fh *os.File
fz *zip.Reader
files map[string]*zip.File
}
func NewEPUBZipStorageImageReader(filename string) (*EPUBZipStorageImageReader, error) {
fh, err := os.Open(filename)
if err != nil {
return nil, err
}
s, err := fh.Stat()
if err != nil {
return nil, err
}
fz, err := zip.NewReader(fh, s.Size())
if err != nil {
return nil, err
}
files := map[string]*zip.File{}
for _, z := range fz.File {
files[z.Name] = z
}
return &EPUBZipStorageImageReader{filename, fh, fz, files}, nil
}
func (e *EPUBZipStorageImageReader) Get(filename string) *zip.File {
return e.files[filename]
}
func (e *EPUBZipStorageImageReader) Size(filename string) uint64 {
img := e.Get(filename)
if img != nil {
return img.CompressedSize64 + 30 + uint64(len(img.Name))
}
return 0
}
func (e *EPUBZipStorageImageReader) Close() error {
return e.fh.Close()
}
func (e *EPUBZipStorageImageReader) Remove() error {
return os.Remove(e.filename)
}