2023-10-01 23:17:22 +00:00
|
|
|
package metadata
|
|
|
|
|
|
|
|
import (
|
2023-12-30 15:18:43 +00:00
|
|
|
"regexp"
|
2023-10-01 23:17:22 +00:00
|
|
|
"strings"
|
|
|
|
|
2023-10-27 00:20:58 +00:00
|
|
|
"github.com/PuerkitoBio/goquery"
|
2023-10-24 00:18:16 +00:00
|
|
|
"github.com/taylorskalyo/goreader/epub"
|
2023-10-01 23:17:22 +00:00
|
|
|
)
|
|
|
|
|
2023-10-25 23:52:01 +00:00
|
|
|
func getEPUBMetadata(filepath string) (*MetadataInfo, error) {
|
|
|
|
rc, err := epub.OpenReader(filepath)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
rf := rc.Rootfiles[0]
|
|
|
|
|
2023-12-30 15:18:43 +00:00
|
|
|
parsedMetadata := &MetadataInfo{
|
2023-10-25 23:52:01 +00:00
|
|
|
Title: &rf.Title,
|
|
|
|
Author: &rf.Creator,
|
|
|
|
Description: &rf.Description,
|
2023-12-30 15:18:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Parse Possible ISBN
|
|
|
|
if rf.Source != "" {
|
|
|
|
replaceRE := regexp.MustCompile(`[-\s]`)
|
|
|
|
possibleISBN := replaceRE.ReplaceAllString(rf.Source, "")
|
|
|
|
|
|
|
|
// ISBN Matches
|
|
|
|
isbn13RE := regexp.MustCompile(`(?P<ISBN>\d{13})`)
|
|
|
|
isbn10RE := regexp.MustCompile(`(?P<ISBN>\d{10})`)
|
|
|
|
isbn13Matches := isbn13RE.FindStringSubmatch(possibleISBN)
|
|
|
|
isbn10Matches := isbn10RE.FindStringSubmatch(possibleISBN)
|
|
|
|
|
|
|
|
if len(isbn13Matches) > 0 {
|
|
|
|
isbnIndex := isbn13RE.SubexpIndex("ISBN")
|
|
|
|
parsedMetadata.ISBN13 = &isbn13Matches[isbnIndex]
|
|
|
|
} else if len(isbn10Matches) > 0 {
|
|
|
|
isbnIndex := isbn10RE.SubexpIndex("ISBN")
|
|
|
|
parsedMetadata.ISBN10 = &isbn10Matches[isbnIndex]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return parsedMetadata, nil
|
2023-10-25 23:52:01 +00:00
|
|
|
}
|
|
|
|
|
2023-10-24 00:18:16 +00:00
|
|
|
func countEPUBWords(filepath string) (int64, error) {
|
|
|
|
rc, err := epub.OpenReader(filepath)
|
2023-10-01 23:17:22 +00:00
|
|
|
if err != nil {
|
2023-10-24 00:18:16 +00:00
|
|
|
return 0, err
|
2023-10-01 23:17:22 +00:00
|
|
|
}
|
2023-10-24 00:18:16 +00:00
|
|
|
rf := rc.Rootfiles[0]
|
2023-10-01 23:17:22 +00:00
|
|
|
|
|
|
|
var completeCount int64
|
|
|
|
for _, item := range rf.Spine.Itemrefs {
|
|
|
|
f, _ := item.Open()
|
2023-10-27 00:20:58 +00:00
|
|
|
doc, _ := goquery.NewDocumentFromReader(f)
|
|
|
|
completeCount = completeCount + int64(len(strings.Fields(doc.Text())))
|
2023-10-01 23:17:22 +00:00
|
|
|
}
|
|
|
|
|
2023-10-24 00:18:16 +00:00
|
|
|
return completeCount, nil
|
2023-10-01 23:17:22 +00:00
|
|
|
}
|