[new] count words & stats, [new] refactor metadata, [new] human readable time
This commit is contained in:
330
metadata/epub.go
Normal file
330
metadata/epub.go
Normal file
@@ -0,0 +1,330 @@
|
||||
/*
|
||||
Package epub provides basic support for reading EPUB archives.
|
||||
Adapted from: https://github.com/taylorskalyo/goreader
|
||||
*/
|
||||
package metadata
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
"errors"
|
||||
"io"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
const containerPath = "META-INF/container.xml"
|
||||
|
||||
var (
|
||||
// ErrNoRootfile occurs when there are no rootfile entries found in
|
||||
// container.xml.
|
||||
ErrNoRootfile = errors.New("epub: no rootfile found in container")
|
||||
|
||||
// ErrBadRootfile occurs when container.xml references a rootfile that does
|
||||
// not exist in the zip.
|
||||
ErrBadRootfile = errors.New("epub: container references non-existent rootfile")
|
||||
|
||||
// ErrNoItemref occurrs when a content.opf contains a spine without any
|
||||
// itemref entries.
|
||||
ErrNoItemref = errors.New("epub: no itemrefs found in spine")
|
||||
|
||||
// ErrBadItemref occurs when an itemref entry in content.opf references an
|
||||
// item that does not exist in the manifest.
|
||||
ErrBadItemref = errors.New("epub: itemref references non-existent item")
|
||||
|
||||
// ErrBadManifest occurs when a manifest in content.opf references an item
|
||||
// that does not exist in the zip.
|
||||
ErrBadManifest = errors.New("epub: manifest references non-existent item")
|
||||
)
|
||||
|
||||
// Reader represents a readable epub file.
|
||||
type Reader struct {
|
||||
Container
|
||||
files map[string]*zip.File
|
||||
}
|
||||
|
||||
// ReadCloser represents a readable epub file that can be closed.
|
||||
type ReadCloser struct {
|
||||
Reader
|
||||
f *os.File
|
||||
}
|
||||
|
||||
// Rootfile contains the location of a content.opf package file.
|
||||
type Rootfile struct {
|
||||
FullPath string `xml:"full-path,attr"`
|
||||
Package
|
||||
}
|
||||
|
||||
// Container serves as a directory of Rootfiles.
|
||||
type Container struct {
|
||||
Rootfiles []*Rootfile `xml:"rootfiles>rootfile"`
|
||||
}
|
||||
|
||||
// Package represents an epub content.opf file.
|
||||
type Package struct {
|
||||
Metadata
|
||||
Manifest
|
||||
Spine
|
||||
}
|
||||
|
||||
// Metadata contains publishing information about the epub.
|
||||
type Metadata struct {
|
||||
Title string `xml:"metadata>title"`
|
||||
Language string `xml:"metadata>language"`
|
||||
Identifier string `xml:"metadata>idenifier"`
|
||||
Creator string `xml:"metadata>creator"`
|
||||
Contributor string `xml:"metadata>contributor"`
|
||||
Publisher string `xml:"metadata>publisher"`
|
||||
Subject string `xml:"metadata>subject"`
|
||||
Description string `xml:"metadata>description"`
|
||||
Event []struct {
|
||||
Name string `xml:"event,attr"`
|
||||
Date string `xml:",innerxml"`
|
||||
} `xml:"metadata>date"`
|
||||
Type string `xml:"metadata>type"`
|
||||
Format string `xml:"metadata>format"`
|
||||
Source string `xml:"metadata>source"`
|
||||
Relation string `xml:"metadata>relation"`
|
||||
Coverage string `xml:"metadata>coverage"`
|
||||
Rights string `xml:"metadata>rights"`
|
||||
}
|
||||
|
||||
// Manifest lists every file that is part of the epub.
|
||||
type Manifest struct {
|
||||
Items []Item `xml:"manifest>item"`
|
||||
}
|
||||
|
||||
// Item represents a file stored in the epub.
|
||||
type Item struct {
|
||||
ID string `xml:"id,attr"`
|
||||
HREF string `xml:"href,attr"`
|
||||
MediaType string `xml:"media-type,attr"`
|
||||
f *zip.File
|
||||
}
|
||||
|
||||
// Spine defines the reading order of the epub documents.
|
||||
type Spine struct {
|
||||
Itemrefs []Itemref `xml:"spine>itemref"`
|
||||
}
|
||||
|
||||
// Itemref points to an Item.
|
||||
type Itemref struct {
|
||||
IDREF string `xml:"idref,attr"`
|
||||
*Item
|
||||
}
|
||||
|
||||
// OpenEPUBReader will open the epub file specified by name and return a
|
||||
// ReadCloser.
|
||||
func OpenEPUBReader(name string) (*ReadCloser, error) {
|
||||
f, err := os.Open(name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rc := new(ReadCloser)
|
||||
rc.f = f
|
||||
|
||||
fi, err := f.Stat()
|
||||
if err != nil {
|
||||
f.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
z, err := zip.NewReader(f, fi.Size())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err = rc.init(z); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return rc, nil
|
||||
}
|
||||
|
||||
// NewReader returns a new Reader reading from ra, which is assumed to have the
|
||||
// given size in bytes.
|
||||
func NewReader(ra io.ReaderAt, size int64) (*Reader, error) {
|
||||
z, err := zip.NewReader(ra, size)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
r := new(Reader)
|
||||
if err = r.init(z); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return r, nil
|
||||
}
|
||||
|
||||
func (r *Reader) init(z *zip.Reader) error {
|
||||
// Create a file lookup table
|
||||
r.files = make(map[string]*zip.File)
|
||||
for _, f := range z.File {
|
||||
r.files[f.Name] = f
|
||||
}
|
||||
|
||||
err := r.setContainer()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = r.setPackages()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
err = r.setItems()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// setContainer unmarshals the epub's container.xml file.
|
||||
func (r *Reader) setContainer() error {
|
||||
f, err := r.files[containerPath].Open()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var b bytes.Buffer
|
||||
_, err = io.Copy(&b, f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = xml.Unmarshal(b.Bytes(), &r.Container)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(r.Container.Rootfiles) < 1 {
|
||||
return ErrNoRootfile
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// setPackages unmarshal's each of the epub's content.opf files.
|
||||
func (r *Reader) setPackages() error {
|
||||
for _, rf := range r.Container.Rootfiles {
|
||||
if r.files[rf.FullPath] == nil {
|
||||
return ErrBadRootfile
|
||||
}
|
||||
|
||||
f, err := r.files[rf.FullPath].Open()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var b bytes.Buffer
|
||||
_, err = io.Copy(&b, f)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = xml.Unmarshal(b.Bytes(), &rf.Package)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// setItems associates Itemrefs with their respective Item and Items with
|
||||
// their zip.File.
|
||||
func (r *Reader) setItems() error {
|
||||
itemrefCount := 0
|
||||
for _, rf := range r.Container.Rootfiles {
|
||||
itemMap := make(map[string]*Item)
|
||||
for i := range rf.Manifest.Items {
|
||||
item := &rf.Manifest.Items[i]
|
||||
itemMap[item.ID] = item
|
||||
|
||||
abs := path.Join(path.Dir(rf.FullPath), item.HREF)
|
||||
item.f = r.files[abs]
|
||||
}
|
||||
|
||||
for i := range rf.Spine.Itemrefs {
|
||||
itemref := &rf.Spine.Itemrefs[i]
|
||||
itemref.Item = itemMap[itemref.IDREF]
|
||||
if itemref.Item == nil {
|
||||
return ErrBadItemref
|
||||
}
|
||||
}
|
||||
itemrefCount += len(rf.Spine.Itemrefs)
|
||||
}
|
||||
|
||||
if itemrefCount < 1 {
|
||||
return ErrNoItemref
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Open returns a ReadCloser that provides access to the Items's contents.
|
||||
// Multiple items may be read concurrently.
|
||||
func (item *Item) Open() (r io.ReadCloser, err error) {
|
||||
if item.f == nil {
|
||||
return nil, ErrBadManifest
|
||||
}
|
||||
|
||||
return item.f.Open()
|
||||
}
|
||||
|
||||
// Close closes the epub file, rendering it unusable for I/O.
|
||||
func (rc *ReadCloser) Close() {
|
||||
rc.f.Close()
|
||||
}
|
||||
|
||||
// Hehe
|
||||
func (rf *Rootfile) CountWords() int64 {
|
||||
var completeCount int64
|
||||
for _, item := range rf.Spine.Itemrefs {
|
||||
f, _ := item.Open()
|
||||
tokenizer := html.NewTokenizer(f)
|
||||
completeCount = completeCount + countWords(*tokenizer)
|
||||
}
|
||||
|
||||
return completeCount
|
||||
}
|
||||
|
||||
func countWords(tokenizer html.Tokenizer) int64 {
|
||||
var err error
|
||||
var totalWords int64
|
||||
for {
|
||||
tokenType := tokenizer.Next()
|
||||
token := tokenizer.Token()
|
||||
if tokenType == html.TextToken {
|
||||
currStr := string(token.Data)
|
||||
totalWords = totalWords + int64(len(strings.Fields(currStr)))
|
||||
} else if tokenType == html.ErrorToken {
|
||||
err = tokenizer.Err()
|
||||
}
|
||||
if err == io.EOF {
|
||||
return totalWords
|
||||
} else if err != nil {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
func main() {
|
||||
rc, err := OpenEPUBReader("test.epub")
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
rf := rc.Rootfiles[0]
|
||||
|
||||
totalWords := rf.CountWords()
|
||||
log.Info("WOAH WORDS:", totalWords)
|
||||
}
|
||||
*/
|
||||
200
metadata/gbooks.go
Normal file
200
metadata/gbooks.go
Normal file
@@ -0,0 +1,200 @@
|
||||
package metadata
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type gBooksIdentifiers struct {
|
||||
Type string `json:"type"`
|
||||
Identifier string `json:"identifier"`
|
||||
}
|
||||
|
||||
type gBooksInfo struct {
|
||||
Title string `json:"title"`
|
||||
Authors []string `json:"authors"`
|
||||
Description string `json:"description"`
|
||||
Identifiers []gBooksIdentifiers `json:"industryIdentifiers"`
|
||||
}
|
||||
|
||||
type gBooksQueryItem struct {
|
||||
ID string `json:"id"`
|
||||
Info gBooksInfo `json:"volumeInfo"`
|
||||
}
|
||||
|
||||
type gBooksQueryResponse struct {
|
||||
TotalItems int `json:"totalItems"`
|
||||
Items []gBooksQueryItem `json:"items"`
|
||||
}
|
||||
|
||||
const GBOOKS_QUERY_URL string = "https://www.googleapis.com/books/v1/volumes?q=%s"
|
||||
const GBOOKS_GBID_INFO_URL string = "https://www.googleapis.com/books/v1/volumes/%s"
|
||||
const GBOOKS_GBID_COVER_URL string = "https://books.google.com/books/content/images/frontcover/%s?fife=w480-h690"
|
||||
|
||||
func getGBooksMetadata(metadataSearch MetadataInfo) ([]MetadataInfo, error) {
|
||||
var queryResults []gBooksQueryItem
|
||||
if metadataSearch.ID != nil {
|
||||
// Use GBID
|
||||
resp, err := performGBIDRequest(*metadataSearch.ID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
queryResults = []gBooksQueryItem{*resp}
|
||||
} else if metadataSearch.ISBN13 != nil {
|
||||
searchQuery := "isbn:" + *metadataSearch.ISBN13
|
||||
resp, err := performSearchRequest(searchQuery)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
queryResults = resp.Items
|
||||
} else if metadataSearch.ISBN10 != nil {
|
||||
searchQuery := "isbn:" + *metadataSearch.ISBN10
|
||||
resp, err := performSearchRequest(searchQuery)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
queryResults = resp.Items
|
||||
} else if metadataSearch.Title != nil || metadataSearch.Author != nil {
|
||||
var searchQuery string
|
||||
if metadataSearch.Title != nil {
|
||||
searchQuery = searchQuery + *metadataSearch.Title
|
||||
}
|
||||
if metadataSearch.Author != nil {
|
||||
searchQuery = searchQuery + " " + *metadataSearch.Author
|
||||
}
|
||||
|
||||
// Escape & Trim
|
||||
searchQuery = url.QueryEscape(strings.TrimSpace(searchQuery))
|
||||
resp, err := performSearchRequest(searchQuery)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
queryResults = resp.Items
|
||||
} else {
|
||||
return nil, errors.New("Invalid Data")
|
||||
}
|
||||
|
||||
// Normalize Data
|
||||
allMetadata := []MetadataInfo{}
|
||||
for i := range queryResults {
|
||||
item := queryResults[i] // Range Value Pointer Issue
|
||||
itemResult := MetadataInfo{
|
||||
ID: &item.ID,
|
||||
Title: &item.Info.Title,
|
||||
Description: &item.Info.Description,
|
||||
}
|
||||
|
||||
if len(item.Info.Authors) > 0 {
|
||||
itemResult.Author = &item.Info.Authors[0]
|
||||
}
|
||||
|
||||
for i := range item.Info.Identifiers {
|
||||
item := item.Info.Identifiers[i] // Range Value Pointer Issue
|
||||
|
||||
if itemResult.ISBN10 != nil && itemResult.ISBN13 != nil {
|
||||
break
|
||||
} else if itemResult.ISBN10 == nil && item.Type == "ISBN_10" {
|
||||
itemResult.ISBN10 = &item.Identifier
|
||||
} else if itemResult.ISBN13 == nil && item.Type == "ISBN_13" {
|
||||
itemResult.ISBN13 = &item.Identifier
|
||||
}
|
||||
}
|
||||
|
||||
allMetadata = append(allMetadata, itemResult)
|
||||
}
|
||||
|
||||
return allMetadata, nil
|
||||
}
|
||||
|
||||
func saveGBooksCover(gbid string, coverFilePath string, overwrite bool) error {
|
||||
// Validate File Doesn't Exists
|
||||
_, err := os.Stat(coverFilePath)
|
||||
if err == nil && overwrite == false {
|
||||
log.Warn("[saveGBooksCover] File Alreads Exists")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create File
|
||||
out, err := os.Create(coverFilePath)
|
||||
if err != nil {
|
||||
log.Error("[saveGBooksCover] File Create Error")
|
||||
return errors.New("File Failure")
|
||||
}
|
||||
defer out.Close()
|
||||
|
||||
// Download File
|
||||
log.Info("[saveGBooksCover] Downloading Cover")
|
||||
coverURL := fmt.Sprintf(GBOOKS_GBID_COVER_URL, gbid)
|
||||
resp, err := http.Get(coverURL)
|
||||
if err != nil {
|
||||
log.Error("[saveGBooksCover] Cover URL API Failure")
|
||||
return errors.New("API Failure")
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Copy File to Disk
|
||||
log.Info("[saveGBooksCover] Saving Cover")
|
||||
_, err = io.Copy(out, resp.Body)
|
||||
if err != nil {
|
||||
log.Error("[saveGBooksCover] File Copy Error")
|
||||
return errors.New("File Failure")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func performSearchRequest(searchQuery string) (*gBooksQueryResponse, error) {
|
||||
apiQuery := fmt.Sprintf(GBOOKS_QUERY_URL, searchQuery)
|
||||
log.Info("[performSearchRequest] Acquiring Metadata: ", apiQuery)
|
||||
resp, err := http.Get(apiQuery)
|
||||
if err != nil {
|
||||
log.Error("[performSearchRequest] Google Books Query URL API Failure")
|
||||
return nil, errors.New("API Failure")
|
||||
}
|
||||
|
||||
parsedResp := gBooksQueryResponse{}
|
||||
err = json.NewDecoder(resp.Body).Decode(&parsedResp)
|
||||
if err != nil {
|
||||
log.Error("[performSearchRequest] Google Books Query API Decode Failure")
|
||||
return nil, errors.New("API Failure")
|
||||
}
|
||||
|
||||
if len(parsedResp.Items) == 0 {
|
||||
log.Warn("[performSearchRequest] No Results")
|
||||
return nil, errors.New("No Results")
|
||||
}
|
||||
|
||||
return &parsedResp, nil
|
||||
}
|
||||
|
||||
func performGBIDRequest(id string) (*gBooksQueryItem, error) {
|
||||
apiQuery := fmt.Sprintf(GBOOKS_GBID_INFO_URL, id)
|
||||
|
||||
log.Info("[performGBIDRequest] Acquiring CoverID")
|
||||
resp, err := http.Get(apiQuery)
|
||||
if err != nil {
|
||||
log.Error("[performGBIDRequest] Cover URL API Failure")
|
||||
return nil, errors.New("API Failure")
|
||||
}
|
||||
|
||||
parsedResp := gBooksQueryItem{}
|
||||
err = json.NewDecoder(resp.Body).Decode(&parsedResp)
|
||||
if err != nil {
|
||||
log.Error("[performGBIDRequest] Google Books ID API Decode Failure")
|
||||
return nil, errors.New("API Failure")
|
||||
}
|
||||
|
||||
return &parsedResp, nil
|
||||
}
|
||||
@@ -1,217 +1,72 @@
|
||||
package metadata
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/gabriel-vasile/mimetype"
|
||||
)
|
||||
|
||||
type Source int
|
||||
|
||||
const (
|
||||
GBOOK Source = iota
|
||||
OLIB
|
||||
)
|
||||
|
||||
type MetadataInfo struct {
|
||||
ID *string
|
||||
Title *string
|
||||
Author *string
|
||||
Description *string
|
||||
GBID *string
|
||||
OLID *string
|
||||
ISBN10 *string
|
||||
ISBN13 *string
|
||||
}
|
||||
|
||||
type gBooksIdentifiers struct {
|
||||
Type string `json:"type"`
|
||||
Identifier string `json:"identifier"`
|
||||
}
|
||||
|
||||
type gBooksInfo struct {
|
||||
Title string `json:"title"`
|
||||
Authors []string `json:"authors"`
|
||||
Description string `json:"description"`
|
||||
Identifiers []gBooksIdentifiers `json:"industryIdentifiers"`
|
||||
}
|
||||
|
||||
type gBooksQueryItem struct {
|
||||
ID string `json:"id"`
|
||||
Info gBooksInfo `json:"volumeInfo"`
|
||||
}
|
||||
|
||||
type gBooksQueryResponse struct {
|
||||
TotalItems int `json:"totalItems"`
|
||||
Items []gBooksQueryItem `json:"items"`
|
||||
}
|
||||
|
||||
const GBOOKS_QUERY_URL string = "https://www.googleapis.com/books/v1/volumes?q=%s"
|
||||
const GBOOKS_GBID_INFO_URL string = "https://www.googleapis.com/books/v1/volumes/%s"
|
||||
const GBOOKS_GBID_COVER_URL string = "https://books.google.com/books/content/images/frontcover/%s?fife=w480-h690"
|
||||
|
||||
func GetMetadata(metadataSearch MetadataInfo) ([]MetadataInfo, error) {
|
||||
var queryResults []gBooksQueryItem
|
||||
if metadataSearch.GBID != nil {
|
||||
// Use GBID
|
||||
resp, err := performGBIDRequest(*metadataSearch.GBID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
queryResults = []gBooksQueryItem{*resp}
|
||||
} else if metadataSearch.ISBN13 != nil {
|
||||
searchQuery := "isbn:" + *metadataSearch.ISBN13
|
||||
resp, err := performSearchRequest(searchQuery)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
queryResults = resp.Items
|
||||
} else if metadataSearch.ISBN10 != nil {
|
||||
searchQuery := "isbn:" + *metadataSearch.ISBN10
|
||||
resp, err := performSearchRequest(searchQuery)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
queryResults = resp.Items
|
||||
} else if metadataSearch.Title != nil || metadataSearch.Author != nil {
|
||||
var searchQuery string
|
||||
if metadataSearch.Title != nil {
|
||||
searchQuery = searchQuery + *metadataSearch.Title
|
||||
}
|
||||
if metadataSearch.Author != nil {
|
||||
searchQuery = searchQuery + " " + *metadataSearch.Author
|
||||
}
|
||||
|
||||
// Escape & Trim
|
||||
searchQuery = url.QueryEscape(strings.TrimSpace(searchQuery))
|
||||
resp, err := performSearchRequest(searchQuery)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
queryResults = resp.Items
|
||||
} else {
|
||||
return nil, errors.New("Invalid Data")
|
||||
}
|
||||
|
||||
// Normalize Data
|
||||
allMetadata := []MetadataInfo{}
|
||||
for i := range queryResults {
|
||||
item := queryResults[i] // Range Value Pointer Issue
|
||||
itemResult := MetadataInfo{
|
||||
GBID: &item.ID,
|
||||
Title: &item.Info.Title,
|
||||
Description: &item.Info.Description,
|
||||
}
|
||||
|
||||
if len(item.Info.Authors) > 0 {
|
||||
itemResult.Author = &item.Info.Authors[0]
|
||||
}
|
||||
|
||||
for i := range item.Info.Identifiers {
|
||||
item := item.Info.Identifiers[i] // Range Value Pointer Issue
|
||||
|
||||
if itemResult.ISBN10 != nil && itemResult.ISBN13 != nil {
|
||||
break
|
||||
} else if itemResult.ISBN10 == nil && item.Type == "ISBN_10" {
|
||||
itemResult.ISBN10 = &item.Identifier
|
||||
} else if itemResult.ISBN13 == nil && item.Type == "ISBN_13" {
|
||||
itemResult.ISBN13 = &item.Identifier
|
||||
}
|
||||
}
|
||||
|
||||
allMetadata = append(allMetadata, itemResult)
|
||||
}
|
||||
|
||||
return allMetadata, nil
|
||||
}
|
||||
|
||||
func SaveCover(gbid string, coverDir string, documentID string, overwrite bool) (*string, error) {
|
||||
|
||||
// Google Books -> JPG
|
||||
func CacheCover(gbid string, coverDir string, documentID string, overwrite bool) (*string, error) {
|
||||
// Get Filepath
|
||||
coverFile := "." + filepath.Clean(fmt.Sprintf("/%s.jpg", documentID))
|
||||
coverFilePath := filepath.Join(coverDir, coverFile)
|
||||
|
||||
// Validate File Doesn't Exists
|
||||
_, err := os.Stat(coverFilePath)
|
||||
if err == nil && overwrite == false {
|
||||
log.Warn("[SaveCover] File Alreads Exists")
|
||||
return &coverFile, nil
|
||||
// Save Google Books
|
||||
if err := saveGBooksCover(gbid, coverFilePath, overwrite); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create File
|
||||
out, err := os.Create(coverFilePath)
|
||||
if err != nil {
|
||||
log.Error("[SaveCover] File Create Error")
|
||||
return nil, errors.New("File Failure")
|
||||
}
|
||||
defer out.Close()
|
||||
// TODO - Refactor & Allow Open Library / Alternative Sources
|
||||
|
||||
// Download File
|
||||
log.Info("[SaveCover] Downloading Cover")
|
||||
coverURL := fmt.Sprintf(GBOOKS_GBID_COVER_URL, gbid)
|
||||
resp, err := http.Get(coverURL)
|
||||
if err != nil {
|
||||
log.Error("[SaveCover] Cover URL API Failure")
|
||||
return nil, errors.New("API Failure")
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Copy File to Disk
|
||||
log.Info("[SaveCover] Saving Cover")
|
||||
_, err = io.Copy(out, resp.Body)
|
||||
if err != nil {
|
||||
log.Error("[SaveCover] File Copy Error")
|
||||
return nil, errors.New("File Failure")
|
||||
}
|
||||
|
||||
// Return FilePath
|
||||
return &coverFile, nil
|
||||
}
|
||||
|
||||
func performSearchRequest(searchQuery string) (*gBooksQueryResponse, error) {
|
||||
apiQuery := fmt.Sprintf(GBOOKS_QUERY_URL, searchQuery)
|
||||
log.Info("[performSearchRequest] Acquiring Metadata: ", apiQuery)
|
||||
resp, err := http.Get(apiQuery)
|
||||
if err != nil {
|
||||
log.Error("[performSearchRequest] Google Books Query URL API Failure")
|
||||
return nil, errors.New("API Failure")
|
||||
}
|
||||
func SearchMetadata(s Source, metadataSearch MetadataInfo) ([]MetadataInfo, error) {
|
||||
switch s {
|
||||
case GBOOK:
|
||||
return getGBooksMetadata(metadataSearch)
|
||||
case OLIB:
|
||||
return nil, errors.New("Not implemented")
|
||||
default:
|
||||
return nil, errors.New("Not implemented")
|
||||
|
||||
parsedResp := gBooksQueryResponse{}
|
||||
err = json.NewDecoder(resp.Body).Decode(&parsedResp)
|
||||
if err != nil {
|
||||
log.Error("[performSearchRequest] Google Books Query API Decode Failure")
|
||||
return nil, errors.New("API Failure")
|
||||
}
|
||||
|
||||
if len(parsedResp.Items) == 0 {
|
||||
log.Warn("[performSearchRequest] No Results")
|
||||
return nil, errors.New("No Results")
|
||||
}
|
||||
|
||||
return &parsedResp, nil
|
||||
}
|
||||
|
||||
func performGBIDRequest(id string) (*gBooksQueryItem, error) {
|
||||
apiQuery := fmt.Sprintf(GBOOKS_GBID_INFO_URL, id)
|
||||
|
||||
log.Info("[performGBIDRequest] Acquiring CoverID")
|
||||
resp, err := http.Get(apiQuery)
|
||||
func GetWordCount(filepath string) (int64, error) {
|
||||
fileMime, err := mimetype.DetectFile(filepath)
|
||||
if err != nil {
|
||||
log.Error("[performGBIDRequest] Cover URL API Failure")
|
||||
return nil, errors.New("API Failure")
|
||||
return 0, err
|
||||
}
|
||||
|
||||
parsedResp := gBooksQueryItem{}
|
||||
err = json.NewDecoder(resp.Body).Decode(&parsedResp)
|
||||
if err != nil {
|
||||
log.Error("[performGBIDRequest] Google Books ID API Decode Failure")
|
||||
return nil, errors.New("API Failure")
|
||||
}
|
||||
if fileExtension := fileMime.Extension(); fileExtension == ".epub" {
|
||||
rc, err := OpenEPUBReader(filepath)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return &parsedResp, nil
|
||||
rf := rc.Rootfiles[0]
|
||||
totalWords := rf.CountWords()
|
||||
return totalWords, nil
|
||||
} else {
|
||||
return 0, errors.New("Invalid Extension")
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user