[add] document view, [add] html sanitization, [add] google books metadata enrichment, [improve] db query performance

This commit is contained in:
2023-09-22 22:12:36 -04:00
parent c1f463f0b9
commit 3150c89303
24 changed files with 750 additions and 456 deletions

View File

@@ -8,97 +8,167 @@ import (
"net/http"
"net/url"
"os"
"path/filepath"
log "github.com/sirupsen/logrus"
)
type coverResult struct {
CoverEditionKey string `json:"cover_edition_key"`
type MetadataInfo struct {
Title *string
Author *string
Description *string
GBID *string
ISBN []*string
}
type queryResponse struct {
ResultCount int `json:"numFound"`
Start int `json:"start"`
ResultCountExact bool `json:"numFoundExact"`
Results []coverResult `json:"docs"`
type gBooksIdentifiers struct {
Type string `json:"type"`
Identifier string `json:"identifier"`
}
var BASE_QUERY_URL string = "https://openlibrary.org/search.json?q=%s&fields=cover_edition_key"
var BASE_COVER_URL string = "https://covers.openlibrary.org/b/olid/%s-L.jpg"
type gBooksInfo struct {
Title string `json:"title"`
Authors []string `json:"authors"`
Description string `json:"description"`
Identifiers []gBooksIdentifiers `json:"industryIdentifiers"`
}
func GetCoverIDs(title *string, author *string) ([]string, error) {
if title == nil || author == nil {
log.Error("[metadata] Invalid Search Query")
return nil, errors.New("Invalid Query")
}
type gBooksQueryItem struct {
ID string `json:"id"`
Info gBooksInfo `json:"volumeInfo"`
}
searchQuery := url.QueryEscape(fmt.Sprintf("%s %s", *title, *author))
apiQuery := fmt.Sprintf(BASE_QUERY_URL, searchQuery)
type gBooksQueryResponse struct {
TotalItems int `json:"totalItems"`
Items []gBooksQueryItem `json:"items"`
}
log.Info("[metadata] Acquiring CoverID")
resp, err := http.Get(apiQuery)
if err != nil {
log.Error("[metadata] Cover URL API Failure")
return nil, errors.New("API Failure")
}
const GBOOKS_QUERY_URL string = "https://www.googleapis.com/books/v1/volumes?q=%s&filter=ebooks&download=epub"
const GBOOKS_GBID_INFO_URL string = "https://www.googleapis.com/books/v1/volumes/%s"
const GBOOKS_GBID_COVER_URL string = "https://books.google.com/books/content/images/frontcover/%s?fife=w480-h690"
target := queryResponse{}
err = json.NewDecoder(resp.Body).Decode(&target)
if err != nil {
log.Error("[metadata] Cover URL API Decode Failure")
return nil, errors.New("API Failure")
}
var coverIDs []string
for _, result := range target.Results {
if result.CoverEditionKey != "" {
coverIDs = append(coverIDs, result.CoverEditionKey)
func GetMetadata(data *MetadataInfo) error {
var queryResult *gBooksQueryItem
if data.GBID != nil {
// Use GBID
resp, err := performGBIDRequest(*data.GBID)
if err != nil {
return err
}
queryResult = resp
} else if len(data.ISBN) > 0 {
searchQuery := "isbn:" + *data.ISBN[0]
resp, err := performSearchRequest(searchQuery)
if err != nil {
return err
}
queryResult = &resp.Items[0]
} else if data.Title != nil && data.Author != nil {
searchQuery := url.QueryEscape(fmt.Sprintf("%s %s", *data.Title, *data.Author))
resp, err := performSearchRequest(searchQuery)
if err != nil {
return err
}
queryResult = &resp.Items[0]
} else {
return errors.New("Invalid Data")
}
return coverIDs, nil
// Merge Data
data.GBID = &queryResult.ID
data.Description = &queryResult.Info.Description
data.Title = &queryResult.Info.Title
if len(queryResult.Info.Authors) > 0 {
data.Author = &queryResult.Info.Authors[0]
}
for _, item := range queryResult.Info.Identifiers {
if item.Type == "ISBN_10" || item.Type == "ISBN_13" {
data.ISBN = append(data.ISBN, &item.Identifier)
}
}
return nil
}
func DownloadAndSaveCover(coverID string, dirPath string) (*string, error) {
// Derive & Sanitize File Name
fileName := "." + filepath.Clean(fmt.Sprintf("/%s.jpg", coverID))
// Generate Storage Path
safePath := filepath.Join(dirPath, "covers", fileName)
func SaveCover(id string, safePath string) error {
// Validate File Doesn't Exists
_, err := os.Stat(safePath)
if err == nil {
log.Warn("[metadata] File Alreads Exists")
return &safePath, nil
log.Warn("[SaveCover] File Alreads Exists")
return nil
}
// Create File
out, err := os.Create(safePath)
if err != nil {
log.Error("[metadata] File Create Error")
return nil, errors.New("File Failure")
log.Error("[SaveCover] File Create Error")
return errors.New("File Failure")
}
defer out.Close()
// Download File
log.Info("[metadata] Downloading Cover")
coverURL := fmt.Sprintf(BASE_COVER_URL, coverID)
log.Info("[SaveCover] Downloading Cover")
coverURL := fmt.Sprintf(GBOOKS_GBID_COVER_URL, id)
resp, err := http.Get(coverURL)
if err != nil {
log.Error("[metadata] Cover URL API Failure")
return nil, errors.New("API Failure")
log.Error("[SaveCover] Cover URL API Failure")
return errors.New("API Failure")
}
defer resp.Body.Close()
// Copy File to Disk
log.Info("[SaveCover] Saving Cover")
_, err = io.Copy(out, resp.Body)
if err != nil {
log.Error("[metadata] File Copy Error")
return nil, errors.New("File Failure")
log.Error("[SaveCover] File Copy Error")
return errors.New("File Failure")
}
// Return FilePath
return &safePath, nil
return nil
}
func performSearchRequest(searchQuery string) (*gBooksQueryResponse, error) {
apiQuery := fmt.Sprintf(GBOOKS_QUERY_URL, searchQuery)
log.Info("[performSearchRequest] Acquiring CoverID")
resp, err := http.Get(apiQuery)
if err != nil {
log.Error("[performSearchRequest] Cover URL API Failure")
return nil, errors.New("API Failure")
}
parsedResp := gBooksQueryResponse{}
err = json.NewDecoder(resp.Body).Decode(&parsedResp)
if err != nil {
log.Error("[performSearchRequest] Google Books Query API Decode Failure")
return nil, errors.New("API Failure")
}
if len(parsedResp.Items) == 0 {
log.Warn("[performSearchRequest] No Results")
return nil, errors.New("No Results")
}
return &parsedResp, nil
}
func performGBIDRequest(id string) (*gBooksQueryItem, error) {
apiQuery := fmt.Sprintf(GBOOKS_GBID_INFO_URL, id)
log.Info("[performGBIDRequest] Acquiring CoverID")
resp, err := http.Get(apiQuery)
if err != nil {
log.Error("[performGBIDRequest] Cover URL API Failure")
return nil, errors.New("API Failure")
}
parsedResp := gBooksQueryItem{}
err = json.NewDecoder(resp.Body).Decode(&parsedResp)
if err != nil {
log.Error("[performGBIDRequest] Google Books ID API Decode Failure")
return nil, errors.New("API Failure")
}
return &parsedResp, nil
}

107
metadata/olib.go Normal file
View File

@@ -0,0 +1,107 @@
package metadata
import (
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"os"
"path/filepath"
log "github.com/sirupsen/logrus"
)
type oLibCoverResult struct {
CoverEditionKey string `json:"cover_edition_key"`
}
type oLibQueryResponse struct {
ResultCount int `json:"numFound"`
Start int `json:"start"`
ResultCountExact bool `json:"numFoundExact"`
Results []oLibCoverResult `json:"docs"`
}
const OLIB_QUERY_URL string = "https://openlibrary.org/search.json?q=%s&fields=cover_edition_key"
const OLIB_OLID_COVER_URL string = "https://covers.openlibrary.org/b/olid/%s-L.jpg"
const OLIB_ISBN_COVER_URL string = "https://covers.openlibrary.org/b/isbn/%s-L.jpg"
const OLIB_OLID_LINK_URL string = "https://openlibrary.org/books/%s"
const OLIB_ISBN_LINK_URL string = "https://openlibrary.org/isbn/%s"
func GetCoverOLIDs(title *string, author *string) ([]string, error) {
if title == nil || author == nil {
log.Error("[metadata] Invalid Search Query")
return nil, errors.New("Invalid Query")
}
searchQuery := url.QueryEscape(fmt.Sprintf("%s %s", *title, *author))
apiQuery := fmt.Sprintf(OLIB_QUERY_URL, searchQuery)
log.Info("[metadata] Acquiring CoverID")
resp, err := http.Get(apiQuery)
if err != nil {
log.Error("[metadata] Cover URL API Failure")
return nil, errors.New("API Failure")
}
target := oLibQueryResponse{}
err = json.NewDecoder(resp.Body).Decode(&target)
if err != nil {
log.Error("[metadata] Cover URL API Decode Failure")
return nil, errors.New("API Failure")
}
var coverIDs []string
for _, result := range target.Results {
if result.CoverEditionKey != "" {
coverIDs = append(coverIDs, result.CoverEditionKey)
}
}
return coverIDs, nil
}
func DownloadAndSaveCover(coverID string, dirPath string) (*string, error) {
// Derive & Sanitize File Name
fileName := "." + filepath.Clean(fmt.Sprintf("/%s.jpg", coverID))
// Generate Storage Path
safePath := filepath.Join(dirPath, "covers", fileName)
// Validate File Doesn't Exists
_, err := os.Stat(safePath)
if err == nil {
log.Warn("[metadata] File Alreads Exists")
return &safePath, nil
}
// Create File
out, err := os.Create(safePath)
if err != nil {
log.Error("[metadata] File Create Error")
return nil, errors.New("File Failure")
}
defer out.Close()
// Download File
log.Info("[metadata] Downloading Cover")
coverURL := fmt.Sprintf(OLIB_OLID_COVER_URL, coverID)
resp, err := http.Get(coverURL)
if err != nil {
log.Error("[metadata] Cover URL API Failure")
return nil, errors.New("API Failure")
}
defer resp.Body.Close()
// Copy File to Disk
_, err = io.Copy(out, resp.Body)
if err != nil {
log.Error("[metadata] File Copy Error")
return nil, errors.New("File Failure")
}
// Return FilePath
return &safePath, nil
}