[add] document view, [add] html sanitization, [add] google books metadata enrichment, [improve] db query performance
This commit is contained in:
@@ -8,97 +8,167 @@ import (
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type coverResult struct {
|
||||
CoverEditionKey string `json:"cover_edition_key"`
|
||||
type MetadataInfo struct {
|
||||
Title *string
|
||||
Author *string
|
||||
Description *string
|
||||
GBID *string
|
||||
ISBN []*string
|
||||
}
|
||||
|
||||
type queryResponse struct {
|
||||
ResultCount int `json:"numFound"`
|
||||
Start int `json:"start"`
|
||||
ResultCountExact bool `json:"numFoundExact"`
|
||||
Results []coverResult `json:"docs"`
|
||||
type gBooksIdentifiers struct {
|
||||
Type string `json:"type"`
|
||||
Identifier string `json:"identifier"`
|
||||
}
|
||||
|
||||
var BASE_QUERY_URL string = "https://openlibrary.org/search.json?q=%s&fields=cover_edition_key"
|
||||
var BASE_COVER_URL string = "https://covers.openlibrary.org/b/olid/%s-L.jpg"
|
||||
type gBooksInfo struct {
|
||||
Title string `json:"title"`
|
||||
Authors []string `json:"authors"`
|
||||
Description string `json:"description"`
|
||||
Identifiers []gBooksIdentifiers `json:"industryIdentifiers"`
|
||||
}
|
||||
|
||||
func GetCoverIDs(title *string, author *string) ([]string, error) {
|
||||
if title == nil || author == nil {
|
||||
log.Error("[metadata] Invalid Search Query")
|
||||
return nil, errors.New("Invalid Query")
|
||||
}
|
||||
type gBooksQueryItem struct {
|
||||
ID string `json:"id"`
|
||||
Info gBooksInfo `json:"volumeInfo"`
|
||||
}
|
||||
|
||||
searchQuery := url.QueryEscape(fmt.Sprintf("%s %s", *title, *author))
|
||||
apiQuery := fmt.Sprintf(BASE_QUERY_URL, searchQuery)
|
||||
type gBooksQueryResponse struct {
|
||||
TotalItems int `json:"totalItems"`
|
||||
Items []gBooksQueryItem `json:"items"`
|
||||
}
|
||||
|
||||
log.Info("[metadata] Acquiring CoverID")
|
||||
resp, err := http.Get(apiQuery)
|
||||
if err != nil {
|
||||
log.Error("[metadata] Cover URL API Failure")
|
||||
return nil, errors.New("API Failure")
|
||||
}
|
||||
const GBOOKS_QUERY_URL string = "https://www.googleapis.com/books/v1/volumes?q=%s&filter=ebooks&download=epub"
|
||||
const GBOOKS_GBID_INFO_URL string = "https://www.googleapis.com/books/v1/volumes/%s"
|
||||
const GBOOKS_GBID_COVER_URL string = "https://books.google.com/books/content/images/frontcover/%s?fife=w480-h690"
|
||||
|
||||
target := queryResponse{}
|
||||
err = json.NewDecoder(resp.Body).Decode(&target)
|
||||
if err != nil {
|
||||
log.Error("[metadata] Cover URL API Decode Failure")
|
||||
return nil, errors.New("API Failure")
|
||||
}
|
||||
|
||||
var coverIDs []string
|
||||
for _, result := range target.Results {
|
||||
if result.CoverEditionKey != "" {
|
||||
coverIDs = append(coverIDs, result.CoverEditionKey)
|
||||
func GetMetadata(data *MetadataInfo) error {
|
||||
var queryResult *gBooksQueryItem
|
||||
if data.GBID != nil {
|
||||
// Use GBID
|
||||
resp, err := performGBIDRequest(*data.GBID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
queryResult = resp
|
||||
} else if len(data.ISBN) > 0 {
|
||||
searchQuery := "isbn:" + *data.ISBN[0]
|
||||
resp, err := performSearchRequest(searchQuery)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
queryResult = &resp.Items[0]
|
||||
} else if data.Title != nil && data.Author != nil {
|
||||
searchQuery := url.QueryEscape(fmt.Sprintf("%s %s", *data.Title, *data.Author))
|
||||
resp, err := performSearchRequest(searchQuery)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
queryResult = &resp.Items[0]
|
||||
} else {
|
||||
return errors.New("Invalid Data")
|
||||
}
|
||||
|
||||
return coverIDs, nil
|
||||
// Merge Data
|
||||
data.GBID = &queryResult.ID
|
||||
data.Description = &queryResult.Info.Description
|
||||
data.Title = &queryResult.Info.Title
|
||||
if len(queryResult.Info.Authors) > 0 {
|
||||
data.Author = &queryResult.Info.Authors[0]
|
||||
}
|
||||
for _, item := range queryResult.Info.Identifiers {
|
||||
if item.Type == "ISBN_10" || item.Type == "ISBN_13" {
|
||||
data.ISBN = append(data.ISBN, &item.Identifier)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func DownloadAndSaveCover(coverID string, dirPath string) (*string, error) {
|
||||
// Derive & Sanitize File Name
|
||||
fileName := "." + filepath.Clean(fmt.Sprintf("/%s.jpg", coverID))
|
||||
|
||||
// Generate Storage Path
|
||||
safePath := filepath.Join(dirPath, "covers", fileName)
|
||||
|
||||
func SaveCover(id string, safePath string) error {
|
||||
// Validate File Doesn't Exists
|
||||
_, err := os.Stat(safePath)
|
||||
if err == nil {
|
||||
log.Warn("[metadata] File Alreads Exists")
|
||||
return &safePath, nil
|
||||
log.Warn("[SaveCover] File Alreads Exists")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create File
|
||||
out, err := os.Create(safePath)
|
||||
if err != nil {
|
||||
log.Error("[metadata] File Create Error")
|
||||
return nil, errors.New("File Failure")
|
||||
log.Error("[SaveCover] File Create Error")
|
||||
return errors.New("File Failure")
|
||||
}
|
||||
defer out.Close()
|
||||
|
||||
// Download File
|
||||
log.Info("[metadata] Downloading Cover")
|
||||
coverURL := fmt.Sprintf(BASE_COVER_URL, coverID)
|
||||
log.Info("[SaveCover] Downloading Cover")
|
||||
coverURL := fmt.Sprintf(GBOOKS_GBID_COVER_URL, id)
|
||||
resp, err := http.Get(coverURL)
|
||||
if err != nil {
|
||||
log.Error("[metadata] Cover URL API Failure")
|
||||
return nil, errors.New("API Failure")
|
||||
log.Error("[SaveCover] Cover URL API Failure")
|
||||
return errors.New("API Failure")
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Copy File to Disk
|
||||
log.Info("[SaveCover] Saving Cover")
|
||||
_, err = io.Copy(out, resp.Body)
|
||||
if err != nil {
|
||||
log.Error("[metadata] File Copy Error")
|
||||
return nil, errors.New("File Failure")
|
||||
log.Error("[SaveCover] File Copy Error")
|
||||
return errors.New("File Failure")
|
||||
}
|
||||
|
||||
// Return FilePath
|
||||
return &safePath, nil
|
||||
return nil
|
||||
}
|
||||
|
||||
func performSearchRequest(searchQuery string) (*gBooksQueryResponse, error) {
|
||||
apiQuery := fmt.Sprintf(GBOOKS_QUERY_URL, searchQuery)
|
||||
|
||||
log.Info("[performSearchRequest] Acquiring CoverID")
|
||||
resp, err := http.Get(apiQuery)
|
||||
if err != nil {
|
||||
log.Error("[performSearchRequest] Cover URL API Failure")
|
||||
return nil, errors.New("API Failure")
|
||||
}
|
||||
|
||||
parsedResp := gBooksQueryResponse{}
|
||||
err = json.NewDecoder(resp.Body).Decode(&parsedResp)
|
||||
if err != nil {
|
||||
log.Error("[performSearchRequest] Google Books Query API Decode Failure")
|
||||
return nil, errors.New("API Failure")
|
||||
}
|
||||
|
||||
if len(parsedResp.Items) == 0 {
|
||||
log.Warn("[performSearchRequest] No Results")
|
||||
return nil, errors.New("No Results")
|
||||
}
|
||||
|
||||
return &parsedResp, nil
|
||||
}
|
||||
|
||||
func performGBIDRequest(id string) (*gBooksQueryItem, error) {
|
||||
apiQuery := fmt.Sprintf(GBOOKS_GBID_INFO_URL, id)
|
||||
|
||||
log.Info("[performGBIDRequest] Acquiring CoverID")
|
||||
resp, err := http.Get(apiQuery)
|
||||
if err != nil {
|
||||
log.Error("[performGBIDRequest] Cover URL API Failure")
|
||||
return nil, errors.New("API Failure")
|
||||
}
|
||||
|
||||
parsedResp := gBooksQueryItem{}
|
||||
err = json.NewDecoder(resp.Body).Decode(&parsedResp)
|
||||
if err != nil {
|
||||
log.Error("[performGBIDRequest] Google Books ID API Decode Failure")
|
||||
return nil, errors.New("API Failure")
|
||||
}
|
||||
|
||||
return &parsedResp, nil
|
||||
}
|
||||
|
||||
107
metadata/olib.go
Normal file
107
metadata/olib.go
Normal file
@@ -0,0 +1,107 @@
|
||||
package metadata
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type oLibCoverResult struct {
|
||||
CoverEditionKey string `json:"cover_edition_key"`
|
||||
}
|
||||
|
||||
type oLibQueryResponse struct {
|
||||
ResultCount int `json:"numFound"`
|
||||
Start int `json:"start"`
|
||||
ResultCountExact bool `json:"numFoundExact"`
|
||||
Results []oLibCoverResult `json:"docs"`
|
||||
}
|
||||
|
||||
const OLIB_QUERY_URL string = "https://openlibrary.org/search.json?q=%s&fields=cover_edition_key"
|
||||
const OLIB_OLID_COVER_URL string = "https://covers.openlibrary.org/b/olid/%s-L.jpg"
|
||||
const OLIB_ISBN_COVER_URL string = "https://covers.openlibrary.org/b/isbn/%s-L.jpg"
|
||||
const OLIB_OLID_LINK_URL string = "https://openlibrary.org/books/%s"
|
||||
const OLIB_ISBN_LINK_URL string = "https://openlibrary.org/isbn/%s"
|
||||
|
||||
func GetCoverOLIDs(title *string, author *string) ([]string, error) {
|
||||
if title == nil || author == nil {
|
||||
log.Error("[metadata] Invalid Search Query")
|
||||
return nil, errors.New("Invalid Query")
|
||||
}
|
||||
|
||||
searchQuery := url.QueryEscape(fmt.Sprintf("%s %s", *title, *author))
|
||||
apiQuery := fmt.Sprintf(OLIB_QUERY_URL, searchQuery)
|
||||
|
||||
log.Info("[metadata] Acquiring CoverID")
|
||||
resp, err := http.Get(apiQuery)
|
||||
if err != nil {
|
||||
log.Error("[metadata] Cover URL API Failure")
|
||||
return nil, errors.New("API Failure")
|
||||
}
|
||||
|
||||
target := oLibQueryResponse{}
|
||||
err = json.NewDecoder(resp.Body).Decode(&target)
|
||||
if err != nil {
|
||||
log.Error("[metadata] Cover URL API Decode Failure")
|
||||
return nil, errors.New("API Failure")
|
||||
}
|
||||
|
||||
var coverIDs []string
|
||||
for _, result := range target.Results {
|
||||
if result.CoverEditionKey != "" {
|
||||
coverIDs = append(coverIDs, result.CoverEditionKey)
|
||||
}
|
||||
}
|
||||
|
||||
return coverIDs, nil
|
||||
}
|
||||
|
||||
func DownloadAndSaveCover(coverID string, dirPath string) (*string, error) {
|
||||
// Derive & Sanitize File Name
|
||||
fileName := "." + filepath.Clean(fmt.Sprintf("/%s.jpg", coverID))
|
||||
|
||||
// Generate Storage Path
|
||||
safePath := filepath.Join(dirPath, "covers", fileName)
|
||||
|
||||
// Validate File Doesn't Exists
|
||||
_, err := os.Stat(safePath)
|
||||
if err == nil {
|
||||
log.Warn("[metadata] File Alreads Exists")
|
||||
return &safePath, nil
|
||||
}
|
||||
|
||||
// Create File
|
||||
out, err := os.Create(safePath)
|
||||
if err != nil {
|
||||
log.Error("[metadata] File Create Error")
|
||||
return nil, errors.New("File Failure")
|
||||
}
|
||||
defer out.Close()
|
||||
|
||||
// Download File
|
||||
log.Info("[metadata] Downloading Cover")
|
||||
coverURL := fmt.Sprintf(OLIB_OLID_COVER_URL, coverID)
|
||||
resp, err := http.Get(coverURL)
|
||||
if err != nil {
|
||||
log.Error("[metadata] Cover URL API Failure")
|
||||
return nil, errors.New("API Failure")
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// Copy File to Disk
|
||||
_, err = io.Copy(out, resp.Body)
|
||||
if err != nil {
|
||||
log.Error("[metadata] File Copy Error")
|
||||
return nil, errors.New("File Failure")
|
||||
}
|
||||
|
||||
// Return FilePath
|
||||
return &safePath, nil
|
||||
}
|
||||
Reference in New Issue
Block a user