2023-09-18 23:57:18 +00:00
|
|
|
package metadata
|
|
|
|
|
|
|
|
import (
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
2024-02-25 01:45:26 +00:00
|
|
|
"io"
|
2023-09-23 18:14:57 +00:00
|
|
|
"path/filepath"
|
2023-09-18 23:57:18 +00:00
|
|
|
|
2023-10-01 23:17:22 +00:00
|
|
|
"github.com/gabriel-vasile/mimetype"
|
2024-02-25 01:45:26 +00:00
|
|
|
"reichard.io/antholume/utils"
|
2023-10-01 23:17:22 +00:00
|
|
|
)
|
|
|
|
|
2024-02-25 01:45:26 +00:00
|
|
|
type MetadataHandler func(string) (*MetadataInfo, error)
|
|
|
|
|
|
|
|
type DocumentType string
|
|
|
|
|
|
|
|
const (
|
|
|
|
TYPE_EPUB DocumentType = ".epub"
|
|
|
|
)
|
|
|
|
|
|
|
|
var extensionHandlerMap = map[DocumentType]MetadataHandler{
|
|
|
|
TYPE_EPUB: getEPUBMetadata,
|
|
|
|
}
|
|
|
|
|
2023-10-01 23:17:22 +00:00
|
|
|
type Source int
|
|
|
|
|
|
|
|
const (
|
2024-02-25 01:45:26 +00:00
|
|
|
SOURCE_GBOOK Source = iota
|
|
|
|
SOURCE_OLIB
|
2023-09-18 23:57:18 +00:00
|
|
|
)
|
|
|
|
|
2023-09-23 02:12:36 +00:00
|
|
|
type MetadataInfo struct {
|
2024-02-25 01:45:26 +00:00
|
|
|
ID *string
|
|
|
|
MD5 *string
|
|
|
|
PartialMD5 *string
|
|
|
|
WordCount *int64
|
|
|
|
|
2023-09-23 02:12:36 +00:00
|
|
|
Title *string
|
|
|
|
Author *string
|
|
|
|
Description *string
|
2023-09-23 18:14:57 +00:00
|
|
|
ISBN10 *string
|
|
|
|
ISBN13 *string
|
2024-02-25 01:45:26 +00:00
|
|
|
Type DocumentType
|
2023-09-18 23:57:18 +00:00
|
|
|
}
|
|
|
|
|
2024-02-25 01:45:26 +00:00
|
|
|
// Downloads the Google Books cover file and saves it to the provided directory.
|
2023-10-01 23:17:22 +00:00
|
|
|
func CacheCover(gbid string, coverDir string, documentID string, overwrite bool) (*string, error) {
|
|
|
|
// Get Filepath
|
2023-09-23 18:14:57 +00:00
|
|
|
coverFile := "." + filepath.Clean(fmt.Sprintf("/%s.jpg", documentID))
|
|
|
|
coverFilePath := filepath.Join(coverDir, coverFile)
|
|
|
|
|
2023-10-01 23:17:22 +00:00
|
|
|
// Save Google Books
|
|
|
|
if err := saveGBooksCover(gbid, coverFilePath, overwrite); err != nil {
|
|
|
|
return nil, err
|
2023-09-18 23:57:18 +00:00
|
|
|
}
|
|
|
|
|
2023-10-01 23:17:22 +00:00
|
|
|
// TODO - Refactor & Allow Open Library / Alternative Sources
|
2023-09-18 23:57:18 +00:00
|
|
|
|
2023-09-23 18:14:57 +00:00
|
|
|
return &coverFile, nil
|
2023-09-23 02:12:36 +00:00
|
|
|
}
|
|
|
|
|
2024-02-25 01:45:26 +00:00
|
|
|
// Searches source for metadata based on the provided information.
|
2023-10-01 23:17:22 +00:00
|
|
|
func SearchMetadata(s Source, metadataSearch MetadataInfo) ([]MetadataInfo, error) {
|
|
|
|
switch s {
|
2024-02-25 01:45:26 +00:00
|
|
|
case SOURCE_GBOOK:
|
2023-10-01 23:17:22 +00:00
|
|
|
return getGBooksMetadata(metadataSearch)
|
2024-02-25 01:45:26 +00:00
|
|
|
case SOURCE_OLIB:
|
2024-08-10 13:26:30 +00:00
|
|
|
return nil, errors.New("not implemented")
|
2023-10-01 23:17:22 +00:00
|
|
|
default:
|
2024-08-10 13:26:30 +00:00
|
|
|
return nil, errors.New("not implemented")
|
2023-09-23 02:12:36 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-02-25 01:45:26 +00:00
|
|
|
// Returns the word count of the provided filepath. An error will be returned
|
|
|
|
// if the file is not supported.
|
|
|
|
func GetWordCount(filepath string) (*int64, error) {
|
2023-10-01 23:17:22 +00:00
|
|
|
fileMime, err := mimetype.DetectFile(filepath)
|
2023-09-23 02:12:36 +00:00
|
|
|
if err != nil {
|
2024-02-25 01:45:26 +00:00
|
|
|
return nil, err
|
2023-09-23 02:12:36 +00:00
|
|
|
}
|
|
|
|
|
2023-10-01 23:17:22 +00:00
|
|
|
if fileExtension := fileMime.Extension(); fileExtension == ".epub" {
|
2023-10-24 00:18:16 +00:00
|
|
|
totalWords, err := countEPUBWords(filepath)
|
2023-10-01 23:17:22 +00:00
|
|
|
if err != nil {
|
2024-02-25 01:45:26 +00:00
|
|
|
return nil, err
|
2023-10-01 23:17:22 +00:00
|
|
|
}
|
2024-02-25 01:45:26 +00:00
|
|
|
return &totalWords, nil
|
2023-10-01 23:17:22 +00:00
|
|
|
} else {
|
2024-02-25 01:45:26 +00:00
|
|
|
return nil, fmt.Errorf("Invalid extension")
|
2023-10-01 23:17:22 +00:00
|
|
|
}
|
2023-09-18 23:57:18 +00:00
|
|
|
}
|
2023-10-25 23:52:01 +00:00
|
|
|
|
2024-02-25 01:45:26 +00:00
|
|
|
// Returns embedded metadata of the provided file. An error will be returned if
|
|
|
|
// the file is not supported.
|
2023-10-25 23:52:01 +00:00
|
|
|
func GetMetadata(filepath string) (*MetadataInfo, error) {
|
2024-02-25 01:45:26 +00:00
|
|
|
// Detect Extension Type
|
2023-10-25 23:52:01 +00:00
|
|
|
fileMime, err := mimetype.DetectFile(filepath)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2024-02-25 01:45:26 +00:00
|
|
|
// Get Extension Type Metadata Handler
|
|
|
|
fileExtension := fileMime.Extension()
|
|
|
|
handler, ok := extensionHandlerMap[DocumentType(fileExtension)]
|
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf("invalid extension %s", fileExtension)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Acquire Metadata
|
|
|
|
metadataInfo, err := handler(filepath)
|
2024-05-26 23:56:59 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("unable to acquire metadata")
|
|
|
|
}
|
2024-02-25 01:45:26 +00:00
|
|
|
|
|
|
|
// Calculate MD5 & Partial MD5
|
|
|
|
partialMD5, err := utils.CalculatePartialMD5(filepath)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("unable to calculate partial MD5")
|
|
|
|
}
|
|
|
|
|
|
|
|
// Calculate Actual MD5
|
|
|
|
MD5, err := utils.CalculateMD5(filepath)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("unable to calculate MD5")
|
|
|
|
}
|
|
|
|
|
|
|
|
// Calculate Word Count
|
|
|
|
wordCount, err := GetWordCount(filepath)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("unable to calculate word count")
|
|
|
|
}
|
|
|
|
|
|
|
|
metadataInfo.WordCount = wordCount
|
|
|
|
metadataInfo.PartialMD5 = partialMD5
|
|
|
|
metadataInfo.MD5 = MD5
|
|
|
|
|
|
|
|
return metadataInfo, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns the extension of the provided filepath (e.g. ".epub"). An error
|
|
|
|
// will be returned if the file is not supported.
|
|
|
|
func GetDocumentType(filepath string) (*DocumentType, error) {
|
|
|
|
// Detect Extension Type
|
|
|
|
fileMime, err := mimetype.DetectFile(filepath)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Detect
|
|
|
|
fileExtension := fileMime.Extension()
|
|
|
|
docType, ok := ParseDocumentType(fileExtension)
|
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf("filetype not supported")
|
|
|
|
}
|
|
|
|
|
|
|
|
return &docType, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns the extension of the provided file reader (e.g. ".epub"). An error
|
|
|
|
// will be returned if the file is not supported.
|
|
|
|
func GetDocumentTypeReader(r io.Reader) (*DocumentType, error) {
|
|
|
|
// Detect Extension Type
|
|
|
|
fileMime, err := mimetype.DetectReader(r)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Detect
|
|
|
|
fileExtension := fileMime.Extension()
|
|
|
|
docType, ok := ParseDocumentType(fileExtension)
|
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf("filetype not supported")
|
|
|
|
}
|
|
|
|
|
|
|
|
return &docType, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Given a filetype string, attempt to resolve a DocumentType
|
|
|
|
func ParseDocumentType(input string) (DocumentType, bool) {
|
|
|
|
validTypes := map[string]DocumentType{
|
|
|
|
string(TYPE_EPUB): TYPE_EPUB,
|
2023-10-25 23:52:01 +00:00
|
|
|
}
|
2024-02-25 01:45:26 +00:00
|
|
|
found, ok := validTypes[input]
|
|
|
|
return found, ok
|
2023-10-25 23:52:01 +00:00
|
|
|
}
|