[new] count words & stats, [new] refactor metadata, [new] human readable time
This commit is contained in:
@ -76,6 +76,7 @@ func (api *API) registerWebAppRoutes() {
helperFuncs := template.FuncMap{
"GetSVGGraphData": graph.GetSVGGraphData,
"GetUTCOffsets": utils.GetUTCOffsets,
"NiceSeconds": utils.NiceSeconds,
render.AddFromFilesFuncs("login", helperFuncs, "templates/login.html")
@ -75,21 +75,24 @@ func (api *API) createAppResourcesRoute(routeName string, args ...map[string]any
templateVarsBase["RouteName"] = routeName
return func(c *gin.Context) {
rUser, _ := c.Get("AuthorizedUser")
var userID string
if rUser, _ := c.Get("AuthorizedUser"); rUser != nil {
userID = rUser.(string)
// Copy Base & Update
templateVars := gin.H{}
for k, v := range templateVarsBase {
templateVars[k] = v
templateVars["User"] = rUser
templateVars["User"] = userID
// Potential URL Parameters
qParams := bindQueryParams(c)
if routeName == "documents" {
documents, err := api.DB.Queries.GetDocumentsWithStats(api.DB.Ctx, database.GetDocumentsWithStatsParams{
UserID: rUser.(string),
UserID: userID,
Offset: (*qParams.Page - 1) * *qParams.Limit,
Limit: *qParams.Limit,
@ -99,6 +102,10 @@ func (api *API) createAppResourcesRoute(routeName string, args ...map[string]any
if err = api.getDocumentsWordCount(documents); err != nil {
log.Error("[createAppResourcesRoute] Unable to Get Word Counts: ", err)
templateVars["Data"] = documents
} else if routeName == "document" {
var rDocID requestDocumentID
@ -109,7 +116,7 @@ func (api *API) createAppResourcesRoute(routeName string, args ...map[string]any
document, err := api.DB.Queries.GetDocumentWithStats(api.DB.Ctx, database.GetDocumentWithStatsParams{
UserID: rUser.(string),
UserID: userID,
DocumentID: rDocID.DocumentID,
if err != nil {
@ -118,11 +125,21 @@ func (api *API) createAppResourcesRoute(routeName string, args ...map[string]any
statistics := gin.H{
"TotalTimeLeftSeconds": (document.TotalPages - document.CurrentPage) * document.SecondsPerPage,
"WordsPerMinute": "N/A",
if document.Words != nil && *document.Words != 0 {
statistics["WordsPerMinute"] = (*document.Words / document.TotalPages * document.ReadPages) / (document.TotalTimeSeconds / 60.0)
templateVars["RelBase"] = "../"
templateVars["Data"] = document
templateVars["Statistics"] = statistics
} else if routeName == "activity" {
activityFilter := database.GetActivityParams{
UserID: rUser.(string),
UserID: userID,
Offset: (*qParams.Page - 1) * *qParams.Limit,
Limit: *qParams.Limit,
@ -143,7 +160,7 @@ func (api *API) createAppResourcesRoute(routeName string, args ...map[string]any
} else if routeName == "home" {
start_time := time.Now()
weekly_streak, err := api.DB.Queries.GetUserWindowStreaks(api.DB.Ctx, database.GetUserWindowStreaksParams{
UserID: rUser.(string),
UserID: userID,
Window: "WEEK",
if err != nil {
@ -153,7 +170,7 @@ func (api *API) createAppResourcesRoute(routeName string, args ...map[string]any
start_time = time.Now()
daily_streak, err := api.DB.Queries.GetUserWindowStreaks(api.DB.Ctx, database.GetUserWindowStreaksParams{
UserID: rUser.(string),
UserID: userID,
Window: "DAY",
if err != nil {
@ -162,11 +179,11 @@ func (api *API) createAppResourcesRoute(routeName string, args ...map[string]any
log.Debug("GetUserWindowStreaks - DAY - ", time.Since(start_time))
start_time = time.Now()
database_info, _ := api.DB.Queries.GetDatabaseInfo(api.DB.Ctx, rUser.(string))
database_info, _ := api.DB.Queries.GetDatabaseInfo(api.DB.Ctx, userID)
log.Debug("GetDatabaseInfo - ", time.Since(start_time))
start_time = time.Now()
read_graph_data, _ := api.DB.Queries.GetDailyReadStats(api.DB.Ctx, rUser.(string))
read_graph_data, _ := api.DB.Queries.GetDailyReadStats(api.DB.Ctx, userID)
log.Debug("GetDailyReadStats - ", time.Since(start_time))
templateVars["Data"] = gin.H{
@ -176,14 +193,14 @@ func (api *API) createAppResourcesRoute(routeName string, args ...map[string]any
"GraphData": read_graph_data,
} else if routeName == "settings" {
user, err := api.DB.Queries.GetUser(api.DB.Ctx, rUser.(string))
user, err := api.DB.Queries.GetUser(api.DB.Ctx, userID)
if err != nil {
log.Error("[createAppResourcesRoute] GetUser DB Error:", err)
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "Invalid Request"})
devices, err := api.DB.Queries.GetDevices(api.DB.Ctx, rUser.(string))
devices, err := api.DB.Queries.GetDevices(api.DB.Ctx, userID)
if err != nil {
log.Error("[createAppResourcesRoute] GetDevices DB Error:", err)
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "Invalid Request"})
@ -248,16 +265,16 @@ func (api *API) getDocumentCover(c *gin.Context) {
var coverFile string = "UNKNOWN"
// Identify Documents & Save Covers
metadataResults, err := metadata.GetMetadata(metadata.MetadataInfo{
metadataResults, err := metadata.SearchMetadata(metadata.GBOOK, metadata.MetadataInfo{
Title: document.Title,
Author: document.Author,
if err == nil && len(metadataResults) > 0 && metadataResults[0].GBID != nil {
if err == nil && len(metadataResults) > 0 && metadataResults[0].ID != nil {
firstResult := metadataResults[0]
// Save Cover
fileName, err := metadata.SaveCover(*firstResult.GBID, coverDir, document.ID, false)
fileName, err := metadata.CacheCover(*firstResult.ID, coverDir, document.ID, false)
if err == nil {
coverFile = *fileName
@ -268,8 +285,8 @@ func (api *API) getDocumentCover(c *gin.Context) {
Title: firstResult.Title,
Author: firstResult.Author,
Description: firstResult.Description,
Gbid: firstResult.GBID,
Olid: firstResult.OLID,
Gbid: firstResult.ID,
Olid: nil,
Isbn10: firstResult.ISBN10,
Isbn13: firstResult.ISBN13,
}); err != nil {
@ -368,7 +385,7 @@ func (api *API) editDocument(c *gin.Context) {
coverFileName = &fileName
} else if rDocEdit.CoverGBID != nil {
var coverDir string = filepath.Join(api.Config.DataPath, "covers")
fileName, err := metadata.SaveCover(*rDocEdit.CoverGBID, coverDir, rDocID.DocumentID, true)
fileName, err := metadata.CacheCover(*rDocEdit.CoverGBID, coverDir, rDocID.DocumentID, true)
if err == nil {
coverFileName = fileName
@ -456,7 +473,7 @@ func (api *API) identifyDocument(c *gin.Context) {
// Get Metadata
metadataResults, err := metadata.GetMetadata(metadata.MetadataInfo{
metadataResults, err := metadata.SearchMetadata(metadata.GBOOK, metadata.MetadataInfo{
Title: rDocIdentify.Title,
Author: rDocIdentify.Author,
ISBN10: rDocIdentify.ISBN,
@ -471,8 +488,8 @@ func (api *API) identifyDocument(c *gin.Context) {
Title: firstResult.Title,
Author: firstResult.Author,
Description: firstResult.Description,
Gbid: firstResult.GBID,
Olid: firstResult.OLID,
Gbid: firstResult.ID,
Olid: nil,
Isbn10: firstResult.ISBN10,
Isbn13: firstResult.ISBN13,
}); err != nil {
@ -495,7 +512,17 @@ func (api *API) identifyDocument(c *gin.Context) {
statistics := gin.H{
"TotalTimeLeftSeconds": (document.TotalPages - document.CurrentPage) * document.SecondsPerPage,
"WordsPerMinute": "N/A",
if document.Words != nil && *document.Words != 0 {
statistics["WordsPerMinute"] = (*document.Words / document.TotalPages * document.ReadPages) / (document.TotalTimeSeconds / 60.0)
templateVars["Data"] = document
templateVars["Statistics"] = statistics
c.HTML(http.StatusOK, "document", templateVars)
@ -582,6 +609,45 @@ func (api *API) editSettings(c *gin.Context) {
c.HTML(http.StatusOK, "settings", templateVars)
func (api *API) getDocumentsWordCount(documents []database.GetDocumentsWithStatsRow) error {
// Do Transaction
tx, err := api.DB.DB.Begin()
if err != nil {
log.Error("[getDocumentsWordCount] Transaction Begin DB Error:", err)
return err
// Defer & Start Transaction
defer tx.Rollback()
qtx := api.DB.Queries.WithTx(tx)
for _, item := range documents {
if item.Words == nil && item.Filepath != nil {
filePath := filepath.Join(api.Config.DataPath, "documents", *item.Filepath)
wordCount, err := metadata.GetWordCount(filePath)
if err != nil {
log.Warn("[getDocumentsWordCount] Word Count Error - ", err)
} else {
if _, err := qtx.UpsertDocument(api.DB.Ctx, database.UpsertDocumentParams{
ID: item.ID,
Words: &wordCount,
}); err != nil {
log.Error("[getDocumentsWordCount] UpsertDocument DB Error - ", err)
return err
// Commit Transaction
if err := tx.Commit(); err != nil {
log.Error("[getDocumentsWordCount] Transaction Commit DB Error:", err)
return err
return nil
func bindQueryParams(c *gin.Context) queryParams {
var qParams queryParams
@ -39,6 +39,7 @@ type Document struct {
SeriesIndex *int64 `json:"series_index"`
Lang *string `json:"lang"`
Description *string `json:"description"`
Words *int64 `json:"words"`
Gbid *string `json:"gbid"`
Olid *string `json:"-"`
Isbn10 *string `json:"isbn10"`
@ -41,12 +41,13 @@ INSERT INTO documents (
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
md5 = COALESCE(excluded.md5, md5),
@ -58,6 +59,7 @@ SET
series_index = COALESCE(excluded.series_index, series_index),
lang = COALESCE(excluded.lang, lang),
description = COALESCE(excluded.description, description),
words = COALESCE(excluded.words, words),
olid = COALESCE(excluded.olid, olid),
gbid = COALESCE(excluded.gbid, gbid),
isbn10 = COALESCE(excluded.isbn10, isbn10),
@ -188,10 +190,15 @@ OFFSET $offset;
WITH true_progress AS (
start_time AS last_read,
SUM(duration) / 60 AS total_time_minutes,
SUM(duration) AS total_time_seconds,
-- Determine Read Pages
COUNT(DISTINCT current_page) AS read_pages,
-- Derive Percentage of Book
ROUND(CAST(current_page AS REAL) / CAST(total_pages AS REAL) * 100, 2) AS percentage
FROM activity
WHERE user_id = $user_id
@ -205,13 +212,23 @@ SELECT
CAST(IFNULL(current_page, 0) AS INTEGER) AS current_page,
CAST(IFNULL(total_pages, 0) AS INTEGER) AS total_pages,
CAST(IFNULL(total_time_minutes, 0) AS INTEGER) AS total_time_minutes,
CAST(IFNULL(total_time_seconds, 0) AS INTEGER) AS total_time_seconds,
CAST(DATETIME(IFNULL(last_read, "1970-01-01"), time_offset) AS TEXT) AS last_read,
CAST(IFNULL(read_pages, 0) AS INTEGER) AS read_pages,
-- Calculate Seconds / Page
-- 1. Calculate Total Time in Seconds (Sum Duration in Activity)
-- 2. Divide by Read Pages (Distinct Pages in Activity)
WHEN percentage > 97.0 THEN 100.0
WHEN percentage IS NULL THEN 0.0
ELSE percentage
WHEN total_time_seconds IS NULL THEN 0.0
ELSE ROUND(CAST(total_time_seconds AS REAL) / CAST(read_pages AS REAL))
END AS INTEGER) AS seconds_per_page,
-- Arbitrarily >97% is Complete
WHEN percentage > 97.0 THEN 100.0
WHEN percentage IS NULL THEN 0.0
ELSE percentage
END AS REAL) AS percentage
FROM documents
@ -225,7 +242,7 @@ LIMIT 1;
WITH true_progress AS (
start_time AS last_read,
SUM(duration) / 60 AS total_time_minutes,
SUM(duration) AS total_time_seconds,
@ -240,7 +257,7 @@ SELECT
CAST(IFNULL(current_page, 0) AS INTEGER) AS current_page,
CAST(IFNULL(total_pages, 0) AS INTEGER) AS total_pages,
CAST(IFNULL(total_time_minutes, 0) AS INTEGER) AS total_time_minutes,
CAST(IFNULL(total_time_seconds, 0) AS INTEGER) AS total_time_seconds,
CAST(DATETIME(IFNULL(last_read, "1970-01-01"), time_offset) AS TEXT) AS last_read,
@ -417,7 +417,7 @@ func (q *Queries) GetDevices(ctx context.Context, userID string) ([]GetDevicesRo
const getDocument = `-- name: GetDocument :one
SELECT id, md5, filepath, coverfile, title, author, series, series_index, lang, description, gbid, olid, isbn10, isbn13, synced, deleted, updated_at, created_at FROM documents
SELECT id, md5, filepath, coverfile, title, author, series, series_index, lang, description, words, gbid, olid, isbn10, isbn13, synced, deleted, updated_at, created_at FROM documents
WHERE id = ?1 LIMIT 1
@ -435,6 +435,7 @@ func (q *Queries) GetDocument(ctx context.Context, documentID string) (Document,
@ -543,10 +544,15 @@ const getDocumentWithStats = `-- name: GetDocumentWithStats :one
WITH true_progress AS (
start_time AS last_read,
SUM(duration) / 60 AS total_time_minutes,
SUM(duration) AS total_time_seconds,
-- Determine Read Pages
COUNT(DISTINCT current_page) AS read_pages,
-- Derive Percentage of Book
ROUND(CAST(current_page AS REAL) / CAST(total_pages AS REAL) * 100, 2) AS percentage
FROM activity
WHERE user_id = ?1
@ -556,17 +562,27 @@ WITH true_progress AS (
documents.id, documents.md5, documents.filepath, documents.coverfile, documents.title, documents.author, documents.series, documents.series_index, documents.lang, documents.description, documents.gbid, documents.olid, documents.isbn10, documents.isbn13, documents.synced, documents.deleted, documents.updated_at, documents.created_at,
documents.id, documents.md5, documents.filepath, documents.coverfile, documents.title, documents.author, documents.series, documents.series_index, documents.lang, documents.description, documents.words, documents.gbid, documents.olid, documents.isbn10, documents.isbn13, documents.synced, documents.deleted, documents.updated_at, documents.created_at,
CAST(IFNULL(current_page, 0) AS INTEGER) AS current_page,
CAST(IFNULL(total_pages, 0) AS INTEGER) AS total_pages,
CAST(IFNULL(total_time_minutes, 0) AS INTEGER) AS total_time_minutes,
CAST(IFNULL(total_time_seconds, 0) AS INTEGER) AS total_time_seconds,
CAST(DATETIME(IFNULL(last_read, "1970-01-01"), time_offset) AS TEXT) AS last_read,
CAST(IFNULL(read_pages, 0) AS INTEGER) AS read_pages,
-- Calculate Seconds / Page
-- 1. Calculate Total Time in Seconds (Sum Duration in Activity)
-- 2. Divide by Read Pages (Distinct Pages in Activity)
WHEN percentage > 97.0 THEN 100.0
WHEN percentage IS NULL THEN 0.0
ELSE percentage
WHEN total_time_seconds IS NULL THEN 0.0
ELSE ROUND(CAST(total_time_seconds AS REAL) / CAST(read_pages AS REAL))
END AS INTEGER) AS seconds_per_page,
-- Arbitrarily >97% is Complete
WHEN percentage > 97.0 THEN 100.0
WHEN percentage IS NULL THEN 0.0
ELSE percentage
END AS REAL) AS percentage
FROM documents
@ -593,6 +609,7 @@ type GetDocumentWithStatsRow struct {
SeriesIndex *int64 `json:"series_index"`
Lang *string `json:"lang"`
Description *string `json:"description"`
Words *int64 `json:"words"`
Gbid *string `json:"gbid"`
Olid *string `json:"-"`
Isbn10 *string `json:"isbn10"`
@ -603,8 +620,10 @@ type GetDocumentWithStatsRow struct {
CreatedAt time.Time `json:"created_at"`
CurrentPage int64 `json:"current_page"`
TotalPages int64 `json:"total_pages"`
TotalTimeMinutes int64 `json:"total_time_minutes"`
TotalTimeSeconds int64 `json:"total_time_seconds"`
LastRead string `json:"last_read"`
ReadPages int64 `json:"read_pages"`
SecondsPerPage int64 `json:"seconds_per_page"`
Percentage float64 `json:"percentage"`
@ -622,6 +641,7 @@ func (q *Queries) GetDocumentWithStats(ctx context.Context, arg GetDocumentWithS
@ -632,15 +652,17 @@ func (q *Queries) GetDocumentWithStats(ctx context.Context, arg GetDocumentWithS
return i, err
const getDocuments = `-- name: GetDocuments :many
SELECT id, md5, filepath, coverfile, title, author, series, series_index, lang, description, gbid, olid, isbn10, isbn13, synced, deleted, updated_at, created_at FROM documents
SELECT id, md5, filepath, coverfile, title, author, series, series_index, lang, description, words, gbid, olid, isbn10, isbn13, synced, deleted, updated_at, created_at FROM documents
ORDER BY created_at DESC
@ -671,6 +693,7 @@ func (q *Queries) GetDocuments(ctx context.Context, arg GetDocumentsParams) ([]D
@ -697,7 +720,7 @@ const getDocumentsWithStats = `-- name: GetDocumentsWithStats :many
WITH true_progress AS (
start_time AS last_read,
SUM(duration) / 60 AS total_time_minutes,
SUM(duration) AS total_time_seconds,
@ -708,11 +731,11 @@ WITH true_progress AS (
HAVING MAX(start_time)
documents.id, documents.md5, documents.filepath, documents.coverfile, documents.title, documents.author, documents.series, documents.series_index, documents.lang, documents.description, documents.gbid, documents.olid, documents.isbn10, documents.isbn13, documents.synced, documents.deleted, documents.updated_at, documents.created_at,
documents.id, documents.md5, documents.filepath, documents.coverfile, documents.title, documents.author, documents.series, documents.series_index, documents.lang, documents.description, documents.words, documents.gbid, documents.olid, documents.isbn10, documents.isbn13, documents.synced, documents.deleted, documents.updated_at, documents.created_at,
CAST(IFNULL(current_page, 0) AS INTEGER) AS current_page,
CAST(IFNULL(total_pages, 0) AS INTEGER) AS total_pages,
CAST(IFNULL(total_time_minutes, 0) AS INTEGER) AS total_time_minutes,
CAST(IFNULL(total_time_seconds, 0) AS INTEGER) AS total_time_seconds,
CAST(DATETIME(IFNULL(last_read, "1970-01-01"), time_offset) AS TEXT) AS last_read,
@ -747,6 +770,7 @@ type GetDocumentsWithStatsRow struct {
SeriesIndex *int64 `json:"series_index"`
Lang *string `json:"lang"`
Description *string `json:"description"`
Words *int64 `json:"words"`
Gbid *string `json:"gbid"`
Olid *string `json:"-"`
Isbn10 *string `json:"isbn10"`
@ -757,7 +781,7 @@ type GetDocumentsWithStatsRow struct {
CreatedAt time.Time `json:"created_at"`
CurrentPage int64 `json:"current_page"`
TotalPages int64 `json:"total_pages"`
TotalTimeMinutes int64 `json:"total_time_minutes"`
TotalTimeSeconds int64 `json:"total_time_seconds"`
LastRead string `json:"last_read"`
Percentage float64 `json:"percentage"`
@ -782,6 +806,7 @@ func (q *Queries) GetDocumentsWithStats(ctx context.Context, arg GetDocumentsWit
@ -792,7 +817,7 @@ func (q *Queries) GetDocumentsWithStats(ctx context.Context, arg GetDocumentsWit
); err != nil {
@ -830,7 +855,7 @@ func (q *Queries) GetLastActivity(ctx context.Context, arg GetLastActivityParams
const getMissingDocuments = `-- name: GetMissingDocuments :many
SELECT documents.id, documents.md5, documents.filepath, documents.coverfile, documents.title, documents.author, documents.series, documents.series_index, documents.lang, documents.description, documents.gbid, documents.olid, documents.isbn10, documents.isbn13, documents.synced, documents.deleted, documents.updated_at, documents.created_at FROM documents
SELECT documents.id, documents.md5, documents.filepath, documents.coverfile, documents.title, documents.author, documents.series, documents.series_index, documents.lang, documents.description, documents.words, documents.gbid, documents.olid, documents.isbn10, documents.isbn13, documents.synced, documents.deleted, documents.updated_at, documents.created_at FROM documents
documents.filepath IS NOT NULL
AND documents.deleted = false
@ -867,6 +892,7 @@ func (q *Queries) GetMissingDocuments(ctx context.Context, documentIds []string)
@ -1157,7 +1183,7 @@ UPDATE documents
deleted = ?1
WHERE id = ?2
RETURNING id, md5, filepath, coverfile, title, author, series, series_index, lang, description, gbid, olid, isbn10, isbn13, synced, deleted, updated_at, created_at
RETURNING id, md5, filepath, coverfile, title, author, series, series_index, lang, description, words, gbid, olid, isbn10, isbn13, synced, deleted, updated_at, created_at
type UpdateDocumentDeletedParams struct {
@ -1179,6 +1205,7 @@ func (q *Queries) UpdateDocumentDeleted(ctx context.Context, arg UpdateDocumentD
@ -1196,7 +1223,7 @@ UPDATE documents
synced = ?1
WHERE id = ?2
RETURNING id, md5, filepath, coverfile, title, author, series, series_index, lang, description, gbid, olid, isbn10, isbn13, synced, deleted, updated_at, created_at
RETURNING id, md5, filepath, coverfile, title, author, series, series_index, lang, description, words, gbid, olid, isbn10, isbn13, synced, deleted, updated_at, created_at
type UpdateDocumentSyncParams struct {
@ -1218,6 +1245,7 @@ func (q *Queries) UpdateDocumentSync(ctx context.Context, arg UpdateDocumentSync
@ -1338,12 +1366,13 @@ INSERT INTO documents (
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
md5 = COALESCE(excluded.md5, md5),
@ -1355,11 +1384,12 @@ SET
series_index = COALESCE(excluded.series_index, series_index),
lang = COALESCE(excluded.lang, lang),
description = COALESCE(excluded.description, description),
words = COALESCE(excluded.words, words),
olid = COALESCE(excluded.olid, olid),
gbid = COALESCE(excluded.gbid, gbid),
isbn10 = COALESCE(excluded.isbn10, isbn10),
isbn13 = COALESCE(excluded.isbn13, isbn13)
RETURNING id, md5, filepath, coverfile, title, author, series, series_index, lang, description, gbid, olid, isbn10, isbn13, synced, deleted, updated_at, created_at
RETURNING id, md5, filepath, coverfile, title, author, series, series_index, lang, description, words, gbid, olid, isbn10, isbn13, synced, deleted, updated_at, created_at
type UpsertDocumentParams struct {
@ -1373,6 +1403,7 @@ type UpsertDocumentParams struct {
SeriesIndex *int64 `json:"series_index"`
Lang *string `json:"lang"`
Description *string `json:"description"`
Words *int64 `json:"words"`
Olid *string `json:"-"`
Gbid *string `json:"gbid"`
Isbn10 *string `json:"isbn10"`
@ -1391,6 +1422,7 @@ func (q *Queries) UpsertDocument(ctx context.Context, arg UpsertDocumentParams)
@ -1408,6 +1440,7 @@ func (q *Queries) UpsertDocument(ctx context.Context, arg UpsertDocumentParams)
@ -25,6 +25,7 @@ CREATE TABLE IF NOT EXISTS documents (
series_index INTEGER,
lang TEXT,
description TEXT,
words INTEGER,
gbid TEXT,
olid TEXT,
Normal file
Normal file
@ -0,0 +1,330 @@
Package epub provides basic support for reading EPUB archives.
Adapted from: https://github.com/taylorskalyo/goreader
package metadata
import (
const containerPath = "META-INF/container.xml"
var (
// ErrNoRootfile occurs when there are no rootfile entries found in
// container.xml.
ErrNoRootfile = errors.New("epub: no rootfile found in container")
// ErrBadRootfile occurs when container.xml references a rootfile that does
// not exist in the zip.
ErrBadRootfile = errors.New("epub: container references non-existent rootfile")
// ErrNoItemref occurrs when a content.opf contains a spine without any
// itemref entries.
ErrNoItemref = errors.New("epub: no itemrefs found in spine")
// ErrBadItemref occurs when an itemref entry in content.opf references an
// item that does not exist in the manifest.
ErrBadItemref = errors.New("epub: itemref references non-existent item")
// ErrBadManifest occurs when a manifest in content.opf references an item
// that does not exist in the zip.
ErrBadManifest = errors.New("epub: manifest references non-existent item")
// Reader represents a readable epub file.
type Reader struct {
files map[string]*zip.File
// ReadCloser represents a readable epub file that can be closed.
type ReadCloser struct {
f *os.File
// Rootfile contains the location of a content.opf package file.
type Rootfile struct {
FullPath string `xml:"full-path,attr"`
// Container serves as a directory of Rootfiles.
type Container struct {
Rootfiles []*Rootfile `xml:"rootfiles>rootfile"`
// Package represents an epub content.opf file.
type Package struct {
// Metadata contains publishing information about the epub.
type Metadata struct {
Title string `xml:"metadata>title"`
Language string `xml:"metadata>language"`
Identifier string `xml:"metadata>idenifier"`
Creator string `xml:"metadata>creator"`
Contributor string `xml:"metadata>contributor"`
Publisher string `xml:"metadata>publisher"`
Subject string `xml:"metadata>subject"`
Description string `xml:"metadata>description"`
Event []struct {
Name string `xml:"event,attr"`
Date string `xml:",innerxml"`
} `xml:"metadata>date"`
Type string `xml:"metadata>type"`
Format string `xml:"metadata>format"`
Source string `xml:"metadata>source"`
Relation string `xml:"metadata>relation"`
Coverage string `xml:"metadata>coverage"`
Rights string `xml:"metadata>rights"`
// Manifest lists every file that is part of the epub.
type Manifest struct {
Items []Item `xml:"manifest>item"`
// Item represents a file stored in the epub.
type Item struct {
ID string `xml:"id,attr"`
HREF string `xml:"href,attr"`
MediaType string `xml:"media-type,attr"`
f *zip.File
// Spine defines the reading order of the epub documents.
type Spine struct {
Itemrefs []Itemref `xml:"spine>itemref"`
// Itemref points to an Item.
type Itemref struct {
IDREF string `xml:"idref,attr"`
// OpenEPUBReader will open the epub file specified by name and return a
// ReadCloser.
func OpenEPUBReader(name string) (*ReadCloser, error) {
f, err := os.Open(name)
if err != nil {
return nil, err
rc := new(ReadCloser)
rc.f = f
fi, err := f.Stat()
if err != nil {
return nil, err
z, err := zip.NewReader(f, fi.Size())
if err != nil {
return nil, err
if err = rc.init(z); err != nil {
return nil, err
return rc, nil
// NewReader returns a new Reader reading from ra, which is assumed to have the
// given size in bytes.
func NewReader(ra io.ReaderAt, size int64) (*Reader, error) {
z, err := zip.NewReader(ra, size)
if err != nil {
return nil, err
r := new(Reader)
if err = r.init(z); err != nil {
return nil, err
return r, nil
func (r *Reader) init(z *zip.Reader) error {
// Create a file lookup table
r.files = make(map[string]*zip.File)
for _, f := range z.File {
r.files[f.Name] = f
err := r.setContainer()
if err != nil {
return err
err = r.setPackages()
if err != nil {
return err
err = r.setItems()
if err != nil {
return err
return nil
// setContainer unmarshals the epub's container.xml file.
func (r *Reader) setContainer() error {
f, err := r.files[containerPath].Open()
if err != nil {
return err
var b bytes.Buffer
_, err = io.Copy(&b, f)
if err != nil {
return err
err = xml.Unmarshal(b.Bytes(), &r.Container)
if err != nil {
return err
if len(r.Container.Rootfiles) < 1 {
return ErrNoRootfile
return nil
// setPackages unmarshal's each of the epub's content.opf files.
func (r *Reader) setPackages() error {
for _, rf := range r.Container.Rootfiles {
if r.files[rf.FullPath] == nil {
return ErrBadRootfile
f, err := r.files[rf.FullPath].Open()
if err != nil {
return err
var b bytes.Buffer
_, err = io.Copy(&b, f)
if err != nil {
return err
err = xml.Unmarshal(b.Bytes(), &rf.Package)
if err != nil {
return err
return nil
// setItems associates Itemrefs with their respective Item and Items with
// their zip.File.
func (r *Reader) setItems() error {
itemrefCount := 0
for _, rf := range r.Container.Rootfiles {
itemMap := make(map[string]*Item)
for i := range rf.Manifest.Items {
item := &rf.Manifest.Items[i]
itemMap[item.ID] = item
abs := path.Join(path.Dir(rf.FullPath), item.HREF)
item.f = r.files[abs]
for i := range rf.Spine.Itemrefs {
itemref := &rf.Spine.Itemrefs[i]
itemref.Item = itemMap[itemref.IDREF]
if itemref.Item == nil {
return ErrBadItemref
itemrefCount += len(rf.Spine.Itemrefs)
if itemrefCount < 1 {
return ErrNoItemref
return nil
// Open returns a ReadCloser that provides access to the Items's contents.
// Multiple items may be read concurrently.
func (item *Item) Open() (r io.ReadCloser, err error) {
if item.f == nil {
return nil, ErrBadManifest
return item.f.Open()
// Close closes the epub file, rendering it unusable for I/O.
func (rc *ReadCloser) Close() {
// Hehe
func (rf *Rootfile) CountWords() int64 {
var completeCount int64
for _, item := range rf.Spine.Itemrefs {
f, _ := item.Open()
tokenizer := html.NewTokenizer(f)
completeCount = completeCount + countWords(*tokenizer)
return completeCount
func countWords(tokenizer html.Tokenizer) int64 {
var err error
var totalWords int64
for {
tokenType := tokenizer.Next()
token := tokenizer.Token()
if tokenType == html.TextToken {
currStr := string(token.Data)
totalWords = totalWords + int64(len(strings.Fields(currStr)))
} else if tokenType == html.ErrorToken {
err = tokenizer.Err()
if err == io.EOF {
return totalWords
} else if err != nil {
return 0
func main() {
rc, err := OpenEPUBReader("test.epub")
if err != nil {
rf := rc.Rootfiles[0]
totalWords := rf.CountWords()
log.Info("WOAH WORDS:", totalWords)
Normal file
Normal file
@ -0,0 +1,200 @@
package metadata
import (
log "github.com/sirupsen/logrus"
type gBooksIdentifiers struct {
Type string `json:"type"`
Identifier string `json:"identifier"`
type gBooksInfo struct {
Title string `json:"title"`
Authors []string `json:"authors"`
Description string `json:"description"`
Identifiers []gBooksIdentifiers `json:"industryIdentifiers"`
type gBooksQueryItem struct {
ID string `json:"id"`
Info gBooksInfo `json:"volumeInfo"`
type gBooksQueryResponse struct {
TotalItems int `json:"totalItems"`
Items []gBooksQueryItem `json:"items"`
const GBOOKS_QUERY_URL string = "https://www.googleapis.com/books/v1/volumes?q=%s"
const GBOOKS_GBID_INFO_URL string = "https://www.googleapis.com/books/v1/volumes/%s"
const GBOOKS_GBID_COVER_URL string = "https://books.google.com/books/content/images/frontcover/%s?fife=w480-h690"
func getGBooksMetadata(metadataSearch MetadataInfo) ([]MetadataInfo, error) {
var queryResults []gBooksQueryItem
if metadataSearch.ID != nil {
// Use GBID
resp, err := performGBIDRequest(*metadataSearch.ID)
if err != nil {
return nil, err
queryResults = []gBooksQueryItem{*resp}
} else if metadataSearch.ISBN13 != nil {
searchQuery := "isbn:" + *metadataSearch.ISBN13
resp, err := performSearchRequest(searchQuery)
if err != nil {
return nil, err
queryResults = resp.Items
} else if metadataSearch.ISBN10 != nil {
searchQuery := "isbn:" + *metadataSearch.ISBN10
resp, err := performSearchRequest(searchQuery)
if err != nil {
return nil, err
queryResults = resp.Items
} else if metadataSearch.Title != nil || metadataSearch.Author != nil {
var searchQuery string
if metadataSearch.Title != nil {
searchQuery = searchQuery + *metadataSearch.Title
if metadataSearch.Author != nil {
searchQuery = searchQuery + " " + *metadataSearch.Author
// Escape & Trim
searchQuery = url.QueryEscape(strings.TrimSpace(searchQuery))
resp, err := performSearchRequest(searchQuery)
if err != nil {
return nil, err
queryResults = resp.Items
} else {
return nil, errors.New("Invalid Data")
// Normalize Data
allMetadata := []MetadataInfo{}
for i := range queryResults {
item := queryResults[i] // Range Value Pointer Issue
itemResult := MetadataInfo{
ID: &item.ID,
Title: &item.Info.Title,
Description: &item.Info.Description,
if len(item.Info.Authors) > 0 {
itemResult.Author = &item.Info.Authors[0]
for i := range item.Info.Identifiers {
item := item.Info.Identifiers[i] // Range Value Pointer Issue
if itemResult.ISBN10 != nil && itemResult.ISBN13 != nil {
} else if itemResult.ISBN10 == nil && item.Type == "ISBN_10" {
itemResult.ISBN10 = &item.Identifier
} else if itemResult.ISBN13 == nil && item.Type == "ISBN_13" {
itemResult.ISBN13 = &item.Identifier
allMetadata = append(allMetadata, itemResult)
return allMetadata, nil
func saveGBooksCover(gbid string, coverFilePath string, overwrite bool) error {
// Validate File Doesn't Exists
_, err := os.Stat(coverFilePath)
if err == nil && overwrite == false {
log.Warn("[saveGBooksCover] File Alreads Exists")
return nil
// Create File
out, err := os.Create(coverFilePath)
if err != nil {
log.Error("[saveGBooksCover] File Create Error")
return errors.New("File Failure")
defer out.Close()
// Download File
log.Info("[saveGBooksCover] Downloading Cover")
coverURL := fmt.Sprintf(GBOOKS_GBID_COVER_URL, gbid)
resp, err := http.Get(coverURL)
if err != nil {
log.Error("[saveGBooksCover] Cover URL API Failure")
return errors.New("API Failure")
defer resp.Body.Close()
// Copy File to Disk
log.Info("[saveGBooksCover] Saving Cover")
_, err = io.Copy(out, resp.Body)
if err != nil {
log.Error("[saveGBooksCover] File Copy Error")
return errors.New("File Failure")
return nil
func performSearchRequest(searchQuery string) (*gBooksQueryResponse, error) {
apiQuery := fmt.Sprintf(GBOOKS_QUERY_URL, searchQuery)
log.Info("[performSearchRequest] Acquiring Metadata: ", apiQuery)
resp, err := http.Get(apiQuery)
if err != nil {
log.Error("[performSearchRequest] Google Books Query URL API Failure")
return nil, errors.New("API Failure")
parsedResp := gBooksQueryResponse{}
err = json.NewDecoder(resp.Body).Decode(&parsedResp)
if err != nil {
log.Error("[performSearchRequest] Google Books Query API Decode Failure")
return nil, errors.New("API Failure")
if len(parsedResp.Items) == 0 {
log.Warn("[performSearchRequest] No Results")
return nil, errors.New("No Results")
return &parsedResp, nil
func performGBIDRequest(id string) (*gBooksQueryItem, error) {
apiQuery := fmt.Sprintf(GBOOKS_GBID_INFO_URL, id)
log.Info("[performGBIDRequest] Acquiring CoverID")
resp, err := http.Get(apiQuery)
if err != nil {
log.Error("[performGBIDRequest] Cover URL API Failure")
return nil, errors.New("API Failure")
parsedResp := gBooksQueryItem{}
err = json.NewDecoder(resp.Body).Decode(&parsedResp)
if err != nil {
log.Error("[performGBIDRequest] Google Books ID API Decode Failure")
return nil, errors.New("API Failure")
return &parsedResp, nil
@ -1,217 +1,72 @@
package metadata
import (
log "github.com/sirupsen/logrus"
type Source int
const (
GBOOK Source = iota
type MetadataInfo struct {
ID *string
Title *string
Author *string
Description *string
GBID *string
OLID *string
ISBN10 *string
ISBN13 *string
type gBooksIdentifiers struct {
Type string `json:"type"`
Identifier string `json:"identifier"`
type gBooksInfo struct {
Title string `json:"title"`
Authors []string `json:"authors"`
Description string `json:"description"`
Identifiers []gBooksIdentifiers `json:"industryIdentifiers"`
type gBooksQueryItem struct {
ID string `json:"id"`
Info gBooksInfo `json:"volumeInfo"`
type gBooksQueryResponse struct {
TotalItems int `json:"totalItems"`
Items []gBooksQueryItem `json:"items"`
const GBOOKS_QUERY_URL string = "https://www.googleapis.com/books/v1/volumes?q=%s"
const GBOOKS_GBID_INFO_URL string = "https://www.googleapis.com/books/v1/volumes/%s"
const GBOOKS_GBID_COVER_URL string = "https://books.google.com/books/content/images/frontcover/%s?fife=w480-h690"
func GetMetadata(metadataSearch MetadataInfo) ([]MetadataInfo, error) {
var queryResults []gBooksQueryItem
if metadataSearch.GBID != nil {
// Use GBID
resp, err := performGBIDRequest(*metadataSearch.GBID)
if err != nil {
return nil, err
queryResults = []gBooksQueryItem{*resp}
} else if metadataSearch.ISBN13 != nil {
searchQuery := "isbn:" + *metadataSearch.ISBN13
resp, err := performSearchRequest(searchQuery)
if err != nil {
return nil, err
queryResults = resp.Items
} else if metadataSearch.ISBN10 != nil {
searchQuery := "isbn:" + *metadataSearch.ISBN10
resp, err := performSearchRequest(searchQuery)
if err != nil {
return nil, err
queryResults = resp.Items
} else if metadataSearch.Title != nil || metadataSearch.Author != nil {
var searchQuery string
if metadataSearch.Title != nil {
searchQuery = searchQuery + *metadataSearch.Title
if metadataSearch.Author != nil {
searchQuery = searchQuery + " " + *metadataSearch.Author
// Escape & Trim
searchQuery = url.QueryEscape(strings.TrimSpace(searchQuery))
resp, err := performSearchRequest(searchQuery)
if err != nil {
return nil, err
queryResults = resp.Items
} else {
return nil, errors.New("Invalid Data")
// Normalize Data
allMetadata := []MetadataInfo{}
for i := range queryResults {
item := queryResults[i] // Range Value Pointer Issue
itemResult := MetadataInfo{
GBID: &item.ID,
Title: &item.Info.Title,
Description: &item.Info.Description,
if len(item.Info.Authors) > 0 {
itemResult.Author = &item.Info.Authors[0]
for i := range item.Info.Identifiers {
item := item.Info.Identifiers[i] // Range Value Pointer Issue
if itemResult.ISBN10 != nil && itemResult.ISBN13 != nil {
} else if itemResult.ISBN10 == nil && item.Type == "ISBN_10" {
itemResult.ISBN10 = &item.Identifier
} else if itemResult.ISBN13 == nil && item.Type == "ISBN_13" {
itemResult.ISBN13 = &item.Identifier
allMetadata = append(allMetadata, itemResult)
return allMetadata, nil
func SaveCover(gbid string, coverDir string, documentID string, overwrite bool) (*string, error) {
// Google Books -> JPG
func CacheCover(gbid string, coverDir string, documentID string, overwrite bool) (*string, error) {
// Get Filepath
coverFile := "." + filepath.Clean(fmt.Sprintf("/%s.jpg", documentID))
coverFilePath := filepath.Join(coverDir, coverFile)
// Validate File Doesn't Exists
_, err := os.Stat(coverFilePath)
if err == nil && overwrite == false {
log.Warn("[SaveCover] File Alreads Exists")
return &coverFile, nil
// Save Google Books
if err := saveGBooksCover(gbid, coverFilePath, overwrite); err != nil {
return nil, err
// Create File
out, err := os.Create(coverFilePath)
if err != nil {
log.Error("[SaveCover] File Create Error")
return nil, errors.New("File Failure")
defer out.Close()
// TODO - Refactor & Allow Open Library / Alternative Sources
// Download File
log.Info("[SaveCover] Downloading Cover")
coverURL := fmt.Sprintf(GBOOKS_GBID_COVER_URL, gbid)
resp, err := http.Get(coverURL)
if err != nil {
log.Error("[SaveCover] Cover URL API Failure")
return nil, errors.New("API Failure")
defer resp.Body.Close()
// Copy File to Disk
log.Info("[SaveCover] Saving Cover")
_, err = io.Copy(out, resp.Body)
if err != nil {
log.Error("[SaveCover] File Copy Error")
return nil, errors.New("File Failure")
// Return FilePath
return &coverFile, nil
func performSearchRequest(searchQuery string) (*gBooksQueryResponse, error) {
apiQuery := fmt.Sprintf(GBOOKS_QUERY_URL, searchQuery)
log.Info("[performSearchRequest] Acquiring Metadata: ", apiQuery)
resp, err := http.Get(apiQuery)
if err != nil {
log.Error("[performSearchRequest] Google Books Query URL API Failure")
return nil, errors.New("API Failure")
func SearchMetadata(s Source, metadataSearch MetadataInfo) ([]MetadataInfo, error) {
switch s {
case GBOOK:
return getGBooksMetadata(metadataSearch)
case OLIB:
return nil, errors.New("Not implemented")
return nil, errors.New("Not implemented")
parsedResp := gBooksQueryResponse{}
err = json.NewDecoder(resp.Body).Decode(&parsedResp)
if err != nil {
log.Error("[performSearchRequest] Google Books Query API Decode Failure")
return nil, errors.New("API Failure")
if len(parsedResp.Items) == 0 {
log.Warn("[performSearchRequest] No Results")
return nil, errors.New("No Results")
return &parsedResp, nil
func performGBIDRequest(id string) (*gBooksQueryItem, error) {
apiQuery := fmt.Sprintf(GBOOKS_GBID_INFO_URL, id)
log.Info("[performGBIDRequest] Acquiring CoverID")
resp, err := http.Get(apiQuery)
func GetWordCount(filepath string) (int64, error) {
fileMime, err := mimetype.DetectFile(filepath)
if err != nil {
log.Error("[performGBIDRequest] Cover URL API Failure")
return nil, errors.New("API Failure")
return 0, err
parsedResp := gBooksQueryItem{}
err = json.NewDecoder(resp.Body).Decode(&parsedResp)
if err != nil {
log.Error("[performGBIDRequest] Google Books ID API Decode Failure")
return nil, errors.New("API Failure")
if fileExtension := fileMime.Extension(); fileExtension == ".epub" {
rc, err := OpenEPUBReader(filepath)
if err != nil {
return 0, err
return &parsedResp, nil
rf := rc.Rootfiles[0]
totalWords := rf.CountWords()
return totalWords, nil
} else {
return 0, errors.New("Invalid Extension")
@ -46,6 +46,10 @@ sql:
type: "string"
pointer: true
- column: "documents.words"
type: "int64"
pointer: true
- column: "documents.olid"
type: "string"
@ -295,16 +295,56 @@
{{ or .Data.Author "N/A" }}
<p class="text-gray-500">Time Read</p>
<p class="font-medium text-lg">
{{ .Data.TotalTimeMinutes }} Minutes
<div class="relative">
<div class="text-gray-500 inline-flex gap-2 relative">
<p>Time Read</p>
<label class="my-auto" for="progress-info-button">
class="cursor-pointer hover:text-gray-800 dark:hover:text-gray-100"
viewBox="0 0 24 24"
d="M12 22C7.28595 22 4.92893 22 3.46447 20.5355C2 19.0711 2 16.714 2 12C2 7.28595 2 4.92893 3.46447 3.46447C4.92893 2 7.28595 2 12 2C16.714 2 19.0711 2 20.5355 3.46447C22 4.92893 22 7.28595 22 12C22 16.714 22 19.0711 20.5355 20.5355C19.0711 22 16.714 22 12 22ZM12 17.75C12.4142 17.75 12.75 17.4142 12.75 17V11C12.75 10.5858 12.4142 10.25 12 10.25C11.5858 10.25 11.25 10.5858 11.25 11V17C11.25 17.4142 11.5858 17.75 12 17.75ZM12 7C12.5523 7 13 7.44772 13 8C13 8.55228 12.5523 9 12 9C11.4477 9 11 8.55228 11 8C11 7.44772 11.4477 7 12 7Z"
<input type="checkbox" id="progress-info-button" class="hidden css-button"/>
<div class="absolute z-30 top-7 right-0 p-3 transition-all duration-200 bg-gray-200 rounded shadow-lg shadow-gray-500 dark:shadow-gray-900 dark:bg-gray-600">
<div class="text-xs flex">
<p class="text-gray-400 w-32">Seconds / Page</p>
<p class="font-medium dark:text-white">
{{ .Data.SecondsPerPage }}
<div class="text-xs flex">
<p class="text-gray-400 w-32">Words / Minute</p>
<p class="font-medium dark:text-white">
{{ .Statistics.WordsPerMinute }}
<div class="text-xs flex">
<p class="text-gray-400 w-32">Est. Time Left</p>
<p class="font-medium dark:text-white whitespace-nowrap">
{{ NiceSeconds .Statistics.TotalTimeLeftSeconds }}
<p class="font-medium text-lg">
{{ NiceSeconds .Data.TotalTimeSeconds }}
<p class="text-gray-500">Progress</p>
<p class="font-medium text-lg">
{{ .Data.CurrentPage }} / {{ .Data.TotalPages }} ({{ .Data.Percentage }}%)
{{ .Data.CurrentPage }} / {{ .Data.TotalPages }} ({{ .Data.Percentage }}%)
@ -410,7 +450,7 @@
<dd class="mt-1 text-sm sm:mt-0 sm:col-span-2">
<img class="rounded object-fill h-32" src="https://books.google.com/books/content/images/frontcover/{{ .Metadata.GBID }}?fife=w480-h690"></img>
<img class="rounded object-fill h-32" src="https://books.google.com/books/content/images/frontcover/{{ .Metadata.ID }}?fife=w480-h690"></img>
<div class="p-3 bg-white dark:bg-gray-800 grid grid-cols-3 gap-4 sm:px-6">
@ -460,7 +500,7 @@
<input type="text" id="description" name="description" value="{{ .Metadata.Description }}">
<input type="text" id="isbn_10" name="isbn_10" value="{{ .Metadata.ISBN10 }}">
<input type="text" id="isbn_13" name="isbn_13" value="{{ .Metadata.ISBN13 }}">
<input type="text" id="cover_gbid" name="cover_gbid" value="{{ .Metadata.GBID }}">
<input type="text" id="cover_gbid" name="cover_gbid" value="{{ .Metadata.ID }}">
<div class="flex justify-end gap-4 m-4">
@ -45,7 +45,7 @@
<p class="text-gray-400">Time Read</p>
<p class="font-medium">
{{ $doc.TotalTimeMinutes }} Minutes
{{ NiceSeconds $doc.TotalTimeSeconds }}
@ -1,5 +1,10 @@
package utils
import (
type UTCOffset struct {
Name string
Value string
@ -49,3 +54,27 @@ var UTC_OFFSETS = []UTCOffset{
func GetUTCOffsets() []UTCOffset {
func NiceSeconds(input int64) (result string) {
days := math.Floor(float64(input) / 60 / 60 / 24)
seconds := input % (60 * 60 * 24)
hours := math.Floor(float64(seconds) / 60 / 60)
seconds = input % (60 * 60)
minutes := math.Floor(float64(seconds) / 60)
seconds = input % 60
if days > 0 {
result += fmt.Sprintf("%dd ", int(days))
if hours > 0 {
result += fmt.Sprintf("%dh ", int(hours))
if minutes > 0 {
result += fmt.Sprintf("%dm ", int(minutes))
if seconds > 0 {
result += fmt.Sprintf("%ds", int(seconds))
Reference in New Issue
Block a user