Compare commits

..

8 Commits

Author SHA1 Message Date
52661f4a88 templ wip 2
Some checks reported errors
continuous-integration/drone/push Build was killed
2024-12-02 19:25:33 -05:00
Evan Reichard
b6274c7021 templ wip 1 2024-12-02 19:25:33 -05:00
20912fef02 fix(streaks): incorrect calculation logic
All checks were successful
continuous-integration/drone/push Build is passing
2024-12-01 20:22:23 -05:00
a981d98ba5 feat(admin): basic log filter
All checks were successful
continuous-integration/drone/push Build is passing
2024-12-01 19:48:51 -05:00
a193f97d29 perf(db): incremental user streaks cache
All checks were successful
continuous-integration/drone/push Build is passing
2024-12-01 18:58:46 -05:00
841b29c425 improve(search): progress & retries
All checks were successful
continuous-integration/drone/push Build is passing
2024-12-01 17:04:41 -05:00
3d61d0f5ef perf(db): incremental document stats cache
All checks were successful
continuous-integration/drone/push Build is passing
2024-12-01 12:48:25 -05:00
5e388730a5 formatting: lua plugin 2024-12-01 11:28:33 -05:00
17 changed files with 1610 additions and 1381 deletions

View File

@ -174,7 +174,10 @@ func (api *API) appGetAdminLogs(c *gin.Context) {
rAdminLogs.Filter = strings.TrimSpace(rAdminLogs.Filter)
var jqFilter *gojq.Code
if rAdminLogs.Filter != "" {
var basicFilter string
if strings.HasPrefix(rAdminLogs.Filter, "\"") && strings.HasSuffix(rAdminLogs.Filter, "\"") {
basicFilter = rAdminLogs.Filter[1 : len(rAdminLogs.Filter)-1]
} else if rAdminLogs.Filter != "" {
parsed, err := gojq.Parse(rAdminLogs.Filter)
if err != nil {
log.Error("Unable to parse JQ filter")
@ -220,12 +223,17 @@ func (api *API) appGetAdminLogs(c *gin.Context) {
continue
}
// No Filter
if jqFilter == nil {
// Basic Filter
if basicFilter != "" && strings.Contains(string(rawData), basicFilter) {
logLines = append(logLines, string(rawData))
continue
}
// No JQ Filter
if jqFilter == nil {
continue
}
// Error or nil
result, _ := jqFilter.Run(jsonMap).Next()
if _, ok := result.(error); ok {

View File

@ -26,7 +26,6 @@ import (
"reichard.io/antholume/ngtemplates/common"
"reichard.io/antholume/ngtemplates/pages"
"reichard.io/antholume/search"
"reichard.io/antholume/utils"
)
type backupType string
@ -747,57 +746,50 @@ func (api *API) appSaveNewDocument(c *gin.Context) {
}
// Send Message
sendDownloadMessage("Downloading document...", gin.H{"Progress": 10})
sendDownloadMessage("Downloading document...", gin.H{"Progress": 1})
// Scaled Download Function
lastTime := time.Now()
downloadFunc := func(p float32) {
nowTime := time.Now()
if nowTime.Before(lastTime.Add(time.Millisecond * 500)) {
return
}
scaledProgress := int((p * 95 / 100) + 2)
sendDownloadMessage("Downloading document...", gin.H{"Progress": scaledProgress})
lastTime = nowTime
}
// Save Book
tempFilePath, err := search.SaveBook(rDocAdd.ID, rDocAdd.Source)
tempFilePath, metadata, err := search.SaveBook(rDocAdd.ID, rDocAdd.Source, downloadFunc)
if err != nil {
log.Warn("Temp File Error: ", err)
log.Warn("Save Book Error: ", err)
sendDownloadMessage("Unable to download file", gin.H{"Error": true})
return
}
// Send Message
sendDownloadMessage("Calculating partial MD5...", gin.H{"Progress": 60})
sendDownloadMessage("Saving document...", gin.H{"Progress": 98})
// Calculate Partial MD5 ID
partialMD5, err := utils.CalculatePartialMD5(tempFilePath)
if err != nil {
log.Warn("Partial MD5 Error: ", err)
sendDownloadMessage("Unable to calculate partial MD5", gin.H{"Error": true})
// Derive Author / Title
docAuthor := "Unknown"
if *metadata.Author != "" {
docAuthor = *metadata.Author
} else if *rDocAdd.Author != "" {
docAuthor = *rDocAdd.Author
}
// Send Message
sendDownloadMessage("Saving file...", gin.H{"Progress": 60})
// Derive Extension on MIME
fileMime, err := mimetype.DetectFile(tempFilePath)
if err != nil {
log.Warn("MIME Detect Error: ", err)
sendDownloadMessage("Unable to download file", gin.H{"Error": true})
return
}
fileExtension := fileMime.Extension()
// Derive Filename
var fileName string
if *rDocAdd.Author != "" {
fileName = fileName + *rDocAdd.Author
} else {
fileName = fileName + "Unknown"
docTitle := "Unknown"
if *metadata.Title != "" {
docTitle = *metadata.Title
} else if *rDocAdd.Title != "" {
docTitle = *rDocAdd.Title
}
if *rDocAdd.Title != "" {
fileName = fileName + " - " + *rDocAdd.Title
} else {
fileName = fileName + " - Unknown"
}
// Remove Slashes
// Remove Slashes & Sanitize File Name
fileName := fmt.Sprintf("%s - %s", docAuthor, docTitle)
fileName = strings.ReplaceAll(fileName, "/", "")
// Derive & Sanitize File Name
fileName = "." + filepath.Clean(fmt.Sprintf("/%s [%s]%s", fileName, *partialMD5, fileExtension))
fileName = "." + filepath.Clean(fmt.Sprintf("/%s [%s]%s", fileName, *metadata.PartialMD5, metadata.Type))
// Open Source File
sourceFile, err := os.Open(tempFilePath)
@ -829,37 +821,15 @@ func (api *API) appSaveNewDocument(c *gin.Context) {
}
// Send Message
sendDownloadMessage("Calculating MD5...", gin.H{"Progress": 70})
// Get MD5 Hash
fileHash, err := getFileMD5(safePath)
if err != nil {
log.Error("Hash Failure: ", err)
sendDownloadMessage("Unable to calculate MD5", gin.H{"Error": true})
return
}
// Send Message
sendDownloadMessage("Calculating word count...", gin.H{"Progress": 80})
// Get Word Count
wordCount, err := metadata.GetWordCount(safePath)
if err != nil {
log.Error("Word Count Failure: ", err)
sendDownloadMessage("Unable to calculate word count", gin.H{"Error": true})
return
}
// Send Message
sendDownloadMessage("Saving to database...", gin.H{"Progress": 90})
sendDownloadMessage("Saving to database...", gin.H{"Progress": 99})
// Upsert Document
if _, err = api.db.Queries.UpsertDocument(api.db.Ctx, database.UpsertDocumentParams{
ID: *partialMD5,
Title: rDocAdd.Title,
Author: rDocAdd.Author,
Md5: fileHash,
Words: wordCount,
ID: *metadata.PartialMD5,
Title: &docTitle,
Author: &docAuthor,
Md5: metadata.MD5,
Words: metadata.WordCount,
Filepath: &fileName,
Basepath: &basePath,
}); err != nil {
@ -872,7 +842,7 @@ func (api *API) appSaveNewDocument(c *gin.Context) {
sendDownloadMessage("Download Success", gin.H{
"Progress": 100,
"ButtonText": "Go to Book",
"ButtonHref": fmt.Sprintf("./documents/%s", *partialMD5),
"ButtonHref": fmt.Sprintf("./documents/%s", *metadata.PartialMD5),
})
}

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
// Code generated by sqlc. DO NOT EDIT.
// versions:
// sqlc v1.26.0
// sqlc v1.27.0
package database

View File

@ -0,0 +1,228 @@
WITH new_activity AS (
SELECT
document_id,
user_id
FROM activity
WHERE
created_at > COALESCE(
(SELECT MAX(last_seen) FROM document_user_statistics),
'1970-01-01T00:00:00Z'
)
GROUP BY user_id, document_id
),
intermediate_ga AS (
SELECT
ga.id AS row_id,
ga.user_id,
ga.document_id,
ga.duration,
ga.start_time,
ga.start_percentage,
ga.end_percentage,
ga.created_at,
-- Find Overlapping Events (Assign Unique ID)
(
SELECT MIN(id)
FROM activity AS overlap
WHERE
ga.document_id = overlap.document_id
AND ga.user_id = overlap.user_id
AND ga.start_percentage <= overlap.end_percentage
AND ga.end_percentage >= overlap.start_percentage
) AS group_leader
FROM activity AS ga
INNER JOIN new_activity AS na
WHERE na.user_id = ga.user_id AND na.document_id = ga.document_id
),
grouped_activity AS (
SELECT
user_id,
document_id,
MAX(created_at) AS created_at,
MAX(start_time) AS start_time,
MIN(start_percentage) AS start_percentage,
MAX(end_percentage) AS end_percentage,
MAX(end_percentage) - MIN(start_percentage) AS read_percentage,
SUM(duration) AS duration
FROM intermediate_ga
GROUP BY group_leader
),
current_progress AS (
SELECT
user_id,
document_id,
COALESCE((
SELECT percentage
FROM document_progress AS dp
WHERE
dp.user_id = iga.user_id
AND dp.document_id = iga.document_id
ORDER BY created_at DESC
LIMIT 1
), end_percentage) AS percentage
FROM intermediate_ga AS iga
GROUP BY user_id, document_id
HAVING MAX(start_time)
)
INSERT INTO document_user_statistics
SELECT
ga.document_id,
ga.user_id,
cp.percentage,
MAX(ga.start_time) AS last_read,
MAX(ga.created_at) AS last_seen,
SUM(ga.read_percentage) AS read_percentage,
-- All Time WPM
SUM(ga.duration) AS total_time_seconds,
(CAST(COALESCE(d.words, 0.0) AS REAL) * SUM(read_percentage))
AS total_words_read,
(CAST(COALESCE(d.words, 0.0) AS REAL) * SUM(read_percentage))
/ (SUM(ga.duration) / 60.0) AS total_wpm,
-- Yearly WPM
SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-1 year')
THEN ga.duration
ELSE 0
END
)
AS yearly_time_seconds,
(
CAST(COALESCE(d.words, 0.0) AS REAL)
* SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-1 year')
THEN read_percentage
ELSE 0
END
)
)
AS yearly_words_read,
COALESCE((
CAST(COALESCE(d.words, 0.0) AS REAL)
* SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-1 year')
THEN read_percentage
END
)
)
/ (
SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-1 year')
THEN ga.duration
END
)
/ 60.0
), 0.0)
AS yearly_wpm,
-- Monthly WPM
SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-1 month')
THEN ga.duration
ELSE 0
END
)
AS monthly_time_seconds,
(
CAST(COALESCE(d.words, 0.0) AS REAL)
* SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-1 month')
THEN read_percentage
ELSE 0
END
)
)
AS monthly_words_read,
COALESCE((
CAST(COALESCE(d.words, 0.0) AS REAL)
* SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-1 month')
THEN read_percentage
END
)
)
/ (
SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-1 month')
THEN ga.duration
END
)
/ 60.0
), 0.0)
AS monthly_wpm,
-- Weekly WPM
SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-7 days')
THEN ga.duration
ELSE 0
END
)
AS weekly_time_seconds,
(
CAST(COALESCE(d.words, 0.0) AS REAL)
* SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-7 days')
THEN read_percentage
ELSE 0
END
)
)
AS weekly_words_read,
COALESCE((
CAST(COALESCE(d.words, 0.0) AS REAL)
* SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-7 days')
THEN read_percentage
END
)
)
/ (
SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-7 days')
THEN ga.duration
END
)
/ 60.0
), 0.0)
AS weekly_wpm
FROM grouped_activity AS ga
INNER JOIN
current_progress AS cp
ON ga.user_id = cp.user_id AND ga.document_id = cp.document_id
INNER JOIN
documents AS d
ON ga.document_id = d.id
GROUP BY ga.document_id, ga.user_id
ORDER BY total_wpm DESC;

View File

@ -28,8 +28,11 @@ type DBManager struct {
//go:embed schema.sql
var ddl string
//go:embed views.sql
var views string
//go:embed user_streaks.sql
var user_streaks string
//go:embed document_user_statistics.sql
var document_user_statistics string
//go:embed migrations/*
var migrations embed.FS
@ -41,6 +44,11 @@ func init() {
Deterministic: true,
Scalar: localTime,
})
sqlite.MustRegisterFunction("LOCAL_DATE", &sqlite.FunctionImpl{
NArgs: 2,
Deterministic: true,
Scalar: localDate,
})
}
// NewMgr Returns an initialized manager
@ -104,12 +112,6 @@ func (dbm *DBManager) init() error {
return err
}
// Execute views
if _, err := dbm.DB.Exec(views, nil); err != nil {
log.Panicf("Error executing views: %v", err)
return err
}
// Update settings
err = dbm.updateSettings()
if err != nil {
@ -144,21 +146,13 @@ func (dbm *DBManager) Reload() error {
// CacheTempTables clears existing statistics and recalculates
func (dbm *DBManager) CacheTempTables() error {
start := time.Now()
user_streaks_sql := `
DELETE FROM user_streaks;
INSERT INTO user_streaks SELECT * FROM view_user_streaks;
`
if _, err := dbm.DB.ExecContext(dbm.Ctx, user_streaks_sql); err != nil {
if _, err := dbm.DB.ExecContext(dbm.Ctx, user_streaks); err != nil {
return err
}
log.Debug("Cached 'user_streaks' in: ", time.Since(start))
start = time.Now()
document_statistics_sql := `
DELETE FROM document_user_statistics;
INSERT INTO document_user_statistics SELECT * FROM view_document_user_statistics;
`
if _, err := dbm.DB.ExecContext(dbm.Ctx, document_statistics_sql); err != nil {
if _, err := dbm.DB.ExecContext(dbm.Ctx, document_user_statistics); err != nil {
return err
}
log.Debug("Cached 'document_user_statistics' in: ", time.Since(start))
@ -240,5 +234,30 @@ func localTime(ctx *sqlite.FunctionContext, args []driver.Value) (driver.Value,
return nil, errors.New("unable to parse time")
}
return formattedTime.In(timeZone).Format("2006-01-02 15:04:05.000"), nil
return formattedTime.In(timeZone).Format(time.RFC3339), nil
}
// localDate is a custom SQL function that is registered as LOCAL_DATE in the init function
func localDate(ctx *sqlite.FunctionContext, args []driver.Value) (driver.Value, error) {
timeStr, ok := args[0].(string)
if !ok {
return nil, errors.New("both arguments to TZTime must be strings")
}
timeZoneStr, ok := args[1].(string)
if !ok {
return nil, errors.New("both arguments to TZTime must be strings")
}
timeZone, err := time.LoadLocation(timeZoneStr)
if err != nil {
return nil, errors.New("unable to parse timezone")
}
formattedTime, err := time.ParseInLocation(time.RFC3339, timeStr, time.UTC)
if err != nil {
return nil, errors.New("unable to parse time")
}
return formattedTime.In(timeZone).Format("2006-01-02"), nil
}

View File

@ -1,6 +1,6 @@
// Code generated by sqlc. DO NOT EDIT.
// versions:
// sqlc v1.26.0
// sqlc v1.27.0
package database
@ -62,6 +62,7 @@ type DocumentUserStatistic struct {
UserID string `json:"user_id"`
Percentage float64 `json:"percentage"`
LastRead string `json:"last_read"`
LastSeen string `json:"last_seen"`
ReadPercentage float64 `json:"read_percentage"`
TotalTimeSeconds int64 `json:"total_time_seconds"`
TotalWordsRead int64 `json:"total_words_read"`
@ -115,4 +116,8 @@ type UserStreak struct {
CurrentStreak int64 `json:"current_streak"`
CurrentStreakStartDate string `json:"current_streak_start_date"`
CurrentStreakEndDate string `json:"current_streak_end_date"`
LastTimezone string `json:"last_timezone"`
LastSeen string `json:"last_seen"`
LastRecord string `json:"last_record"`
LastCalculated string `json:"last_calculated"`
}

View File

@ -67,7 +67,7 @@ WITH filtered_activity AS (
SELECT
document_id,
device_id,
CAST(STRFTIME('%Y-%m-%d %H:%M:%S', LOCAL_TIME(activity.start_time, users.timezone)) AS TEXT) AS start_time,
LOCAL_TIME(activity.start_time, users.timezone) AS start_time,
title,
author,
duration,
@ -80,7 +80,7 @@ LEFT JOIN users ON users.id = activity.user_id;
-- name: GetDailyReadStats :many
WITH RECURSIVE last_30_days AS (
SELECT DATE(LOCAL_TIME(STRFTIME('%Y-%m-%dT%H:%M:%SZ', 'now'), timezone)) AS date
SELECT LOCAL_DATE(STRFTIME('%Y-%m-%dT%H:%M:%SZ', 'now'), timezone) AS date
FROM users WHERE users.id = $user_id
UNION ALL
SELECT DATE(date, '-1 days')
@ -99,7 +99,7 @@ filtered_activity AS (
activity_days AS (
SELECT
SUM(duration) AS seconds_read,
DATE(LOCAL_TIME(start_time, timezone)) AS day
LOCAL_DATE(start_time, timezone) AS day
FROM filtered_activity AS activity
LEFT JOIN users ON users.id = activity.user_id
GROUP BY day
@ -138,8 +138,8 @@ WHERE id = $device_id LIMIT 1;
SELECT
devices.id,
devices.device_name,
CAST(STRFTIME('%Y-%m-%d %H:%M:%S', LOCAL_TIME(devices.created_at, users.timezone)) AS TEXT) AS created_at,
CAST(STRFTIME('%Y-%m-%d %H:%M:%S', LOCAL_TIME(devices.last_synced, users.timezone)) AS TEXT) AS last_synced
LOCAL_TIME(devices.created_at, users.timezone) AS created_at,
LOCAL_TIME(devices.last_synced, users.timezone) AS last_synced
FROM devices
JOIN users ON users.id = devices.user_id
WHERE users.id = $user_id
@ -283,7 +283,7 @@ SELECT
ROUND(CAST(progress.percentage AS REAL) * 100, 2) AS percentage,
progress.document_id,
progress.user_id,
CAST(STRFTIME('%Y-%m-%d %H:%M:%S', LOCAL_TIME(progress.created_at, users.timezone)) AS TEXT) AS created_at
LOCAL_TIME(progress.created_at, users.timezone) AS created_at
FROM document_progress AS progress
LEFT JOIN users ON progress.user_id = users.id
LEFT JOIN devices ON progress.device_id = devices.id

View File

@ -1,6 +1,6 @@
// Code generated by sqlc. DO NOT EDIT.
// versions:
// sqlc v1.26.0
// sqlc v1.27.0
// source: query.sql
package database
@ -193,7 +193,7 @@ WITH filtered_activity AS (
SELECT
document_id,
device_id,
CAST(STRFTIME('%Y-%m-%d %H:%M:%S', LOCAL_TIME(activity.start_time, users.timezone)) AS TEXT) AS start_time,
LOCAL_TIME(activity.start_time, users.timezone) AS start_time,
title,
author,
duration,
@ -214,15 +214,15 @@ type GetActivityParams struct {
}
type GetActivityRow struct {
DocumentID string `json:"document_id"`
DeviceID string `json:"device_id"`
StartTime string `json:"start_time"`
Title *string `json:"title"`
Author *string `json:"author"`
Duration int64 `json:"duration"`
StartPercentage float64 `json:"start_percentage"`
EndPercentage float64 `json:"end_percentage"`
ReadPercentage float64 `json:"read_percentage"`
DocumentID string `json:"document_id"`
DeviceID string `json:"device_id"`
StartTime interface{} `json:"start_time"`
Title *string `json:"title"`
Author *string `json:"author"`
Duration int64 `json:"duration"`
StartPercentage float64 `json:"start_percentage"`
EndPercentage float64 `json:"end_percentage"`
ReadPercentage float64 `json:"read_percentage"`
}
func (q *Queries) GetActivity(ctx context.Context, arg GetActivityParams) ([]GetActivityRow, error) {
@ -266,7 +266,7 @@ func (q *Queries) GetActivity(ctx context.Context, arg GetActivityParams) ([]Get
const getDailyReadStats = `-- name: GetDailyReadStats :many
WITH RECURSIVE last_30_days AS (
SELECT DATE(LOCAL_TIME(STRFTIME('%Y-%m-%dT%H:%M:%SZ', 'now'), timezone)) AS date
SELECT LOCAL_DATE(STRFTIME('%Y-%m-%dT%H:%M:%SZ', 'now'), timezone) AS date
FROM users WHERE users.id = ?1
UNION ALL
SELECT DATE(date, '-1 days')
@ -285,7 +285,7 @@ filtered_activity AS (
activity_days AS (
SELECT
SUM(duration) AS seconds_read,
DATE(LOCAL_TIME(start_time, timezone)) AS day
LOCAL_DATE(start_time, timezone) AS day
FROM filtered_activity AS activity
LEFT JOIN users ON users.id = activity.user_id
GROUP BY day
@ -422,8 +422,8 @@ const getDevices = `-- name: GetDevices :many
SELECT
devices.id,
devices.device_name,
CAST(STRFTIME('%Y-%m-%d %H:%M:%S', LOCAL_TIME(devices.created_at, users.timezone)) AS TEXT) AS created_at,
CAST(STRFTIME('%Y-%m-%d %H:%M:%S', LOCAL_TIME(devices.last_synced, users.timezone)) AS TEXT) AS last_synced
LOCAL_TIME(devices.created_at, users.timezone) AS created_at,
LOCAL_TIME(devices.last_synced, users.timezone) AS last_synced
FROM devices
JOIN users ON users.id = devices.user_id
WHERE users.id = ?1
@ -431,10 +431,10 @@ ORDER BY devices.last_synced DESC
`
type GetDevicesRow struct {
ID string `json:"id"`
DeviceName string `json:"device_name"`
CreatedAt string `json:"created_at"`
LastSynced string `json:"last_synced"`
ID string `json:"id"`
DeviceName string `json:"device_name"`
CreatedAt interface{} `json:"created_at"`
LastSynced interface{} `json:"last_synced"`
}
func (q *Queries) GetDevices(ctx context.Context, userID string) ([]GetDevicesRow, error) {
@ -902,7 +902,7 @@ SELECT
ROUND(CAST(progress.percentage AS REAL) * 100, 2) AS percentage,
progress.document_id,
progress.user_id,
CAST(STRFTIME('%Y-%m-%d %H:%M:%S', LOCAL_TIME(progress.created_at, users.timezone)) AS TEXT) AS created_at
LOCAL_TIME(progress.created_at, users.timezone) AS created_at
FROM document_progress AS progress
LEFT JOIN users ON progress.user_id = users.id
LEFT JOIN devices ON progress.device_id = devices.id
@ -929,13 +929,13 @@ type GetProgressParams struct {
}
type GetProgressRow struct {
Title *string `json:"title"`
Author *string `json:"author"`
DeviceName string `json:"device_name"`
Percentage float64 `json:"percentage"`
DocumentID string `json:"document_id"`
UserID string `json:"user_id"`
CreatedAt string `json:"created_at"`
Title *string `json:"title"`
Author *string `json:"author"`
DeviceName string `json:"device_name"`
Percentage float64 `json:"percentage"`
DocumentID string `json:"document_id"`
UserID string `json:"user_id"`
CreatedAt interface{} `json:"created_at"`
}
func (q *Queries) GetProgress(ctx context.Context, arg GetProgressParams) ([]GetProgressRow, error) {
@ -1078,7 +1078,7 @@ func (q *Queries) GetUserStatistics(ctx context.Context) ([]GetUserStatisticsRow
}
const getUserStreaks = `-- name: GetUserStreaks :many
SELECT user_id, "window", max_streak, max_streak_start_date, max_streak_end_date, current_streak, current_streak_start_date, current_streak_end_date FROM user_streaks
SELECT user_id, "window", max_streak, max_streak_start_date, max_streak_end_date, current_streak, current_streak_start_date, current_streak_end_date, last_timezone, last_seen, last_record, last_calculated FROM user_streaks
WHERE user_id = ?1
`
@ -1100,6 +1100,10 @@ func (q *Queries) GetUserStreaks(ctx context.Context, userID string) ([]UserStre
&i.CurrentStreak,
&i.CurrentStreakStartDate,
&i.CurrentStreakEndDate,
&i.LastTimezone,
&i.LastSeen,
&i.LastRecord,
&i.LastCalculated,
); err != nil {
return nil, err
}

View File

@ -118,30 +118,13 @@ CREATE TABLE IF NOT EXISTS settings (
created_at DATETIME NOT NULL DEFAULT (STRFTIME('%Y-%m-%dT%H:%M:%SZ', 'now'))
);
---------------------------------------------------------------
----------------------- Temporary Tables ----------------------
---------------------------------------------------------------
-- Temporary User Streaks Table (Cached from View)
CREATE TEMPORARY TABLE IF NOT EXISTS user_streaks (
user_id TEXT NOT NULL,
window TEXT NOT NULL,
max_streak INTEGER NOT NULL,
max_streak_start_date TEXT NOT NULL,
max_streak_end_date TEXT NOT NULL,
current_streak INTEGER NOT NULL,
current_streak_start_date TEXT NOT NULL,
current_streak_end_date TEXT NOT NULL
);
-- Temporary Document User Statistics Table (Cached from View)
CREATE TEMPORARY TABLE IF NOT EXISTS document_user_statistics (
-- Document User Statistics Table
CREATE TABLE IF NOT EXISTS document_user_statistics (
document_id TEXT NOT NULL,
user_id TEXT NOT NULL,
percentage REAL NOT NULL,
last_read TEXT NOT NULL,
last_read DATETIME NOT NULL,
last_seen DATETIME NOT NULL,
read_percentage REAL NOT NULL,
total_time_seconds INTEGER NOT NULL,
@ -163,21 +146,40 @@ CREATE TEMPORARY TABLE IF NOT EXISTS document_user_statistics (
UNIQUE(document_id, user_id) ON CONFLICT REPLACE
);
-- User Streaks Table
DROP TABLE IF EXISTS user_streaks;
CREATE TABLE IF NOT EXISTS user_streaks (
user_id TEXT NOT NULL,
window TEXT NOT NULL,
max_streak INTEGER NOT NULL,
max_streak_start_date TEXT NOT NULL,
max_streak_end_date TEXT NOT NULL,
current_streak INTEGER NOT NULL,
current_streak_start_date TEXT NOT NULL,
current_streak_end_date TEXT NOT NULL,
last_timezone TEXT NOT NULL,
last_seen TEXT NOT NULL,
last_record TEXT NOT NULL,
last_calculated TEXT NOT NULL,
UNIQUE(user_id, window) ON CONFLICT REPLACE
);
---------------------------------------------------------------
--------------------------- Indexes ---------------------------
---------------------------------------------------------------
CREATE INDEX IF NOT EXISTS activity_start_time ON activity (start_time);
CREATE INDEX IF NOT EXISTS activity_created_at ON activity (created_at);
CREATE INDEX IF NOT EXISTS activity_user_id ON activity (user_id);
CREATE INDEX IF NOT EXISTS activity_user_id_document_id ON activity (
user_id,
document_id
);
DROP VIEW IF EXISTS view_user_streaks;
DROP VIEW IF EXISTS view_document_user_statistics;
---------------------------------------------------------------
--------------------------- Triggers --------------------------
---------------------------------------------------------------

154
database/user_streaks.sql Normal file
View File

@ -0,0 +1,154 @@
WITH updated_users AS (
SELECT a.user_id
FROM activity AS a
LEFT JOIN users AS u ON u.id = a.user_id
LEFT JOIN user_streaks AS s ON a.user_id = s.user_id AND s.window = 'DAY'
WHERE
a.created_at > COALESCE(s.last_seen, '1970-01-01')
AND LOCAL_DATE(s.last_record, u.timezone) != LOCAL_DATE(a.start_time, u.timezone)
GROUP BY a.user_id
),
outdated_users AS (
SELECT
a.user_id,
u.timezone AS last_timezone,
MAX(a.created_at) AS last_seen,
MAX(a.start_time) AS last_record,
STRFTIME('%Y-%m-%dT%H:%M:%SZ', 'now') AS last_calculated
FROM activity AS a
LEFT JOIN users AS u ON u.id = a.user_id
LEFT JOIN user_streaks AS s ON a.user_id = s.user_id AND s.window = 'DAY'
GROUP BY a.user_id
HAVING
-- User Changed Timezones
s.last_timezone != u.timezone
-- Users Date Changed
OR LOCAL_DATE(COALESCE(s.last_calculated, '1970-01-01T00:00:00Z'), u.timezone) !=
LOCAL_DATE(STRFTIME('%Y-%m-%dT%H:%M:%SZ', 'now'), u.timezone)
-- User Added New Data
OR a.user_id IN updated_users
),
document_windows AS (
SELECT
activity.user_id,
users.timezone,
DATE(
LOCAL_DATE(activity.start_time, users.timezone),
'weekday 0', '-7 day'
) AS weekly_read,
LOCAL_DATE(activity.start_time, users.timezone) AS daily_read
FROM activity
INNER JOIN outdated_users ON outdated_users.user_id = activity.user_id
LEFT JOIN users ON users.id = activity.user_id
GROUP BY activity.user_id, weekly_read, daily_read
),
weekly_partitions AS (
SELECT
user_id,
timezone,
'WEEK' AS "window",
weekly_read AS read_window,
ROW_NUMBER() OVER (
PARTITION BY user_id ORDER BY weekly_read DESC
) AS seqnum
FROM document_windows
GROUP BY user_id, weekly_read
),
daily_partitions AS (
SELECT
user_id,
timezone,
'DAY' AS "window",
daily_read AS read_window,
ROW_NUMBER() OVER (
PARTITION BY user_id ORDER BY daily_read DESC
) AS seqnum
FROM document_windows
GROUP BY user_id, daily_read
),
streaks AS (
SELECT
COUNT(*) AS streak,
MIN(read_window) AS start_date,
MAX(read_window) AS end_date,
window,
user_id,
timezone
FROM daily_partitions
GROUP BY
timezone,
user_id,
DATE(read_window, '+' || seqnum || ' day')
UNION ALL
SELECT
COUNT(*) AS streak,
MIN(read_window) AS start_date,
MAX(read_window) AS end_date,
window,
user_id,
timezone
FROM weekly_partitions
GROUP BY
timezone,
user_id,
DATE(read_window, '+' || (seqnum * 7) || ' day')
),
max_streak AS (
SELECT
MAX(streak) AS max_streak,
start_date AS max_streak_start_date,
end_date AS max_streak_end_date,
window,
user_id
FROM streaks
GROUP BY user_id, window
),
current_streak AS (
SELECT
streak AS current_streak,
start_date AS current_streak_start_date,
end_date AS current_streak_end_date,
window,
user_id
FROM streaks
WHERE CASE
WHEN window = "WEEK" THEN
DATE(LOCAL_DATE(STRFTIME('%Y-%m-%dT%H:%M:%SZ', 'now'), timezone), 'weekday 0', '-14 day') = current_streak_end_date
OR DATE(LOCAL_DATE(STRFTIME('%Y-%m-%dT%H:%M:%SZ', 'now'), timezone), 'weekday 0', '-7 day') = current_streak_end_date
WHEN window = "DAY" THEN
DATE(LOCAL_DATE(STRFTIME('%Y-%m-%dT%H:%M:%SZ', 'now'), timezone), '-1 day') = current_streak_end_date
OR DATE(LOCAL_DATE(STRFTIME('%Y-%m-%dT%H:%M:%SZ', 'now'), timezone)) = current_streak_end_date
END
GROUP BY user_id, window
)
INSERT INTO user_streaks
SELECT
max_streak.user_id,
max_streak.window,
IFNULL(max_streak, 0) AS max_streak,
IFNULL(max_streak_start_date, "N/A") AS max_streak_start_date,
IFNULL(max_streak_end_date, "N/A") AS max_streak_end_date,
IFNULL(current_streak.current_streak, 0) AS current_streak,
IFNULL(current_streak.current_streak_start_date, "N/A") AS current_streak_start_date,
IFNULL(current_streak.current_streak_end_date, "N/A") AS current_streak_end_date,
outdated_users.last_timezone AS last_timezone,
outdated_users.last_seen AS last_seen,
outdated_users.last_record AS last_record,
outdated_users.last_calculated AS last_calculated
FROM max_streak
JOIN outdated_users ON max_streak.user_id = outdated_users.user_id
LEFT JOIN current_streak ON
current_streak.user_id = max_streak.user_id
AND current_streak.window = max_streak.window;

View File

@ -1,296 +0,0 @@
---------------------------------------------------------------
---------------------------- Views ----------------------------
---------------------------------------------------------------
--------------------------------
--------- User Streaks ---------
--------------------------------
CREATE VIEW view_user_streaks AS
WITH document_windows AS (
SELECT
activity.user_id,
users.timezone,
DATE(
LOCAL_TIME(activity.start_time, users.timezone),
'weekday 0', '-7 day'
) AS weekly_read,
DATE(LOCAL_TIME(activity.start_time, users.timezone)) AS daily_read
FROM activity
LEFT JOIN users ON users.id = activity.user_id
GROUP BY activity.user_id, weekly_read, daily_read
),
weekly_partitions AS (
SELECT
user_id,
timezone,
'WEEK' AS "window",
weekly_read AS read_window,
row_number() OVER (
PARTITION BY user_id ORDER BY weekly_read DESC
) AS seqnum
FROM document_windows
GROUP BY user_id, weekly_read
),
daily_partitions AS (
SELECT
user_id,
timezone,
'DAY' AS "window",
daily_read AS read_window,
row_number() OVER (
PARTITION BY user_id ORDER BY daily_read DESC
) AS seqnum
FROM document_windows
GROUP BY user_id, daily_read
),
streaks AS (
SELECT
COUNT(*) AS streak,
MIN(read_window) AS start_date,
MAX(read_window) AS end_date,
window,
user_id,
timezone
FROM daily_partitions
GROUP BY
timezone,
user_id,
DATE(read_window, '+' || seqnum || ' day')
UNION ALL
SELECT
COUNT(*) AS streak,
MIN(read_window) AS start_date,
MAX(read_window) AS end_date,
window,
user_id,
timezone
FROM weekly_partitions
GROUP BY
timezone,
user_id,
DATE(read_window, '+' || (seqnum * 7) || ' day')
),
max_streak AS (
SELECT
MAX(streak) AS max_streak,
start_date AS max_streak_start_date,
end_date AS max_streak_end_date,
window,
user_id
FROM streaks
GROUP BY user_id, window
),
current_streak AS (
SELECT
streak AS current_streak,
start_date AS current_streak_start_date,
end_date AS current_streak_end_date,
window,
user_id
FROM streaks
WHERE CASE
WHEN window = "WEEK" THEN
DATE(LOCAL_TIME(STRFTIME('%Y-%m-%dT%H:%M:%SZ', 'now'), timezone), 'weekday 0', '-14 day') = current_streak_end_date
OR DATE(LOCAL_TIME(STRFTIME('%Y-%m-%dT%H:%M:%SZ', 'now'), timezone), 'weekday 0', '-7 day') = current_streak_end_date
WHEN window = "DAY" THEN
DATE(LOCAL_TIME(STRFTIME('%Y-%m-%dT%H:%M:%SZ', 'now'), timezone), '-1 day') = current_streak_end_date
OR DATE(LOCAL_TIME(STRFTIME('%Y-%m-%dT%H:%M:%SZ', 'now'), timezone)) = current_streak_end_date
END
GROUP BY user_id, window
)
SELECT
max_streak.user_id,
max_streak.window,
IFNULL(max_streak, 0) AS max_streak,
IFNULL(max_streak_start_date, "N/A") AS max_streak_start_date,
IFNULL(max_streak_end_date, "N/A") AS max_streak_end_date,
IFNULL(current_streak, 0) AS current_streak,
IFNULL(current_streak_start_date, "N/A") AS current_streak_start_date,
IFNULL(current_streak_end_date, "N/A") AS current_streak_end_date
FROM max_streak
LEFT JOIN current_streak ON
current_streak.user_id = max_streak.user_id
AND current_streak.window = max_streak.window;
--------------------------------
------- Document Stats ---------
--------------------------------
CREATE VIEW view_document_user_statistics AS
WITH intermediate_ga AS (
SELECT
ga1.id AS row_id,
ga1.user_id,
ga1.document_id,
ga1.duration,
ga1.start_time,
ga1.start_percentage,
ga1.end_percentage,
-- Find Overlapping Events (Assign Unique ID)
(
SELECT MIN(id)
FROM activity AS ga2
WHERE
ga1.document_id = ga2.document_id
AND ga1.user_id = ga2.user_id
AND ga1.start_percentage <= ga2.end_percentage
AND ga1.end_percentage >= ga2.start_percentage
) AS group_leader
FROM activity AS ga1
),
grouped_activity AS (
SELECT
user_id,
document_id,
MAX(start_time) AS start_time,
MIN(start_percentage) AS start_percentage,
MAX(end_percentage) AS end_percentage,
MAX(end_percentage) - MIN(start_percentage) AS read_percentage,
SUM(duration) AS duration
FROM intermediate_ga
GROUP BY group_leader
),
current_progress AS (
SELECT
user_id,
document_id,
COALESCE((
SELECT percentage
FROM document_progress AS dp
WHERE
dp.user_id = iga.user_id
AND dp.document_id = iga.document_id
ORDER BY created_at DESC
LIMIT 1
), end_percentage) AS percentage
FROM intermediate_ga AS iga
GROUP BY user_id, document_id
HAVING MAX(start_time)
)
SELECT
ga.document_id,
ga.user_id,
cp.percentage,
MAX(start_time) AS last_read,
SUM(read_percentage) AS read_percentage,
-- All Time WPM
SUM(duration) AS total_time_seconds,
(CAST(COALESCE(d.words, 0.0) AS REAL) * SUM(read_percentage))
AS total_words_read,
(CAST(COALESCE(d.words, 0.0) AS REAL) * SUM(read_percentage))
/ (SUM(duration) / 60.0) AS total_wpm,
-- Yearly WPM
SUM(CASE WHEN start_time >= DATE('now', '-1 year') THEN duration ELSE 0 END)
AS yearly_time_seconds,
(
CAST(COALESCE(d.words, 0.0) AS REAL)
* SUM(
CASE
WHEN start_time >= DATE('now', '-1 year') THEN read_percentage
ELSE 0
END
)
)
AS yearly_words_read,
COALESCE((
CAST(COALESCE(d.words, 0.0) AS REAL)
* SUM(
CASE
WHEN start_time >= DATE('now', '-1 year') THEN read_percentage
END
)
)
/ (
SUM(
CASE
WHEN start_time >= DATE('now', '-1 year') THEN duration
END
)
/ 60.0
), 0.0)
AS yearly_wpm,
-- Monthly WPM
SUM(
CASE WHEN start_time >= DATE('now', '-1 month') THEN duration ELSE 0 END
)
AS monthly_time_seconds,
(
CAST(COALESCE(d.words, 0.0) AS REAL)
* SUM(
CASE
WHEN start_time >= DATE('now', '-1 month') THEN read_percentage
ELSE 0
END
)
)
AS monthly_words_read,
COALESCE((
CAST(COALESCE(d.words, 0.0) AS REAL)
* SUM(
CASE
WHEN start_time >= DATE('now', '-1 month') THEN read_percentage
END
)
)
/ (
SUM(
CASE
WHEN start_time >= DATE('now', '-1 month') THEN duration
END
)
/ 60.0
), 0.0)
AS monthly_wpm,
-- Weekly WPM
SUM(CASE WHEN start_time >= DATE('now', '-7 days') THEN duration ELSE 0 END)
AS weekly_time_seconds,
(
CAST(COALESCE(d.words, 0.0) AS REAL)
* SUM(
CASE
WHEN start_time >= DATE('now', '-7 days') THEN read_percentage
ELSE 0
END
)
)
AS weekly_words_read,
COALESCE((
CAST(COALESCE(d.words, 0.0) AS REAL)
* SUM(
CASE
WHEN start_time >= DATE('now', '-7 days') THEN read_percentage
END
)
)
/ (
SUM(
CASE
WHEN start_time >= DATE('now', '-7 days') THEN duration
END
)
/ 60.0
), 0.0)
AS weekly_wpm
FROM grouped_activity AS ga
INNER JOIN
current_progress AS cp
ON ga.user_id = cp.user_id AND ga.document_id = cp.document_id
INNER JOIN
documents AS d
ON ga.document_id = d.id
GROUP BY ga.document_id, ga.user_id
ORDER BY total_wpm DESC;

View File

@ -3,6 +3,7 @@ package search
import (
"fmt"
"io"
"net/url"
"regexp"
"strings"
@ -11,50 +12,14 @@ import (
var commentRE = regexp.MustCompile(`(?s)<!--(.*?)-->`)
func parseAnnasArchiveDownloadURL(body io.ReadCloser) (string, error) {
// Parse
defer body.Close()
doc, _ := goquery.NewDocumentFromReader(body)
// Return Download URL
downloadPath, exists := doc.Find("body > table > tbody > tr > td > a").Attr("href")
if !exists {
return "", fmt.Errorf("Download URL not found")
}
// Possible Funky URL
downloadPath = strings.ReplaceAll(downloadPath, "\\", "/")
return fmt.Sprintf("http://libgen.li/%s", downloadPath), nil
}
// getAnnasArchiveBookSelection parses potentially commented out HTML. For some reason
// Annas Archive comments out blocks "below the fold". They aren't rendered until you
// scroll. This attempts to parse the commented out HTML.
func getAnnasArchiveBookSelection(rawBook *goquery.Selection) *goquery.Selection {
rawHTML, err := rawBook.Html()
func searchAnnasArchive(query string) ([]SearchItem, error) {
searchURL := "https://annas-archive.org/search?index=&q=%s&ext=epub&sort=&lang=en"
url := fmt.Sprintf(searchURL, url.QueryEscape(query))
body, err := getPage(url)
if err != nil {
return rawBook
return nil, err
}
strippedHTML := strings.TrimSpace(rawHTML)
if !strings.HasPrefix(strippedHTML, "<!--") || !strings.HasSuffix(strippedHTML, "-->") {
return rawBook
}
allMatches := commentRE.FindAllStringSubmatch(strippedHTML, -1)
if len(allMatches) != 1 || len(allMatches[0]) != 2 {
return rawBook
}
captureGroup := allMatches[0][1]
docReader := strings.NewReader(captureGroup)
doc, err := goquery.NewDocumentFromReader(docReader)
if err != nil {
return rawBook
}
return doc.Selection
return parseAnnasArchive(body)
}
func parseAnnasArchive(body io.ReadCloser) ([]SearchItem, error) {
@ -107,3 +72,32 @@ func parseAnnasArchive(body io.ReadCloser) ([]SearchItem, error) {
// Return Results
return allEntries, nil
}
// getAnnasArchiveBookSelection parses potentially commented out HTML. For some reason
// Annas Archive comments out blocks "below the fold". They aren't rendered until you
// scroll. This attempts to parse the commented out HTML.
func getAnnasArchiveBookSelection(rawBook *goquery.Selection) *goquery.Selection {
rawHTML, err := rawBook.Html()
if err != nil {
return rawBook
}
strippedHTML := strings.TrimSpace(rawHTML)
if !strings.HasPrefix(strippedHTML, "<!--") || !strings.HasSuffix(strippedHTML, "-->") {
return rawBook
}
allMatches := commentRE.FindAllStringSubmatch(strippedHTML, -1)
if len(allMatches) != 1 || len(allMatches[0]) != 2 {
return rawBook
}
captureGroup := allMatches[0][1]
docReader := strings.NewReader(captureGroup)
doc, err := goquery.NewDocumentFromReader(docReader)
if err != nil {
return rawBook
}
return doc.Selection
}

69
search/downloaders.go Normal file
View File

@ -0,0 +1,69 @@
package search
import (
"errors"
"fmt"
"strings"
"github.com/PuerkitoBio/goquery"
)
func getLibGenDownloadURL(md5 string, _ Source) ([]string, error) {
// Get Page
body, err := getPage("http://libgen.li/ads.php?md5=" + md5)
if err != nil {
return nil, err
}
defer body.Close()
// Parse
doc, err := goquery.NewDocumentFromReader(body)
if err != nil {
return nil, err
}
// Return Download URL
downloadPath, exists := doc.Find("body > table > tbody > tr > td > a").Attr("href")
if !exists {
return nil, fmt.Errorf("Download URL not found")
}
// Possible Funky URL
downloadPath = strings.ReplaceAll(downloadPath, "\\", "/")
return []string{fmt.Sprintf("http://libgen.li/%s", downloadPath)}, nil
}
func getLibraryDownloadURL(md5 string, source Source) ([]string, error) {
// Derive Info URL
var infoURL string
switch source {
case SOURCE_LIBGEN_FICTION, SOURCE_ANNAS_ARCHIVE:
infoURL = "http://library.lol/fiction/" + md5
case SOURCE_LIBGEN_NON_FICTION:
infoURL = "http://library.lol/main/" + md5
default:
return nil, errors.New("invalid source")
}
// Get Page
body, err := getPage(infoURL)
if err != nil {
return nil, err
}
defer body.Close()
// Parse
doc, err := goquery.NewDocumentFromReader(body)
if err != nil {
return nil, err
}
// Return Download URL
// downloadURL, _ := doc.Find("#download [href*=cloudflare]").Attr("href")
downloadURL, exists := doc.Find("#download h2 a").Attr("href")
if !exists {
return nil, errors.New("Download URL not found")
}
return []string{downloadURL}, nil
}

View File

@ -3,12 +3,23 @@ package search
import (
"fmt"
"io"
"net/url"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
func searchLibGenFiction(query string) ([]SearchItem, error) {
searchURL := "https://libgen.is/fiction/?q=%s&language=English&format=epub"
url := fmt.Sprintf(searchURL, url.QueryEscape(query))
body, err := getPage(url)
if err != nil {
return nil, err
}
return parseLibGenFiction(body)
}
func parseLibGenFiction(body io.ReadCloser) ([]SearchItem, error) {
// Parse
defer body.Close()
@ -62,6 +73,16 @@ func parseLibGenFiction(body io.ReadCloser) ([]SearchItem, error) {
return allEntries, nil
}
func searchLibGenNonFiction(query string) ([]SearchItem, error) {
searchURL := "https://libgen.is/search.php?req=%s"
url := fmt.Sprintf(searchURL, url.QueryEscape(query))
body, err := getPage(url)
if err != nil {
return nil, err
}
return parseLibGenNonFiction(body)
}
func parseLibGenNonFiction(body io.ReadCloser) ([]SearchItem, error) {
// Parse
defer body.Close()
@ -106,18 +127,3 @@ func parseLibGenNonFiction(body io.ReadCloser) ([]SearchItem, error) {
// Return Results
return allEntries, nil
}
func parseLibGenDownloadURL(body io.ReadCloser) (string, error) {
// Parse
defer body.Close()
doc, _ := goquery.NewDocumentFromReader(body)
// Return Download URL
// downloadURL, _ := doc.Find("#download [href*=cloudflare]").Attr("href")
downloadURL, exists := doc.Find("#download h2 a").Attr("href")
if !exists {
return "", fmt.Errorf("Download URL not found")
}
return downloadURL, nil
}

22
search/progress.go Normal file
View File

@ -0,0 +1,22 @@
package search
type writeCounter struct {
Total int64
Current int64
ProgressFunction func(float32)
}
func (wc *writeCounter) Write(p []byte) (int, error) {
n := len(p)
wc.Current += int64(n)
wc.flushProgress()
return n, nil
}
func (wc *writeCounter) flushProgress() {
if wc.ProgressFunction == nil || wc.Total < 100000 {
return
}
percentage := float32(wc.Current) * 100 / float32(wc.Total)
wc.ProgressFunction(percentage)
}

View File

@ -2,17 +2,18 @@ package search
import (
"crypto/tls"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"os"
"time"
log "github.com/sirupsen/logrus"
"reichard.io/antholume/metadata"
)
const userAgent string = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:127.0) Gecko/20100101 Firefox/127.0"
const userAgent string = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
type Cadence string
@ -21,13 +22,6 @@ const (
CADENCE_TOP_MONTH Cadence = "m"
)
type BookType int
const (
BOOK_FICTION BookType = iota
BOOK_NON_FICTION
)
type Source string
const (
@ -47,108 +41,58 @@ type SearchItem struct {
UploadDate string
}
type sourceDef struct {
searchURL string
downloadURL string
parseSearchFunc func(io.ReadCloser) ([]SearchItem, error)
parseDownloadFunc func(io.ReadCloser) (string, error)
type searchFunc func(query string) (searchResults []SearchItem, err error)
type downloadFunc func(md5 string, source Source) (downloadURL []string, err error)
var searchDefs = map[Source]searchFunc{
SOURCE_ANNAS_ARCHIVE: searchAnnasArchive,
SOURCE_LIBGEN_FICTION: searchLibGenFiction,
SOURCE_LIBGEN_NON_FICTION: searchLibGenNonFiction,
}
var sourceDefs = map[Source]sourceDef{
SOURCE_ANNAS_ARCHIVE: {
searchURL: "https://annas-archive.org/search?index=&q=%s&ext=epub&sort=&lang=en",
downloadURL: "http://libgen.li/ads.php?md5=%s",
parseSearchFunc: parseAnnasArchive,
parseDownloadFunc: parseAnnasArchiveDownloadURL,
},
SOURCE_LIBGEN_FICTION: {
searchURL: "https://libgen.is/fiction/?q=%s&language=English&format=epub",
downloadURL: "http://libgen.li/ads.php?md5=%s",
parseSearchFunc: parseLibGenFiction,
parseDownloadFunc: parseAnnasArchiveDownloadURL,
},
SOURCE_LIBGEN_NON_FICTION: {
searchURL: "https://libgen.is/search.php?req=%s",
downloadURL: "http://libgen.li/ads.php?md5=%s",
parseSearchFunc: parseLibGenNonFiction,
parseDownloadFunc: parseAnnasArchiveDownloadURL,
},
var downloadFuncs = []downloadFunc{
getLibGenDownloadURL,
getLibraryDownloadURL,
}
func SearchBook(query string, source Source) ([]SearchItem, error) {
def := sourceDefs[source]
log.Debug("Source: ", def)
url := fmt.Sprintf(def.searchURL, url.QueryEscape(query))
body, err := getPage(url)
if err != nil {
return nil, err
searchFunc, found := searchDefs[source]
if !found {
return nil, fmt.Errorf("invalid source: %s", source)
}
return def.parseSearchFunc(body)
log.Debug("Source: ", source)
return searchFunc(query)
}
func SaveBook(id string, source Source) (string, error) {
def := sourceDefs[source]
log.Debug("Source: ", def)
url := fmt.Sprintf(def.downloadURL, id)
func SaveBook(md5 string, source Source, progressFunc func(float32)) (string, *metadata.MetadataInfo, error) {
for _, f := range downloadFuncs {
downloadURLs, err := f(md5, source)
if err != nil {
log.Error("failed to acquire download urls")
continue
}
body, err := getPage(url)
if err != nil {
return "", err
for _, bookURL := range downloadURLs {
// Download File
log.Info("Downloading Book: ", bookURL)
fileName, err := downloadBook(bookURL, progressFunc)
if err != nil {
log.Error("Book URL API Failure: ", err)
continue
}
// Get Metadata
metadata, err := metadata.GetMetadata(fileName)
if err != nil {
log.Error("Book Metadata Failure: ", err)
continue
}
return fileName, metadata, nil
}
}
bookURL, err := def.parseDownloadFunc(body)
if err != nil {
log.Error("Parse Download URL Error: ", err)
return "", fmt.Errorf("Download Failure")
}
// Create File
tempFile, err := os.CreateTemp("", "book")
if err != nil {
log.Error("File Create Error: ", err)
return "", fmt.Errorf("File Failure")
}
defer tempFile.Close()
// Download File
log.Info("Downloading Book: ", bookURL)
resp, err := downloadBook(bookURL)
if err != nil {
os.Remove(tempFile.Name())
log.Error("Book URL API Failure: ", err)
return "", fmt.Errorf("API Failure")
}
defer resp.Body.Close()
// Copy File to Disk
log.Info("Saving Book")
_, err = io.Copy(tempFile, resp.Body)
if err != nil {
os.Remove(tempFile.Name())
log.Error("File Copy Error: ", err)
return "", fmt.Errorf("File Failure")
}
return tempFile.Name(), nil
}
func GetBookURL(id string, bookType BookType) (string, error) {
// Derive Info URL
var infoURL string
if bookType == BOOK_FICTION {
infoURL = "http://library.lol/fiction/" + id
} else if bookType == BOOK_NON_FICTION {
infoURL = "http://library.lol/main/" + id
}
// Parse & Derive Download URL
body, err := getPage(infoURL)
if err != nil {
return "", err
}
// downloadURL := parseLibGenDownloadURL(body)
return parseLibGenDownloadURL(body)
return "", nil, errors.New("failed to download book")
}
func getPage(page string) (io.ReadCloser, error) {
@ -162,8 +106,6 @@ func getPage(page string) (io.ReadCloser, error) {
if err != nil {
return nil, err
}
// Set User-Agent
req.Header.Set("User-Agent", userAgent)
// Do Request
@ -176,7 +118,7 @@ func getPage(page string) (io.ReadCloser, error) {
return resp.Body, err
}
func downloadBook(bookURL string) (*http.Response, error) {
func downloadBook(bookURL string, progressFunc func(float32)) (string, error) {
log.Debug("URL: ", bookURL)
// Allow Insecure
@ -189,11 +131,33 @@ func downloadBook(bookURL string) (*http.Response, error) {
// Start Request
req, err := http.NewRequest("GET", bookURL, nil)
if err != nil {
return nil, err
return "", err
}
// Set User-Agent
req.Header.Set("User-Agent", userAgent)
return client.Do(req)
// Perform API Request
resp, err := client.Do(req)
if err != nil {
return "", err
}
// Create File
tempFile, err := os.CreateTemp("", "book")
if err != nil {
log.Error("File Create Error: ", err)
return "", fmt.Errorf("failed to create temp file: %w", err)
}
defer tempFile.Close()
// Copy File to Disk
log.Info("Saving Book")
counter := &writeCounter{Total: resp.ContentLength, ProgressFunction: progressFunc}
_, err = io.Copy(tempFile, io.TeeReader(resp.Body, counter))
if err != nil {
os.Remove(tempFile.Name())
log.Error("File Copy Error: ", err)
return "", fmt.Errorf("failed to copy response to temp file: %w", err)
}
return tempFile.Name(), nil
}