fix(sql): document user stats
Some checks reported errors
continuous-integration/drone/push Build is passing
continuous-integration/drone Build was killed

This commit is contained in:
Evan Reichard 2025-01-25 15:03:07 -05:00
parent f6dd8cee50
commit acf4119d9a

View File

@ -1,54 +1,84 @@
WITH new_activity AS (
WITH grouped_activity AS (
SELECT
document_id,
user_id
FROM activity
WHERE
created_at > COALESCE(
(SELECT MAX(last_seen) FROM document_user_statistics),
'1970-01-01T00:00:00Z'
)
GROUP BY user_id, document_id
),
intermediate_ga AS (
SELECT
ga.id AS row_id,
ga.user_id,
ga.document_id,
ga.duration,
ga.start_time,
ga.start_percentage,
ga.end_percentage,
ga.created_at,
MAX(ga.created_at) AS created_at,
MAX(ga.start_time) AS start_time,
MIN(ga.start_percentage) AS start_percentage,
MAX(ga.end_percentage) AS end_percentage,
-- Total Duration & Percentage
SUM(ga.duration) AS total_time_seconds,
SUM(ga.end_percentage - ga.start_percentage) AS total_read_percentage,
-- Yearly Duration
SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-1 year')
THEN ga.duration
ELSE 0
END
)
AS yearly_time_seconds,
-- Yearly Percentage
SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-1 year')
THEN ga.end_percentage - ga.start_percentage
ELSE 0
END
)
AS yearly_read_percentage,
-- Monthly Duration
SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-1 month')
THEN ga.duration
ELSE 0
END
)
AS monthly_time_seconds,
-- Monthly Percentage
SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-1 month')
THEN ga.end_percentage - ga.start_percentage
ELSE 0
END
)
AS monthly_read_percentage,
-- Weekly Duration
SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-7 days')
THEN ga.duration
ELSE 0
END
)
AS weekly_time_seconds,
-- Weekly Percentage
SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-7 days')
THEN ga.end_percentage - ga.start_percentage
ELSE 0
END
)
AS weekly_read_percentage
-- Find Overlapping Events (Assign Unique ID)
(
SELECT MIN(id)
FROM activity AS overlap
WHERE
ga.document_id = overlap.document_id
AND ga.user_id = overlap.user_id
AND ga.start_percentage <= overlap.end_percentage
AND ga.end_percentage >= overlap.start_percentage
) AS group_leader
FROM activity AS ga
INNER JOIN new_activity AS na
WHERE na.user_id = ga.user_id AND na.document_id = ga.document_id
),
grouped_activity AS (
SELECT
user_id,
document_id,
MAX(created_at) AS created_at,
MAX(start_time) AS start_time,
MIN(start_percentage) AS start_percentage,
MAX(end_percentage) AS end_percentage,
MAX(end_percentage) - MIN(start_percentage) AS read_percentage,
SUM(duration) AS duration
FROM intermediate_ga
GROUP BY group_leader
GROUP BY ga.user_id, ga.document_id
),
current_progress AS (
@ -56,17 +86,15 @@ current_progress AS (
user_id,
document_id,
COALESCE((
SELECT percentage
SELECT dp.percentage
FROM document_progress AS dp
WHERE
dp.user_id = iga.user_id
AND dp.document_id = iga.document_id
ORDER BY created_at DESC
ORDER BY dp.created_at DESC
LIMIT 1
), end_percentage) AS percentage
FROM intermediate_ga AS iga
GROUP BY user_id, document_id
HAVING MAX(start_time)
FROM grouped_activity AS iga
)
INSERT INTO document_user_statistics
@ -76,145 +104,40 @@ SELECT
cp.percentage,
MAX(ga.start_time) AS last_read,
MAX(ga.created_at) AS last_seen,
SUM(ga.read_percentage) AS read_percentage,
SUM(ga.total_read_percentage) AS read_percentage,
-- All Time WPM
SUM(ga.duration) AS total_time_seconds,
(CAST(COALESCE(d.words, 0.0) AS REAL) * SUM(read_percentage))
SUM(ga.total_time_seconds) AS total_time_seconds,
(CAST(COALESCE(d.words, 0.0) AS REAL) * SUM(ga.total_read_percentage))
AS total_words_read,
(CAST(COALESCE(d.words, 0.0) AS REAL) * SUM(read_percentage))
/ (SUM(ga.duration) / 60.0) AS total_wpm,
(CAST(COALESCE(d.words, 0.0) AS REAL) * SUM(ga.total_read_percentage))
/ (SUM(ga.total_time_seconds) / 60.0) AS total_wpm,
-- Yearly WPM
SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-1 year')
THEN ga.duration
ELSE 0
END
)
AS yearly_time_seconds,
(
CAST(COALESCE(d.words, 0.0) AS REAL)
* SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-1 year')
THEN read_percentage
ELSE 0
END
)
)
ga.yearly_time_seconds,
CAST(COALESCE(d.words, 0.0) AS REAL) * ga.yearly_read_percentage
AS yearly_words_read,
COALESCE((
CAST(COALESCE(d.words, 0.0) AS REAL)
* SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-1 year')
THEN read_percentage
END
)
)
/ (
SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-1 year')
THEN ga.duration
END
)
/ 60.0
), 0.0)
COALESCE(
(CAST(COALESCE(d.words, 0.0) AS REAL) * ga.yearly_read_percentage)
/ (ga.yearly_time_seconds / 60), 0.0)
AS yearly_wpm,
-- Monthly WPM
SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-1 month')
THEN ga.duration
ELSE 0
END
)
AS monthly_time_seconds,
(
CAST(COALESCE(d.words, 0.0) AS REAL)
* SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-1 month')
THEN read_percentage
ELSE 0
END
)
)
-- Monthly WPM
ga.monthly_time_seconds,
CAST(COALESCE(d.words, 0.0) AS REAL) * ga.monthly_read_percentage
AS monthly_words_read,
COALESCE((
CAST(COALESCE(d.words, 0.0) AS REAL)
* SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-1 month')
THEN read_percentage
END
)
)
/ (
SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-1 month')
THEN ga.duration
END
)
/ 60.0
), 0.0)
COALESCE(
(CAST(COALESCE(d.words, 0.0) AS REAL) * ga.monthly_read_percentage)
/ (ga.monthly_time_seconds / 60), 0.0)
AS monthly_wpm,
-- Weekly WPM
SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-7 days')
THEN ga.duration
ELSE 0
END
)
AS weekly_time_seconds,
(
CAST(COALESCE(d.words, 0.0) AS REAL)
* SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-7 days')
THEN read_percentage
ELSE 0
END
)
)
-- Weekly WPM
ga.weekly_time_seconds,
CAST(COALESCE(d.words, 0.0) AS REAL) * ga.weekly_read_percentage
AS weekly_words_read,
COALESCE((
CAST(COALESCE(d.words, 0.0) AS REAL)
* SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-7 days')
THEN read_percentage
END
)
)
/ (
SUM(
CASE
WHEN
ga.start_time >= DATE('now', '-7 days')
THEN ga.duration
END
)
/ 60.0
), 0.0)
COALESCE(
(CAST(COALESCE(d.words, 0.0) AS REAL) * ga.weekly_read_percentage)
/ (ga.weekly_time_seconds / 60), 0.0)
AS weekly_wpm
FROM grouped_activity AS ga