From acf4119d9ab6409a0cd35bea3e030970fc44ccbe Mon Sep 17 00:00:00 2001 From: Evan Reichard Date: Sat, 25 Jan 2025 15:03:07 -0500 Subject: [PATCH] fix(sql): document user stats --- database/document_user_statistics.sql | 279 ++++++++++---------------- 1 file changed, 101 insertions(+), 178 deletions(-) diff --git a/database/document_user_statistics.sql b/database/document_user_statistics.sql index 332842a..db4aab0 100644 --- a/database/document_user_statistics.sql +++ b/database/document_user_statistics.sql @@ -1,54 +1,84 @@ -WITH new_activity AS ( +WITH grouped_activity AS ( SELECT - document_id, - user_id - FROM activity - WHERE - created_at > COALESCE( - (SELECT MAX(last_seen) FROM document_user_statistics), - '1970-01-01T00:00:00Z' - ) - GROUP BY user_id, document_id -), - -intermediate_ga AS ( - SELECT - ga.id AS row_id, ga.user_id, ga.document_id, - ga.duration, - ga.start_time, - ga.start_percentage, - ga.end_percentage, - ga.created_at, + MAX(ga.created_at) AS created_at, + MAX(ga.start_time) AS start_time, + MIN(ga.start_percentage) AS start_percentage, + MAX(ga.end_percentage) AS end_percentage, + + -- Total Duration & Percentage + SUM(ga.duration) AS total_time_seconds, + SUM(ga.end_percentage - ga.start_percentage) AS total_read_percentage, + + -- Yearly Duration + SUM( + CASE + WHEN + ga.start_time >= DATE('now', '-1 year') + THEN ga.duration + ELSE 0 + END + ) + AS yearly_time_seconds, + + -- Yearly Percentage + SUM( + CASE + WHEN + ga.start_time >= DATE('now', '-1 year') + THEN ga.end_percentage - ga.start_percentage + ELSE 0 + END + ) + AS yearly_read_percentage, + + -- Monthly Duration + SUM( + CASE + WHEN + ga.start_time >= DATE('now', '-1 month') + THEN ga.duration + ELSE 0 + END + ) + AS monthly_time_seconds, + + -- Monthly Percentage + SUM( + CASE + WHEN + ga.start_time >= DATE('now', '-1 month') + THEN ga.end_percentage - ga.start_percentage + ELSE 0 + END + ) + AS monthly_read_percentage, + + -- Weekly Duration + SUM( + CASE + WHEN + ga.start_time >= DATE('now', '-7 days') + THEN ga.duration + ELSE 0 + END + ) + AS weekly_time_seconds, + + -- Weekly Percentage + SUM( + CASE + WHEN + ga.start_time >= DATE('now', '-7 days') + THEN ga.end_percentage - ga.start_percentage + ELSE 0 + END + ) + AS weekly_read_percentage - -- Find Overlapping Events (Assign Unique ID) - ( - SELECT MIN(id) - FROM activity AS overlap - WHERE - ga.document_id = overlap.document_id - AND ga.user_id = overlap.user_id - AND ga.start_percentage <= overlap.end_percentage - AND ga.end_percentage >= overlap.start_percentage - ) AS group_leader FROM activity AS ga - INNER JOIN new_activity AS na - WHERE na.user_id = ga.user_id AND na.document_id = ga.document_id -), - -grouped_activity AS ( - SELECT - user_id, - document_id, - MAX(created_at) AS created_at, - MAX(start_time) AS start_time, - MIN(start_percentage) AS start_percentage, - MAX(end_percentage) AS end_percentage, - MAX(end_percentage) - MIN(start_percentage) AS read_percentage, - SUM(duration) AS duration - FROM intermediate_ga - GROUP BY group_leader + GROUP BY ga.user_id, ga.document_id ), current_progress AS ( @@ -56,17 +86,15 @@ current_progress AS ( user_id, document_id, COALESCE(( - SELECT percentage + SELECT dp.percentage FROM document_progress AS dp WHERE dp.user_id = iga.user_id AND dp.document_id = iga.document_id - ORDER BY created_at DESC + ORDER BY dp.created_at DESC LIMIT 1 ), end_percentage) AS percentage - FROM intermediate_ga AS iga - GROUP BY user_id, document_id - HAVING MAX(start_time) + FROM grouped_activity AS iga ) INSERT INTO document_user_statistics @@ -76,145 +104,40 @@ SELECT cp.percentage, MAX(ga.start_time) AS last_read, MAX(ga.created_at) AS last_seen, - SUM(ga.read_percentage) AS read_percentage, + SUM(ga.total_read_percentage) AS read_percentage, -- All Time WPM - SUM(ga.duration) AS total_time_seconds, - (CAST(COALESCE(d.words, 0.0) AS REAL) * SUM(read_percentage)) + SUM(ga.total_time_seconds) AS total_time_seconds, + (CAST(COALESCE(d.words, 0.0) AS REAL) * SUM(ga.total_read_percentage)) AS total_words_read, - (CAST(COALESCE(d.words, 0.0) AS REAL) * SUM(read_percentage)) - / (SUM(ga.duration) / 60.0) AS total_wpm, + (CAST(COALESCE(d.words, 0.0) AS REAL) * SUM(ga.total_read_percentage)) + / (SUM(ga.total_time_seconds) / 60.0) AS total_wpm, -- Yearly WPM - SUM( - CASE - WHEN - ga.start_time >= DATE('now', '-1 year') - THEN ga.duration - ELSE 0 - END - ) - AS yearly_time_seconds, - ( - CAST(COALESCE(d.words, 0.0) AS REAL) - * SUM( - CASE - WHEN - ga.start_time >= DATE('now', '-1 year') - THEN read_percentage - ELSE 0 - END - ) - ) + ga.yearly_time_seconds, + CAST(COALESCE(d.words, 0.0) AS REAL) * ga.yearly_read_percentage AS yearly_words_read, - COALESCE(( - CAST(COALESCE(d.words, 0.0) AS REAL) - * SUM( - CASE - WHEN - ga.start_time >= DATE('now', '-1 year') - THEN read_percentage - END - ) - ) - / ( - SUM( - CASE - WHEN - ga.start_time >= DATE('now', '-1 year') - THEN ga.duration - END - ) - / 60.0 - ), 0.0) + COALESCE( + (CAST(COALESCE(d.words, 0.0) AS REAL) * ga.yearly_read_percentage) + / (ga.yearly_time_seconds / 60), 0.0) AS yearly_wpm, - -- Monthly WPM - SUM( - CASE - WHEN - ga.start_time >= DATE('now', '-1 month') - THEN ga.duration - ELSE 0 - END - ) - AS monthly_time_seconds, - ( - CAST(COALESCE(d.words, 0.0) AS REAL) - * SUM( - CASE - WHEN - ga.start_time >= DATE('now', '-1 month') - THEN read_percentage - ELSE 0 - END - ) - ) + -- Monthly WPM + ga.monthly_time_seconds, + CAST(COALESCE(d.words, 0.0) AS REAL) * ga.monthly_read_percentage AS monthly_words_read, - COALESCE(( - CAST(COALESCE(d.words, 0.0) AS REAL) - * SUM( - CASE - WHEN - ga.start_time >= DATE('now', '-1 month') - THEN read_percentage - END - ) - ) - / ( - SUM( - CASE - WHEN - ga.start_time >= DATE('now', '-1 month') - THEN ga.duration - END - ) - / 60.0 - ), 0.0) + COALESCE( + (CAST(COALESCE(d.words, 0.0) AS REAL) * ga.monthly_read_percentage) + / (ga.monthly_time_seconds / 60), 0.0) AS monthly_wpm, - -- Weekly WPM - SUM( - CASE - WHEN - ga.start_time >= DATE('now', '-7 days') - THEN ga.duration - ELSE 0 - END - ) - AS weekly_time_seconds, - ( - CAST(COALESCE(d.words, 0.0) AS REAL) - * SUM( - CASE - WHEN - ga.start_time >= DATE('now', '-7 days') - THEN read_percentage - ELSE 0 - END - ) - ) + -- Weekly WPM + ga.weekly_time_seconds, + CAST(COALESCE(d.words, 0.0) AS REAL) * ga.weekly_read_percentage AS weekly_words_read, - COALESCE(( - CAST(COALESCE(d.words, 0.0) AS REAL) - * SUM( - CASE - WHEN - ga.start_time >= DATE('now', '-7 days') - THEN read_percentage - END - ) - ) - / ( - SUM( - CASE - WHEN - ga.start_time >= DATE('now', '-7 days') - THEN ga.duration - END - ) - / 60.0 - ), 0.0) + COALESCE( + (CAST(COALESCE(d.words, 0.0) AS REAL) * ga.weekly_read_percentage) + / (ga.weekly_time_seconds / 60), 0.0) AS weekly_wpm FROM grouped_activity AS ga