From 1b8b5060f1ca8d540ebf6a071617aae3cd725185 Mon Sep 17 00:00:00 2001 From: Evan Reichard Date: Thu, 26 Oct 2023 20:20:58 -0400 Subject: [PATCH] [fix] server word count, [add] client word count --- assets/reader/index.js | 23 ++++++++++++++++++++++- metadata/epub.go | 31 +++---------------------------- metadata/metadata_test.go | 2 +- 3 files changed, 26 insertions(+), 30 deletions(-) diff --git a/assets/reader/index.js b/assets/reader/index.js index 8baf329..d335c38 100644 --- a/assets/reader/index.js +++ b/assets/reader/index.js @@ -45,9 +45,15 @@ class EBookReader { * Load progress and generate locations **/ async setupReader() { + // Get Word Count (If Needed) + if (this.bookState.words == 0) + this.bookState.words = await this.countWords(); + // Load Progress let { cfi } = await this.getCFIFromXPath(this.bookState.progress); - if (!cfi) this.bookState.currentWord = 0; + this.bookState.currentWord = cfi + ? this.bookState.percentage * (this.bookState.words / 100) + : 0; let getStats = function () { // Start Timer @@ -1038,6 +1044,21 @@ class EBookReader { ); } + /** + * Count the words of the book. Useful for keeping a more accurate track + * of progress percentage. Implementation returns the same number as the + * server side implementation. + **/ + countWords() { + // Iterate over each item in the spine, render, and count words. + return this.book.spine.spineItems.reduce(async (totalCount, item) => { + let currentCount = await totalCount; + let newDoc = await item.load(this.book.load.bind(this.book)); + let itemCount = newDoc.innerText.trim().split(/\s+/).length; + return currentCount + itemCount; + }, 0); + } + /** * Save settings to localStorage **/ diff --git a/metadata/epub.go b/metadata/epub.go index e5904a1..b518ba8 100644 --- a/metadata/epub.go +++ b/metadata/epub.go @@ -1,11 +1,10 @@ package metadata import ( - "io" "strings" + "github.com/PuerkitoBio/goquery" "github.com/taylorskalyo/goreader/epub" - "golang.org/x/net/html" ) func getEPUBMetadata(filepath string) (*MetadataInfo, error) { @@ -32,33 +31,9 @@ func countEPUBWords(filepath string) (int64, error) { var completeCount int64 for _, item := range rf.Spine.Itemrefs { f, _ := item.Open() - tokenizer := html.NewTokenizer(f) - newCount, err := countTokenizerWords(*tokenizer) - if err != nil { - return 0, err - } - completeCount = completeCount + newCount + doc, _ := goquery.NewDocumentFromReader(f) + completeCount = completeCount + int64(len(strings.Fields(doc.Text()))) } return completeCount, nil } - -func countTokenizerWords(tokenizer html.Tokenizer) (int64, error) { - var err error - var totalWords int64 - for { - tokenType := tokenizer.Next() - token := tokenizer.Token() - if tokenType == html.TextToken { - currStr := string(token.Data) - totalWords = totalWords + int64(len(strings.Fields(currStr))) - } else if tokenType == html.ErrorToken { - err = tokenizer.Err() - } - if err == io.EOF { - return totalWords, nil - } else if err != nil { - return 0, err - } - } -} diff --git a/metadata/metadata_test.go b/metadata/metadata_test.go index 322591b..e4a47a4 100644 --- a/metadata/metadata_test.go +++ b/metadata/metadata_test.go @@ -5,7 +5,7 @@ import ( ) func TestGetWordCount(t *testing.T) { - var want int64 = 30477 + var want int64 = 30080 wordCount, err := countEPUBWords("../_test_files/alice.epub") if wordCount != want {