[fix] server word count, [add] client word count
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
@@ -1,11 +1,10 @@
|
||||
package metadata
|
||||
|
||||
import (
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/taylorskalyo/goreader/epub"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
func getEPUBMetadata(filepath string) (*MetadataInfo, error) {
|
||||
@@ -32,33 +31,9 @@ func countEPUBWords(filepath string) (int64, error) {
|
||||
var completeCount int64
|
||||
for _, item := range rf.Spine.Itemrefs {
|
||||
f, _ := item.Open()
|
||||
tokenizer := html.NewTokenizer(f)
|
||||
newCount, err := countTokenizerWords(*tokenizer)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
completeCount = completeCount + newCount
|
||||
doc, _ := goquery.NewDocumentFromReader(f)
|
||||
completeCount = completeCount + int64(len(strings.Fields(doc.Text())))
|
||||
}
|
||||
|
||||
return completeCount, nil
|
||||
}
|
||||
|
||||
func countTokenizerWords(tokenizer html.Tokenizer) (int64, error) {
|
||||
var err error
|
||||
var totalWords int64
|
||||
for {
|
||||
tokenType := tokenizer.Next()
|
||||
token := tokenizer.Token()
|
||||
if tokenType == html.TextToken {
|
||||
currStr := string(token.Data)
|
||||
totalWords = totalWords + int64(len(strings.Fields(currStr)))
|
||||
} else if tokenType == html.ErrorToken {
|
||||
err = tokenizer.Err()
|
||||
}
|
||||
if err == io.EOF {
|
||||
return totalWords, nil
|
||||
} else if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@ import (
|
||||
)
|
||||
|
||||
func TestGetWordCount(t *testing.T) {
|
||||
var want int64 = 30477
|
||||
var want int64 = 30080
|
||||
wordCount, err := countEPUBWords("../_test_files/alice.epub")
|
||||
|
||||
if wordCount != want {
|
||||
|
||||
Reference in New Issue
Block a user