diff --git a/api/api.go b/api/api.go index c26650b..9ef9fb1 100644 --- a/api/api.go +++ b/api/api.go @@ -82,6 +82,7 @@ func (api *API) registerWebAppRoutes() { render.AddFromFilesFuncs("login", helperFuncs, "templates/login.html") render.AddFromFilesFuncs("home", helperFuncs, "templates/base.html", "templates/home.html") + render.AddFromFilesFuncs("search", helperFuncs, "templates/base.html", "templates/search.html") render.AddFromFilesFuncs("settings", helperFuncs, "templates/base.html", "templates/settings.html") render.AddFromFilesFuncs("activity", helperFuncs, "templates/base.html", "templates/activity.html") render.AddFromFilesFuncs("documents", helperFuncs, "templates/base.html", "templates/documents.html") @@ -107,6 +108,12 @@ func (api *API) registerWebAppRoutes() { api.Router.POST("/documents/:document/edit", api.authWebAppMiddleware, api.editDocument) api.Router.POST("/documents/:document/identify", api.authWebAppMiddleware, api.identifyDocument) api.Router.POST("/documents/:document/delete", api.authWebAppMiddleware, api.deleteDocument) + + // Behind Configuration Flag + if api.Config.SearchEnabled { + api.Router.GET("/search", api.authWebAppMiddleware, api.createAppResourcesRoute("search")) + api.Router.POST("/search", api.authWebAppMiddleware, api.saveNewDocument) + } } func (api *API) registerKOAPIRoutes(apiGroup *gin.RouterGroup) { diff --git a/api/app-routes.go b/api/app-routes.go index d202e72..60da961 100644 --- a/api/app-routes.go +++ b/api/app-routes.go @@ -17,6 +17,8 @@ import ( "golang.org/x/exp/slices" "reichard.io/bbank/database" "reichard.io/bbank/metadata" + "reichard.io/bbank/search" + "reichard.io/bbank/utils" ) type queryParams struct { @@ -25,6 +27,11 @@ type queryParams struct { Document *string `form:"document"` } +type searchParams struct { + Query *string `form:"query"` + BookType *string `form:"book_type"` +} + type requestDocumentEdit struct { Title *string `form:"title"` Author *string `form:"author"` @@ -48,6 +55,13 @@ type requestSettingsEdit struct { TimeOffset *string `form:"time_offset"` } +type requestDocumentAdd struct { + ID *string `form:"id"` + Title *string `form:"title"` + Author *string `form:"author"` + BookType *string `form:"book_type"` +} + func (api *API) webManifest(c *gin.Context) { c.Header("Content-Type", "application/manifest+json") c.File("./assets/manifest.json") @@ -60,6 +74,7 @@ func (api *API) createAppResourcesRoute(routeName string, args ...map[string]any templateVarsBase = args[0] } templateVarsBase["RouteName"] = routeName + templateVarsBase["SearchEnabled"] = api.Config.SearchEnabled return func(c *gin.Context) { var userID string @@ -174,6 +189,26 @@ func (api *API) createAppResourcesRoute(routeName string, args ...map[string]any }, "Devices": devices, } + } else if routeName == "search" { + var sParams searchParams + c.BindQuery(&sParams) + + // Only Handle Query + if sParams.BookType != nil && !slices.Contains([]string{"NON_FICTION", "FICTION"}, *sParams.BookType) { + templateVars["SearchErrorMessage"] = "Invalid Book Type" + } else if sParams.Query != nil && *sParams.Query == "" { + templateVars["SearchErrorMessage"] = "Invalid Query" + } else if sParams.BookType != nil && sParams.Query != nil { + var bType search.BookType = search.BOOK_FICTION + if *sParams.BookType == "NON_FICTION" { + bType = search.BOOK_NON_FICTION + } + + // Search + searchResults := search.SearchBook(*sParams.Query, bType) + templateVars["Data"] = searchResults + templateVars["BookType"] = *sParams.BookType + } } else if routeName == "login" { templateVars["RegistrationEnabled"] = api.Config.RegistrationEnabled } @@ -430,7 +465,8 @@ func (api *API) identifyDocument(c *gin.Context) { // Template Variables templateVars := gin.H{ - "RelBase": "../../", + "RelBase": "../../", + "SearchEnabled": api.Config.SearchEnabled, } // Get Metadata @@ -479,6 +515,103 @@ func (api *API) identifyDocument(c *gin.Context) { c.HTML(http.StatusOK, "document", templateVars) } +func (api *API) saveNewDocument(c *gin.Context) { + var rDocAdd requestDocumentAdd + if err := c.ShouldBind(&rDocAdd); err != nil { + log.Error("[saveNewDocument] Invalid Form Bind") + c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "Invalid Request"}) + return + } + + // Validate Form Exists + if rDocAdd.ID == nil || + rDocAdd.BookType == nil || + rDocAdd.Title == nil || + rDocAdd.Author == nil { + log.Error("[saveNewDocument] Missing Form Values") + c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "Invalid Request"}) + return + } + + var bType search.BookType = search.BOOK_FICTION + if *rDocAdd.BookType == "NON_FICTION" { + bType = search.BOOK_NON_FICTION + } + + // Save Book + tempFilePath, err := search.SaveBook(*rDocAdd.ID, bType) + if err != nil { + log.Warn("[saveNewDocument] Temp File Error: ", err) + c.AbortWithStatus(http.StatusBadRequest) + return + } + + // Calculate Partial MD5 ID + partialMD5, err := utils.CalculatePartialMD5(tempFilePath) + if err != nil { + log.Warn("[saveNewDocument] Partial MD5 Error: ", err) + c.AbortWithStatus(http.StatusBadRequest) + return + } + + // Derive Extension on MIME + fileMime, err := mimetype.DetectFile(tempFilePath) + fileExtension := fileMime.Extension() + + // Derive Filename + var fileName string + if *rDocAdd.Author != "" { + fileName = fileName + *rDocAdd.Author + } else { + fileName = fileName + "Unknown" + } + + if *rDocAdd.Title != "" { + fileName = fileName + " - " + *rDocAdd.Title + } else { + fileName = fileName + " - Unknown" + } + + // Remove Slashes + fileName = strings.ReplaceAll(fileName, "/", "") + + // Derive & Sanitize File Name + fileName = "." + filepath.Clean(fmt.Sprintf("/%s [%s]%s", fileName, partialMD5, fileExtension)) + + // Generate Storage Path + safePath := filepath.Join(api.Config.DataPath, "documents", fileName) + + // Move File + if err := os.Rename(tempFilePath, safePath); err != nil { + log.Warn("[saveNewDocument] Move Temp File Error: ", err) + c.AbortWithStatus(http.StatusBadRequest) + return + } + + // Get MD5 Hash + fileHash, err := getFileMD5(safePath) + if err != nil { + log.Error("[saveNewDocument] Hash Failure:", err) + c.AbortWithStatus(http.StatusBadRequest) + return + } + + // Upsert Document + if _, err = api.DB.Queries.UpsertDocument(api.DB.Ctx, database.UpsertDocumentParams{ + ID: partialMD5, + Title: rDocAdd.Title, + Author: rDocAdd.Author, + Md5: fileHash, + Filepath: &fileName, + }); err != nil { + log.Error("[saveNewDocument] UpsertDocument DB Error:", err) + c.AbortWithStatus(http.StatusBadRequest) + return + } + + c.Redirect(http.StatusFound, fmt.Sprintf("./documents/%s", partialMD5)) +} + func (api *API) editSettings(c *gin.Context) { rUser, _ := c.Get("AuthorizedUser") @@ -555,7 +688,8 @@ func (api *API) editSettings(c *gin.Context) { "Settings": gin.H{ "TimeOffset": *user.TimeOffset, }, - "Devices": devices, + "Devices": devices, + "SearchEnabled": api.Config.SearchEnabled, } c.HTML(http.StatusOK, "settings", templateVars) diff --git a/config/config.go b/config/config.go index 4ef4b7e..aeb0e9c 100644 --- a/config/config.go +++ b/config/config.go @@ -22,6 +22,7 @@ type Config struct { // Miscellaneous Settings RegistrationEnabled bool CookieSessionKey string + SearchEnabled bool } func Load() *Config { @@ -35,6 +36,7 @@ func Load() *Config { ListenPort: getEnv("LISTEN_PORT", "8585"), CookieSessionKey: trimLowerString(getEnv("COOKIE_SESSION_KEY", "")), RegistrationEnabled: trimLowerString(getEnv("REGISTRATION_ENABLED", "false")) == "true", + SearchEnabled: trimLowerString(getEnv("SEARCH_ENABLED", "false")) == "true", } } diff --git a/go.mod b/go.mod index 2cc0c9f..570a813 100644 --- a/go.mod +++ b/go.mod @@ -17,6 +17,8 @@ require ( ) require ( + github.com/PuerkitoBio/goquery v1.8.1 // indirect + github.com/andybalholm/cascadia v1.3.1 // indirect github.com/aymerick/douceur v0.2.0 // indirect github.com/bytedance/sonic v1.10.0 // indirect github.com/chenzhuoyu/base64x v0.0.0-20230717121745-296ad89f973d // indirect diff --git a/go.sum b/go.sum index a9d3d2d..c406e60 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,9 @@ +github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM= +github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ= github.com/alexedwards/argon2id v0.0.0-20230305115115-4b3c3280a736 h1:qZaEtLxnqY5mJ0fVKbk31NVhlgi0yrKm51Pq/I5wcz4= github.com/alexedwards/argon2id v0.0.0-20230305115115-4b3c3280a736/go.mod h1:mTeFRcTdnpzOlRjMoFYC/80HwVUreupyAiqPkCZQOXc= +github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= +github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= github.com/antonlindstrom/pgstore v0.0.0-20200229204646-b08ebf1105e0/go.mod h1:2Ti6VUHVxpC0VSmTZzEvpzysnaGAfGBOoMIz5ykPyyw= github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk= github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4= @@ -174,8 +178,10 @@ golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= golang.org/x/net v0.15.0 h1:ugBLEUaxABaB5AJqW9enI0ACdci2RUd4eP51NTBvuJ8= golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= @@ -187,6 +193,7 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -218,6 +225,7 @@ golang.org/x/tools v0.13.0 h1:Iey4qkscZuv0VvIt8E0neZjtPVQFSc870HQ448QgEmQ= golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= diff --git a/search/search.go b/search/search.go new file mode 100644 index 0000000..5392dad --- /dev/null +++ b/search/search.go @@ -0,0 +1,253 @@ +package search + +import ( + "errors" + "io" + "net/http" + "net/url" + "os" + "strings" + "time" + + "github.com/PuerkitoBio/goquery" + log "github.com/sirupsen/logrus" +) + +type Cadence string + +const ( + TOP_YEAR Cadence = "y" + TOP_MONTH Cadence = "m" +) + +type BookType int + +const ( + BOOK_FICTION BookType = iota + BOOK_NON_FICTION +) + +type SearchItem struct { + ID string + Title string + Author string + Language string + Series string + FileType string + FileSize string + UploadDate string +} + +func SearchBook(query string, bookType BookType) (allEntries []SearchItem) { + log.Info(query) + if bookType == BOOK_FICTION { + // Search Fiction + url := "https://libgen.is/fiction/?q=" + url.QueryEscape(query) + "&language=English&format=epub" + body := getPage(url) + allEntries = parseLibGenFiction(body) + } else if bookType == BOOK_NON_FICTION { + // Search NonFiction + url := "https://libgen.is/search.php?req=" + url.QueryEscape(query) + body := getPage(url) + allEntries = parseLibGenNonFiction(body) + } + + return +} + +func GoodReadsMostRead(c Cadence) []SearchItem { + body := getPage("https://www.goodreads.com/book/most_read?category=all&country=US&duration=" + string(c)) + return parseGoodReads(body) +} + +func GetBookURL(id string, bookType BookType) string { + // Derive Info URL + var infoURL string + if bookType == BOOK_FICTION { + infoURL = "http://library.lol/fiction/" + id + } else if bookType == BOOK_NON_FICTION { + infoURL = "http://library.lol/main/" + id + } + + // Parse & Derive Download URL + body := getPage(infoURL) + + // downloadURL := parseLibGenDownloadURL(body) + return parseLibGenDownloadURL(body) +} + +func SaveBook(id string, bookType BookType) (string, error) { + // Derive Info URL + var infoURL string + if bookType == BOOK_FICTION { + infoURL = "http://library.lol/fiction/" + id + } else if bookType == BOOK_NON_FICTION { + infoURL = "http://library.lol/main/" + id + } + + // Parse & Derive Download URL + body := getPage(infoURL) + bookURL := parseLibGenDownloadURL(body) + + // Create File + tempFile, err := os.CreateTemp("", "book") + if err != nil { + log.Error("[SaveBook] File Create Error: ", err) + return "", errors.New("File Failure") + } + defer tempFile.Close() + + // Download File + log.Info("[SaveBook] Downloading Book") + resp, err := http.Get(bookURL) + if err != nil { + os.Remove(tempFile.Name()) + log.Error("[SaveBook] Cover URL API Failure") + return "", errors.New("API Failure") + } + defer resp.Body.Close() + + // Copy File to Disk + log.Info("[SaveBook] Saving Book") + _, err = io.Copy(tempFile, resp.Body) + if err != nil { + os.Remove(tempFile.Name()) + log.Error("[SaveBook] File Copy Error") + return "", errors.New("File Failure") + } + + return tempFile.Name(), nil +} + +func getPage(page string) io.ReadCloser { + resp, _ := http.Get(page) + return resp.Body +} + +func parseLibGenFiction(body io.ReadCloser) []SearchItem { + // Parse + defer body.Close() + doc, _ := goquery.NewDocumentFromReader(body) + + // Normalize Results + var allEntries []SearchItem + doc.Find("table.catalog tbody > tr").Each(func(ix int, rawBook *goquery.Selection) { + + // Parse File Details + fileItem := rawBook.Find("td:nth-child(5)") + fileDesc := fileItem.Text() + fileDescSplit := strings.Split(fileDesc, "/") + fileType := strings.ToLower(strings.TrimSpace(fileDescSplit[0])) + fileSize := strings.TrimSpace(fileDescSplit[1]) + + // Parse Upload Date + uploadedRaw, _ := fileItem.Attr("title") + uploadedDateRaw := strings.Split(uploadedRaw, "Uploaded at ")[1] + uploadDate, _ := time.Parse("2006-01-02 15:04:05", uploadedDateRaw) + + // Parse MD5 + editHref, _ := rawBook.Find("td:nth-child(7) a").Attr("href") + hrefArray := strings.Split(editHref, "/") + id := hrefArray[len(hrefArray)-1] + + // Parse Other Details + title := rawBook.Find("td:nth-child(3) p a").Text() + author := rawBook.Find(".catalog_authors li a").Text() + language := rawBook.Find("td:nth-child(4)").Text() + series := rawBook.Find("td:nth-child(2)").Text() + + item := SearchItem{ + ID: id, + Title: title, + Author: author, + Series: series, + Language: language, + FileType: fileType, + FileSize: fileSize, + UploadDate: uploadDate.Format(time.RFC3339), + } + + allEntries = append(allEntries, item) + }) + + // Return Results + return allEntries +} + +func parseLibGenNonFiction(body io.ReadCloser) []SearchItem { + // Parse + defer body.Close() + doc, _ := goquery.NewDocumentFromReader(body) + + // Normalize Results + var allEntries []SearchItem + doc.Find("table.c tbody > tr:nth-child(n + 2)").Each(func(ix int, rawBook *goquery.Selection) { + + // Parse Type & Size + fileSize := strings.ToLower(strings.TrimSpace(rawBook.Find("td:nth-child(8)").Text())) + fileType := strings.ToLower(strings.TrimSpace(rawBook.Find("td:nth-child(9)").Text())) + + // Parse MD5 + titleRaw := rawBook.Find("td:nth-child(3) [id]") + editHref, _ := titleRaw.Attr("href") + hrefArray := strings.Split(editHref, "?md5=") + id := hrefArray[1] + + // Parse Other Details + title := titleRaw.Text() + author := rawBook.Find("td:nth-child(2)").Text() + language := rawBook.Find("td:nth-child(7)").Text() + series := rawBook.Find("td:nth-child(3) [href*='column=series']").Text() + + item := SearchItem{ + ID: id, + Title: title, + Author: author, + Series: series, + Language: language, + FileType: fileType, + FileSize: fileSize, + } + + allEntries = append(allEntries, item) + }) + + // Return Results + return allEntries +} + +func parseLibGenDownloadURL(body io.ReadCloser) string { + // Parse + defer body.Close() + doc, _ := goquery.NewDocumentFromReader(body) + + // Return Download URL + // downloadURL, _ := doc.Find("#download [href*=cloudflare]").Attr("href") + downloadURL, _ := doc.Find("#download h2 a").Attr("href") + + return downloadURL +} + +func parseGoodReads(body io.ReadCloser) []SearchItem { + // Parse + defer body.Close() + doc, _ := goquery.NewDocumentFromReader(body) + + // Normalize Results + var allEntries []SearchItem + + doc.Find("[itemtype=\"http://schema.org/Book\"]").Each(func(ix int, rawBook *goquery.Selection) { + title := rawBook.Find(".bookTitle span").Text() + author := rawBook.Find(".authorName span").Text() + + item := SearchItem{ + Title: title, + Author: author, + } + + allEntries = append(allEntries, item) + }) + + // Return Results + return allEntries +} diff --git a/templates/activity.html b/templates/activity.html index bf33603..5fd0198 100644 --- a/templates/activity.html +++ b/templates/activity.html @@ -33,6 +33,12 @@
+ {{ if not .Data }} ++ | + Document + | ++ Series + | ++ Type + | ++ Size + | + +|
---|---|---|---|---|---|
No Results | +|||||
+ + | ++ {{ $item.Author }} - {{ $item.Title }} + | +
+ {{ or $item.Series "N/A" }} + |
+
+ {{ or $item.FileType "N/A" }} + |
+
+ {{ or $item.FileSize "N/A" }} + |
+
+ {{ or $item.UploadDate "N/A" }} + |
+
{{ $device.DeviceName }}
diff --git a/utils/utils.go b/utils/utils.go index e09bb60..883ab84 100644 --- a/utils/utils.go +++ b/utils/utils.go @@ -1,8 +1,12 @@ package utils import ( + "bytes" + "crypto/md5" "fmt" + "io" "math" + "os" ) type UTCOffset struct { @@ -82,3 +86,38 @@ func NiceSeconds(input int64) (result string) { return } + +// Reimplemented KOReader Partial MD5 Calculation +func CalculatePartialMD5(filePath string) (string, error) { + file, err := os.Open(filePath) + if err != nil { + return "", err + } + + defer file.Close() + + var step int64 = 1024 + var size int64 = 1024 + var buf bytes.Buffer + + for i := -1; i <= 10; i++ { + byteStep := make([]byte, size) + + var newShift int64 = int64(i * 2) + var newOffset int64 + if i == -1 { + newOffset = 0 + } else { + newOffset = step << newShift + } + + _, err := file.ReadAt(byteStep, newOffset) + if err == io.EOF { + break + } + buf.Write(byteStep) + } + + allBytes := buf.Bytes() + return fmt.Sprintf("%x", md5.Sum(allBytes)), nil +}