From ca1cce1ff1bb39b67d05119c364df205108295b1 Mon Sep 17 00:00:00 2001 From: Evan Reichard Date: Sat, 25 Nov 2023 18:38:18 -0500 Subject: [PATCH] [add] opds search, [fix] opds urls, [add] log level env var --- .gitignore | 1 + README.md | 1 + api/api.go | 7 +- api/app-routes.go | 46 +++------ api/opds-routes.go | 66 +++++++++--- config/config.go | 15 ++- search/search.go | 229 +++++++++++++++++++++++++++++------------- templates/search.html | 16 +-- 8 files changed, 250 insertions(+), 131 deletions(-) diff --git a/.gitignore b/.gitignore index 67e0ef5..2913e15 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +TODO.md .DS_Store data/ build/ diff --git a/README.md b/README.md index a941b6e..5e2e3d5 100644 --- a/README.md +++ b/README.md @@ -88,6 +88,7 @@ The service is now accessible at: `http://localhost:8585`. I recommend registeri | CONFIG_PATH | /config | Directory where to store SQLite's DB | | DATA_PATH | /data | Directory where to store the documents and cover metadata | | LISTEN_PORT | 8585 | Port the server listens at | +| LOG_LEVEL | info | Set server log level | | REGISTRATION_ENABLED | false | Whether to allow registration (applies to both WebApp & KOSync API) | | COOKIE_SESSION_KEY | | Optional secret cookie session key (auto generated if not provided) | | COOKIE_SECURE | true | Set Cookie `Secure` attribute (i.e. only works over HTTPS) | diff --git a/api/api.go b/api/api.go index f064a6f..d77e644 100644 --- a/api/api.go +++ b/api/api.go @@ -163,11 +163,12 @@ func (api *API) registerOPDSRoutes(apiGroup *gin.RouterGroup) { opdsGroup := apiGroup.Group("/opds") // OPDS Routes - opdsGroup.GET("", api.authOPDSMiddleware, api.opdsDocuments) - opdsGroup.GET("/", api.authOPDSMiddleware, api.opdsDocuments) + opdsGroup.GET("", api.authOPDSMiddleware, api.opdsEntry) + opdsGroup.GET("/", api.authOPDSMiddleware, api.opdsEntry) + opdsGroup.GET("/search.xml", api.authOPDSMiddleware, api.opdsSearchDescription) + opdsGroup.GET("/documents", api.authOPDSMiddleware, api.opdsDocuments) opdsGroup.GET("/documents/:document/cover", api.authOPDSMiddleware, api.getDocumentCover) opdsGroup.GET("/documents/:document/file", api.authOPDSMiddleware, api.downloadDocument) - opdsGroup.GET("/search.xml", api.authOPDSMiddleware, api.opdsSearchDescription) } func generateToken(n int) ([]byte, error) { diff --git a/api/app-routes.go b/api/app-routes.go index 85e9f1d..5c90c24 100644 --- a/api/app-routes.go +++ b/api/app-routes.go @@ -32,8 +32,8 @@ type queryParams struct { } type searchParams struct { - Query *string `form:"query"` - BookType *string `form:"book_type"` + Query *string `form:"query"` + Source *search.Source `form:"source"` } type requestDocumentUpload struct { @@ -64,10 +64,10 @@ type requestSettingsEdit struct { } type requestDocumentAdd struct { - ID *string `form:"id"` - Title *string `form:"title"` - Author *string `form:"author"` - BookType *string `form:"book_type"` + ID string `form:"id"` + Title *string `form:"title"` + Author *string `form:"author"` + Source search.Source `form:"source"` } func (api *API) webManifest(c *gin.Context) { @@ -240,25 +240,18 @@ func (api *API) createAppResourcesRoute(routeName string, args ...map[string]any c.BindQuery(&sParams) // Only Handle Query - if sParams.BookType != nil && !slices.Contains([]string{"NON_FICTION", "FICTION"}, *sParams.BookType) { - templateVars["SearchErrorMessage"] = "Invalid Book Type" - } else if sParams.Query != nil && *sParams.Query == "" { - templateVars["SearchErrorMessage"] = "Invalid Query" - } else if sParams.BookType != nil && sParams.Query != nil { - var bType search.BookType = search.BOOK_FICTION - if *sParams.BookType == "NON_FICTION" { - bType = search.BOOK_NON_FICTION - } - + if sParams.Query != nil && sParams.Source != nil { // Search - searchResults, err := search.SearchBook(*sParams.Query, bType) + searchResults, err := search.SearchBook(*sParams.Query, *sParams.Source) if err != nil { errorPage(c, http.StatusInternalServerError, fmt.Sprintf("Search Error: %v", err)) return } templateVars["Data"] = searchResults - templateVars["BookType"] = *sParams.BookType + templateVars["Source"] = *sParams.Source + } else if sParams.Query != nil || sParams.Source != nil { + templateVars["SearchErrorMessage"] = "Invalid Query" } } else if routeName == "login" { templateVars["RegistrationEnabled"] = api.Config.RegistrationEnabled @@ -762,23 +755,8 @@ func (api *API) saveNewDocument(c *gin.Context) { return } - // Validate Form Exists - if rDocAdd.ID == nil || - rDocAdd.BookType == nil || - rDocAdd.Title == nil || - rDocAdd.Author == nil { - log.Error("[saveNewDocument] Missing Form Values") - errorPage(c, http.StatusBadRequest, "Invalid or missing form values.") - return - } - - var bType search.BookType = search.BOOK_FICTION - if *rDocAdd.BookType == "NON_FICTION" { - bType = search.BOOK_NON_FICTION - } - // Save Book - tempFilePath, err := search.SaveBook(*rDocAdd.ID, bType) + tempFilePath, err := search.SaveBook(rDocAdd.ID, rDocAdd.Source) if err != nil { log.Warn("[saveNewDocument] Temp File Error: ", err) errorPage(c, http.StatusInternalServerError, "Unable to save file.") diff --git a/api/opds-routes.go b/api/opds-routes.go index 87c65f2..655bacd 100644 --- a/api/opds-routes.go +++ b/api/opds-routes.go @@ -26,6 +26,40 @@ var mimeMapping map[string]string = map[string]string{ "lit": "application/x-ms-reader", } +func (api *API) opdsEntry(c *gin.Context) { + // Build & Return XML + mainFeed := &opds.Feed{ + Title: "AnthoLume OPDS Server", + Updated: time.Now().UTC(), + Links: []opds.Link{ + { + Title: "Search AnthoLume", + Rel: "search", + TypeLink: "application/opensearchdescription+xml", + Href: "/api/opds/search.xml", + }, + }, + + Entries: []opds.Entry{ + { + Title: "AnthoLume - All Documents", + Content: &opds.Content{ + Content: "AnthoLume - All Documents", + ContentType: "text", + }, + Links: []opds.Link{ + { + Href: "/api/opds/documents?limit=100", + TypeLink: "application/atom+xml;type=feed;profile=opds-catalog", + }, + }, + }, + }, + } + + c.XML(http.StatusOK, mainFeed) +} + func (api *API) opdsDocuments(c *gin.Context) { var userID string if rUser, _ := c.Get("AuthorizedUser"); rUser != nil { @@ -35,9 +69,17 @@ func (api *API) opdsDocuments(c *gin.Context) { // Potential URL Parameters qParams := bindQueryParams(c) + // Possible Query + var query *string + if qParams.Search != nil && *qParams.Search != "" { + search := "%" + *qParams.Search + "%" + query = &search + } + // Get Documents documents, err := api.DB.Queries.GetDocumentsWithStats(api.DB.Ctx, database.GetDocumentsWithStatsParams{ UserID: userID, + Query: query, Offset: (*qParams.Page - 1) * *qParams.Limit, Limit: *qParams.Limit, }) @@ -71,7 +113,7 @@ func (api *API) opdsDocuments(c *gin.Context) { } item := opds.Entry{ - Title: fmt.Sprintf("[%3d%%] %s", int(doc.Percentage), title), + Title: title, Author: []opds.Author{ { Name: author, @@ -84,12 +126,12 @@ func (api *API) opdsDocuments(c *gin.Context) { Links: []opds.Link{ { Rel: "http://opds-spec.org/acquisition", - Href: fmt.Sprintf("./documents/%s/file", doc.ID), + Href: fmt.Sprintf("/api/opds/documents/%s/file", doc.ID), TypeLink: mimeMapping[fileType], }, { Rel: "http://opds-spec.org/image", - Href: fmt.Sprintf("./documents/%s/cover", doc.ID), + Href: fmt.Sprintf("/api/opds/documents/%s/cover", doc.ID), TypeLink: "image/jpeg", }, }, @@ -99,19 +141,15 @@ func (api *API) opdsDocuments(c *gin.Context) { } } + feedTitle := "All Documents" + if query != nil { + feedTitle = "Search Results" + } + // Build & Return XML searchFeed := &opds.Feed{ - Title: "All Documents", + Title: feedTitle, Updated: time.Now().UTC(), - // TODO - // Links: []opds.Link{ - // { - // Title: "Search AnthoLume", - // Rel: "search", - // TypeLink: "application/opensearchdescription+xml", - // Href: "search.xml", - // }, - // }, Entries: allEntries, } @@ -122,7 +160,7 @@ func (api *API) opdsSearchDescription(c *gin.Context) { rawXML := ` Search AnthoLume Search AnthoLume - + ` c.Data(http.StatusOK, "application/xml", []byte(rawXML)) } diff --git a/config/config.go b/config/config.go index a9f7785..77c4a85 100644 --- a/config/config.go +++ b/config/config.go @@ -3,6 +3,8 @@ package config import ( "os" "strings" + + log "github.com/sirupsen/logrus" ) type Config struct { @@ -22,6 +24,7 @@ type Config struct { RegistrationEnabled bool SearchEnabled bool DemoMode bool + LogLevel string // Cookie Settings CookieSessionKey string @@ -30,7 +33,7 @@ type Config struct { } func Load() *Config { - return &Config{ + c := &Config{ Version: "0.0.1", DBType: trimLowerString(getEnv("DATABASE_TYPE", "SQLite")), DBName: trimLowerString(getEnv("DATABASE_NAME", "antholume")), @@ -41,9 +44,19 @@ func Load() *Config { DemoMode: trimLowerString(getEnv("DEMO_MODE", "false")) == "true", SearchEnabled: trimLowerString(getEnv("SEARCH_ENABLED", "false")) == "true", CookieSessionKey: trimLowerString(getEnv("COOKIE_SESSION_KEY", "")), + LogLevel: trimLowerString(getEnv("LOG_LEVEL", "info")), CookieSecure: trimLowerString(getEnv("COOKIE_SECURE", "true")) == "true", CookieHTTPOnly: trimLowerString(getEnv("COOKIE_HTTP_ONLY", "true")) == "true", } + + // Log Level + ll, err := log.ParseLevel(c.LogLevel) + if err != nil { + ll = log.InfoLevel + } + log.SetLevel(ll) + + return c } func getEnv(key, fallback string) string { diff --git a/search/search.go b/search/search.go index 595ac3e..7c25a0a 100644 --- a/search/search.go +++ b/search/search.go @@ -2,6 +2,7 @@ package search import ( "errors" + "fmt" "io" "net/http" "net/url" @@ -16,8 +17,8 @@ import ( type Cadence string const ( - TOP_YEAR Cadence = "y" - TOP_MONTH Cadence = "m" + CADENCE_TOP_YEAR Cadence = "y" + CADENCE_TOP_MONTH Cadence = "m" ) type BookType int @@ -27,6 +28,14 @@ const ( BOOK_NON_FICTION ) +type Source string + +const ( + SOURCE_ANNAS_ARCHIVE Source = "Annas Archive" + SOURCE_LIBGEN_FICTION Source = "LibGen Fiction" + SOURCE_LIBGEN_NON_FICTION Source = "LibGen Non-fiction" +) + type SearchItem struct { ID string Title string @@ -38,26 +47,89 @@ type SearchItem struct { UploadDate string } -func SearchBook(query string, bookType BookType) ([]SearchItem, error) { - if bookType == BOOK_FICTION { - // Search Fiction - url := "https://libgen.is/fiction/?q=" + url.QueryEscape(query) + "&language=English&format=epub" - body, err := getPage(url) - if err != nil { - return nil, err - } - return parseLibGenFiction(body) - } else if bookType == BOOK_NON_FICTION { - // Search NonFiction - url := "https://libgen.is/search.php?req=" + url.QueryEscape(query) - body, err := getPage(url) - if err != nil { - return nil, err - } - return parseLibGenNonFiction(body) - } else { - return nil, errors.New("Invalid Book Type") +type sourceDef struct { + searchURL string + downloadURL string + parseSearchFunc func(io.ReadCloser) ([]SearchItem, error) + parseDownloadFunc func(io.ReadCloser) (string, error) +} + +var sourceDefs = map[Source]sourceDef{ + SOURCE_ANNAS_ARCHIVE: { + searchURL: "https://annas-archive.org/search?index=&q=%s&ext=epub&sort=&lang=en", + downloadURL: "http://libgen.li/ads.php?md5=%s", + parseSearchFunc: parseAnnasArchive, + parseDownloadFunc: parseAnnasArchiveDownloadURL, + }, + SOURCE_LIBGEN_FICTION: { + searchURL: "https://libgen.is/fiction/?q=%s&language=English&format=epub", + downloadURL: "http://library.lol/fiction/%s", + parseSearchFunc: parseLibGenFiction, + parseDownloadFunc: parseLibGenDownloadURL, + }, + SOURCE_LIBGEN_NON_FICTION: { + searchURL: "https://libgen.is/search.php?req=%s", + downloadURL: "http://library.lol/main/%s", + parseSearchFunc: parseLibGenNonFiction, + parseDownloadFunc: parseLibGenDownloadURL, + }, +} + +func SearchBook(query string, source Source) ([]SearchItem, error) { + def := sourceDefs[source] + log.Debug("[SearchBook] Source: ", def) + url := fmt.Sprintf(def.searchURL, url.QueryEscape(query)) + body, err := getPage(url) + if err != nil { + return nil, err } + return def.parseSearchFunc(body) +} + +func SaveBook(id string, source Source) (string, error) { + def := sourceDefs[source] + log.Debug("[SaveBook] Source: ", def) + url := fmt.Sprintf(def.downloadURL, id) + + body, err := getPage(url) + if err != nil { + return "", err + } + + bookURL, err := def.parseDownloadFunc(body) + if err != nil { + log.Error("[SaveBook] Parse Download URL Error: ", err) + return "", errors.New("Download Failure") + } + + // Create File + tempFile, err := os.CreateTemp("", "book") + if err != nil { + log.Error("[SaveBook] File Create Error: ", err) + return "", errors.New("File Failure") + } + defer tempFile.Close() + + // Download File + log.Info("[SaveBook] Downloading Book: ", bookURL) + resp, err := http.Get(bookURL) + if err != nil { + os.Remove(tempFile.Name()) + log.Error("[SaveBook] Cover URL API Failure") + return "", errors.New("API Failure") + } + defer resp.Body.Close() + + // Copy File to Disk + log.Info("[SaveBook] Saving Book") + _, err = io.Copy(tempFile, resp.Body) + if err != nil { + os.Remove(tempFile.Name()) + log.Error("[SaveBook] File Copy Error") + return "", errors.New("File Failure") + } + + return tempFile.Name(), nil } func GoodReadsMostRead(c Cadence) ([]SearchItem, error) { @@ -87,57 +159,9 @@ func GetBookURL(id string, bookType BookType) (string, error) { return parseLibGenDownloadURL(body) } -func SaveBook(id string, bookType BookType) (string, error) { - // Derive Info URL - var infoURL string - if bookType == BOOK_FICTION { - infoURL = "http://library.lol/fiction/" + id - } else if bookType == BOOK_NON_FICTION { - infoURL = "http://library.lol/main/" + id - } - - // Parse & Derive Download URL - body, err := getPage(infoURL) - if err != nil { - return "", err - } - bookURL, err := parseLibGenDownloadURL(body) - if err != nil { - log.Error("[SaveBook] Parse Download URL Error: ", err) - return "", errors.New("Download Failure") - } - - // Create File - tempFile, err := os.CreateTemp("", "book") - if err != nil { - log.Error("[SaveBook] File Create Error: ", err) - return "", errors.New("File Failure") - } - defer tempFile.Close() - - // Download File - log.Info("[SaveBook] Downloading Book") - resp, err := http.Get(bookURL) - if err != nil { - os.Remove(tempFile.Name()) - log.Error("[SaveBook] Cover URL API Failure") - return "", errors.New("API Failure") - } - defer resp.Body.Close() - - // Copy File to Disk - log.Info("[SaveBook] Saving Book") - _, err = io.Copy(tempFile, resp.Body) - if err != nil { - os.Remove(tempFile.Name()) - log.Error("[SaveBook] File Copy Error") - return "", errors.New("File Failure") - } - - return tempFile.Name(), nil -} - func getPage(page string) (io.ReadCloser, error) { + log.Debug("[getPage] ", page) + // Set 10s Timeout client := http.Client{ Timeout: 10 * time.Second, @@ -292,3 +316,66 @@ func parseGoodReads(body io.ReadCloser) ([]SearchItem, error) { // Return Results return allEntries, nil } + +func parseAnnasArchiveDownloadURL(body io.ReadCloser) (string, error) { + // Parse + defer body.Close() + doc, _ := goquery.NewDocumentFromReader(body) + + // Return Download URL + downloadURL, exists := doc.Find("body > table > tbody > tr > td > a").Attr("href") + if exists == false { + return "", errors.New("Download URL not found") + } + + return "http://libgen.li/" + downloadURL, nil +} + +func parseAnnasArchive(body io.ReadCloser) ([]SearchItem, error) { + // Parse + defer body.Close() + doc, err := goquery.NewDocumentFromReader(body) + if err != nil { + return nil, err + } + + // Normalize Results + var allEntries []SearchItem + doc.Find("form > div.w-full > div.w-full > div > div.justify-center").Each(func(ix int, rawBook *goquery.Selection) { + // Parse Details + details := rawBook.Find("div:nth-child(2) > div:nth-child(1)").Text() + detailsSplit := strings.Split(details, ", ") + + // Invalid Details + if len(detailsSplit) < 3 { + return + } + + language := detailsSplit[0] + fileType := detailsSplit[1] + fileSize := detailsSplit[2] + + // Get Title & Author + title := rawBook.Find("h3").Text() + author := rawBook.Find("div:nth-child(2) > div:nth-child(4)").Text() + + // Parse MD5 + itemHref, _ := rawBook.Find("a").Attr("href") + hrefArray := strings.Split(itemHref, "/") + id := hrefArray[len(hrefArray)-1] + + item := SearchItem{ + ID: id, + Title: title, + Author: author, + Language: language, + FileType: fileType, + FileSize: fileSize, + } + + allEntries = append(allEntries, item) + }) + + // Return Results + return allEntries, nil +} diff --git a/templates/search.html b/templates/search.html index 06eafe2..5b8bbd0 100644 --- a/templates/search.html +++ b/templates/search.html @@ -36,7 +36,6 @@ /> -
@@ -136,9 +136,9 @@