From 0a1dfeab655763a4b936c0b05b922826f0b1b1b7 Mon Sep 17 00:00:00 2001 From: Evan Reichard Date: Tue, 13 Aug 2024 22:32:16 -0400 Subject: [PATCH] fix(search): set user agent for dl --- metadata/metadata.go | 2 +- search/anna.go | 6 +++--- search/search.go | 41 ++++++++++++++++++++++++++--------------- 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/metadata/metadata.go b/metadata/metadata.go index f92c911..d3444c2 100644 --- a/metadata/metadata.go +++ b/metadata/metadata.go @@ -87,7 +87,7 @@ func GetWordCount(filepath string) (*int64, error) { } return &totalWords, nil } else { - return nil, fmt.Errorf("Invalid extension") + return nil, fmt.Errorf("Invalid extension: %s", fileExtension) } } diff --git a/search/anna.go b/search/anna.go index 3473c7e..aa57312 100644 --- a/search/anna.go +++ b/search/anna.go @@ -17,15 +17,15 @@ func parseAnnasArchiveDownloadURL(body io.ReadCloser) (string, error) { doc, _ := goquery.NewDocumentFromReader(body) // Return Download URL - downloadURL, exists := doc.Find("body > table > tbody > tr > td > a").Attr("href") + downloadPath, exists := doc.Find("body > table > tbody > tr > td > a").Attr("href") if !exists { return "", fmt.Errorf("Download URL not found") } // Possible Funky URL - downloadURL = strings.ReplaceAll(downloadURL, "\\", "/") + downloadPath = strings.ReplaceAll(downloadPath, "\\", "/") - return downloadURL, nil + return fmt.Sprintf("http://libgen.li/%s", downloadPath), nil } // getAnnasArchiveBookSelection parses potentially commented out HTML. For some reason diff --git a/search/search.go b/search/search.go index ebba671..e1e425d 100644 --- a/search/search.go +++ b/search/search.go @@ -12,7 +12,7 @@ import ( log "github.com/sirupsen/logrus" ) -const userAgent string = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0" +const userAgent string = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:127.0) Gecko/20100101 Firefox/127.0" type Cadence string @@ -57,21 +57,21 @@ type sourceDef struct { var sourceDefs = map[Source]sourceDef{ SOURCE_ANNAS_ARCHIVE: { searchURL: "https://annas-archive.org/search?index=&q=%s&ext=epub&sort=&lang=en", - downloadURL: "http://library.lol/fiction/%s", + downloadURL: "http://libgen.li/ads.php?md5=%s", parseSearchFunc: parseAnnasArchive, - parseDownloadFunc: parseLibGenDownloadURL, + parseDownloadFunc: parseAnnasArchiveDownloadURL, }, SOURCE_LIBGEN_FICTION: { searchURL: "https://libgen.is/fiction/?q=%s&language=English&format=epub", - downloadURL: "http://library.lol/fiction/%s", + downloadURL: "http://libgen.li/ads.php?md5=%s", parseSearchFunc: parseLibGenFiction, - parseDownloadFunc: parseLibGenDownloadURL, + parseDownloadFunc: parseAnnasArchiveDownloadURL, }, SOURCE_LIBGEN_NON_FICTION: { searchURL: "https://libgen.is/search.php?req=%s", - downloadURL: "http://library.lol/main/%s", + downloadURL: "http://libgen.li/ads.php?md5=%s", parseSearchFunc: parseLibGenNonFiction, - parseDownloadFunc: parseLibGenDownloadURL, + parseDownloadFunc: parseAnnasArchiveDownloadURL, }, } @@ -155,12 +155,19 @@ func getPage(page string) (io.ReadCloser, error) { log.Debug("URL: ", page) // Set 10s Timeout - client := http.Client{ - Timeout: 10 * time.Second, + client := http.Client{Timeout: 10 * time.Second} + + // Start Request + req, err := http.NewRequest("GET", page, nil) + if err != nil { + return nil, err } - // Get Page - resp, err := client.Get(page) + // Set User-Agent + req.Header.Set("User-Agent", userAgent) + + // Do Request + resp, err := client.Do(req) if err != nil { return nil, err } @@ -170,10 +177,14 @@ func getPage(page string) (io.ReadCloser, error) { } func downloadBook(bookURL string) (*http.Response, error) { + log.Debug("URL: ", bookURL) + // Allow Insecure - client := &http.Client{Transport: &http.Transport{ - TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, - }} + client := &http.Client{ + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, + }, + } // Start Request req, err := http.NewRequest("GET", bookURL, nil) @@ -181,7 +192,7 @@ func downloadBook(bookURL string) (*http.Response, error) { return nil, err } - // Set UserAgent + // Set User-Agent req.Header.Set("User-Agent", userAgent) return client.Do(req)