[fix] wonky xpath issues

2023-11-06 07:12:24 -05:00
parent ffc5462326
commit 5cc1e2d71c
1 changed files with 27 additions and 3 deletions
@@ -1021,7 +1021,7 @@ class EBookReader {
        .find((item) => item.sectionIndex == spinePosition)?.document ||
      sectionItem.document;

-    // Derive XPath & Namespace
+    // Derive Namespace & XPath
    let namespaceURI = docItem.documentElement.namespaceURI;
    let remainingXPath = xpath
      // Replace with new base
@@ -1031,6 +1031,11 @@ class EBookReader {
      // Remove potential trailing `text()`
      .replace(/\/text\(\)(\[\d+\])?$/, "");

+    // XPath to CSS Selector
+    let derivedSelector = remainingXPath
+      .replace(/^\/html\/body/, "body")
+      .replace(/\/(\w+)\[(\d+)\]/g, " $1:nth-of-type($2)");
+
    // Validate Namespace
    if (namespaceURI) remainingXPath = remainingXPath.replaceAll("/", "/ns:");

@@ -1047,8 +1052,27 @@ class EBookReader {
      }
    );

-    // Get Element & CFI
-    let element = docSearch.iterateNext();
+    /**
+     * There are two ways to do this. One via XPath, and the other via derived
+     * CSS selectors. Unfortunately it seems like KOReaders XPath implementation
+     * is a little wonky, requiring the need for CSS Selectors.
+     *
+     * For example the following XPath was generated by KOReader:
+     *     "/body/DocFragment[19]/body/h1/img.0"
+     *
+     * In reality, the XPath should have been (note the 'a'):
+     *     "/body/DocFragment[19]/body/h1/a/img.0"
+     *
+     * Unfortunately due to the above, `docItem.evaluate` will not find the
+     * element. So as an alternative I thought it would be possible to derive
+     * a CSS selector. I think this should be fully comprehensive; AFAICT
+     * KOReader only creates XPaths referencing HTML tag names and indexes.
+     **/
+
+    // Get Element & CFI (XPath -> CSS Selector Fallback)
+    let element =
+      docSearch.iterateNext() || docItem.querySelector(derivedSelector);
+
    let cfi = sectionItem.cfiFromElement(element);

    return { cfi, element };