[fix] wonky xpath issues

2023-11-06 07:12:24 -05:00
parent ffc5462326
commit 5cc1e2d71c
1 changed files with 27 additions and 3 deletions
@@ -1021,7 +1021,7 @@ class EBookReader {
        .find((item) => item.sectionIndex == spinePosition)?.document ||
      sectionItem.document;
-    // Derive XPath & Namespace
+    // Derive Namespace & XPath
    let namespaceURI = docItem.documentElement.namespaceURI;
    let remainingXPath = xpath
      // Replace with new base
@@ -1031,6 +1031,11 @@ class EBookReader {
      // Remove potential trailing `text()`
      .replace(/\/text\(\)(\[\d+\])?$/, "");
    // XPath to CSS Selector
    let derivedSelector = remainingXPath
      .replace(/^\/html\/body/, "body")
      .replace(/\/(\w+)\[(\d+)\]/g, " $1:nth-of-type($2)");
    // Validate Namespace
    if (namespaceURI) remainingXPath = remainingXPath.replaceAll("/", "/ns:");
@@ -1047,8 +1052,27 @@ class EBookReader {
      }
    );
-    // Get Element & CFI
+    /**
-    let element = docSearch.iterateNext();
+     * There are two ways to do this. One via XPath, and the other via derived
     * CSS selectors. Unfortunately it seems like KOReaders XPath implementation
     * is a little wonky, requiring the need for CSS Selectors.
     *
     * For example the following XPath was generated by KOReader:
     *     "/body/DocFragment[19]/body/h1/img.0"
     *
     * In reality, the XPath should have been (note the 'a'):
     *     "/body/DocFragment[19]/body/h1/a/img.0"
     *
     * Unfortunately due to the above, `docItem.evaluate` will not find the
     * element. So as an alternative I thought it would be possible to derive
     * a CSS selector. I think this should be fully comprehensive; AFAICT
     * KOReader only creates XPaths referencing HTML tag names and indexes.
     **/
    // Get Element & CFI (XPath -> CSS Selector Fallback)
    let element =
      docSearch.iterateNext() || docItem.querySelector(derivedSelector);
    let cfi = sectionItem.cfiFromElement(element);
    return { cfi, element };