uxjulia · uxjulia · Jun 13, 2026 · Jun 9, 2026 · Jun 10, 2026 · Jun 12, 2026
diff --git a/lib/KOReaderSync/ProgressMapper.cpp b/lib/KOReaderSync/ProgressMapper.cpp
@@ -30,8 +30,7 @@ int parseIndex(const std::string& xpath, const char* prefix, bool last = false)
 
 int parseCharOffset(const std::string& xpath) {
   const size_t textPos = xpath.rfind("text()");
-  if (textPos == std::string::npos) return 0;
-  const size_t dotPos = xpath.find('.', textPos);
+  const size_t dotPos = (textPos != std::string::npos) ? xpath.find('.', textPos) : xpath.rfind('.');
   if (dotPos == std::string::npos || dotPos + 1 >= xpath.size()) return 0;
   int val = 0;
   for (size_t i = dotPos + 1; i < xpath.size(); i++) {
@@ -104,7 +103,13 @@ bool isChapterStartXPath(const std::string& xpath) {
   if (dotPos == std::string::npos || dotPos <= bodyContentStart || dotPos + 1 >= xpath.size()) {
     return false;
   }
-  if (xpath.find('/', bodyContentStart) != std::string::npos) {
+  size_t terminalEnd = dotPos;
+  static constexpr char kTextNode[] = "/text()";
+  const size_t textNodePos = xpath.rfind(kTextNode, dotPos);
+  if (textNodePos != std::string::npos && textNodePos >= bodyContentStart) {
+    terminalEnd = textNodePos;
+  }
+  if (xpath.find('/', bodyContentStart) < terminalEnd) {
     return false;
   }
 
@@ -122,7 +127,7 @@ struct XPathStep {
 
 static constexpr int MAX_XPATH_DEPTH = 16;
 
-// Parse the XPath segment between /body/DocFragment[N]/body/ and text()[N].offset
+// Parse the XPath segment between /body/DocFragment[N]/body/ and the terminal position
 // into an ordered sequence of steps. Returns step count, 0 on failure.
 // Example input: "/body/DocFragment[1]/body/div[1]/ul/li[4]/text()[1].51"
 // Fills steps with: {div,1}, {ul,1}, {li,4}
@@ -136,13 +141,20 @@ int parseXPathSteps(const std::string& xpath, XPathStep steps[MAX_XPATH_DEPTH])
   if (xpath.compare(afterBracket + 1, strlen(kBody), kBody) != 0) return 0;
   size_t pos = afterBracket + 1 + strlen(kBody);
 
-  const size_t textPos = xpath.rfind("/text()");
-  if (textPos == std::string::npos || textPos <= pos) return 0;
+  size_t stepsEnd = xpath.rfind("/text()");
+  if (stepsEnd == std::string::npos) {
+    stepsEnd = xpath.rfind('.');
+    if (stepsEnd == std::string::npos || stepsEnd <= pos || stepsEnd + 1 >= xpath.size()) return 0;
+    for (size_t i = stepsEnd + 1; i < xpath.size(); i++) {
+      if (xpath[i] < '0' || xpath[i] > '9') return 0;
+    }
+  }
+  if (stepsEnd <= pos) return 0;
 
   int count = 0;
-  while (pos < textPos && count < MAX_XPATH_DEPTH) {
+  while (pos < stepsEnd && count < MAX_XPATH_DEPTH) {
     const size_t slash = xpath.find('/', pos);
-    const size_t segEnd = (slash < textPos) ? slash : textPos;
+    const size_t segEnd = (slash < stepsEnd) ? slash : stepsEnd;
 
     XPathStep& step = steps[count];
     const size_t bracket = xpath.find('[', pos);
@@ -166,7 +178,7 @@ int parseXPathSteps(const std::string& xpath, XPathStep steps[MAX_XPATH_DEPTH])
     }
 
     count++;
-    pos = (slash < textPos) ? slash + 1 : textPos;
+    pos = (slash < stepsEnd) ? slash + 1 : stepsEnd;
   }
   return count;
 }
@@ -223,10 +235,148 @@ class ParagraphStreamer final : public Print {
   char capturedAnchorId[MAX_ANCHOR_ID] = {};
   int capturedAnchorIdLen = 0;
   bool capturingAnchorTag = false;
-  enum IdScanState { ID_SCAN, ID_I, ID_D, ID_EQ, ID_IN_VALUE_D, ID_IN_VALUE_S } idState = ID_SCAN;
+  enum AnchorAttrState {
+    ATTR_FIND_NAME,
+    ATTR_READ_NAME,
+    ATTR_AFTER_NAME,
+    ATTR_BEFORE_VALUE,
+    ATTR_CAPTURE_D,
+    ATTR_CAPTURE_S
+  } attrState = ATTR_FIND_NAME;
+  uint8_t attrNameLen = 0;
+  bool currentAttrIsId = false;
   bool inAttrQuote =
       false;  // true while inside a quoted attribute value (prevents '/' from being treated as self-close)
   char attrQuoteChar = 0;
+  uint8_t nonVisibleDepth = 0;
+
+  bool isNonVisibleTag() const {
+    return strcasecmp(tagName, "head") == 0 || strcasecmp(tagName, "style") == 0 ||
+           strcasecmp(tagName, "script") == 0 || strcasecmp(tagName, "title") == 0;
+  }
+
+  static bool isAttrWhitespace(uint8_t c) { return c == ' ' || c == '\t' || c == '\n' || c == '\r'; }
+
+  static bool isAttrNameChar(uint8_t c) {
+    return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || c == '-' ||
+           c == ':' || c == '.';
+  }
+
+  void resetAnchorAttrScan() {
+    attrState = ATTR_FIND_NAME;
+    attrNameLen = 0;
+    currentAttrIsId = false;
+  }
+
+  void finishCapturedAnchorId() {
+    capturedAnchorId[capturedAnchorIdLen] = '\0';
+    capturingAnchorTag = false;
+    resetAnchorAttrScan();
+  }
+
+  void beginAnchorIdScan() {
+    capturingAnchorTag = true;
+    resetAnchorAttrScan();
+  }
+
+  void endAnchorIdScan() {
+    if (capturingAnchorTag) {
+      capturedAnchorIdLen = 0;
+    }
+    capturingAnchorTag = false;
+    resetAnchorAttrScan();
+  }
+
+  void appendCapturedAnchorId(uint8_t c) {
+    if (capturedAnchorIdLen + 1 < MAX_ANCHOR_ID) {
+      capturedAnchorId[capturedAnchorIdLen++] = c;
+    }
+  }
+
+  void scanAnchorAttribute(uint8_t c) {
+    switch (attrState) {
+      case ATTR_FIND_NAME:
+        if (isAttrNameChar(c)) {
+          attrState = ATTR_READ_NAME;
+          attrNameLen = 1;
+          currentAttrIsId = c == 'i';
+        }
+        break;
+      case ATTR_READ_NAME:
+        if (isAttrNameChar(c)) {
+          if (attrNameLen == 1) {
+            currentAttrIsId = currentAttrIsId && c == 'd';
+          } else {
+            currentAttrIsId = false;
+          }
+          attrNameLen++;
+        } else {
+          currentAttrIsId = currentAttrIsId && attrNameLen == 2;
+          if (isAttrWhitespace(c)) {
+            attrState = ATTR_AFTER_NAME;
+          } else if (c == '=') {
+            attrState = ATTR_BEFORE_VALUE;
+          } else {
+            resetAnchorAttrScan();
+          }
+        }
+        break;
+      case ATTR_AFTER_NAME:
+        if (isAttrWhitespace(c)) {
+          break;
+        }
+        if (c == '=') {
+          attrState = ATTR_BEFORE_VALUE;
+        } else if (isAttrNameChar(c)) {
+          attrState = ATTR_READ_NAME;
+          attrNameLen = 1;
+          currentAttrIsId = c == 'i';
+        } else {
+          resetAnchorAttrScan();
+        }
+        break;
+      case ATTR_BEFORE_VALUE:
+        if (isAttrWhitespace(c)) {
+          break;
+        }
+        if (currentAttrIsId && c == '"') {
+          capturedAnchorIdLen = 0;
+          attrState = ATTR_CAPTURE_D;
+        } else if (currentAttrIsId && c == '\'') {
+          capturedAnchorIdLen = 0;
+          attrState = ATTR_CAPTURE_S;
+        } else if (c == '"') {
+          attrState = ATTR_CAPTURE_D;
+        } else if (c == '\'') {
+          attrState = ATTR_CAPTURE_S;
+        } else {
+          resetAnchorAttrScan();
+        }
+        break;
+      case ATTR_CAPTURE_D:
+        if (c == '"') {
+          if (currentAttrIsId) {
+            finishCapturedAnchorId();
+          } else {
+            resetAnchorAttrScan();
+          }
+        } else if (currentAttrIsId) {
+          appendCapturedAnchorId(c);
+        }
+        break;
+      case ATTR_CAPTURE_S:
+        if (c == '\'') {
+          if (currentAttrIsId) {
+            finishCapturedAnchorId();
+          } else {
+            resetAnchorAttrScan();
+          }
+        } else if (currentAttrIsId) {
+          appendCapturedAnchorId(c);
+        }
+        break;
+    }
+  }
 
   void onVisibleCodepoint() {
     totalVisChars++;
@@ -286,15 +436,19 @@ class ParagraphStreamer final : public Print {
   void onOpenTag() {
     htmlDepth++;
 
+    if (nonVisibleDepth > 0 || isNonVisibleTag()) {
+      nonVisibleDepth++;
+      return;
+    }
+
     if (stepCount == 0) {
       if (strcasecmp(tagName, "p") == 0) onLegacyP();
       return;
     }
 
-    // Capture <a id> inside the fully-matched element even after target char is found
+    // Capture a child <a id> inside the fully-matched element even after target char is found.
     if (revPFound && matchedDepth == stepCount && capturedAnchorIdLen == 0 && strcasecmp(tagName, "a") == 0) {
-      capturingAnchorTag = true;
-      idState = ID_SCAN;
+      beginAnchorIdScan();
     }
 
     if (revDone) return;
@@ -315,6 +469,7 @@ class ParagraphStreamer final : public Print {
           stepEnteredAtDepth[matchedDepth] = htmlDepth;
           matchedDepth++;
           if (matchedDepth == stepCount) {
+            beginAnchorIdScan();
             paragraphAtMatch = pCount;
             liCountAtMatch = liCount;
             revPFound = true;
@@ -332,6 +487,12 @@ class ParagraphStreamer final : public Print {
   }
 
   void onCloseTag() {
+    if (nonVisibleDepth > 0) {
+      nonVisibleDepth--;
+      if (htmlDepth > 0) htmlDepth--;
+      return;
+    }
+
     // Legacy mode: each direct child element closing advances the text node index.
     if (stepCount == 0 && revPFound && !revDone && paragraphHtmlDepth >= 0 && htmlDepth == paragraphHtmlDepth + 1) {
       currentTextNode++;
@@ -419,42 +580,12 @@ class ParagraphStreamer final : public Print {
           attrQuoteChar = 0;
         }
         if (capturingAnchorTag) {
-          switch (idState) {
-            case ID_SCAN:
-              idState = (c == 'i' || c == 'I') ? ID_I : ID_SCAN;
-              break;
-            case ID_I:
-              idState = (c == 'd' || c == 'D') ? ID_D : ID_SCAN;
-              break;
-            case ID_D:
-              idState = (c == '=') ? ID_EQ : ID_SCAN;
-              break;
-            case ID_EQ:
-              if (c == '"')
-                idState = ID_IN_VALUE_D;
-              else if (c == '\'')
-                idState = ID_IN_VALUE_S;
-              break;
-            case ID_IN_VALUE_D:
-              if (c == '"') {
-                capturedAnchorId[capturedAnchorIdLen] = '\0';
-                capturingAnchorTag = false;
-              } else if (capturedAnchorIdLen + 1 < MAX_ANCHOR_ID)
-                capturedAnchorId[capturedAnchorIdLen++] = c;
-              break;
-            case ID_IN_VALUE_S:
-              if (c == '\'') {
-                capturedAnchorId[capturedAnchorIdLen] = '\0';
-                capturingAnchorTag = false;
-              } else if (capturedAnchorIdLen + 1 < MAX_ANCHOR_ID)
-                capturedAnchorId[capturedAnchorIdLen++] = c;
-              break;
-          }
+          scanAnchorAttribute(c);
         }
         // Only treat '/' as self-closing when outside a quoted attribute value.
         if (c == '/' && !inAttrQuote) {
+          endAnchorIdScan();
           onCloseTag();
-          capturingAnchorTag = false;
         }
         break;
     }
@@ -512,10 +643,13 @@ class ParagraphStreamer final : public Print {
       tagNameLen = 0;
       tagIsClose = false;
       capturingAnchorTag = false;
-      idState = ID_SCAN;
+      resetAnchorAttrScan();
       inAttrQuote = false;
       attrQuoteChar = 0;
     } else if (c == '>') {
+      if (tagState == TAG_ATTRS) {
+        endAnchorIdScan();
+      }
       globalInTag = false;
       inAttrQuote = false;
       if (tagState == TAG_IN_NAME && tagNameLen > 0) {
@@ -529,6 +663,9 @@ class ParagraphStreamer final : public Print {
       tagState = TAG_IDLE;
     } else if (globalInTag) {
       processByteInTag(c);
+    } else if (nonVisibleDepth > 0) {
+      // Ignore head/style/script/title text. KOReader XPaths are body-relative, and CSS text
+      // should not contribute to intra-spine progress.
     } else {
       if (c == '&') {
         globalInEntity = true;
@@ -762,4 +899,4 @@ std::string ProgressMapper::generateXPath(const std::shared_ptr<Epub>& epub, int
 
   const int p = s.paragraphCount();
   return (p > 0) ? base + "/p[" + std::to_string(p) + "]" : base;
-}
+}