uxjulia · uxjulia · Jun 15, 2026 · Jun 15, 2026
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -16,6 +16,7 @@ jobs:
     steps:
       - uses: actions/checkout@v6
         with:
+          token: ${{ secrets.RELEASE_PAT }}
           submodules: recursive
           fetch-depth: 0
 

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,7 @@
 ## [Unreleased]
 
 ### Added
+- EPUB layout now supports CJK book text with SD-card fonts, including character-level wrapping and basic CJK punctuation line-break rules.
 
 ### Changed
 

diff --git a/lib/Epub/Epub/ParsedText.cpp b/lib/Epub/Epub/ParsedText.cpp
@@ -143,6 +143,102 @@ bool isWordCharacter(uint32_t cp) {
 
 }  // namespace
 
+void ParsedText::reserveWordCapacity(const size_t requiredSize) {
+  if (words.capacity() >= requiredSize) {
+    return;
+  }
+
+  size_t newCapacity = words.capacity() * 2;
+  if (newCapacity < requiredSize) {
+    newCapacity = requiredSize;
+  }
+  if (newCapacity < 16) {
+    newCapacity = 16;
+  }
+
+  words.reserve(newCapacity);
+  wordStyles.reserve(newCapacity);
+  wordContinues.reserve(newCapacity);
+  wordNoBreakBefore.reserve(newCapacity);
+  wordIsBionicSuffix.reserve(newCapacity);
+  wordIsGuideDot.reserve(newCapacity);
+  wordBackgroundBlack.reserve(newCapacity);
+}
+
+void ParsedText::pushToken(std::string_view token, const EpdFontFamily::Style style, const bool continues,
+                           const bool noBreakBefore, const bool bionicSuffix, const bool guideDot,
+                           const bool backgroundBlack) {
+  if (token.empty()) {
+    return;
+  }
+  words.emplace_back(token);
+  wordStyles.push_back(style);
+  wordContinues.push_back(continues);
+  wordNoBreakBefore.push_back(noBreakBefore);
+  wordIsBionicSuffix.push_back(bionicSuffix);
+  wordIsGuideDot.push_back(guideDot);
+  wordBackgroundBlack.push_back(backgroundBlack);
+}
+
+bool ParsedText::addCjkAwareWord(std::string_view word, const EpdFontFamily::Style baseStyle,
+                                 const bool attachToPrevious, const bool backgroundBlack) {
+  const auto* ptr = reinterpret_cast<const unsigned char*>(word.data());
+  const auto* end = ptr + word.size();
+  bool containsCjk = false;
+  while (ptr < end) {
+    const uint32_t cp = utf8NextCodepoint(&ptr);
+    if (utf8IsCjkBreakable(cp)) {
+      containsCjk = true;
+      break;
+    }
+  }
+
+  if (!containsCjk) {
+    return false;
+  }
+
+  reserveWordCapacity(words.size() + word.size());
+
+  ptr = reinterpret_cast<const unsigned char*>(word.data());
+  const unsigned char* segmentStart = ptr;
+  bool isFirstToken = true;
+  bool openingNeedsNextToken = false;
+
+  auto flushNonCjkSegment = [&](const unsigned char* segmentEnd) {
+    if (segmentEnd <= segmentStart) {
+      return;
+    }
+    const bool continues = isFirstToken ? attachToPrevious : true;
+    const bool noBreakBefore = (isFirstToken && attachToPrevious) || openingNeedsNextToken;
+    pushToken(std::string_view(reinterpret_cast<const char*>(segmentStart), segmentEnd - segmentStart), baseStyle,
+              continues, noBreakBefore, false, false, backgroundBlack);
+    isFirstToken = false;
+    openingNeedsNextToken = false;
+  };
+
+  while (ptr < end) {
+    const unsigned char* cpStart = ptr;
+    const uint32_t cp = utf8NextCodepoint(&ptr);
+    if (!utf8IsCjkBreakable(cp)) {
+      continue;
+    }
+
+    flushNonCjkSegment(cpStart);
+
+    const bool continues = isFirstToken ? attachToPrevious : true;
+    const bool noBreakBefore =
+        (isFirstToken && attachToPrevious) || openingNeedsNextToken || utf8IsCjkClosingPunctuation(cp);
+    pushToken(std::string_view(reinterpret_cast<const char*>(cpStart), ptr - cpStart), baseStyle, continues,
+              noBreakBefore, false, false, backgroundBlack);
+    isFirstToken = false;
+    openingNeedsNextToken = utf8IsCjkOpeningPunctuation(cp);
+    segmentStart = ptr;
+  }
+
+  flushNonCjkSegment(end);
+  return true;
+}
+
 void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle, const bool underline,
                          const bool attachToPrevious, const bool backgroundBlack) {
   if (word.empty()) return;
@@ -154,24 +250,21 @@ void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle,
   const bool wordStartsRtl = !hasRtlWord && mayContainRtlBytes(word.c_str()) &&
                              BidiUtils::startsWithRtl(word.c_str(), RTL_PER_WORD_PROBE_DEPTH);
 
+  if (addCjkAwareWord(word, baseStyle, attachToPrevious, backgroundBlack)) {
+    if (wordStartsRtl) {
+      hasRtlWord = true;
+    }
+    return;
+  }
+
   // GUIDE READING: insert middle dot (U+00B7) between non-continuation words.
   if (guideReadingEnabled && !attachToPrevious && !words.empty()) {
-    words.emplace_back("\xc2\xb7");
-    wordStyles.push_back(EpdFontFamily::REGULAR);
-    wordContinues.push_back(false);
-    wordIsBionicSuffix.push_back(false);
-    wordIsGuideDot.push_back(true);
-    wordBackgroundBlack.push_back(false);
+    pushToken("\xc2\xb7", EpdFontFamily::REGULAR, false, true, false, true, false);
   }
 
   // Already-bold text should stay fully bold; bionic splitting would make its suffix regular later.
   if (!this->bionicReadingEnabled || (baseStyle & EpdFontFamily::BOLD) != 0) {
-    words.push_back(std::move(word));
-    wordStyles.push_back(baseStyle);
-    wordContinues.push_back(attachToPrevious);
-    wordIsBionicSuffix.push_back(false);
-    wordIsGuideDot.push_back(false);
-    wordBackgroundBlack.push_back(backgroundBlack);
+    pushToken(word, baseStyle, attachToPrevious, attachToPrevious, false, false, backgroundBlack);
     if (wordStartsRtl) {
       hasRtlWord = true;
     }
@@ -183,39 +276,14 @@ void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle,
   // Pre-reserve capacity to prevent mid-word heap reallocations.
   size_t maxPossibleNewTokens = word.length();
   size_t requiredSize = words.size() + maxPossibleNewTokens;
-
-  if (words.capacity() < requiredSize) {
-    // Emulate standard geometric growth (doubling) to ensure we don't reallocate on every word.
-    size_t newCapacity = words.capacity() * 2;
-
-    // Ensure the doubled capacity is actually enough for this specific word
-    if (newCapacity < requiredSize) {
-      newCapacity = requiredSize;
-    }
-    // Set a sensible minimum starting size so the first few words don't trigger tiny reallocations
-    if (newCapacity < 16) {
-      newCapacity = 16;
-    }
-
-    words.reserve(newCapacity);
-    wordStyles.reserve(newCapacity);
-    wordContinues.reserve(newCapacity);
-    wordIsBionicSuffix.reserve(newCapacity);
-    wordIsGuideDot.reserve(newCapacity);
-    wordBackgroundBlack.reserve(newCapacity);
-  }
+  reserveWordCapacity(requiredSize);
 
   // Lambda helper to process and push individual sub-segments of the string
   // Use std::string_view to avoid heap allocations when slicing
   auto processSegment = [&](std::string_view segment, bool isWord, bool attach) {
     if (!isWord) {
       // Punctuation and Numbers stay regular
-      words.emplace_back(segment);
-      wordStyles.push_back(baseStyle);
-      wordContinues.push_back(attach);
-      wordIsBionicSuffix.push_back(false);
-      wordIsGuideDot.push_back(false);
-      wordBackgroundBlack.push_back(backgroundBlack);
+      pushToken(segment, baseStyle, attach, attach, false, false, backgroundBlack);
     } else {
       size_t charCount = 0;
       const unsigned char* countPtr = reinterpret_cast<const unsigned char*>(segment.data());
@@ -233,12 +301,8 @@ void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle,
 
       if (targetBoldChars >= charCount) {
         // Whole segment is bold - no suffix split needed
-        words.emplace_back(segment);
-        wordStyles.push_back(static_cast<EpdFontFamily::Style>(baseStyle | EpdFontFamily::BOLD));
-        wordContinues.push_back(attach);
-        wordIsBionicSuffix.push_back(false);
-        wordIsGuideDot.push_back(false);
-        wordBackgroundBlack.push_back(backgroundBlack);
+        pushToken(segment, static_cast<EpdFontFamily::Style>(baseStyle | EpdFontFamily::BOLD), attach, attach, false,
+                  false, backgroundBlack);
       } else {
         countPtr = reinterpret_cast<const unsigned char*>(segment.data());
         for (size_t i = 0; i < targetBoldChars; ++i) {
@@ -247,20 +311,12 @@ void ParsedText::addWord(std::string word, const EpdFontFamily::Style fontStyle,
         size_t splitByteOffset = countPtr - reinterpret_cast<const unsigned char*>(segment.data());
 
         // Bold prefix
-        words.emplace_back(segment.substr(0, splitByteOffset));
-        wordStyles.push_back(static_cast<EpdFontFamily::Style>(baseStyle | EpdFontFamily::BOLD));
-        wordContinues.push_back(attach);
-        wordIsBionicSuffix.push_back(false);
-        wordIsGuideDot.push_back(false);
-        wordBackgroundBlack.push_back(backgroundBlack);
+        pushToken(segment.substr(0, splitByteOffset),
+                  static_cast<EpdFontFamily::Style>(baseStyle | EpdFontFamily::BOLD), attach, attach, false, false,
+                  backgroundBlack);
 
         // Regular suffix - marked so extractLine can merge it back into one TextBlock entry
-        words.emplace_back(segment.substr(splitByteOffset));
-        wordStyles.push_back(baseStyle);
-        wordContinues.push_back(true);
-        wordIsBionicSuffix.push_back(true);
-        wordIsGuideDot.push_back(false);
-        wordBackgroundBlack.push_back(backgroundBlack);
+        pushToken(segment.substr(splitByteOffset), baseStyle, true, true, true, false, backgroundBlack);
       }
     }
   };
@@ -381,6 +437,7 @@ void ParsedText::layoutAndExtractLines(const GfxRenderer& renderer, const int fo
     words.erase(words.begin(), words.begin() + consumed);
     wordStyles.erase(wordStyles.begin(), wordStyles.begin() + consumed);
     wordContinues.erase(wordContinues.begin(), wordContinues.begin() + consumed);
+    wordNoBreakBefore.erase(wordNoBreakBefore.begin(), wordNoBreakBefore.begin() + consumed);
     wordIsBionicSuffix.erase(wordIsBionicSuffix.begin(), wordIsBionicSuffix.begin() + consumed);
     wordIsGuideDot.erase(wordIsGuideDot.begin(), wordIsGuideDot.begin() + consumed);
     wordBackgroundBlack.erase(wordBackgroundBlack.begin(), wordBackgroundBlack.begin() + consumed);
@@ -405,7 +462,7 @@ std::vector<size_t> ParsedText::computeLineBreaks(const GfxRenderer& renderer, c
   }
 
   auto nextTokenAttaches = [&](const size_t index, const size_t totalWordCount) {
-    return index + 1 < totalWordCount && (continuesVec[index + 1] || wordIsGuideDot[index + 1]);
+    return index + 1 < totalWordCount && (wordNoBreakBefore[index + 1] || wordIsGuideDot[index + 1]);
   };
 
   const int firstLineIndent = resolveFirstLineIndent(true, renderer, fontId);
@@ -524,7 +581,7 @@ std::vector<size_t> ParsedText::computeHyphenatedLineBreaks(const GfxRenderer& r
   size_t currentIndex = 0;
   bool isFirstLine = true;
   auto currentTokenAttaches = [&](const size_t index) {
-    return index < wordWidths.size() && (continuesVec[index] || wordIsGuideDot[index]);
+    return index < wordWidths.size() && (wordNoBreakBefore[index] || wordIsGuideDot[index]);
   };
 
   while (currentIndex < wordWidths.size()) {
@@ -652,6 +709,7 @@ bool ParsedText::hyphenateWordAtIndex(const size_t wordIndex, const int availabl
   // The hyphen remainder is neither a bionic suffix nor a guide dot - it starts fresh on the next line.
   wordIsBionicSuffix.insert(wordIsBionicSuffix.begin() + wordIndex + 1, false);
   wordIsGuideDot.insert(wordIsGuideDot.begin() + wordIndex + 1, false);
+  wordNoBreakBefore.insert(wordNoBreakBefore.begin() + wordIndex + 1, false);
 
   // Continuation flag handling after splitting a word into prefix + remainder.
   //
@@ -726,6 +784,7 @@ bool ParsedText::splitPathologicalTokenAtIndex(const size_t wordIndex, const int
   wordBackgroundBlack.insert(wordBackgroundBlack.begin() + wordIndex + 1, wordBackgroundBlack[wordIndex]);
   wordIsBionicSuffix.insert(wordIsBionicSuffix.begin() + wordIndex + 1, false);
   wordIsGuideDot.insert(wordIsGuideDot.begin() + wordIndex + 1, false);
+  wordNoBreakBefore.insert(wordNoBreakBefore.begin() + wordIndex + 1, false);
   wordContinues.insert(wordContinues.begin() + wordIndex + 1, false);
 
   wordWidths[wordIndex] = static_cast<uint16_t>(chosenWidth);

diff --git a/lib/Epub/Epub/ParsedText.h b/lib/Epub/Epub/ParsedText.h
@@ -5,6 +5,7 @@
 #include <functional>
 #include <memory>
 #include <string>
+#include <string_view>
 #include <vector>
 
 #include "blocks/BlockStyle.h"
@@ -16,6 +17,7 @@ class ParsedText {
   std::vector<std::string> words;
   std::vector<EpdFontFamily::Style> wordStyles;
   std::vector<bool> wordContinues;       // true = word attaches to previous (no space before it)
+  std::vector<bool> wordNoBreakBefore;   // true = line breaker cannot start a line at this token
   std::vector<bool> wordIsBionicSuffix;  // true = token is the regular tail of a bionic bold-prefix split
   std::vector<bool> wordIsGuideDot;      // true = token is a guide dot (U+00B7) inserted between words
   std::vector<uint8_t> wordBackgroundBlack;
@@ -35,6 +37,11 @@ class ParsedText {
   std::vector<uint8_t> reorderedBackgroundBlackScratch;
   std::vector<uint16_t> visualOrderScratch;
 
+  void reserveWordCapacity(size_t requiredSize);
+  void pushToken(std::string_view token, EpdFontFamily::Style style, bool continues, bool noBreakBefore,
+                 bool bionicSuffix, bool guideDot, bool backgroundBlack);
+  bool addCjkAwareWord(std::string_view word, EpdFontFamily::Style baseStyle, bool attachToPrevious,
+                       bool backgroundBlack);
   int resolveFirstLineIndent(bool isFirstLine, const GfxRenderer& renderer, int fontId) const;
   std::vector<size_t> computeLineBreaks(const GfxRenderer& renderer, int fontId, int pageWidth,
                                         std::vector<uint16_t>& wordWidths, std::vector<bool>& continuesVec);

diff --git a/lib/Epub/Epub/Section.cpp b/lib/Epub/Epub/Section.cpp
@@ -5,6 +5,7 @@
 #include <Logging.h>
 #include <MemoryBudget.h>
 #include <Serialization.h>
+#include <Utf8.h>
 
 #include "Epub/css/CssParser.h"
 #include "Page.h"
@@ -13,7 +14,7 @@
 
 namespace {
 constexpr uint32_t SECTION_CACHE_MAGIC = 0x535843FF;  // bytes: 0xFF, "CXS"
-constexpr uint8_t SECTION_FILE_VERSION = 40;
+constexpr uint8_t SECTION_FILE_VERSION = 41;
 constexpr uint8_t INITIAL_PAGE_LUT_RESERVE = 32;
 constexpr uint32_t HEADER_SIZE = sizeof(SECTION_CACHE_MAGIC) + sizeof(uint8_t) + sizeof(int) + sizeof(float) +
                                  sizeof(bool) + sizeof(bool) + sizeof(uint8_t) + sizeof(uint16_t) + sizeof(uint16_t) +
@@ -26,6 +27,10 @@ struct PageLutEntry {
   uint16_t paragraphIndex;
   uint16_t listItemIndex;
 };
+
+bool shouldDisableLatinReadingAssistForCjk(const std::shared_ptr<Epub>& epub) {
+  return epub && utf8LanguageTagIsCjk(epub->getLanguage());
+}
 }  // namespace
 
 uint32_t Section::onPageComplete(std::unique_ptr<Page> page) {
@@ -89,6 +94,9 @@ bool Section::loadSectionFile(const int fontId, const float lineCompression, con
                               const uint16_t viewportWidth, const uint16_t viewportHeight,
                               const bool hyphenationEnabled, const bool embeddedStyle, const uint8_t imageRendering,
                               const bool bionicReadingEnabled, const bool guideReadingEnabled) {
+  const bool cjkOptimized = shouldDisableLatinReadingAssistForCjk(epub);
+  const bool effectiveBionicReadingEnabled = bionicReadingEnabled && !cjkOptimized;
+  const bool effectiveGuideReadingEnabled = guideReadingEnabled && !cjkOptimized;
   if (!Storage.openFileForRead("SCT", filePath, file)) {
     return false;
   }
@@ -155,7 +163,8 @@ bool Section::loadSectionFile(const int fontId, const float lineCompression, con
         paragraphAlignment != fileParagraphAlignment || viewportWidth != fileViewportWidth ||
         viewportHeight != fileViewportHeight || hyphenationEnabled != fileHyphenationEnabled ||
         embeddedStyle != fileEmbeddedStyle || imageRendering != fileImageRendering ||
-        bionicReadingEnabled != fileBionicReadingEnabled || guideReadingEnabled != fileGuideReadingEnabled) {
+        effectiveBionicReadingEnabled != fileBionicReadingEnabled ||
+        effectiveGuideReadingEnabled != fileGuideReadingEnabled) {
       file.close();
       LOG_ERR("SCT", "Deserialization failed: Parameters do not match");
       clearCache();
@@ -203,9 +212,16 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c
   const auto tmpSectionPath = filePath + ".tmp";
   pageCount = 0;
   if (layoutAbortedForLowMemory) *layoutAbortedForLowMemory = false;
+  const bool cjkOptimized = shouldDisableLatinReadingAssistForCjk(epub);
+  const bool effectiveBionicReadingEnabled = bionicReadingEnabled && !cjkOptimized;
+  const bool effectiveGuideReadingEnabled = guideReadingEnabled && !cjkOptimized;
   LOG_DBG("SCT", "Create section start: spine=%d viewport=%ux%u image=%u bionic=%u guide=%u free=%u maxAlloc=%u",
-          spineIndex, viewportWidth, viewportHeight, imageRendering, bionicReadingEnabled, guideReadingEnabled,
-          ESP.getFreeHeap(), ESP.getMaxAllocHeap());
+          spineIndex, viewportWidth, viewportHeight, imageRendering, effectiveBionicReadingEnabled,
+          effectiveGuideReadingEnabled, ESP.getFreeHeap(), ESP.getMaxAllocHeap());
+  if (cjkOptimized && (bionicReadingEnabled || guideReadingEnabled)) {
+    LOG_DBG("SCT", "CJK language detected (%s); disabling bionic/guide reading for section layout",
+            epub->getLanguage().c_str());
+  }
 
   // Create cache directory if it doesn't exist
   {
@@ -260,7 +276,7 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c
   }
   if (!writeSectionFileHeader(fontId, lineCompression, extraParagraphSpacing, forceParagraphIndents, paragraphAlignment,
                               viewportWidth, viewportHeight, hyphenationEnabled, embeddedStyle, imageRendering,
-                              bionicReadingEnabled, guideReadingEnabled)) {
+                              effectiveBionicReadingEnabled, effectiveGuideReadingEnabled)) {
     LOG_ERR("SCT", "Failed to write section header");
     file.close();
     Storage.remove(tmpSectionPath.c_str());
@@ -308,7 +324,8 @@ bool Section::createSectionFile(const int fontId, const float lineCompression, c
 
   ChapterHtmlSlimParser visitor(
       epub, tmpHtmlPath, renderer, fontId, lineCompression, extraParagraphSpacing, forceParagraphIndents,
-      paragraphAlignment, viewportWidth, viewportHeight, hyphenationEnabled, bionicReadingEnabled, guideReadingEnabled,
+      paragraphAlignment, viewportWidth, viewportHeight, hyphenationEnabled, effectiveBionicReadingEnabled,
+      effectiveGuideReadingEnabled,
       [this, &lut](std::unique_ptr<Page> page, const uint16_t paragraphIndex, const uint16_t listItemIndex) {
         lut.push_back({this->onPageComplete(std::move(page)), paragraphIndex, listItemIndex});
       },

diff --git a/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp b/lib/Epub/Epub/parsers/ChapterHtmlSlimParser.cpp
@@ -1831,7 +1831,7 @@ void XMLCALL ChapterHtmlSlimParser::characterData(void* userData, const XML_Char
     }
 
     // If we're about to run out of space, then cut the word off and start a new one.
-    // For CJK text (no spaces), this is the primary word-breaking mechanism.
+    // ParsedText performs CJK-aware tokenization after this flushes a complete UTF-8 chunk.
     // We must avoid splitting multi-byte UTF-8 sequences across word boundaries,
     // otherwise the trailing bytes become orphaned continuation bytes that the
     // decoder can't interpret.