diff --git a/.env.example b/.env.example new file mode 100644 index 00000000..5d686a63 --- /dev/null +++ b/.env.example @@ -0,0 +1,8 @@ +# Ollama Configuration +# Sign in once with: ollama signin +# Example endpoint: http://localhost:11434/v1 +OLLAMA_BASE_URL=http://localhost:11434/v1 +# Ollama's local OpenAI-compatible endpoint usually accepts this placeholder key +OLLAMA_API_KEY=ollama +OLLAMA_MODEL=llama3 +OLLAMA_CLOUD_MODEL=gpt-oss:120b-cloud diff --git a/.gitignore b/.gitignore index 9e1d25d4..566b28ff 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,38 @@ wheels/ # Virtual environments .venv +# IDE settings +.vscode/ +.idea/ + # Custom -*_data/ *.epub + +# Books directory (but keep the folder structure) +books/* +!books/.gitkeep + +# Temp directory for uploads +temp/ +uvtmp/ +.codex-commit-work/ + +# AI Features & Data +.env +reader_data.db +test.db + +# Backup files +backups/ +*.db.backup + +# Export files +reader_data_*.json +highlights_*.csv +ai_analyses_*.csv +report_*.txt + +# OS files +.DS_Store +Thumbs.db +desktop.ini diff --git a/README.md b/README.md index 5d868d7b..2ef4ae35 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,241 @@ -# reader 3 +# Reader3 - EPUB Reader with AI Analysis -![reader3](reader3.png) +A lightweight, self-hosted EPUB reader with integrated AI analysis capabilities. -A lightweight, self-hosted EPUB reader that lets you read through EPUB books one chapter at a time. This makes it very easy to copy paste the contents of a chapter to an LLM, to read along. Basically - get epub books (e.g. [Project Gutenberg](https://www.gutenberg.org/) has many), open them up in this reader, copy paste text around to your favorite LLM, and read together and along. +## Features -This project was 90% vibe coded just to illustrate how one can very easily [read books together with LLMs](https://x.com/karpathy/status/1990577951671509438). I'm not going to support it in any way, it's provided here as is for other people's inspiration and I don't intend to improve it. Code is ephemeral now and libraries are over, ask your LLM to change it in whatever way you like. +### Reading Experience +- 📚 **Clean Layout** - Three-column design (TOC, Content, AI Panel) +- 📖 **Sticky Navigation** - Top navigation bar stays visible while scrolling +- ⌨️ **Keyboard Shortcuts** - Arrow keys for prev/next chapter, ESC to close panels +- 🔗 **Internal Links** - Footnotes and author comments open in modal popups +- 🎯 **Clickable Covers** - Click book covers to start reading instantly -## Usage +### AI & Annotations +- 🤖 **AI Analysis** - Right-click on text for fact-checking or discussion (Ollama local or Cloud) +- � ***Personal Comments** - Add your own notes without AI (no API cost) +- 💾 **Manual Save** - Choose what to save to avoid clutter +- ✨ **Color-Coded Highlights** - Yellow (fact check), Blue (discussion), Green (comments) +- 🏷️ **Smart Tooltips** - Hover over highlights to see type +- 🗑️ **Edit & Delete** - Manage all your highlights and comments +- 🎨 **Markdown Support** - AI responses render with proper formatting + +### Library & Organization +- 📝 **Highlights View** - See all your notes and analyses for each book +- 📤 **Export to Markdown** - Export highlights with AI context warnings +- 🌐 **Web Upload** - Upload EPUB files via click or drag & drop +- 🖼️ **Cover Images** - Automatic cover extraction and display +- 🔍 **Search & Filters** - Search by title/author, filter by title initial, or show unfinished books only +- ✅ **Completion Tracking** - Mark books complete and keep completed titles visually distinct +- 🔤 **Mixed-Language Sorting** - English titles sort alphabetically, Chinese titles sort by pinyin initials +- 📏 **Estimated Word Count** - Each book card shows a quick reading-length estimate +- ⚙️ **Compact Library Settings** - AI provider and view controls are tucked into a collapsible settings panel +- 🗂️ **Organized Storage** - All books in `books/` directory, data in SQLite + +## Quick Start + +### 1. Configure Ollama + +Edit `.env` file: +```bash +# Ollama +OLLAMA_BASE_URL=http://localhost:11434/v1 +OLLAMA_API_KEY=ollama +OLLAMA_MODEL=llama3 +OLLAMA_CLOUD_MODEL=gpt-oss:120b-cloud +``` -The project uses [uv](https://docs.astral.sh/uv/). So for example, download [Dracula EPUB3](https://www.gutenberg.org/ebooks/345) to this directory as `dracula.epub`, then: +Then sign your Ollama daemon into Ollama Cloud once: ```bash -uv run reader3.py dracula.epub +ollama signin ``` -This creates the directory `dracula_data`, which registers the book to your local library. We can then run the server: +### 2. Add Books + +**Option A: Upload via Web Interface (Recommended)** +1. Start server: `uv run server.py` +2. Open http://127.0.0.1:8123 +3. Click the "+" card OR drag & drop EPUB file +4. Wait for automatic processing + +The upload path processes EPUBs with the same Python interpreter running the server, so adding books does not depend on a separate `uv` executable being available at request time. + +**Option B: Command Line** +```bash +uv run reader3.py your_book.epub +``` + +### 3. Start Server ```bash uv run server.py ``` -And visit [localhost:8123](http://localhost:8123/) to see your current Library. You can easily add more books, or delete them from your library by deleting the folder. It's not supposed to be complicated or complex. +The server listens on `0.0.0.0:8123` by default so other devices on your LAN can reach it. +You can override that with: + +```bash +READER_HOST=0.0.0.0 READER_PORT=8123 uv run server.py +``` + +### 4. Read and Analyze + +1. Open http://127.0.0.1:8123 +2. Select a book +3. Right-click on text → Choose analysis type +4. Review AI response in side panel +5. Save if important +6. Highlights appear on next visit! + +## Usage + +### AI Analysis +- Select text → Right-click → Choose: + - **📋 Fact Check** - Verify facts and get context + - **💡 Discussion** - Deep analysis and insights + - **💬 Add Comment** - Your personal notes (no AI) +- View response in right panel +- Click "Save" for important insights + +### Highlights +- **Yellow** - Fact checks +- **Blue** - Discussions +- **Green** - Your comments +- Hover to see type, click to view/edit +- All highlights are editable and deletable + +### View & Export Highlights +- Click ⋮ menu on any book → "View Highlights" +- See all your notes and analyses in one page +- Filter by type (Fact Check, Discussion, Comment) +- Export to markdown for AI processing +- Context length warnings for large exports +- Jump directly to any chapter + +### Library Browsing +- Use the alphabet bar under the search field to filter the grid by title initial +- The title filter uses pinyin initials for Chinese books, so `三体` appears under `S` +- Open `Settings` to switch AI Provider or toggle between all books and unfinished books +- Use the ⋮ menu on a book card to mark it complete or incomplete +- Completed books keep a green progress indicator and a dimmed cover treatment + +### Keyboard Shortcuts +- **← →** - Navigate between chapters +- **ESC** - Close panels and modals +- Works anywhere except when typing in text fields + +## Project Structure + +``` +reader3/ +├── reader3.py # EPUB processor +├── server.py # Web server +├── database.py # SQLite operations +├── ai_service.py # AI integration +├── books/ # All book data here +│ └── book_name_data/ +│ ├── book.pkl +│ └── images/ +├── templates/ # HTML templates +├── reader_data.db # SQLite database +└── .env # API configuration +``` + +## Data Management + +### View Your Highlights +- Click ⋮ menu on any book → "View Highlights" +- See all notes, comments, and analyses in one page +- Filter by type and jump to chapters + +### View Database (Advanced) +```bash +uv run check_database.py +``` + +### Backup +```bash +# Double-click: backup.bat +# Or manually: +copy reader_data.db backups\reader_data_backup.db +``` + +## Tools + +- `check_database.py` - View raw database contents (advanced) +- `backup.bat` - Quick database backup + +## Why Ollama Cloud? + +- ✅ Uses the same Ollama workflow as local models +- ✅ Lets you use larger hosted models without a local GPU +- ✅ Keeps one provider for both local and cloud modes +- ✅ Works through Ollama's OpenAI-compatible endpoint + +## Troubleshooting + +### API Key Error +1. Check `.env` file exists and has correct key +2. Restart server + +### No Highlights Showing +1. Check browser console (F12) for errors +2. Verify data exists: `uv run check_database.py` +3. Hard refresh (Ctrl+Shift+R) + +### Upload Says A Tool Is Missing +Recent versions process uploads with the server's active Python interpreter. If uploads still fail after pulling changes, restart the server or systemd service so it picks up the new upload path. + +### Server Won't Start +1. Check if port 8123 is available +2. Verify `.env` configuration + +## Run At Startup On Linux + +This repo includes a systemd unit template and installer so the app can start on boot. +The installed service runs the app with `uv run server.py`, matching the normal development command. + +### 1. Install dependencies + +```bash +uv sync +``` + +### 2. Install the systemd service + +```bash +sudo ./scripts/install-systemd-service.sh +``` + +This installs [deploy/reader3.service](/home/tr/projects/ai-reader/deploy/reader3.service), enables it, and starts it immediately. + +### 3. Check service status + +```bash +systemctl status reader3.service +``` + +### 4. Open the port on the machine firewall if needed + +If you use UFW: + +```bash +sudo ufw allow 8123/tcp +``` + +Then browse to `http://:8123` from another device on your home network. + +### 5. Find the machine IP + +```bash +hostname -I +``` ## License -MIT \ No newline at end of file +MIT + +--- + +**Note**: This project is designed to be simple and hackable. Ask your LLM to modify it however you like! diff --git a/TECHNICAL_CHALLENGES.md b/TECHNICAL_CHALLENGES.md new file mode 100644 index 00000000..7f1a7175 --- /dev/null +++ b/TECHNICAL_CHALLENGES.md @@ -0,0 +1,193 @@ +# Technical Challenges Solved + +This document outlines the key technical challenges we encountered and solved while building this AI-powered EPUB reader. + +## 1. EPUB Cover Image Extraction + +**Challenge**: Cover images weren't being extracted from EPUB files. Some books had covers marked as `ITEM_COVER` type instead of `ITEM_IMAGE`, causing them to be skipped. + +**Solution**: +- Modified image extraction to handle both `ITEM_COVER` and `ITEM_IMAGE` types +- Implemented multi-method cover detection: check ITEM_COVER type → search by filename pattern → use first large image as fallback +- Added size filtering (>10KB) to avoid using small icons as covers + +**Code**: `reader3.py` lines 190-230 + +## 2. Multi-Paragraph Text Highlighting + +**Challenge**: When users highlighted text spanning multiple paragraphs, the highlight wouldn't display because wrapping `

` tags in a `` creates invalid HTML that browsers reject. + +**Solution**: +- Detect when highlighted text spans block elements +- Apply highlight class directly to the paragraph elements instead of wrapping +- Use Range API with whitespace-tolerant regex matching to handle text across multiple elements +- Normalize whitespace in search patterns to handle variations in HTML structure + +**Code**: `templates/reader.html` - `applyHighlights()` and `findTextRange()` functions + +## 3. FastAPI Route Ordering for Image Serving + +**Challenge**: Image URLs like `/read/{book_id}/images/{image_name}` were returning 404 because the catch-all route `/read/{book_id}/{chapter_ref:path}` was matching first. + +**Solution**: +- Moved the specific image route definition before the generic chapter route +- FastAPI matches routes in order, so more specific routes must come first +- Also fixed path handling to preserve spaces in book folder names (removed incorrect `os.path.basename()` usage) + +**Code**: `server.py` - route ordering around line 125-175 + +## 4. Reading Progress with Precise Scroll Position + +**Challenge**: +- `scrollTop` was always returning 0 when read directly +- `beforeunload` event doesn't fire reliably +- Need to track exact scroll position within chapters, not just chapter numbers + +**Solution**: +- Use scroll event listener to continuously track `currentScrollPosition` variable +- Intercept navigation clicks with `preventDefault()` to ensure save completes before navigation +- Add `pagehide` event as backup for mobile browsers +- Store both chapter index and scroll position in database +- Implement retry mechanism for scroll restoration to handle content loading delays + +**Code**: `templates/reader.html` - scroll tracking and `saveProgress()` function + +## 5. Database Schema Migration + +**Challenge**: Adding `scroll_position` column to existing `reading_progress` table without breaking existing data. + +**Solution**: +- Created migration script that checks if column exists before adding +- Used `ALTER TABLE ADD COLUMN` with `DEFAULT 0` for backward compatibility +- Gracefully handles both new installations and existing databases + +**Code**: `migrate_progress.py` + +## 6. AI Prompt Engineering for Reading Context + +**Challenge**: Generic AI prompts weren't providing useful reading assistance. Needed different types of help for different reading scenarios. + +**Solution**: +- Split into two distinct functions: + - **解释说明 (Explanation)**: Quick lookups for terms, people, events, concepts + - **深入讨论 (Discussion)**: Academic analysis with theoretical frameworks and critical thinking +- Structured prompts with clear dimensions (论点解析, 理论视角, 批判思考, 启发问题) +- Removed context parameter from fact-check to keep it focused and fast + +**Code**: `ai_service.py` - `fact_check()` and `discuss()` methods + +## 7. Dark Mode Implementation + +**Challenge**: Implementing comprehensive dark mode across all pages with proper contrast and readability. + +**Solution**: +- Used CSS class toggle (`body.dark-mode`) instead of media queries for user control +- Defined dark mode colors for every UI element including highlights, progress bars, modals +- Persisted theme preference in localStorage +- Synchronized theme across all pages (library, reader, highlights) +- Used `!important` for highlight colors to override inline styles + +**Code**: All template files - CSS dark mode sections + +## 8. TOC Auto-Scroll to Active Item + +**Challenge**: When opening a book mid-way through, the TOC sidebar didn't show the current chapter, requiring manual scrolling. + +**Solution**: +- Calculate active TOC item position using `offsetTop` +- Scroll sidebar to center the active item in viewport +- Execute after DOM load to ensure elements are rendered + +**Code**: `templates/reader.html` - TOC auto-scroll in DOMContentLoaded + +## 9. Book Detection Without Naming Convention + +**Challenge**: Initially required `_data` suffix in folder names, limiting flexibility and creating ugly folder names. + +**Solution**: +- Changed detection from filename pattern matching to presence of `book.pkl` file +- Updated library scanning to check for file existence instead of name patterns +- Maintained backward compatibility with old `_data` folders + +**Code**: `server.py` - `library_view()` function + +## 10. Whitespace-Tolerant Text Matching + +**Challenge**: Saved highlights couldn't be found when text spanned multiple paragraphs due to whitespace differences (newlines, multiple spaces). + +**Solution**: +- Created regex pattern that replaces `\s+` in search text with `\s+` pattern +- Allows flexible matching of any whitespace sequence +- Escapes special regex characters in user text before pattern creation +- Falls back to exact match first for performance + +**Code**: `templates/reader.html` - `findTextRange()` function + +## 11. Mixed English/Chinese Library Sorting + +**Challenge**: The library needed title-based navigation that felt natural for both English and Chinese books. A plain Unicode sort would scatter Chinese titles in a way that was hard to browse. + +**Solution**: +- Normalized titles before sorting by stripping leading symbols and common English articles (`the`, `a`, `an`) +- Added pinyin transliteration for Chinese titles using `pypinyin` +- Derived a stable title-group key from the transliterated form so English and Chinese books share the same alphabet filter model +- Kept a fallback path when transliteration is unavailable so the library still renders safely + +**Code**: `server.py` - `normalize_title_for_sort()`, `transliterate_for_sort()`, `title_group_key()` + +## 12. Upload Processing Without Runtime `uv` + +**Challenge**: Uploading books through the web UI failed in environments where the server was running correctly but the `uv` executable was not available in the request-time PATH. + +**Solution**: +- Replaced the upload subprocess call from `uv run reader3.py ...` to `sys.executable reader3.py ...` +- Ensured uploaded books are processed by the exact same Python environment that is already running the FastAPI app +- Removed a brittle runtime dependency while keeping the normal CLI workflow intact + +**Code**: `server.py` - `/upload` endpoint + +## 13. Flat Library Navigation With Alphabet Filter + +**Challenge**: Sectioned alphabetical grouping made the landing page feel heavier than necessary, but the library still needed faster navigation as the number of books grew. + +**Solution**: +- Flattened the card grid back to a single list for simpler scanning +- Turned the alphabet bar into an active filter rather than a jump list +- Combined title-initial filtering with existing search and unfinished-only filtering in one client-side pass +- Moved less-frequently used controls into a collapsible settings panel to reduce clutter at the top of the page + +**Code**: `templates/library.html` - alphabet filter UI and `filterBooks()` + +--- + +## Key Technologies Used + +- **FastAPI**: Async web framework with automatic API documentation +- **SQLite**: Lightweight database for highlights and progress +- **ebooklib**: EPUB parsing and extraction +- **BeautifulSoup**: HTML processing and cleaning +- **MathJax**: Mathematical equation rendering +- **Marked.js**: Markdown rendering for AI responses +- **Jinja2**: Server-side templating +- **Vanilla JavaScript**: No framework dependencies for frontend + +## Architecture Decisions + +1. **Server-side rendering** for initial page load (SEO-friendly, fast first paint) +2. **Client-side interactivity** for highlights and AI features (responsive UX) +3. **SQLite for data** (simple, portable, no separate database server) +4. **Pickle for book data** (fast serialization, preserves Python objects) +5. **localStorage for preferences** (theme, font settings persist across sessions) +6. **Event-driven progress saving** (reliable, doesn't interfere with reading) + +## Performance Optimizations + +- **LRU cache** for book loading (avoid repeated disk reads) +- **Lazy AI service initialization** (only load when needed) +- **Async/await** throughout (non-blocking I/O) +- **keepalive flag** on fetch requests (ensures completion on page unload) +- **Debounced scroll tracking** (via event listener, not polling) + +--- + +*This document serves as a reference for understanding the technical depth and problem-solving approaches used in this project.* diff --git a/ai_service.py b/ai_service.py new file mode 100644 index 00000000..627020db --- /dev/null +++ b/ai_service.py @@ -0,0 +1,132 @@ +""" +AI service for fact-checking and discussion. +Supports Ollama local and Ollama Cloud providers. +""" +import os +import httpx +from typing import Optional + + +class AIService: + """Handles AI API calls for Ollama local and Ollama Cloud providers.""" + + def __init__(self, api_key: Optional[str] = None, base_url: Optional[str] = None): + self.ollama_base_url = base_url or os.getenv("OLLAMA_BASE_URL", "http://localhost:11434/v1") + self.ollama_api_key = api_key or os.getenv("OLLAMA_API_KEY", "ollama") + self.ollama_model = os.getenv("OLLAMA_MODEL", "llama3") + self.ollama_cloud_model = os.getenv("OLLAMA_CLOUD_MODEL", "gpt-oss:120b-cloud") + + def _get_connection_params(self, provider: str, ollama_model: Optional[str]) -> tuple[str, str, str]: + """Return (base_url, api_key, model) for the given provider.""" + if provider == "ollama_cloud": + model = ollama_model or self.ollama_cloud_model + else: + model = ollama_model or self.ollama_model + + return self.ollama_base_url, self.ollama_api_key, model + + def _build_messages(self, prompt: str, provider: str) -> list[dict[str, str]]: + """Build chat messages with explicit Chinese output constraints.""" + if provider == "ollama": + system_prompt = ( + "你是中文阅读助手。必须仅使用简体中文回答。" + "不要输出英文句子,不要输出英文小标题;如需术语请给出中文解释。" + ) + else: + system_prompt = "请使用简体中文回答,保持表达清晰、准确。" + + return [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": prompt}, + ] + + async def fact_check(self, text: str, context: str = "", + provider: str = "ollama", ollama_model: Optional[str] = None) -> str: + """Quick explanation and fact-checking for unclear content.""" + prompt = f"""请帮我理解以下内容: + +{text} + +请根据内容类型提供相应的解释: + +**如果是专有名词/概念**:给出清晰的定义和解释 +**如果是人物**:介绍其身份、背景和重要性 +**如果是历史事件**:说明事件经过、时间、影响 +**如果是地点**:介绍其地理位置、特点、相关背景 +**如果是数据/事实陈述**:验证准确性,提供来源或背景 + +要求: +- 简洁明了,重点突出 +- 如有错误或争议,明确指出 +- 如果内容不完整或无法判断,说明需要更多上下文""" + + return await self._call_api(prompt, provider=provider, ollama_model=ollama_model) + + async def discuss(self, text: str, context: str = "", + provider: str = "ollama", ollama_model: Optional[str] = None) -> str: + """Generate insightful and academic discussion about the selected text.""" + prompt = f"""请对以下文本进行深入的学术性分析和讨论: + +{text} + +请从以下几个维度展开分析: + +**1. 核心论点解析** +- 作者的主要观点是什么? +- 论证逻辑和结构如何? +- 使用了哪些论证方法(举例、类比、引用等)? + +**2. 理论与学术视角** +- 这段文本涉及哪些学术领域或理论框架? +- 与哪些经典理论、学派或学者的观点相关? +- 在学术史或思想史上的位置如何? + +**3. 批判性思考** +- 论证是否充分?有无逻辑漏洞? +- 是否存在隐含的假设或前提? +- 可能的反驳观点是什么? + +**4. 启发性问题** +- 这段文本引发了哪些值得深入思考的问题? +- 如何将这些观点应用到其他领域或情境? +- 对当代有什么启示意义? + +要求: +- 保持学术严谨性,但避免过于晦涩 +- 提出具有启发性的问题,引导深入思考 +- 如涉及专业术语,简要解释 +- 鼓励多角度、批判性的思考""" + + return await self._call_api(prompt, provider=provider, ollama_model=ollama_model) + + async def _call_api(self, prompt: str, provider: str = "ollama", + ollama_model: Optional[str] = None) -> str: + """Make API call to OpenAI-compatible endpoint.""" + provider = (provider or "ollama").lower() + if provider not in ("ollama", "ollama_cloud"): + return "不支持的AI提供商。" + + base_url, api_key, model = self._get_connection_params(provider, ollama_model) + + async with httpx.AsyncClient(timeout=60.0) as client: + try: + response = await client.post( + f"{base_url}/chat/completions", + headers={ + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json" + }, + json={ + "model": model, + "messages": self._build_messages(prompt, provider), + "temperature": 0.7 + } + ) + response.raise_for_status() + data = response.json() + return data["choices"][0]["message"]["content"] + + except httpx.HTTPError as e: + return f"API调用失败: {str(e)}" + except Exception as e: + return f"处理失败: {str(e)}" diff --git a/backup.bat b/backup.bat new file mode 100644 index 00000000..4677b453 --- /dev/null +++ b/backup.bat @@ -0,0 +1,28 @@ +@echo off +echo ======================================== +echo 备份 Reader3 数据库 +echo ======================================== +echo. + +REM 创建backups文件夹 +if not exist backups mkdir backups + +REM 生成带时间戳的文件名 +set datetime=%date:~0,4%%date:~5,2%%date:~8,2%_%time:~0,2%%time:~3,2%%time:~6,2% +set datetime=%datetime: =0% + +REM 备份数据库 +copy reader_data.db backups\reader_data_%datetime%.db + +echo. +echo ✓ 备份完成! +echo 文件: backups\reader_data_%datetime%.db +echo. + +REM 显示backups文件夹内容 +echo 现有备份: +dir /b backups\*.db + +echo. +echo ======================================== +pause diff --git a/books/.gitkeep b/books/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/check_book.py b/check_book.py new file mode 100644 index 00000000..5b2a0a09 --- /dev/null +++ b/check_book.py @@ -0,0 +1,12 @@ +import pickle +import sys +from reader3 import Book, BookMetadata, ChapterContent, TOCEntry + +book_path = sys.argv[1] if len(sys.argv) > 1 else 'books/Evicted/book.pkl' + +with open(book_path, 'rb') as f: + book = pickle.load(f) + print(f"Cover image: {book.cover_image}") + print(f"\nFirst few spine items:") + for i, item in enumerate(book.spine[:3]): + print(f" {i}: {item.href}") diff --git a/check_database.py b/check_database.py new file mode 100644 index 00000000..cb60f06d --- /dev/null +++ b/check_database.py @@ -0,0 +1,94 @@ +"""查看数据库内容""" +import sqlite3 +from datetime import datetime + +db_path = "reader_data.db" + +print("=" * 60) +print("数据库内容检查") +print("=" * 60) +print(f"\n数据库位置: {db_path}") +print() + +conn = sqlite3.connect(db_path) +cursor = conn.cursor() + +# 检查highlights表 +print("📚 Highlights (高亮) 表:") +print("-" * 60) +cursor.execute("SELECT COUNT(*) FROM highlights") +count = cursor.fetchone()[0] +print(f"总记录数: {count}") + +if count > 0: + cursor.execute(""" + SELECT id, book_id, chapter_index, + substr(selected_text, 1, 50) as text_preview, + created_at + FROM highlights + ORDER BY created_at DESC + LIMIT 5 + """) + + print("\n最近的5条记录:") + for row in cursor.fetchall(): + print(f"\nID: {row[0]}") + print(f" 书籍: {row[1]}") + print(f" 章节: {row[2]}") + print(f" 文本: {row[3]}...") + print(f" 时间: {row[4]}") + +print("\n" + "=" * 60) + +# 检查ai_analyses表 +print("🤖 AI Analyses (AI分析) 表:") +print("-" * 60) +cursor.execute("SELECT COUNT(*) FROM ai_analyses") +count = cursor.fetchone()[0] +print(f"总记录数: {count}") + +if count > 0: + cursor.execute(""" + SELECT id, highlight_id, analysis_type, + substr(prompt, 1, 50) as prompt_preview, + substr(response, 1, 100) as response_preview, + created_at + FROM ai_analyses + ORDER BY created_at DESC + LIMIT 5 + """) + + print("\n最近的5条记录:") + for row in cursor.fetchall(): + print(f"\nID: {row[0]}") + print(f" 关联高亮ID: {row[1]}") + print(f" 分析类型: {row[2]}") + print(f" 提示: {row[3]}...") + print(f" 响应: {row[4]}...") + print(f" 时间: {row[5]}") + +print("\n" + "=" * 60) + +# 统计信息 +print("📊 统计信息:") +print("-" * 60) + +cursor.execute(""" + SELECT analysis_type, COUNT(*) + FROM ai_analyses + GROUP BY analysis_type +""") +stats = cursor.fetchall() + +if stats: + print("\n按分析类型统计:") + for row in stats: + print(f" {row[0]}: {row[1]} 条") +else: + print(" 暂无数据") + +conn.close() + +print("\n" + "=" * 60) +print("✓ 检查完成") +print("=" * 60) diff --git a/database.py b/database.py new file mode 100644 index 00000000..cd438672 --- /dev/null +++ b/database.py @@ -0,0 +1,328 @@ +""" +Database models for storing highlights and AI interactions. +""" +import sqlite3 +from datetime import datetime +from typing import List, Dict, Optional +from dataclasses import dataclass + + +@dataclass +class Highlight: + """User highlight with position info.""" + id: Optional[int] = None + book_id: str = "" + chapter_index: int = 0 + selected_text: str = "" + context_before: str = "" + context_after: str = "" + created_at: str = "" + + +@dataclass +class AIAnalysis: + """AI analysis result (fact-check or discussion).""" + id: Optional[int] = None + highlight_id: int = 0 + analysis_type: str = "" # 'fact_check' or 'discussion' + prompt: str = "" + response: str = "" + created_at: str = "" + + +class Database: + """Simple SQLite database for storing highlights and AI analyses.""" + + def __init__(self, db_path: str = "reader_data.db"): + self.db_path = db_path + self.init_db() + + def init_db(self): + """Create tables if they don't exist.""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + cursor.execute(""" + CREATE TABLE IF NOT EXISTS highlights ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + book_id TEXT NOT NULL, + chapter_index INTEGER NOT NULL, + selected_text TEXT NOT NULL, + context_before TEXT, + context_after TEXT, + created_at TEXT NOT NULL + ) + """) + + cursor.execute(""" + CREATE TABLE IF NOT EXISTS ai_analyses ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + highlight_id INTEGER NOT NULL, + analysis_type TEXT NOT NULL, + prompt TEXT NOT NULL, + response TEXT NOT NULL, + created_at TEXT NOT NULL, + FOREIGN KEY (highlight_id) REFERENCES highlights (id) + ) + """) + + cursor.execute(""" + CREATE TABLE IF NOT EXISTS reading_progress ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + book_id TEXT NOT NULL UNIQUE, + chapter_index INTEGER NOT NULL, + scroll_position INTEGER DEFAULT 0, + is_completed INTEGER NOT NULL DEFAULT 0, + last_read_at TEXT NOT NULL + ) + """) + + cursor.execute("PRAGMA table_info(reading_progress)") + progress_columns = {row[1] for row in cursor.fetchall()} + if "is_completed" not in progress_columns: + cursor.execute( + "ALTER TABLE reading_progress ADD COLUMN is_completed INTEGER NOT NULL DEFAULT 0" + ) + + conn.commit() + conn.close() + + def save_highlight(self, highlight: Highlight) -> int: + """Save a highlight and return its ID.""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + cursor.execute(""" + INSERT INTO highlights (book_id, chapter_index, selected_text, + context_before, context_after, created_at) + VALUES (?, ?, ?, ?, ?, ?) + """, ( + highlight.book_id, + highlight.chapter_index, + highlight.selected_text, + highlight.context_before, + highlight.context_after, + highlight.created_at or datetime.now().isoformat() + )) + + highlight_id = cursor.lastrowid + conn.commit() + conn.close() + + return highlight_id + + def save_analysis(self, analysis: AIAnalysis) -> int: + """Save an AI analysis and return its ID.""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + cursor.execute(""" + INSERT INTO ai_analyses ( + highlight_id, analysis_type, prompt, response, created_at + ) + VALUES (?, ?, ?, ?, ?) + """, ( + analysis.highlight_id, + analysis.analysis_type, + analysis.prompt, + analysis.response, + analysis.created_at or datetime.now().isoformat() + )) + + analysis_id = cursor.lastrowid + conn.commit() + conn.close() + + return analysis_id + + def get_highlights_for_chapter( + self, book_id: str, chapter_index: int + ) -> List[Dict]: + """Get all highlights for a specific chapter.""" + conn = sqlite3.connect(self.db_path) + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + + cursor.execute(""" + SELECT * FROM highlights + WHERE book_id = ? AND chapter_index = ? + ORDER BY created_at DESC + """, (book_id, chapter_index)) + + rows = cursor.fetchall() + conn.close() + + return [dict(row) for row in rows] + + def get_all_highlights_for_book(self, book_id: str) -> List[Dict]: + """Get all highlights for a book (all chapters).""" + conn = sqlite3.connect(self.db_path) + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + + cursor.execute(""" + SELECT * FROM highlights + WHERE book_id = ? + ORDER BY created_at DESC + """, (book_id,)) + + rows = cursor.fetchall() + conn.close() + + return [dict(row) for row in rows] + + def get_analyses_for_highlight(self, highlight_id: int) -> List[Dict]: + """Get all AI analyses for a highlight.""" + conn = sqlite3.connect(self.db_path) + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + + cursor.execute(""" + SELECT * FROM ai_analyses + WHERE highlight_id = ? + ORDER BY created_at DESC + """, (highlight_id,)) + + rows = cursor.fetchall() + conn.close() + + return [dict(row) for row in rows] + + def update_analysis(self, analysis_id: int, response: str): + """Update an existing analysis response (for editing comments).""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + cursor.execute(""" + UPDATE ai_analyses + SET response = ? + WHERE id = ? + """, (response, analysis_id)) + + conn.commit() + conn.close() + + def delete_analysis(self, analysis_id: int): + """Delete an analysis and its highlight if no other analyses exist.""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + # Get the highlight_id before deleting + cursor.execute( + "SELECT highlight_id FROM ai_analyses WHERE id = ?", + (analysis_id,), + ) + result = cursor.fetchone() + + if result: + highlight_id = result[0] + + # Delete the analysis + cursor.execute("DELETE FROM ai_analyses WHERE id = ?", (analysis_id,)) + + # Check if there are other analyses for this highlight + cursor.execute( + "SELECT COUNT(*) FROM ai_analyses WHERE highlight_id = ?", + (highlight_id,), + ) + count = cursor.fetchone()[0] + + # If no other analyses, delete the highlight too + if count == 0: + cursor.execute("DELETE FROM highlights WHERE id = ?", (highlight_id,)) + + conn.commit() + conn.close() + + def delete_highlight(self, highlight_id: int): + """Delete a highlight and any analyses attached to it.""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + cursor.execute( + "DELETE FROM ai_analyses WHERE highlight_id = ?", + (highlight_id,), + ) + cursor.execute("DELETE FROM highlights WHERE id = ?", (highlight_id,)) + + conn.commit() + conn.close() + + def save_progress(self, book_id: str, chapter_index: int, scroll_position: int = 0): + """Save or update reading progress for a book.""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + cursor.execute(""" + INSERT INTO reading_progress ( + book_id, chapter_index, scroll_position, is_completed, last_read_at + ) + VALUES (?, ?, ?, ?, ?) + ON CONFLICT(book_id) DO UPDATE SET + chapter_index = excluded.chapter_index, + scroll_position = excluded.scroll_position, + is_completed = excluded.is_completed, + last_read_at = excluded.last_read_at + """, (book_id, chapter_index, scroll_position, 0, datetime.now().isoformat())) + + conn.commit() + conn.close() + + def get_progress(self, book_id: str) -> Optional[Dict]: + """Get the last read position for a book.""" + conn = sqlite3.connect(self.db_path) + conn.row_factory = sqlite3.Row + cursor = conn.cursor() + + cursor.execute(""" + SELECT chapter_index, scroll_position, is_completed FROM reading_progress + WHERE book_id = ? + """, (book_id,)) + + result = cursor.fetchone() + conn.close() + + if not result: + return None + + progress = dict(result) + progress["is_completed"] = bool(progress.get("is_completed", 0)) + return progress + + def set_completed(self, book_id: str, is_completed: bool): + """Mark a book as completed or not completed.""" + conn = sqlite3.connect(self.db_path) + cursor = conn.cursor() + + cursor.execute( + "SELECT chapter_index, scroll_position FROM reading_progress WHERE book_id = ?", + (book_id,), + ) + result = cursor.fetchone() + + if result: + chapter_index, scroll_position = result + else: + chapter_index, scroll_position = 0, 0 + + cursor.execute( + """ + INSERT INTO reading_progress ( + book_id, chapter_index, scroll_position, is_completed, last_read_at + ) + VALUES (?, ?, ?, ?, ?) + ON CONFLICT(book_id) DO UPDATE SET + is_completed = excluded.is_completed, + last_read_at = excluded.last_read_at + """, + ( + book_id, + chapter_index, + scroll_position, + int(is_completed), + datetime.now().isoformat(), + ), + ) + + conn.commit() + conn.close() diff --git a/deploy/reader3.service b/deploy/reader3.service new file mode 100644 index 00000000..aa5749c1 --- /dev/null +++ b/deploy/reader3.service @@ -0,0 +1,20 @@ +[Unit] +Description=Reader3 EPUB server +After=network-online.target +Wants=network-online.target + +[Service] +Type=simple +User=__USER__ +Group=__GROUP__ +WorkingDirectory=__WORKDIR__ +Environment=PATH=__WORKDIR__/.venv/bin:/usr/local/bin:/usr/bin:/bin +Environment=PYTHONUNBUFFERED=1 +Environment=READER_HOST=0.0.0.0 +Environment=READER_PORT=8123 +ExecStart=__UV_BIN__ run server.py +Restart=always +RestartSec=5 + +[Install] +WantedBy=multi-user.target \ No newline at end of file diff --git a/list_epub_images.py b/list_epub_images.py new file mode 100644 index 00000000..e744c962 --- /dev/null +++ b/list_epub_images.py @@ -0,0 +1,35 @@ +import sys +import ebooklib +from ebooklib import epub + +if len(sys.argv) < 2: + print("Usage: python list_epub_images.py ") + sys.exit(1) + +epub_file = sys.argv[1] +book = epub.read_epub(epub_file) + +print("All images (ITEM_IMAGE type):") +print("-" * 60) +for item in book.get_items(): + if item.get_type() == ebooklib.ITEM_IMAGE: + print(f" {item.get_name()}") + print(f" Size: {len(item.get_content())} bytes") + +print("\n" + "=" * 60) +print("All items with image extensions:") +print("-" * 60) +for item in book.get_items(): + name = item.get_name().lower() + if name.endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg')): + print(f" {item.get_name()}") + print(f" Type: {item.get_type()}") + print(f" Size: {len(item.get_content())} bytes") + +print("\n" + "=" * 60) +print("COVER type items:") +print("-" * 60) +for item in book.get_items(): + if item.get_type() == ebooklib.ITEM_COVER: + print(f" {item.get_name()}") + print(f" Size: {len(item.get_content())} bytes") diff --git a/migrate_progress.py b/migrate_progress.py new file mode 100644 index 00000000..b0642004 --- /dev/null +++ b/migrate_progress.py @@ -0,0 +1,34 @@ +""" +Migration script to add scroll_position column to reading_progress table. +""" +import sqlite3 +import os + +db_path = os.getenv("DATABASE_PATH", "reader_data.db") + +conn = sqlite3.connect(db_path) +cursor = conn.cursor() + +try: + # Check if column exists + cursor.execute("PRAGMA table_info(reading_progress)") + columns = [row[1] for row in cursor.fetchall()] + + if 'scroll_position' not in columns: + print("Adding scroll_position column...") + cursor.execute(""" + ALTER TABLE reading_progress + ADD COLUMN scroll_position INTEGER DEFAULT 0 + """) + conn.commit() + print("✓ Migration completed successfully!") + else: + print("✓ Column already exists, no migration needed.") + +except sqlite3.OperationalError as e: + if "no such table" in str(e): + print("Table doesn't exist yet, will be created on first run.") + else: + print(f"Error: {e}") +finally: + conn.close() diff --git a/pyproject.toml b/pyproject.toml index 31e61793..d1d0e38f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,4 +10,7 @@ dependencies = [ "fastapi>=0.121.2", "jinja2>=3.1.6", "uvicorn>=0.38.0", + "httpx>=0.27.0", + "python-multipart>=0.0.6", + "pypinyin>=0.54.0", ] diff --git a/reader3.py b/reader3.py index d0b9d3f9..07931ee7 100644 --- a/reader3.py +++ b/reader3.py @@ -6,7 +6,7 @@ import pickle import shutil from dataclasses import dataclass, field -from typing import List, Dict, Optional, Any +from typing import List, Dict, Optional from datetime import datetime from urllib.parse import unquote @@ -14,6 +14,22 @@ from ebooklib import epub from bs4 import BeautifulSoup, Comment +# Patch ebooklib: _parse_nav crashes with IndexError when a NAV document +# exists but has no