From e997b23621b5feae23518a9709e17225cde5ff2e Mon Sep 17 00:00:00 2001 From: henosch Date: Wed, 27 May 2026 07:14:14 +0200 Subject: [PATCH 01/11] Run container with host user --- docker-compose.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 5bdbbfb..9ddcfd2 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,8 +1,9 @@ -services: - librecrawl: - build: . - container_name: librecrawl - ports: +services: + librecrawl: + build: . + container_name: librecrawl + user: "${UID:-1000}:${GID:-1000}" + ports: # In local mode: only localhost (127.0.0.1:5000) # In production mode: all interfaces (0.0.0.0:5000) for LAN/WAN access - "${HOST_BINDING:-127.0.0.1}:5000:5000" From a5ead727c52f59893554a6018b3fa2a87a9993de Mon Sep 17 00:00:00 2001 From: henosch Date: Wed, 27 May 2026 09:26:58 +0200 Subject: [PATCH 02/11] Persist data in Docker volume --- docker-compose.yml | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 9ddcfd2..17782db 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -7,9 +7,9 @@ services: # In local mode: only localhost (127.0.0.1:5000) # In production mode: all interfaces (0.0.0.0:5000) for LAN/WAN access - "${HOST_BINDING:-127.0.0.1}:5000:5000" - volumes: - # Persist the user database and settings - - ./data:/app/data + volumes: + # Persist the user database and settings + - librecrawl-data:/app/data environment: - FLASK_APP=main.py - PYTHONUNBUFFERED=1 @@ -49,6 +49,9 @@ services: fi; python main.py $$FLAGS" - # Increase shared memory size for Chrome to prevent crashes - shm_size: '2gb' + # Increase shared memory size for Chrome to prevent crashes + shm_size: '2gb' + +volumes: + librecrawl-data: From 0e02a3a702ae87d490784f55ffd9ec3e0b6e673e Mon Sep 17 00:00:00 2001 From: henosch Date: Wed, 27 May 2026 09:35:06 +0200 Subject: [PATCH 03/11] Add account management script --- manage-accounts.sh | 281 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 281 insertions(+) create mode 100755 manage-accounts.sh diff --git a/manage-accounts.sh b/manage-accounts.sh new file mode 100755 index 0000000..8bc476c --- /dev/null +++ b/manage-accounts.sh @@ -0,0 +1,281 @@ +#!/usr/bin/env bash +set -euo pipefail + +CONTAINER="${LIBRECRAWL_CONTAINER:-librecrawl}" + +run_python() { + docker exec -i \ + -e LC_ACTION="${LC_ACTION:-}" \ + -e LC_USERNAME="${LC_USERNAME:-}" \ + -e LC_EMAIL="${LC_EMAIL:-}" \ + -e LC_TIER="${LC_TIER:-}" \ + -e LC_VERIFIED="${LC_VERIFIED:-}" \ + -e LC_IDENTIFIER="${LC_IDENTIFIER:-}" \ + -e LC_PASSWORD="${LC_PASSWORD:-}" \ + "$CONTAINER" python - <<'PY' +import os +import secrets +import sqlite3 +import string +import sys + +sys.path.insert(0, "/app") +from src.auth_db import hash_password + +DB_FILE = "/app/data/users.db" +VALID_TIERS = {"guest", "user", "extra", "admin"} + + +def connect(): + conn = sqlite3.connect(DB_FILE) + conn.row_factory = sqlite3.Row + return conn + + +def generated_password(length=20): + alphabet = string.ascii_letters + string.digits + "!@#$%^&*()-_=+" + return "".join(secrets.choice(alphabet) for _ in range(length)) + + +def get_user(conn, identifier): + if identifier.isdigit(): + row = conn.execute("SELECT * FROM users WHERE id = ?", (int(identifier),)).fetchone() + if row: + return row + return conn.execute( + "SELECT * FROM users WHERE username = ? OR email = ?", + (identifier, identifier), + ).fetchone() + + +action = os.environ.get("LC_ACTION", "") + +try: + with connect() as conn: + if action == "list": + rows = conn.execute( + """ + SELECT id, username, email, verified, tier, created_at, last_login + FROM users + ORDER BY id + """ + ).fetchall() + if not rows: + print("No users found.") + else: + print(f"{'ID':<4} {'Username':<20} {'Email':<32} {'Verified':<8} {'Tier':<8} Last login") + print("-" * 95) + for row in rows: + print( + f"{row['id']:<4} " + f"{row['username']:<20} " + f"{row['email']:<32} " + f"{row['verified']:<8} " + f"{(row['tier'] or 'guest'):<8} " + f"{row['last_login'] or '-'}" + ) + + elif action == "create": + username = os.environ["LC_USERNAME"].strip() + email = os.environ["LC_EMAIL"].strip() + tier = os.environ.get("LC_TIER", "user").strip() + verified = 1 if os.environ.get("LC_VERIFIED", "1") == "1" else 0 + password = os.environ.get("LC_PASSWORD") or generated_password() + + if tier not in VALID_TIERS: + raise ValueError(f"Invalid tier: {tier}") + if len(username) < 3: + raise ValueError("Username must be at least 3 characters.") + if "@" not in email: + raise ValueError("Email must contain @.") + if len(password) < 8: + raise ValueError("Password must be at least 8 characters.") + + conn.execute( + """ + INSERT INTO users (username, email, password_hash, verified, tier) + VALUES (?, ?, ?, ?, ?) + """, + (username, email, hash_password(password), verified, tier), + ) + print(f"Created user '{username}' with tier '{tier}'.") + if not os.environ.get("LC_PASSWORD"): + print(f"Generated password: {password}") + + elif action == "password": + identifier = os.environ["LC_IDENTIFIER"].strip() + password = os.environ.get("LC_PASSWORD") or generated_password() + if len(password) < 8: + raise ValueError("Password must be at least 8 characters.") + + user = get_user(conn, identifier) + if not user: + raise ValueError("User not found.") + + conn.execute( + "UPDATE users SET password_hash = ? WHERE id = ?", + (hash_password(password), user["id"]), + ) + print(f"Password updated for '{user['username']}'.") + if not os.environ.get("LC_PASSWORD"): + print(f"Generated password: {password}") + + elif action == "tier": + identifier = os.environ["LC_IDENTIFIER"].strip() + tier = os.environ["LC_TIER"].strip() + if tier not in VALID_TIERS: + raise ValueError(f"Invalid tier: {tier}") + + user = get_user(conn, identifier) + if not user: + raise ValueError("User not found.") + + conn.execute("UPDATE users SET tier = ? WHERE id = ?", (tier, user["id"])) + print(f"Tier updated for '{user['username']}' to '{tier}'.") + + elif action == "verified": + identifier = os.environ["LC_IDENTIFIER"].strip() + verified = 1 if os.environ.get("LC_VERIFIED", "1") == "1" else 0 + + user = get_user(conn, identifier) + if not user: + raise ValueError("User not found.") + + conn.execute("UPDATE users SET verified = ? WHERE id = ?", (verified, user["id"])) + print(f"User '{user['username']}' is now {'verified' if verified else 'unverified'}.") + + else: + raise ValueError("Unknown action.") + +except sqlite3.IntegrityError as exc: + print(f"Database error: {exc}", file=sys.stderr) + sys.exit(1) +except Exception as exc: + print(f"Error: {exc}", file=sys.stderr) + sys.exit(1) +PY +} + +require_container() { + if ! docker ps --format '{{.Names}}' | grep -qx "$CONTAINER"; then + echo "Container '$CONTAINER' is not running." >&2 + echo "Start it first with: docker compose up -d" >&2 + exit 1 + fi +} + +read_password() { + local first second + while true; do + read -rsp "Password: " first + echo + read -rsp "Confirm password: " second + echo + if [[ "$first" != "$second" ]]; then + echo "Passwords do not match." + continue + fi + if (( ${#first} < 8 )); then + echo "Password must be at least 8 characters." + continue + fi + printf '%s' "$first" + return + done +} + +choose_tier() { + local tier="${1:-user}" + while true; do + read -rp "Tier (guest/user/extra/admin) [$tier]: " input + tier="${input:-$tier}" + case "$tier" in + guest|user|extra|admin) printf '%s' "$tier"; return ;; + *) echo "Invalid tier." ;; + esac + done +} + +choose_verified() { + local default="${1:-y}" + read -rp "Verified? (y/n) [$default]: " input + input="${input:-$default}" + case "${input,,}" in + y|yes) printf '1' ;; + *) printf '0' ;; + esac +} + +create_account() { + local username email tier verified generate password + read -rp "Username: " username + read -rp "Email [$username@localhost]: " email + email="${email:-$username@localhost}" + tier="$(choose_tier user)" + echo + verified="$(choose_verified y)" + echo + read -rp "Generate password? (y/n) [y]: " generate + generate="${generate:-y}" + + if [[ "${generate,,}" =~ ^(y|yes)$ ]]; then + LC_ACTION=create LC_USERNAME="$username" LC_EMAIL="$email" LC_TIER="$tier" LC_VERIFIED="$verified" run_python + else + password="$(read_password)" + LC_ACTION=create LC_USERNAME="$username" LC_EMAIL="$email" LC_TIER="$tier" LC_VERIFIED="$verified" LC_PASSWORD="$password" run_python + fi +} + +change_password() { + local identifier generate password + read -rp "User ID, username, or email: " identifier + read -rp "Generate password? (y/n) [y]: " generate + generate="${generate:-y}" + + if [[ "${generate,,}" =~ ^(y|yes)$ ]]; then + LC_ACTION=password LC_IDENTIFIER="$identifier" run_python + else + password="$(read_password)" + LC_ACTION=password LC_IDENTIFIER="$identifier" LC_PASSWORD="$password" run_python + fi +} + +set_tier() { + local identifier tier + read -rp "User ID, username, or email: " identifier + tier="$(choose_tier user)" + echo + LC_ACTION=tier LC_IDENTIFIER="$identifier" LC_TIER="$tier" run_python +} + +set_verified() { + local identifier verified + read -rp "User ID, username, or email: " identifier + verified="$(choose_verified y)" + echo + LC_ACTION=verified LC_IDENTIFIER="$identifier" LC_VERIFIED="$verified" run_python +} + +require_container + +while true; do + echo + echo "LibreCrawl account management ($CONTAINER)" + echo "1. List users" + echo "2. Create account" + echo "3. Change password" + echo "4. Set tier" + echo "5. Set verified/unverified" + echo "q. Quit" + read -rp "Choice: " choice + + case "${choice,,}" in + 1) LC_ACTION=list run_python ;; + 2) create_account ;; + 3) change_password ;; + 4) set_tier ;; + 5) set_verified ;; + q) exit 0 ;; + *) echo "Invalid choice." ;; + esac +done From f702c7a864732372eca31611ee995491251780bd Mon Sep 17 00:00:00 2001 From: henosch Date: Wed, 27 May 2026 09:42:32 +0200 Subject: [PATCH 04/11] Promote verified users from guest tier --- src/auth_db.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/auth_db.py b/src/auth_db.py index 70ab21c..535e8b6 100644 --- a/src/auth_db.py +++ b/src/auth_db.py @@ -516,10 +516,17 @@ def verify_token(token): if datetime.now() > expires_at: return False, "This verification link has expired", None, None - # Mark user as verified - cursor.execute(''' - UPDATE users SET verified = 1 WHERE id = ? - ''', (result['user_id'],)) + # Mark user as verified and promote registered accounts out of + # the guest tier so account-only features like settings unlock. + cursor.execute(''' + UPDATE users + SET verified = 1, + tier = CASE + WHEN tier IS NULL OR tier = 'guest' THEN 'user' + ELSE tier + END + WHERE id = ? + ''', (result['user_id'],)) # Mark token as used cursor.execute(''' From 3d79619cb9f366cd2add2d830450406a59eaaf2d Mon Sep 17 00:00:00 2001 From: henosch Date: Wed, 27 May 2026 09:53:28 +0200 Subject: [PATCH 05/11] Expose guest login toggle in Compose --- docker-compose.yml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 17782db..619d216 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,11 +14,13 @@ services: - FLASK_APP=main.py - PYTHONUNBUFFERED=1 # Set to "true" for local mode (no authentication), "false" for production - - LOCAL_MODE=${LOCAL_MODE:-true} - # Set to "true" to disable new user registrations - - REGISTRATION_DISABLED=${REGISTRATION_DISABLED:-false} - # Set to "true" for demo mode (1.5GB per-user memory limit) - - DEMO_MODE=${DEMO_MODE:-false} + - LOCAL_MODE=${LOCAL_MODE:-true} + # Set to "true" to disable new user registrations + - REGISTRATION_DISABLED=${REGISTRATION_DISABLED:-false} + # Set to "true" to disable guest login + - DISABLE_GUEST=${DISABLE_GUEST:-false} + # Set to "true" for demo mode (1.5GB per-user memory limit) + - DEMO_MODE=${DEMO_MODE:-false} # DANGEROUS: Set to "true" to allow login as any username with no password # (username only used to separate sessions). Do NOT use in production. - DANGEROUSLY_SKIP_AUTH=${DANGEROUSLY_SKIP_AUTH:-false} From 3c43b0fd35f6e4d7ee345cebd237bd3d2d9f856f Mon Sep 17 00:00:00 2001 From: root Date: Wed, 27 May 2026 18:16:48 +0200 Subject: [PATCH 06/11] fix: extract author from in addition to Previously only was handled. Many sites use which was silently ignored, causing the author field to always be empty for those pages. - Add fallback extraction from after meta tags loop - still takes precedence if both are present - Uses BeautifulSoup's list-aware rel matching, so multi-value rel attributes like rel="nofollow author" are handled correctly Co-Authored-By: Claude Sonnet 4.6 --- src/core/seo_extractor.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/core/seo_extractor.py b/src/core/seo_extractor.py index f1558c4..219d244 100644 --- a/src/core/seo_extractor.py +++ b/src/core/seo_extractor.py @@ -78,6 +78,13 @@ def extract_meta_tags(soup, result): canonical = soup.find('link', attrs={'rel': 'canonical'}) result['canonical_url'] = canonical.get('href', '') if canonical else '' + # Extract author from if not already set via + # BeautifulSoup treats rel as a list, so rel='author' also matches rel="nofollow author" + if not result.get('author'): + author_link = soup.find('link', rel='author') + if author_link: + result['author'] = author_link.get('href', '') + @staticmethod def extract_opengraph_tags(soup, result): """Extract OpenGraph meta tags""" From a7aa0ba88b835740e2b0c55f4b69255a3046db5b Mon Sep 17 00:00:00 2001 From: root Date: Wed, 27 May 2026 18:28:05 +0200 Subject: [PATCH 07/11] fix(e-e-a-t): use url.author instead of url.meta_author The backend stores the extracted author in result['author'], not result['meta_author']. The wrong field name caused pagesWithAuthor to always be 0 in the E-E-A-T plugin, regardless of page content. Co-Authored-By: Claude Sonnet 4.6 --- web/static/plugins/e-e-a-t.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/web/static/plugins/e-e-a-t.js b/web/static/plugins/e-e-a-t.js index a5140b4..7b4489e 100644 --- a/web/static/plugins/e-e-a-t.js +++ b/web/static/plugins/e-e-a-t.js @@ -327,7 +327,8 @@ LibreCrawlPlugin.register({ } // Check for author information (20 points) - if (url.meta_author || (url.og_tags && url.og_tags.author)) { + // Backend stores the field as 'author' (from or ) + if (url.author || (url.og_tags && url.og_tags.author)) { score += 20; pagesWithAuthor++; urlData.hasAuthor = true; From c2b5636d6a583c4455940ac5ae63d57f1b903cf8 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 27 May 2026 18:35:28 +0200 Subject: [PATCH 08/11] fix: persist author/keywords/generator/theme_color to DB and fix e-e-a-t detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three root causes for author always showing 0: 1. crawl_db.py: 'author', 'keywords', 'generator', 'theme_color' were missing from both the CREATE TABLE schema and the save_url_batch INSERT. After a DB reload these fields were always undefined/null. → Added the 4 columns to the schema, added ALTER TABLE migrations for existing databases, and included them in the batch INSERT. 2. e-e-a-t.js: no fallback for DB-loaded crawls where url.author may be null but url.meta_tags.author is still populated (meta_tags IS saved as JSON). → Added url.meta_tags.author as a fallback check. Co-Authored-By: Claude Sonnet 4.6 --- src/crawl_db.py | 22 +++++++++++++++++++--- web/static/plugins/e-e-a-t.js | 5 +++-- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/crawl_db.py b/src/crawl_db.py index 3a41335..ec95503 100644 --- a/src/crawl_db.py +++ b/src/crawl_db.py @@ -102,6 +102,11 @@ def init_crawl_tables(): external_links INTEGER, internal_links INTEGER, + author TEXT, + keywords TEXT, + generator TEXT, + theme_color TEXT, + response_time REAL, javascript_rendered BOOLEAN DEFAULT 0, error_type TEXT, @@ -118,6 +123,13 @@ def init_crawl_tables(): except sqlite3.OperationalError: pass # Column already exists + # Migration: add author/keywords/generator/theme_color columns + for col in [('author', 'TEXT'), ('keywords', 'TEXT'), ('generator', 'TEXT'), ('theme_color', 'TEXT')]: + try: + cursor.execute(f'ALTER TABLE crawled_urls ADD COLUMN {col[0]} {col[1]}') + except sqlite3.OperationalError: + pass # Column already exists + # Links table cursor.execute(''' CREATE TABLE IF NOT EXISTS crawl_links ( @@ -294,7 +306,11 @@ def save_url_batch(crawl_id, urls): url_data.get('internal_links'), url_data.get('response_time'), url_data.get('javascript_rendered', False), - url_data.get('error_type') + url_data.get('error_type'), + url_data.get('author'), + url_data.get('keywords'), + url_data.get('generator'), + url_data.get('theme_color'), ) rows.append(row) @@ -306,8 +322,8 @@ def save_url_batch(crawl_id, urls): meta_tags, og_tags, twitter_tags, json_ld, analytics, images, hreflang, schema_org, redirects, linked_from, external_links, internal_links, response_time, javascript_rendered, - error_type - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + error_type, author, keywords, generator, theme_color + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ''', rows) print(f"Saved {len(urls)} URLs to database for crawl {crawl_id}") diff --git a/web/static/plugins/e-e-a-t.js b/web/static/plugins/e-e-a-t.js index 7b4489e..122ccaf 100644 --- a/web/static/plugins/e-e-a-t.js +++ b/web/static/plugins/e-e-a-t.js @@ -327,8 +327,9 @@ LibreCrawlPlugin.register({ } // Check for author information (20 points) - // Backend stores the field as 'author' (from or ) - if (url.author || (url.og_tags && url.og_tags.author)) { + // 'author' is the direct field; fall back to meta_tags.author (DB-loaded crawls) + // and og_tags.author (OpenGraph) + if (url.author || (url.meta_tags && url.meta_tags.author) || (url.og_tags && url.og_tags.author)) { score += 20; pagesWithAuthor++; urlData.hasAuthor = true; From fa74569387dd4051c21b500730013797028a513a Mon Sep 17 00:00:00 2001 From: root Date: Wed, 27 May 2026 19:19:35 +0200 Subject: [PATCH 09/11] feat: add DE/EN i18n with language toggle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a lightweight translation system to LibreCrawl: - web/static/js/i18n.js — translation engine Uses data-i18n / data-i18n-placeholder / data-i18n-title attributes. Language preference is persisted in localStorage. Default language: German (de). - web/static/locales/de.json — ~200 German strings - web/static/locales/en.json — ~200 English strings (fallback) - login.html, register.html, index.html All user-visible text marked with data-i18n attributes. DE/EN toggle button added to header (main app) and top-right corner (login/register pages). JS validation and button state messages also use i18n.t(). --- docker-compose.yml | 37 +- web/static/js/i18n.js | 85 ++ web/static/locales/de.json | 294 +++++ web/static/locales/en.json | 294 +++++ web/templates/index.html | 2025 ++++++++++++++++++----------------- web/templates/login.html | 645 +++++------ web/templates/register.html | 606 ++++++----- 7 files changed, 2348 insertions(+), 1638 deletions(-) create mode 100644 web/static/js/i18n.js create mode 100644 web/static/locales/de.json create mode 100644 web/static/locales/en.json diff --git a/docker-compose.yml b/docker-compose.yml index 619d216..4ced2f3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -9,30 +9,33 @@ services: - "${HOST_BINDING:-127.0.0.1}:5000:5000" volumes: # Persist the user database and settings + # Use a named volume so Docker preserves the /app/data ownership from + # the image. A root-owned bind mount can prevent SQLite from creating + # users.db when the app runs as the non-root librecrawl user. - librecrawl-data:/app/data environment: - FLASK_APP=main.py - PYTHONUNBUFFERED=1 # Set to "true" for local mode (no authentication), "false" for production - - LOCAL_MODE=${LOCAL_MODE:-true} - # Set to "true" to disable new user registrations - - REGISTRATION_DISABLED=${REGISTRATION_DISABLED:-false} + - LOCAL_MODE=${LOCAL_MODE:-true} + # Set to "true" to disable new user registrations + - REGISTRATION_DISABLED=${REGISTRATION_DISABLED:-false} # Set to "true" to disable guest login - DISABLE_GUEST=${DISABLE_GUEST:-false} - # Set to "true" for demo mode (1.5GB per-user memory limit) - - DEMO_MODE=${DEMO_MODE:-false} - # DANGEROUS: Set to "true" to allow login as any username with no password - # (username only used to separate sessions). Do NOT use in production. - - DANGEROUSLY_SKIP_AUTH=${DANGEROUSLY_SKIP_AUTH:-false} - # Uncomment these for SMTP sending - # - SMTP_HOST=${SMTP_HOST} - # - SMTP_PORT=${SMTP_PORT} - # - SMTP_USER=${SMTP_USER} - # - SMTP_PASSWORD=${SMTP_PASSWORD} - # - SMTP_FROM=${SMTP_FROM} - # - SMTP_FROM_NAME=${SMTP_FROM_NAME} - # Add this for correct links in the verification emails - # - MAIN_APP_URL=${MAIN_APP_URL} + # Set to "true" for demo mode (1.5GB per-user memory limit) + - DEMO_MODE=${DEMO_MODE:-false} + # DANGEROUS: Set to "true" to allow login as any username with no password + # (username only used to separate sessions). Do NOT use in production. + - DANGEROUSLY_SKIP_AUTH=${DANGEROUSLY_SKIP_AUTH:-false} + # Uncomment these for SMTP sending + # - SMTP_HOST=${SMTP_HOST} + # - SMTP_PORT=${SMTP_PORT} + # - SMTP_USER=${SMTP_USER} + # - SMTP_PASSWORD=${SMTP_PASSWORD} + # - SMTP_FROM=${SMTP_FROM} + # - SMTP_FROM_NAME=${SMTP_FROM_NAME} + # Add this for correct links in the verification emails + # - MAIN_APP_URL=${MAIN_APP_URL} restart: unless-stopped command: > sh -c " diff --git a/web/static/js/i18n.js b/web/static/js/i18n.js new file mode 100644 index 0000000..9280f1b --- /dev/null +++ b/web/static/js/i18n.js @@ -0,0 +1,85 @@ +/** + * LibreCrawl i18n — lightweight translation engine + * Usage in HTML: data-i18n="key" → sets textContent + * data-i18n-placeholder="key" → sets placeholder + * data-i18n-title="key" → sets title attribute + * data-i18n-html="key" → sets innerHTML (use sparingly) + * Usage in JS: i18n.t('key') + */ +const i18n = (() => { + const STORAGE_KEY = 'librecrawl_lang'; + const DEFAULT_LANG = 'de'; + let translations = {}; + let currentLang = localStorage.getItem(STORAGE_KEY) || DEFAULT_LANG; + + async function load(lang) { + try { + const res = await fetch(`/static/locales/${lang}.json?v=${Date.now()}`); + if (!res.ok) throw new Error(`Failed to load ${lang}.json`); + translations = await res.json(); + currentLang = lang; + localStorage.setItem(STORAGE_KEY, lang); + apply(); + updateToggleButtons(); + } catch (e) { + console.warn('i18n: could not load', lang, e); + } + } + + function t(key, vars) { + let str = translations[key] || key; + if (vars) { + Object.entries(vars).forEach(([k, v]) => { + str = str.replace(new RegExp(`{${k}}`, 'g'), v); + }); + } + return str; + } + + function apply(root) { + const scope = root || document; + scope.querySelectorAll('[data-i18n]').forEach(el => { + const val = translations[el.dataset.i18n]; + if (val !== undefined) el.textContent = val; + }); + scope.querySelectorAll('[data-i18n-placeholder]').forEach(el => { + const val = translations[el.dataset.i18nPlaceholder]; + if (val !== undefined) el.placeholder = val; + }); + scope.querySelectorAll('[data-i18n-title]').forEach(el => { + const val = translations[el.dataset.i18nTitle]; + if (val !== undefined) el.title = val; + }); + scope.querySelectorAll('[data-i18n-html]').forEach(el => { + const val = translations[el.dataset.i18nHtml]; + if (val !== undefined) el.innerHTML = val; + }); + if (!root) document.documentElement.lang = currentLang; + } + + function updateToggleButtons() { + document.querySelectorAll('.i18n-toggle').forEach(btn => { + btn.textContent = currentLang === 'de' ? '🇬🇧 EN' : '🇩🇪 DE'; + btn.title = currentLang === 'de' ? 'Switch to English' : 'Auf Deutsch wechseln'; + }); + } + + function toggle() { + load(currentLang === 'de' ? 'en' : 'de'); + } + + function getLang() { return currentLang; } + + // Auto-init on DOM ready + function init() { + load(currentLang); + } + + if (document.readyState === 'loading') { + document.addEventListener('DOMContentLoaded', init); + } else { + init(); + } + + return { t, load, apply, toggle, getLang }; +})(); diff --git a/web/static/locales/de.json b/web/static/locales/de.json new file mode 100644 index 0000000..9256479 --- /dev/null +++ b/web/static/locales/de.json @@ -0,0 +1,294 @@ +{ + "login.title": "Anmelden – LibreCrawl", + "login.subtitle": "SEO Spider Tool", + "login.username": "Benutzername", + "login.password": "Passwort", + "login.btn_login": "Anmelden", + "login.btn_continue": "Weiter", + "login.btn_logging_in": "Anmelden…", + "login.btn_continuing": "Weiter…", + "login.guest": "Als Gast fortfahren (3 Crawls/24h)", + "login.guest_entering": "Als Gast eintreten…", + "login.no_account": "Noch kein Konto?", + "login.register_here": "Hier registrieren", + "login.error_generic": "Ein Fehler ist aufgetreten. Bitte erneut versuchen.", + "login.guest_error": "Gast-Anmeldung fehlgeschlagen", + "login.registrations_disabled": "Registrierungen sind deaktiviert", + + "register.title": "Registrieren – LibreCrawl", + "register.subtitle": "Konto erstellen", + "register.username": "Benutzername", + "register.username_hint": "Mindestens 3 Zeichen", + "register.email": "E-Mail", + "register.password": "Passwort", + "register.password_hint": "Mindestens 8 Zeichen", + "register.confirm_password": "Passwort bestätigen", + "register.btn_register": "Registrieren", + "register.btn_registering": "Registrieren…", + "register.btn_disabled": "Registrierung deaktiviert", + "register.have_account": "Bereits ein Konto?", + "register.login_here": "Hier anmelden", + "register.error_passwords": "Passwörter stimmen nicht überein", + "register.error_password_length": "Passwort muss mindestens 8 Zeichen haben", + "register.error_username_length": "Benutzername muss mindestens 3 Zeichen haben", + "register.error_generic": "Ein Fehler ist aufgetreten. Bitte erneut versuchen.", + "register.disabled_msg": "Registrierungen sind derzeit deaktiviert. Bitte Administrator kontaktieren.", + + "app.btn_dashboard": "Dashboard", + "app.btn_settings": "Einstellungen", + "app.btn_export": "Exportieren", + "app.btn_save_crawl": "Crawl speichern", + "app.btn_load_crawl": "Crawl laden", + "app.btn_logout": "Abmelden", + + "app.url_label": "URL zum Crawlen", + "app.btn_start": "Start", + "app.btn_stop": "Stop", + "app.btn_clear": "Leeren", + "app.status_initializing": "Initialisierung…", + "app.status_ready": "Bereit", + + "app.sidebar_filters": "Filter", + "app.filter_internal": "Intern", + "app.filter_external": "Extern", + "app.filter_response_codes": "Antwortcodes", + "app.filter_2xx": "2xx Erfolg", + "app.filter_3xx": "3xx Weiterleitung", + "app.filter_4xx": "4xx Client-Fehler", + "app.filter_5xx": "5xx Server-Fehler", + "app.filter_no_response": "Keine Antwort", + "app.filter_no_response_title": "DNS-Fehler, Verbindung abgelehnt, Timeout, SSL-Fehler usw. – nützlich zum Finden abgelaufener Domains.", + "app.filter_content_type": "Inhaltstyp", + "app.filter_images": "Bilder", + + "app.sidebar_statistics": "Statistiken", + "app.stat_discovered": "URLs entdeckt", + "app.stat_crawled": "URLs gecrawlt", + "app.stat_depth": "Crawl-Tiefe", + "app.stat_speed": "Geschwindigkeit", + + "app.sidebar_datasize": "Datengröße", + "app.stat_crawl_data": "Crawl-Daten", + "app.stat_per_url": "Pro URL", + "app.stat_est_1m": "Schätzung für 1 Mio. URLs", + "app.stat_system": "System verfügbar", + + "app.tab_overview": "Übersicht", + "app.tab_internal": "Intern", + "app.tab_external": "Extern", + "app.tab_status_codes": "Statuscodes", + "app.tab_links": "Links", + "app.tab_issues": "Probleme", + "app.tab_pagespeed": "PageSpeed", + "app.tab_visualization": "Visualisierung", + + "app.th_address": "Adresse", + "app.th_status": "Status", + "app.th_title": "Titel", + "app.th_meta_desc": "Meta-Beschreibung", + "app.th_h1": "H1", + "app.th_words": "Wörter", + "app.th_response": "Antwortzeit (ms)", + "app.th_analytics": "Analytics", + "app.th_og_tags": "OG-Tags", + "app.th_json_ld": "JSON-LD", + "app.th_links_int_ext": "Links (Int/Ext)", + "app.th_images": "Bilder", + "app.th_js": "JS", + "app.th_details": "Details", + "app.th_content_type": "Inhaltstyp", + "app.th_size": "Größe", + "app.th_status_code": "Statuscode", + "app.th_count": "Anzahl", + "app.th_percentage": "Prozentsatz", + "app.th_source_url": "Quell-URL", + "app.th_target_url": "Ziel-URL", + "app.th_anchor_text": "Ankertext", + "app.th_placement": "Position", + "app.th_domain": "Domain", + "app.th_type": "Typ", + "app.th_category": "Kategorie", + "app.th_issue": "Problem", + + "app.links_internal": "🔗 Interne Links", + "app.links_external": "🌐 Externe Links", + "app.links_all_status": "Alle Statuscodes", + "app.links_search": "Suchen…", + + "app.issues_all": "Alle Probleme", + "app.issues_errors": "Fehler", + "app.issues_warnings": "Warnungen", + "app.issues_info": "Info", + "app.issues_empty_title": "Keine Probleme gefunden", + "app.issues_empty_desc": "Starte einen Crawl, um SEO-Probleme auf deiner Website zu entdecken.", + + "app.pagespeed_title": "PageSpeed Insights Ergebnisse", + "app.pagespeed_desc": "Leistungsanalyse für Startseite und Kategorieseiten (powered by Google PageSpeed Insights)", + "app.pagespeed_placeholder": "🚀 PageSpeed-Analyse erscheint hier nach Abschluss des Crawls", + "app.pagespeed_hint": "Aktiviere PageSpeed-Analyse in den Einstellungen für Core Web Vitals und Performance-Scores", + + "app.viz_title": "Seitenstruktur-Visualisierung", + "app.viz_layout_force": "Kraftgerichtet", + "app.viz_layout_hierarchical": "Hierarchisch", + "app.viz_layout_circle": "Kreis", + "app.viz_layout_concentric": "Konzentrisch", + "app.viz_layout_grid": "Raster", + "app.viz_filter_all": "Alle Seiten", + "app.viz_filter_html": "Nur HTML", + "app.viz_btn_reset": "Ansicht zurücksetzen", + "app.viz_btn_export": "PNG exportieren", + "app.viz_no_data": "Noch keine Daten", + "app.viz_no_data_desc": "Starte einen Crawl, um deine Seitenstruktur zu visualisieren", + "app.viz_legend_2xx": "2xx Erfolg", + "app.viz_legend_3xx": "3xx Weiterleitung", + "app.viz_legend_4xx": "4xx Fehler", + "app.viz_legend_5xx": "5xx Fehler", + "app.viz_legend_other": "Sonstige", + + "app.modal_crawl_history": "Crawl-Verlauf", + "app.modal_settings": "Einstellungen", + "app.modal_loading_crawls": "Crawls werden geladen…", + + "app.settings_tab_crawler": "Crawler", + "app.settings_tab_requests": "Anfragen", + "app.settings_tab_filters": "Filter", + "app.settings_tab_export": "Export", + "app.settings_tab_javascript": "JavaScript", + "app.settings_tab_issues": "Problem-Ausschluss", + "app.settings_tab_customcss": "Eigenes CSS", + "app.settings_tab_advanced": "Erweitert", + + "app.settings_crawler_title": "Crawler-Konfiguration", + "app.settings_max_depth": "Maximale Crawl-Tiefe", + "app.settings_max_depth_help": "Wie tief ab der Start-URL gecrawlt wird", + "app.settings_max_urls": "Maximale Anzahl URLs", + "app.settings_max_urls_help": "Crawl nach dieser Anzahl URLs beenden", + "app.settings_crawl_delay": "Crawl-Verzögerung (Sekunden)", + "app.settings_crawl_delay_help": "Verzögerung zwischen Anfragen für schonenden Crawl", + "app.settings_follow_redirects": "Weiterleitungen folgen", + "app.settings_follow_redirects_help": "3xx-Weiterleitungen automatisch folgen", + "app.settings_crawl_external": "Externe Links crawlen", + "app.settings_crawl_external_help": "Externe Domains in den Crawl einbeziehen", + + "app.settings_requests_title": "HTTP-Anfragen-Konfiguration", + "app.settings_user_agent": "User Agent", + "app.settings_user_agent_help": "User-Agent-String, der mit Anfragen gesendet wird", + "app.settings_timeout": "Anfrage-Timeout (Sekunden)", + "app.settings_timeout_help": "Wie lange auf eine Antwort gewartet wird", + "app.settings_retries": "Wiederholungsversuche", + "app.settings_retries_help": "Anzahl der Wiederholungen bei fehlgeschlagenen Anfragen", + "app.settings_accept_lang": "Akzeptierte Sprache", + "app.settings_accept_lang_help": "Bevorzugte Sprache für Antworten", + "app.settings_robots_txt": "robots.txt beachten", + "app.settings_robots_txt_help": "robots.txt vor dem Crawlen prüfen", + "app.settings_cookies": "Cookies erlauben", + "app.settings_cookies_help": "Cookies akzeptieren und senden", + "app.settings_sitemaps": "Sitemaps entdecken", + "app.settings_sitemaps_help": "sitemap.xml-Dateien automatisch finden und verarbeiten (einschließlich verschachtelter Sitemaps)", + "app.settings_pagespeed_enable": "PageSpeed-Analyse aktivieren", + "app.settings_pagespeed_enable_help": "Google PageSpeed Insights für Startseite und 2 Kategorieseiten nach dem Crawl ausführen", + "app.settings_pagespeed_key": "Google PageSpeed API-Schlüssel", + "app.settings_pagespeed_key_help": "API-Schlüssel für höhere Anfragelimits (25.000 Anfragen/Tag).", + "app.settings_pagespeed_key_link": "API-Schlüssel hier holen", + + "app.settings_filters_title": "Inhaltsfilter", + "app.settings_include_ext": "Dateierweiterungen einschließen", + "app.settings_include_ext_help": "Kommagetrennte Liste der zu crawlenden Erweiterungen", + "app.settings_exclude_ext": "Dateierweiterungen ausschließen", + "app.settings_exclude_ext_help": "Kommagetrennte Liste der zu überspringenden Erweiterungen", + "app.settings_include_patterns": "URL-Muster einschließen", + "app.settings_include_patterns_placeholder": "Ein Muster pro Zeile (Regex unterstützt)", + "app.settings_include_patterns_help": "Nur URLs crawlen, die diesen Mustern entsprechen", + "app.settings_exclude_patterns": "URL-Muster ausschließen", + "app.settings_exclude_patterns_placeholder": "Ein Muster pro Zeile (Regex unterstützt)", + "app.settings_exclude_patterns_help": "URLs überspringen, die diesen Mustern entsprechen", + "app.settings_max_filesize": "Maximale Dateigröße (MB)", + "app.settings_max_filesize_help": "Dateien überspringen, die größer als diese Größe sind", + "app.settings_duplication_title": "Duplikatserkennung", + "app.settings_duplication_enable": "Duplikatsprüfung aktivieren", + "app.settings_duplication_enable_help": "Doppelte Inhalte auf allen gecrawlten Seiten erkennen", + "app.settings_duplication_threshold": "Ähnlichkeitsschwelle für Duplikate", + "app.settings_duplication_threshold_help": "Inhaltliche Ähnlichkeitsschwelle (0,0–1,0). Höhere Werte = strengere Prüfung. Standard: 0,85 (85 % ähnlich)", + + "app.settings_export_title": "Export-Konfiguration", + "app.settings_export_format": "Standardformat für Export", + "app.settings_export_format_help": "Standardformat für Datenexporte", + "app.settings_export_fields": "Export-Felder", + "app.export_url": "URL", + "app.export_status_code": "Statuscode", + "app.export_title": "Titel", + "app.export_meta_desc": "Meta-Beschreibung", + "app.export_h1": "H1-Tags", + "app.export_word_count": "Wörteranzahl", + "app.export_content_type": "Inhaltstyp", + "app.export_response_time": "Antwortzeit", + "app.export_canonical": "Kanonische URL", + "app.export_og_tags": "OpenGraph-Tags", + "app.export_twitter": "Twitter Cards", + "app.export_json_ld": "JSON-LD-Daten", + "app.export_analytics": "Analytics-Tracking", + "app.export_internal_links": "Anzahl interne Links", + "app.export_external_links": "Anzahl externe Links", + "app.export_images": "Anzahl Bilder", + "app.export_links_detailed": "Detaillierte Link-Daten (separate Datei)", + "app.export_lang": "Sprache", + "app.export_charset": "Zeichensatz", + "app.export_viewport": "Viewport", + "app.export_robots": "Robots-Meta", + "app.export_issues": "Erkannte Probleme (separater Export)", + + "app.settings_js_title": "JavaScript-Rendering-Konfiguration", + "app.settings_js_enable": "JavaScript-Rendering aktivieren", + "app.settings_js_enable_help": "Seiten mit JavaScript rendern für dynamische Inhalte (langsamer, aber genauer)", + "app.settings_js_wait": "JavaScript-Wartezeit (Sekunden)", + "app.settings_js_wait_help": "Wartezeit für das JavaScript-Rendering nach dem Seitenaufruf", + "app.settings_js_page_timeout": "Seitenlade-Timeout (Sekunden)", + "app.settings_js_page_timeout_help": "Maximale Wartezeit für das Laden einer Seite", + "app.settings_js_browser": "Browser-Engine", + "app.settings_js_browser_help": "Browser-Engine für das JavaScript-Rendering", + "app.settings_js_chromium": "Chromium (Empfohlen)", + "app.settings_js_firefox": "Firefox", + "app.settings_js_webkit": "WebKit (Safari)", + "app.settings_js_headless": "Headless-Modus", + "app.settings_js_headless_help": "Browser ohne sichtbares Fenster ausführen (empfohlen für Performance)", + "app.settings_js_useragent": "JavaScript User Agent", + "app.settings_js_useragent_help": "User-Agent-String für JavaScript-gerenderte Seiten", + "app.settings_js_viewport": "Browser-Viewport-Größe", + "app.settings_js_viewport_help": "Browser-Fenstergröße für das Rendering (beeinflusst responsive Layouts)", + "app.settings_js_concurrency": "Max. gleichzeitige Browser-Seiten", + "app.settings_js_concurrency_help": "Anzahl der Browser-Seiten für paralleles JavaScript-Rendering (mehr = schneller, aber mehr Speicher)", + "app.settings_js_warning_title": "⚠️ Performance-Auswirkung", + "app.settings_js_warning_text": "JavaScript-Rendering ist deutlich langsamer als reines HTTP-Crawling. Nur für Seiten mit dynamischen Inhalten oder wenn statisches Crawling unvollständige Ergebnisse liefert.", + + "app.settings_issues_title": "Problem-Ausschlussmuster", + "app.settings_issues_desc": "URLs, die diesen Mustern entsprechen, werden von der Problemerkennung ausgeschlossen. Dies sind typischerweise Admin-Bereiche, Entwicklungsdateien und Framework-Interna, die nie von Suchmaschinen indexiert werden sollten.", + "app.settings_issues_label": "Ausschlussmuster (eines pro Zeile)", + "app.settings_issues_help": "* als Platzhalter verwenden. Beispiele: /admin/*, *.json, /test/*", + "app.settings_issues_reset": "Auf Standard zurücksetzen", + "app.settings_issues_reset_help": "Die Standard-Ausschlussmuster wiederherstellen", + + "app.settings_css_title": "Eigenes CSS", + "app.settings_css_desc": "Eigenes CSS schreiben, um das Aussehen der LibreCrawl-Oberfläche anzupassen. Änderungen werden nach dem Speichern sofort übernommen.", + "app.settings_css_label": "CSS-Code", + "app.settings_css_help": "Gültige CSS-Regeln eingeben, um die Oberfläche anzupassen", + + "app.settings_advanced_title": "Erweiterte Konfiguration", + "app.settings_concurrency": "Gleichzeitige Anfragen", + "app.settings_concurrency_help": "Anzahl simultaner Anfragen (mehr = schneller, aber ressourcenintensiver)", + "app.settings_memory": "Speicherlimit (MB)", + "app.settings_memory_help": "Maximale Speichernutzung für Crawl-Daten", + "app.settings_log_level": "Log-Level", + "app.settings_log_level_help": "Ausführlichkeit der Protokollierung", + "app.settings_save_session": "Sitzungsdaten speichern", + "app.settings_save_session_help": "Crawl-Daten automatisch zwischen Sitzungen speichern", + "app.settings_proxy_enable": "Proxy aktivieren", + "app.settings_proxy_enable_help": "Anfragen über einen Proxy-Server leiten", + "app.settings_proxy_url": "Proxy-URL", + "app.settings_proxy_url_help": "Proxy-Server-URL mit Port", + "app.settings_custom_headers": "Eigene HTTP-Header", + "app.settings_custom_headers_help": "Zusätzliche HTTP-Header senden (einer pro Zeile)", + + "app.btn_reset_defaults": "Auf Standard zurücksetzen", + "app.btn_cancel": "Abbrechen", + "app.btn_save_settings": "Einstellungen speichern" +} diff --git a/web/static/locales/en.json b/web/static/locales/en.json new file mode 100644 index 0000000..beaf312 --- /dev/null +++ b/web/static/locales/en.json @@ -0,0 +1,294 @@ +{ + "login.title": "Login - LibreCrawl", + "login.subtitle": "SEO Spider Tool", + "login.username": "Username", + "login.password": "Password", + "login.btn_login": "Login", + "login.btn_continue": "Continue", + "login.btn_logging_in": "Logging in...", + "login.btn_continuing": "Continuing...", + "login.guest": "Continue as Guest (3 crawls/24h)", + "login.guest_entering": "Entering as guest...", + "login.no_account": "Don't have an account?", + "login.register_here": "Register here", + "login.error_generic": "An error occurred. Please try again.", + "login.guest_error": "Failed to enter as guest", + "login.registrations_disabled": "Registrations are disabled", + + "register.title": "Register - LibreCrawl", + "register.subtitle": "Create your account", + "register.username": "Username", + "register.username_hint": "At least 3 characters", + "register.email": "Email", + "register.password": "Password", + "register.password_hint": "At least 8 characters", + "register.confirm_password": "Confirm Password", + "register.btn_register": "Register", + "register.btn_registering": "Registering...", + "register.btn_disabled": "Registration Disabled", + "register.have_account": "Already have an account?", + "register.login_here": "Login here", + "register.error_passwords": "Passwords do not match", + "register.error_password_length": "Password must be at least 8 characters", + "register.error_username_length": "Username must be at least 3 characters", + "register.error_generic": "An error occurred. Please try again.", + "register.disabled_msg": "Registrations are currently disabled. Please contact the administrator.", + + "app.btn_dashboard": "Dashboard", + "app.btn_settings": "Settings", + "app.btn_export": "Export", + "app.btn_save_crawl": "Save Crawl", + "app.btn_load_crawl": "Load Crawl", + "app.btn_logout": "Logout", + + "app.url_label": "URL to Crawl", + "app.btn_start": "Start", + "app.btn_stop": "Stop", + "app.btn_clear": "Clear", + "app.status_initializing": "Initializing...", + "app.status_ready": "Ready", + + "app.sidebar_filters": "Filters", + "app.filter_internal": "Internal", + "app.filter_external": "External", + "app.filter_response_codes": "Response Codes", + "app.filter_2xx": "2xx Success", + "app.filter_3xx": "3xx Redirect", + "app.filter_4xx": "4xx Client Error", + "app.filter_5xx": "5xx Server Error", + "app.filter_no_response": "No Response", + "app.filter_no_response_title": "DNS failure, connection refused, timeout, SSL error, etc. — useful for finding expired domains.", + "app.filter_content_type": "Content Type", + "app.filter_images": "Images", + + "app.sidebar_statistics": "Statistics", + "app.stat_discovered": "URLs Discovered", + "app.stat_crawled": "URLs Crawled", + "app.stat_depth": "Crawl Depth", + "app.stat_speed": "Speed", + + "app.sidebar_datasize": "Data Size", + "app.stat_crawl_data": "Crawl Data", + "app.stat_per_url": "Per URL", + "app.stat_est_1m": "Est. for 1M URLs", + "app.stat_system": "System Available", + + "app.tab_overview": "Overview", + "app.tab_internal": "Internal", + "app.tab_external": "External", + "app.tab_status_codes": "Status Codes", + "app.tab_links": "Links", + "app.tab_issues": "Issues", + "app.tab_pagespeed": "PageSpeed", + "app.tab_visualization": "Visualization", + + "app.th_address": "Address", + "app.th_status": "Status", + "app.th_title": "Title", + "app.th_meta_desc": "Meta Desc", + "app.th_h1": "H1", + "app.th_words": "Words", + "app.th_response": "Response (ms)", + "app.th_analytics": "Analytics", + "app.th_og_tags": "OG Tags", + "app.th_json_ld": "JSON-LD", + "app.th_links_int_ext": "Links (Int/Ext)", + "app.th_images": "Images", + "app.th_js": "JS", + "app.th_details": "Details", + "app.th_content_type": "Content Type", + "app.th_size": "Size", + "app.th_status_code": "Status Code", + "app.th_count": "Count", + "app.th_percentage": "Percentage", + "app.th_source_url": "Source URL", + "app.th_target_url": "Target URL", + "app.th_anchor_text": "Anchor Text", + "app.th_placement": "Placement", + "app.th_domain": "Domain", + "app.th_type": "Type", + "app.th_category": "Category", + "app.th_issue": "Issue", + + "app.links_internal": "🔗 Internal Links", + "app.links_external": "🌐 External Links", + "app.links_all_status": "All Status Codes", + "app.links_search": "Search...", + + "app.issues_all": "All Issues", + "app.issues_errors": "Errors", + "app.issues_warnings": "Warnings", + "app.issues_info": "Info", + "app.issues_empty_title": "No Issues Found", + "app.issues_empty_desc": "Start crawling to detect SEO issues and problems on your website.", + + "app.pagespeed_title": "PageSpeed Insights Results", + "app.pagespeed_desc": "Performance analysis for homepage and category pages (powered by Google PageSpeed Insights)", + "app.pagespeed_placeholder": "🚀 PageSpeed analysis will appear here after crawl completion", + "app.pagespeed_hint": "Enable PageSpeed Analysis in settings to get Core Web Vitals and performance scores", + + "app.viz_title": "Site Structure Visualization", + "app.viz_layout_force": "Force-Directed", + "app.viz_layout_hierarchical": "Hierarchical", + "app.viz_layout_circle": "Circle", + "app.viz_layout_concentric": "Concentric", + "app.viz_layout_grid": "Grid", + "app.viz_filter_all": "All Pages", + "app.viz_filter_html": "HTML Only", + "app.viz_btn_reset": "Reset View", + "app.viz_btn_export": "Export PNG", + "app.viz_no_data": "No Data Yet", + "app.viz_no_data_desc": "Start crawling to visualize your site structure", + "app.viz_legend_2xx": "2xx Success", + "app.viz_legend_3xx": "3xx Redirect", + "app.viz_legend_4xx": "4xx Error", + "app.viz_legend_5xx": "5xx Error", + "app.viz_legend_other": "Other", + + "app.modal_crawl_history": "Crawl History", + "app.modal_settings": "Settings", + "app.modal_loading_crawls": "Loading crawls...", + + "app.settings_tab_crawler": "Crawler", + "app.settings_tab_requests": "Requests", + "app.settings_tab_filters": "Filters", + "app.settings_tab_export": "Export", + "app.settings_tab_javascript": "JavaScript", + "app.settings_tab_issues": "Issue Exclusion", + "app.settings_tab_customcss": "Custom CSS", + "app.settings_tab_advanced": "Advanced", + + "app.settings_crawler_title": "Crawler Configuration", + "app.settings_max_depth": "Maximum Crawl Depth", + "app.settings_max_depth_help": "How deep to crawl from the starting URL", + "app.settings_max_urls": "Maximum URLs to Crawl", + "app.settings_max_urls_help": "Stop crawling after this many URLs", + "app.settings_crawl_delay": "Crawl Delay (seconds)", + "app.settings_crawl_delay_help": "Delay between requests to be respectful", + "app.settings_follow_redirects": "Follow Redirects", + "app.settings_follow_redirects_help": "Follow 3xx redirects automatically", + "app.settings_crawl_external": "Crawl External Links", + "app.settings_crawl_external_help": "Include external domains in crawl", + + "app.settings_requests_title": "HTTP Request Configuration", + "app.settings_user_agent": "User Agent", + "app.settings_user_agent_help": "User agent string sent with requests", + "app.settings_timeout": "Request Timeout (seconds)", + "app.settings_timeout_help": "How long to wait for a response", + "app.settings_retries": "Retry Attempts", + "app.settings_retries_help": "Number of retries for failed requests", + "app.settings_accept_lang": "Accept Language", + "app.settings_accept_lang_help": "Preferred language for responses", + "app.settings_robots_txt": "Respect robots.txt", + "app.settings_robots_txt_help": "Check robots.txt before crawling", + "app.settings_cookies": "Allow Cookies", + "app.settings_cookies_help": "Accept and send cookies", + "app.settings_sitemaps": "Discover Sitemaps", + "app.settings_sitemaps_help": "Automatically find and parse sitemap.xml files (including nested sitemaps)", + "app.settings_pagespeed_enable": "Enable PageSpeed Analysis", + "app.settings_pagespeed_enable_help": "Run Google PageSpeed Insights on homepage and 2 category pages after crawl completes", + "app.settings_pagespeed_key": "Google PageSpeed API Key", + "app.settings_pagespeed_key_help": "API key for higher rate limits (25k requests/day vs limited without key).", + "app.settings_pagespeed_key_link": "Get API key here", + + "app.settings_filters_title": "Content Filters", + "app.settings_include_ext": "Include File Extensions", + "app.settings_include_ext_help": "Comma-separated list of extensions to crawl", + "app.settings_exclude_ext": "Exclude File Extensions", + "app.settings_exclude_ext_help": "Comma-separated list of extensions to skip", + "app.settings_include_patterns": "Include URL Patterns", + "app.settings_include_patterns_placeholder": "One pattern per line (regex supported)", + "app.settings_include_patterns_help": "Only crawl URLs matching these patterns", + "app.settings_exclude_patterns": "Exclude URL Patterns", + "app.settings_exclude_patterns_placeholder": "One pattern per line (regex supported)", + "app.settings_exclude_patterns_help": "Skip URLs matching these patterns", + "app.settings_max_filesize": "Maximum File Size (MB)", + "app.settings_max_filesize_help": "Skip files larger than this size", + "app.settings_duplication_title": "Duplication Detection", + "app.settings_duplication_enable": "Enable Duplication Check", + "app.settings_duplication_enable_help": "Detect duplicate content across all crawled pages", + "app.settings_duplication_threshold": "Duplication Similarity Threshold", + "app.settings_duplication_threshold_help": "Content similarity threshold (0.0-1.0). Higher values = stricter matching. Default: 0.85 (85% similar)", + + "app.settings_export_title": "Export Configuration", + "app.settings_export_format": "Default Export Format", + "app.settings_export_format_help": "Default format for data exports", + "app.settings_export_fields": "Export Fields", + "app.export_url": "URL", + "app.export_status_code": "Status Code", + "app.export_title": "Title", + "app.export_meta_desc": "Meta Description", + "app.export_h1": "H1 Tags", + "app.export_word_count": "Word Count", + "app.export_content_type": "Content Type", + "app.export_response_time": "Response Time", + "app.export_canonical": "Canonical URL", + "app.export_og_tags": "OpenGraph Tags", + "app.export_twitter": "Twitter Cards", + "app.export_json_ld": "JSON-LD Data", + "app.export_analytics": "Analytics Tracking", + "app.export_internal_links": "Internal Links Count", + "app.export_external_links": "External Links Count", + "app.export_images": "Images Count", + "app.export_links_detailed": "Detailed Links Data (separate file)", + "app.export_lang": "Language", + "app.export_charset": "Charset", + "app.export_viewport": "Viewport", + "app.export_robots": "Robots Meta", + "app.export_issues": "Issues Detected (separate export)", + + "app.settings_js_title": "JavaScript Rendering Configuration", + "app.settings_js_enable": "Enable JavaScript Rendering", + "app.settings_js_enable_help": "Render pages with JavaScript for dynamic content (slower but more accurate)", + "app.settings_js_wait": "JavaScript Wait Time (seconds)", + "app.settings_js_wait_help": "Time to wait for JavaScript to render after page load", + "app.settings_js_page_timeout": "Page Load Timeout (seconds)", + "app.settings_js_page_timeout_help": "Maximum time to wait for page to load", + "app.settings_js_browser": "Browser Engine", + "app.settings_js_browser_help": "Browser engine for JavaScript rendering", + "app.settings_js_chromium": "Chromium (Recommended)", + "app.settings_js_firefox": "Firefox", + "app.settings_js_webkit": "WebKit (Safari)", + "app.settings_js_headless": "Headless Mode", + "app.settings_js_headless_help": "Run browser without visible window (recommended for performance)", + "app.settings_js_useragent": "JavaScript User Agent", + "app.settings_js_useragent_help": "User agent string for JavaScript-rendered pages", + "app.settings_js_viewport": "Browser Viewport Size", + "app.settings_js_viewport_help": "Browser window size for rendering (affects responsive layouts)", + "app.settings_js_concurrency": "Max Concurrent Browser Pages", + "app.settings_js_concurrency_help": "Number of browser pages for parallel JavaScript rendering (higher = faster but more memory)", + "app.settings_js_warning_title": "⚠️ Performance Impact", + "app.settings_js_warning_text": "JavaScript rendering is significantly slower than HTTP-only crawling. Use for sites with dynamic content or when static crawling produces incomplete results.", + + "app.settings_issues_title": "Issue Exclusion Patterns", + "app.settings_issues_desc": "URLs matching these patterns will be excluded from issue detection. These are typically admin areas, development files, and framework internals that should never be indexed by search engines.", + "app.settings_issues_label": "Exclusion Patterns (one per line)", + "app.settings_issues_help": "Use * for wildcards. Examples: /admin/*, *.json, /test/*", + "app.settings_issues_reset": "Reset to Defaults", + "app.settings_issues_reset_help": "Restore the default exclusion patterns", + + "app.settings_css_title": "Custom CSS Styling", + "app.settings_css_desc": "Write custom CSS to personalize the look and feel of the LibreCrawl interface. Changes apply immediately after saving.", + "app.settings_css_label": "Custom CSS Code", + "app.settings_css_help": "Enter valid CSS rules to customize the interface appearance", + + "app.settings_advanced_title": "Advanced Configuration", + "app.settings_concurrency": "Concurrent Requests", + "app.settings_concurrency_help": "Number of simultaneous requests (higher = faster but more resource intensive)", + "app.settings_memory": "Memory Limit (MB)", + "app.settings_memory_help": "Maximum memory usage for crawl data", + "app.settings_log_level": "Log Level", + "app.settings_log_level_help": "Logging verbosity level", + "app.settings_save_session": "Save Session Data", + "app.settings_save_session_help": "Automatically save crawl data between sessions", + "app.settings_proxy_enable": "Enable Proxy", + "app.settings_proxy_enable_help": "Route requests through a proxy server", + "app.settings_proxy_url": "Proxy URL", + "app.settings_proxy_url_help": "Proxy server URL with port", + "app.settings_custom_headers": "Custom Headers", + "app.settings_custom_headers_help": "Additional HTTP headers to send (one per line)", + + "app.btn_reset_defaults": "Reset to Defaults", + "app.btn_cancel": "Cancel", + "app.btn_save_settings": "Save Settings" +} diff --git a/web/templates/index.html b/web/templates/index.html index f54a3c9..973db63 100644 --- a/web/templates/index.html +++ b/web/templates/index.html @@ -1,1012 +1,1015 @@ - - - - - LibreCrawl - SEO Spider - - - - - -
- -
-
-

LibreCrawl

-
- - - - - - - -
-
-
- - -
-
-
- - -
-
- - - -
-
- -
- - -
- - - - -
-
-
- - - - - - - - -
- -
-
-
- - - - - - - - - - - - - - - - - - - - - - -
AddressStatusTitleMeta DescH1WordsResponse (ms)AnalyticsOG TagsJSON-LDLinks (Int/Ext)ImagesJSDetails
-
-
- -
-
- - - - - - - - - - - - - -
AddressStatusContent TypeSizeTitle
-
-
- -
-
- - - - - - - - - - - - - -
AddressStatusContent TypeSizeTitle
-
-
- -
-
- - - - - - - - - - - - -
Status CodeStatusCountPercentage
-
-
- - - -
- -
- - - - -
- -
- - - - - - - - - - - - - -
URLTypeCategoryIssueDetails
- -
-
- -
-
-
-

PageSpeed Insights Results

-

Performance analysis for homepage and category pages (powered by Google PageSpeed Insights)

-
-
-
-

🚀 PageSpeed analysis will appear here after crawl completion

-

Enable PageSpeed Analysis in settings to get Core Web Vitals and performance scores

-
-
-
-
- -
-
-
-

Site Structure Visualization

-
- - - - -
-
-
-
- - - - - - - - - - - -

No Data Yet

-

Start crawling to visualize your site structure

-
-
-
-
- - 2xx Success -
-
- - 3xx Redirect -
-
- - 4xx Error -
-
- - 5xx Error -
-
- - Other -
-
-
-
-
-
-
-
- - -
- Ready - 00:00 -
-
- - - - - - - - - - - - - - - - + + + + + LibreCrawl - SEO Spider + + + + + +
+ +
+
+

LibreCrawl

+
+ + + + + + + + +
+
+
+ + +
+
+
+ + +
+
+ + + +
+
+ +
+ + +
+ + + + +
+
+
+ + + + + + + + +
+ +
+
+
+ + + + + + + + + + + + + + + + + + + + + + +
AddressStatusTitleMeta DescH1WordsResponse (ms)AnalyticsOG TagsJSON-LDLinks (Int/Ext)ImagesJSDetails
+
+
+ +
+
+ + + + + + + + + + + + + +
AddressStatusContent TypeSizeTitle
+
+
+ +
+
+ + + + + + + + + + + + + +
AddressStatusContent TypeSizeTitle
+
+
+ +
+
+ + + + + + + + + + + + +
Status CodeStatusCountPercentage
+
+
+ + + +
+ +
+ + + + +
+ +
+ + + + + + + + + + + + + +
URLTypeCategoryIssueDetails
+ +
+
+ +
+
+
+

PageSpeed Insights Results

+

Performance analysis for homepage and category pages (powered by Google PageSpeed Insights)

+
+
+
+

🚀 PageSpeed analysis will appear here after crawl completion

+

Enable PageSpeed Analysis in settings to get Core Web Vitals and performance scores

+
+
+
+
+ +
+
+
+

Site Structure Visualization

+
+ + + + +
+
+
+
+ + + + + + + + + + + +

No Data Yet

+

Start crawling to visualize your site structure

+
+
+
+
+ + 2xx Success +
+
+ + 3xx Redirect +
+
+ + 4xx Error +
+
+ + 5xx Error +
+
+ + Other +
+
+
+
+
+
+
+
+ + +
+ Ready + 00:00 +
+
+ + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/web/templates/login.html b/web/templates/login.html index 9890083..08d9eba 100644 --- a/web/templates/login.html +++ b/web/templates/login.html @@ -1,313 +1,332 @@ - - - - - - Login - LibreCrawl - - - - - - - - - + + + + + + Login - LibreCrawl + + + + + + + + + + diff --git a/web/templates/register.html b/web/templates/register.html index e13110f..fb8166f 100644 --- a/web/templates/register.html +++ b/web/templates/register.html @@ -1,297 +1,309 @@ - - - - - - Register - LibreCrawl - - - - -
- - -
- - {% if registration_disabled %} -
- Registrations are currently disabled. Please contact the administrator. -
- {% endif %} - -
-
- - -
At least 3 characters
-
- -
- - -
- -
- - -
At least 8 characters
-
- -
- - -
- - -
- - -
- - - - + + + + + + Register - LibreCrawl + + + + +
+ + + + +
+ + {% if registration_disabled %} +
+ Registrations are currently disabled. Please contact the administrator. +
+ {% endif %} + +
+
+ + +
At least 3 characters
+
+ +
+ + +
+ +
+ + +
At least 8 characters
+
+ +
+ + +
+ + +
+ + +
+ + + + + From 3bacdac93f4508353fd9d34dbf4797df197312a7 Mon Sep 17 00:00:00 2001 From: henosch Date: Wed, 27 May 2026 19:30:29 +0200 Subject: [PATCH 10/11] fix(i18n): translate remaining settings help texts and info boxes - Duplication threshold help text - Issue exclusion description, Reset button, info box with bullet list - Custom CSS description and CSS Tips info box Uses data-i18n-html for blocks containing HTML markup. --- web/static/locales/de.json | 4 ++++ web/static/locales/en.json | 4 ++++ web/templates/index.html | 14 +++++++------- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/web/static/locales/de.json b/web/static/locales/de.json index 9256479..782a912 100644 --- a/web/static/locales/de.json +++ b/web/static/locales/de.json @@ -288,6 +288,10 @@ "app.settings_custom_headers": "Eigene HTTP-Header", "app.settings_custom_headers_help": "Zusätzliche HTTP-Header senden (einer pro Zeile)", + "app.settings_duplication_threshold_help": "Inhaltliche Ähnlichkeitsschwelle (0,0–1,0). Höhere Werte = strengere Prüfung. Standard: 0,85 (85 % ähnlich)", + "app.settings_issues_defaults_html": "ℹ️ Standard-Ausschlüsse enthalten:
• WordPress-Admin-Bereiche (/wp-admin/*, /wp-content/plugins/*)
• Gängige Admin-Panels (/admin/*, /administrator/*, /cpanel/*)
• Entwicklungsdateien (/.git/*, /node_modules/*, /vendor/*)
• Build- und Cache-Verzeichnisse (/build/*, /cache/*, /tmp/*)
• Konfigurationsdateien (*.json, *.yaml, *.xml)
• Backup-Dateien (*.bak, *.backup, *.old)", + "app.settings_css_tips_html": "💡 CSS-Tipps:
• Bestimmte Elemente ansprechen: .header, .data-table, .sidebar
• Farben ändern: background-color, color, border-color
• Abstände anpassen: padding, margin, gap
• Schriften anpassen: font-family, font-size, font-weight
• Bei Bedarf !important verwenden, um Standard-Styles zu überschreiben
• Änderungen betreffen nur die Benutzeroberfläche", + "app.btn_reset_defaults": "Auf Standard zurücksetzen", "app.btn_cancel": "Abbrechen", "app.btn_save_settings": "Einstellungen speichern" diff --git a/web/static/locales/en.json b/web/static/locales/en.json index beaf312..faa9f65 100644 --- a/web/static/locales/en.json +++ b/web/static/locales/en.json @@ -288,6 +288,10 @@ "app.settings_custom_headers": "Custom Headers", "app.settings_custom_headers_help": "Additional HTTP headers to send (one per line)", + "app.settings_duplication_threshold_help": "Content similarity threshold (0.0-1.0). Higher values = stricter matching. Default: 0.85 (85% similar)", + "app.settings_issues_defaults_html": "ℹ️ Default Exclusions Include:
• WordPress admin areas (/wp-admin/*, /wp-content/plugins/*)
• Common admin panels (/admin/*, /administrator/*, /cpanel/*)
• Development files (/.git/*, /node_modules/*, /vendor/*)
• Build and cache directories (/build/*, /cache/*, /tmp/*)
• Configuration files (*.json, *.yaml, *.xml)
• Backup files (*.bak, *.backup, *.old)", + "app.settings_css_tips_html": "💡 CSS Tips:
• Target specific elements: .header, .data-table, .sidebar
• Change colors: background-color, color, border-color
• Modify spacing: padding, margin, gap
• Adjust fonts: font-family, font-size, font-weight
• Use !important to override default styles if needed
• Changes only affect the frontend UI", + "app.btn_reset_defaults": "Reset to Defaults", "app.btn_cancel": "Cancel", "app.btn_save_settings": "Save Settings" diff --git a/web/templates/index.html b/web/templates/index.html index 973db63..0a04746 100644 --- a/web/templates/index.html +++ b/web/templates/index.html @@ -678,7 +678,7 @@

Duplication Detection

- Content similarity threshold (0.0-1.0). Higher values = stricter matching. Default: 0.85 (85% similar) + Content similarity threshold (0.0-1.0). Higher values = stricter matching. Default: 0.85 (85% similar)
@@ -867,7 +867,7 @@

JavaScript Rendering Configuration

Issue Exclusion Patterns

-

URLs matching these patterns will be excluded from issue detection. These are typically admin areas, development files, and framework internals that should never be indexed by search engines.

+

URLs matching these patterns will be excluded from issue detection. These are typically admin areas, development files, and framework internals that should never be indexed by search engines.

@@ -876,15 +876,15 @@

Issue Exclusion Patterns

*.json"> Use * for wildcards. Examples: /admin/*, *.json, /test/*
- - Restore the default exclusion patterns + Restore the default exclusion patterns
-
+
ℹ️ Default Exclusions Include:
• WordPress admin areas (/wp-admin/*, /wp-content/plugins/*)
• Common admin panels (/admin/*, /administrator/*, /cpanel/*)
@@ -899,7 +899,7 @@

Issue Exclusion Patterns

Custom CSS Styling

-

Write custom CSS to personalize the look and feel of the LibreCrawl interface. Changes apply immediately after saving.

+

Write custom CSS to personalize the look and feel of the LibreCrawl interface. Changes apply immediately after saving.

@@ -912,7 +912,7 @@

Custom CSS Styling

-
+
💡 CSS Tips:
• Target specific elements: .header, .data-table, .sidebar
• Change colors: background-color, color, border-color
From 31c4023015f5562c249a524905a9355b4eda14a3 Mon Sep 17 00:00:00 2001 From: henosch Date: Wed, 27 May 2026 19:46:00 +0200 Subject: [PATCH 11/11] docs: add German README (README.de.md) --- README.de.md | 208 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 208 insertions(+) create mode 100644 README.de.md diff --git a/README.de.md b/README.de.md new file mode 100644 index 0000000..020e87b --- /dev/null +++ b/README.de.md @@ -0,0 +1,208 @@ +# LibreCrawl 🇩🇪 + +Ein webbasierter SEO-Crawler für Website-Analysen und technische SEO-Audits — kostenlos, quelloffen und ohne Abonnement. + +🌐 **Website**: [librecrawl.com](https://librecrawl.com) +📖 **API-Dokumentation**: [librecrawl.com/api/docs](https://librecrawl.com/api/docs/) + +> LibreCrawl wird ***immer*** kostenlos und quelloffen bleiben. +> Wenn es deine Screaming-Frog-Lizenz für 259 $/Jahr ersetzt, freut sich der Entwickler über einen [Kaffee ☕](https://www.paypal.com/donate/?business=7H9HFA3385JS8&no_recurring=0&item_name=Continue+the+development+of+LibreCrawl¤cy_code=AUD). + +--- + +## Was LibreCrawl macht + +LibreCrawl crawlt Websites und liefert detaillierte Informationen zu Seiten, Links, SEO-Elementen und Performance. Es ist als Webanwendung auf Basis von Python Flask gebaut und unterstützt mehrere gleichzeitige Benutzer mit isolierten Sitzungen. + +## Features + +- 🚀 **Multi-Tenancy** – Mehrere Benutzer können gleichzeitig crawlen, vollständig isoliert +- 🌍 **Deutsche & englische Oberfläche** – Sprachumschalter (DE/EN) in der Kopfzeile +- 🎨 **Eigenes CSS** – Oberfläche mit eigenem CSS-Theme personalisieren +- 🔄 **JavaScript-Rendering** für dynamische Inhalte (React, Vue, Angular usw.) +- 📊 **SEO-Analyse** – Titel, Meta-Beschreibungen, Überschriften, Author, Canonical u.v.m. +- 🔗 **Link-Analyse** – Interne und externe Links mit detaillierter Beziehungskarte +- 📈 **PageSpeed-Insights-Integration** – Core Web Vitals analysieren +- 💾 **Mehrere Exportformate** – CSV, JSON oder XML +- 🔍 **Problemerkennung** – Automatische SEO-Fehler- und Warnungserkennung +- ⚡ **Echtzeit-Fortschritt** mit Live-Statistiken + +--- + +## Installation + +### Schnellstart (automatisch) + +**Windows:** +```batch +start-librecrawl.bat +``` + +**Linux/Mac:** +```bash +chmod +x start-librecrawl.sh +./start-librecrawl.sh +``` + +Das Skript prüft automatisch, ob Docker vorhanden ist, installiert Abhängigkeiten und öffnet LibreCrawl unter `http://localhost:5000`. + +--- + +### Manuelle Installation + +#### Option 1: Docker (empfohlen) + +**Voraussetzungen:** Docker und Docker Compose + +```bash +# Repository klonen +git clone https://github.com/PhialsBasement/LibreCrawl.git +cd LibreCrawl + +# Umgebungsdatei kopieren +cp .env.example .env + +# Starten +docker compose up -d + +# Browser öffnen: http://localhost:5000 +``` + +**Lokaler Betrieb** (Standard — kein Login erforderlich): +```bash +# .env +LOCAL_MODE=true +HOST_BINDING=127.0.0.1 +``` + +**Produktionsbetrieb** (mit Benutzeranmeldung): +```bash +# .env +LOCAL_MODE=false +HOST_BINDING=0.0.0.0 +``` + +Nach Änderungen den Container neu bauen: +```bash +docker compose up -d --build +``` + +--- + +#### Option 2: Python + +**Voraussetzungen:** Python 3.8+ + +```bash +# Abhängigkeiten installieren +pip install -r requirements.txt + +# Optional: JavaScript-Rendering +playwright install chromium + +# Starten (lokaler Modus, kein Login) +python main.py --local + +# Starten (Standard mit Authentifizierung) +python main.py +``` + +Browser öffnen: `http://localhost:5000` + +--- + +## Betriebsmodi + +| Modus | Beschreibung | +|---|---| +| **Lokalmodus** (`--local`) | Kein Login, alle Benutzer erhalten Admin-Rechte — ideal für Einzelnutzer | +| **Standardmodus** | Vollständiges Authentifizierungssystem mit Benutzerrollen und Ratelimits | + +--- + +## Einstellungen + +Über den Button **„Einstellungen"** konfigurierbar: + +| Bereich | Optionen | +|---|---| +| **Crawler** | Crawl-Tiefe (bis 5 Mio. URLs), Verzögerung, externe Links | +| **Anfragen** | User Agent, Timeouts, Proxy, robots.txt | +| **JavaScript** | Browser-Engine, Wartezeit, Viewport-Größe | +| **Filter** | Dateitypen und URL-Muster ein-/ausschließen | +| **Export** | Format und Felder auswählen | +| **Eigenes CSS** | Oberfläche mit eigenem Stylesheet anpassen | +| **Problem-Ausschluss** | Muster für die SEO-Problemerkennung ausschließen | + +Für PageSpeed-Analysen einen Google-API-Schlüssel unter **Einstellungen → Anfragen** eintragen (25.000 Anfragen/Tag statt limitiertem Kontingent). + +--- + +## Exportformate + +- **CSV** – Für Tabellenkalkulationen (Excel, Google Sheets) +- **JSON** – Strukturierte Daten mit allen Details +- **XML** – Für externe Tools und Weiterverarbeitung + +--- + +## Plugins + +Eigene Plugins als `.js`-Datei in `/web/static/plugins/` ablegen — sie erscheinen automatisch als neuer Tab in der Oberfläche. + +```javascript +LibreCrawlPlugin.register({ + id: 'mein-plugin', + name: 'Mein Plugin', + tab: { label: 'Mein Tab', icon: '🔥' }, + + onTabActivate(container, data) { + // data enthält: { urls, links, issues, stats } + container.innerHTML = ` +
+

Meine Analyse

+

${data.urls.length} URLs gefunden

+
+ `; + } +}); +``` + +Beispiel-Plugins: `_example-plugin.js` (Vorlage) und `e-e-a-t.js` (E-E-A-T-Analyse). + +--- + +## Bekannte Einschränkungen + +- PageSpeed API hat Ratelimits (mit API-Schlüssel deutlich besser) +- Große Websites benötigen mehr Zeit +- JavaScript-Rendering ist langsamer als reines HTTP-Crawling +- Einstellungen werden im Browser-LocalStorage gespeichert (gehen verloren, wenn Browser-Daten gelöscht werden) + +--- + +## Projektstruktur + +``` +main.py – Flask-Server und Haupt-Einstiegspunkt +src/crawler.py – Kern-Crawling-Engine +src/core/seo_extractor.py – SEO-Datenextraktion +src/crawl_db.py – Datenbankoperationen +web/templates/ – HTML-Templates +web/static/js/ – Frontend-JavaScript +web/static/locales/ – Übersetzungsdateien (de.json, en.json) +web/static/plugins/ – Eigene Plugins +``` + +--- + +## Lizenz + +MIT License — siehe [LICENSE](LICENSE). + +--- + +## Danksagung + +Herzlichen Dank an **[PhialsBasement](https://github.com/PhialsBasement)** für die Entwicklung von LibreCrawl und dafür, dass er es der Community als freies, quelloffenes Werkzeug zur Verfügung stellt. +Dieses Projekt wäre ohne seine Arbeit nicht möglich.