From fe593433c744da9774f84586c8e15c93ef062b06 Mon Sep 17 00:00:00 2001 From: Lance Sigersmith Date: Sat, 25 Apr 2026 17:13:22 -0400 Subject: [PATCH 01/17] Fix PyICU dependencies --- update_data.sh | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/update_data.sh b/update_data.sh index 42cde94..9c2e076 100755 --- a/update_data.sh +++ b/update_data.sh @@ -16,8 +16,9 @@ SKIP_MIGRATION=${1:-false} PROJECT_ROOT=$(pwd) # Define target languages and data types. -TARGET_LANGUAGES=("english" "french" "german" "italian" "spanish" "portuguese" "russian" "swedish") -DATA_TYPES=("nouns" "verbs") +TARGET_LANGUAGES=("english") #"french" "german" "italian" "spanish" "portuguese" "russian" "swedish") +# DATA_TYPES=("nouns" "verbs" "emoji_keywords") +DATA_TYPES=("nouns" "verbs" "emoji_keywords") RED='\033[0;31m' GREEN='\033[0;32m' @@ -62,7 +63,8 @@ log "Log file: $LOG_FILE" log "๐Ÿ“ฆ Setting up Scribe-Data repository..." if [ ! -d "$SCRIBE_DATA_DIR" ]; then log "Cloning Scribe-Data repository..." - git clone --depth=1 https://github.com/scribe-org/Scribe-Data.git "$SCRIBE_DATA_DIR" || { + #git clone --depth=1 https://github.com/scribe-org/Scribe-Data.git "$SCRIBE_DATA_DIR" || { + git clone --depth=1 --branch fix/emoji-keywords-sqlite-generation https://github.com/LJSigersmith/Scribe-Data.git "$SCRIBE_DATA_DIR" || { error "Failed to clone Scribe-Data repo" exit 1 } @@ -100,6 +102,15 @@ if ! command -v pip &> /dev/null && ! command -v pip3 &> /dev/null; then success "pip installed successfully" fi +# MARK: System Dependencies + +log "๐Ÿ”ง Installing system dependencies for PyICU..." +sudo apt-get install -y libicu-dev pkg-config g++ python3-dev || { + error "Failed to install system dependencies" + exit 1 +} +success "System dependencies installed" + # MARK: Make Venv log "๐Ÿงช Setting up virtual environment..." @@ -130,6 +141,13 @@ pip install -e . || { } success "Dependencies installed successfully" +log "๐Ÿ”ง Building PyICU from source against local ICU..." +pip install --force-reinstall --no-binary :all: PyICU || { + error "Failed to build PyICU from source" + exit 1 +} +success "PyICU built from source successfully" + # MARK: Download Wikidata Dump First DUMP_DIR="./scribe_data_wikidata_dumps_export" From a46c91a2e01f1e733f4937c3944a3271e3c9cfd6 Mon Sep 17 00:00:00 2001 From: Lance Sigersmith Date: Sat, 25 Apr 2026 17:14:01 -0400 Subject: [PATCH 02/17] Removed patched Scribe-Data fork url --- update_data.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/update_data.sh b/update_data.sh index 9c2e076..eee3771 100755 --- a/update_data.sh +++ b/update_data.sh @@ -63,7 +63,6 @@ log "Log file: $LOG_FILE" log "๐Ÿ“ฆ Setting up Scribe-Data repository..." if [ ! -d "$SCRIBE_DATA_DIR" ]; then log "Cloning Scribe-Data repository..." - #git clone --depth=1 https://github.com/scribe-org/Scribe-Data.git "$SCRIBE_DATA_DIR" || { git clone --depth=1 --branch fix/emoji-keywords-sqlite-generation https://github.com/LJSigersmith/Scribe-Data.git "$SCRIBE_DATA_DIR" || { error "Failed to clone Scribe-Data repo" exit 1 From 056e49420475f4b33160366557bd0545ab9bdeb6 Mon Sep 17 00:00:00 2001 From: Lance Sigersmith Date: Sat, 25 Apr 2026 17:14:37 -0400 Subject: [PATCH 03/17] Re added all languages --- update_data.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/update_data.sh b/update_data.sh index eee3771..110adf6 100755 --- a/update_data.sh +++ b/update_data.sh @@ -16,8 +16,7 @@ SKIP_MIGRATION=${1:-false} PROJECT_ROOT=$(pwd) # Define target languages and data types. -TARGET_LANGUAGES=("english") #"french" "german" "italian" "spanish" "portuguese" "russian" "swedish") -# DATA_TYPES=("nouns" "verbs" "emoji_keywords") +TARGET_LANGUAGES=("english" "french" "german" "italian" "spanish" "portuguese" "russian" "swedish") DATA_TYPES=("nouns" "verbs" "emoji_keywords") RED='\033[0;31m' From d8a21d891cd6cda256c1da4ae0b4d5d531f070f1 Mon Sep 17 00:00:00 2001 From: Lance Sigersmith Date: Sat, 25 Apr 2026 17:19:13 -0400 Subject: [PATCH 04/17] added test url to test github actions flow --- update_data.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/update_data.sh b/update_data.sh index 110adf6..9c2e076 100755 --- a/update_data.sh +++ b/update_data.sh @@ -16,7 +16,8 @@ SKIP_MIGRATION=${1:-false} PROJECT_ROOT=$(pwd) # Define target languages and data types. -TARGET_LANGUAGES=("english" "french" "german" "italian" "spanish" "portuguese" "russian" "swedish") +TARGET_LANGUAGES=("english") #"french" "german" "italian" "spanish" "portuguese" "russian" "swedish") +# DATA_TYPES=("nouns" "verbs" "emoji_keywords") DATA_TYPES=("nouns" "verbs" "emoji_keywords") RED='\033[0;31m' @@ -62,6 +63,7 @@ log "Log file: $LOG_FILE" log "๐Ÿ“ฆ Setting up Scribe-Data repository..." if [ ! -d "$SCRIBE_DATA_DIR" ]; then log "Cloning Scribe-Data repository..." + #git clone --depth=1 https://github.com/scribe-org/Scribe-Data.git "$SCRIBE_DATA_DIR" || { git clone --depth=1 --branch fix/emoji-keywords-sqlite-generation https://github.com/LJSigersmith/Scribe-Data.git "$SCRIBE_DATA_DIR" || { error "Failed to clone Scribe-Data repo" exit 1 From e608231f9e7dd6e836c65c5a5fe4993b09512a86 Mon Sep 17 00:00:00 2001 From: Lance Sigersmith Date: Sat, 25 Apr 2026 17:23:34 -0400 Subject: [PATCH 05/17] correct python version --- .github/workflows/update_scribe_data.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/update_scribe_data.yml b/.github/workflows/update_scribe_data.yml index f1a59ed..36c326e 100644 --- a/.github/workflows/update_scribe_data.yml +++ b/.github/workflows/update_scribe_data.yml @@ -43,10 +43,10 @@ jobs: - name: Checkout repository uses: actions/checkout@v4 - - name: Set up Python 3.11 + - name: Set up Python 3.12 uses: actions/setup-python@v4 with: - python-version: "3.11" + python-version: "3.12" - name: Install system dependencies run: | From 1ef0dce1df8df1798de8e0345007bad5e3c94d67 Mon Sep 17 00:00:00 2001 From: Lance Sigersmith Date: Sat, 25 Apr 2026 17:26:17 -0400 Subject: [PATCH 06/17] Added workflow step to verfy sqlite output --- .github/workflows/update_scribe_data.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/update_scribe_data.yml b/.github/workflows/update_scribe_data.yml index 36c326e..2441d8e 100644 --- a/.github/workflows/update_scribe_data.yml +++ b/.github/workflows/update_scribe_data.yml @@ -60,6 +60,15 @@ jobs: id: run_script run: ./update_data.sh true + - name: Verify SQLite output + run: | + DB="./packs/sqlite/ENLanguageData.sqlite" + echo "Tables in $DB:" + sqlite3 "$DB" ".tables" + echo "" + echo "Sample row from emoji_keywords:" + sqlite3 "$DB" "SELECT * FROM emoji_keywords LIMIT 1;" + - name: Create deployment package run: | if [ ! -d "./packs/sqlite" ]; then From 8bf3d6004d13f5ef37a61845625768793da57db1 Mon Sep 17 00:00:00 2001 From: Lance Sigersmith Date: Sat, 25 Apr 2026 17:37:59 -0400 Subject: [PATCH 07/17] removed testing environment links and re added all languages --- update_data.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/update_data.sh b/update_data.sh index 9c2e076..44978f2 100755 --- a/update_data.sh +++ b/update_data.sh @@ -16,8 +16,7 @@ SKIP_MIGRATION=${1:-false} PROJECT_ROOT=$(pwd) # Define target languages and data types. -TARGET_LANGUAGES=("english") #"french" "german" "italian" "spanish" "portuguese" "russian" "swedish") -# DATA_TYPES=("nouns" "verbs" "emoji_keywords") +TARGET_LANGUAGES=("english" "french" "german" "italian" "spanish" "portuguese" "russian" "swedish") DATA_TYPES=("nouns" "verbs" "emoji_keywords") RED='\033[0;31m' @@ -63,8 +62,7 @@ log "Log file: $LOG_FILE" log "๐Ÿ“ฆ Setting up Scribe-Data repository..." if [ ! -d "$SCRIBE_DATA_DIR" ]; then log "Cloning Scribe-Data repository..." - #git clone --depth=1 https://github.com/scribe-org/Scribe-Data.git "$SCRIBE_DATA_DIR" || { - git clone --depth=1 --branch fix/emoji-keywords-sqlite-generation https://github.com/LJSigersmith/Scribe-Data.git "$SCRIBE_DATA_DIR" || { + git clone --depth=1 https://github.com/scribe-org/Scribe-Data.git "$SCRIBE_DATA_DIR" || { error "Failed to clone Scribe-Data repo" exit 1 } From 39164196804973e1638c26012b5d56b6354bc7ea Mon Sep 17 00:00:00 2001 From: Lance Sigersmith Date: Mon, 27 Apr 2026 20:36:58 -0400 Subject: [PATCH 08/17] test workflow with dependencies moved to github workflow --- .github/workflows/update_scribe_data.yml | 2 +- update_data.sh | 12 ++---------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/.github/workflows/update_scribe_data.yml b/.github/workflows/update_scribe_data.yml index 2441d8e..1410924 100644 --- a/.github/workflows/update_scribe_data.yml +++ b/.github/workflows/update_scribe_data.yml @@ -51,7 +51,7 @@ jobs: - name: Install system dependencies run: | sudo apt-get update - sudo apt-get install -y curl git sqlite3 make + sudo apt-get install -y curl git sqlite3 make libicu-dev pkg-config g++ python3-dev - name: Make script executable run: chmod +x ./update_data.sh diff --git a/update_data.sh b/update_data.sh index 44978f2..b85e66f 100755 --- a/update_data.sh +++ b/update_data.sh @@ -62,7 +62,8 @@ log "Log file: $LOG_FILE" log "๐Ÿ“ฆ Setting up Scribe-Data repository..." if [ ! -d "$SCRIBE_DATA_DIR" ]; then log "Cloning Scribe-Data repository..." - git clone --depth=1 https://github.com/scribe-org/Scribe-Data.git "$SCRIBE_DATA_DIR" || { + #git clone --depth=1 https://github.com/scribe-org/Scribe-Data.git "$SCRIBE_DATA_DIR" || { + git clone --depth=1 --branch fix/emoji-keywords-sqlite-generation https://github.com/LJSigersmith/Scribe-Data.git "$SCRIBE_DATA_DIR" || { error "Failed to clone Scribe-Data repo" exit 1 } @@ -100,15 +101,6 @@ if ! command -v pip &> /dev/null && ! command -v pip3 &> /dev/null; then success "pip installed successfully" fi -# MARK: System Dependencies - -log "๐Ÿ”ง Installing system dependencies for PyICU..." -sudo apt-get install -y libicu-dev pkg-config g++ python3-dev || { - error "Failed to install system dependencies" - exit 1 -} -success "System dependencies installed" - # MARK: Make Venv log "๐Ÿงช Setting up virtual environment..." From 2a0467fdde545c470fc725889f87f57fb3993e3c Mon Sep 17 00:00:00 2001 From: Lance Sigersmith Date: Mon, 27 Apr 2026 20:54:49 -0400 Subject: [PATCH 09/17] english for testing --- update_data.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/update_data.sh b/update_data.sh index b85e66f..b1862fa 100755 --- a/update_data.sh +++ b/update_data.sh @@ -16,7 +16,7 @@ SKIP_MIGRATION=${1:-false} PROJECT_ROOT=$(pwd) # Define target languages and data types. -TARGET_LANGUAGES=("english" "french" "german" "italian" "spanish" "portuguese" "russian" "swedish") +TARGET_LANGUAGES=("english") #"french" "german" "italian" "spanish" "portuguese" "russian" "swedish") DATA_TYPES=("nouns" "verbs" "emoji_keywords") RED='\033[0;31m' From 0e7667b73b25262d4617e391a234842b33615ec6 Mon Sep 17 00:00:00 2001 From: Lance Sigersmith Date: Mon, 27 Apr 2026 21:05:07 -0400 Subject: [PATCH 10/17] trst wo deps --- .github/workflows/update_scribe_data.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/update_scribe_data.yml b/.github/workflows/update_scribe_data.yml index 1410924..2441d8e 100644 --- a/.github/workflows/update_scribe_data.yml +++ b/.github/workflows/update_scribe_data.yml @@ -51,7 +51,7 @@ jobs: - name: Install system dependencies run: | sudo apt-get update - sudo apt-get install -y curl git sqlite3 make libicu-dev pkg-config g++ python3-dev + sudo apt-get install -y curl git sqlite3 make - name: Make script executable run: chmod +x ./update_data.sh From 2bf19cd03eb2d1226d1d5913957a56223d90d4a3 Mon Sep 17 00:00:00 2001 From: Lance Sigersmith Date: Mon, 27 Apr 2026 21:16:02 -0400 Subject: [PATCH 11/17] test build local pyicu --- update_data.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/update_data.sh b/update_data.sh index b1862fa..601f996 100755 --- a/update_data.sh +++ b/update_data.sh @@ -131,12 +131,12 @@ pip install -e . || { } success "Dependencies installed successfully" -log "๐Ÿ”ง Building PyICU from source against local ICU..." -pip install --force-reinstall --no-binary :all: PyICU || { - error "Failed to build PyICU from source" - exit 1 -} -success "PyICU built from source successfully" +# log "๐Ÿ”ง Building PyICU from source against local ICU..." +# pip install --force-reinstall --no-binary :all: PyICU || { +# error "Failed to build PyICU from source" +# exit 1 +# } +# success "PyICU built from source successfully" # MARK: Download Wikidata Dump First From 35eecd93aa5eb3023273e513c7006def4a3e994e Mon Sep 17 00:00:00 2001 From: Lance Sigersmith Date: Mon, 27 Apr 2026 21:24:42 -0400 Subject: [PATCH 12/17] test just deps no local pyciu --- .github/workflows/update_scribe_data.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/update_scribe_data.yml b/.github/workflows/update_scribe_data.yml index 2441d8e..1410924 100644 --- a/.github/workflows/update_scribe_data.yml +++ b/.github/workflows/update_scribe_data.yml @@ -51,7 +51,7 @@ jobs: - name: Install system dependencies run: | sudo apt-get update - sudo apt-get install -y curl git sqlite3 make + sudo apt-get install -y curl git sqlite3 make libicu-dev pkg-config g++ python3-dev - name: Make script executable run: chmod +x ./update_data.sh From e385107da260e0decb6a654f532f81568e6411ac Mon Sep 17 00:00:00 2001 From: Lance Sigersmith Date: Mon, 27 Apr 2026 21:36:01 -0400 Subject: [PATCH 13/17] pyicu from source, no deps --- .github/workflows/update_scribe_data.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/update_scribe_data.yml b/.github/workflows/update_scribe_data.yml index 1410924..2441d8e 100644 --- a/.github/workflows/update_scribe_data.yml +++ b/.github/workflows/update_scribe_data.yml @@ -51,7 +51,7 @@ jobs: - name: Install system dependencies run: | sudo apt-get update - sudo apt-get install -y curl git sqlite3 make libicu-dev pkg-config g++ python3-dev + sudo apt-get install -y curl git sqlite3 make - name: Make script executable run: chmod +x ./update_data.sh From 4bd84768148028b94c6cb55732af145fda75fd02 Mon Sep 17 00:00:00 2001 From: Lance Sigersmith Date: Mon, 27 Apr 2026 21:46:20 -0400 Subject: [PATCH 14/17] no deps again, but commit the right file --- .github/workflows/update_scribe_data.yml | 2 +- update_data.sh | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/update_scribe_data.yml b/.github/workflows/update_scribe_data.yml index 2441d8e..72d8aa5 100644 --- a/.github/workflows/update_scribe_data.yml +++ b/.github/workflows/update_scribe_data.yml @@ -51,7 +51,7 @@ jobs: - name: Install system dependencies run: | sudo apt-get update - sudo apt-get install -y curl git sqlite3 make + sudo apt-get install -y curl git sqlite3 make - name: Make script executable run: chmod +x ./update_data.sh diff --git a/update_data.sh b/update_data.sh index 601f996..b1862fa 100755 --- a/update_data.sh +++ b/update_data.sh @@ -131,12 +131,12 @@ pip install -e . || { } success "Dependencies installed successfully" -# log "๐Ÿ”ง Building PyICU from source against local ICU..." -# pip install --force-reinstall --no-binary :all: PyICU || { -# error "Failed to build PyICU from source" -# exit 1 -# } -# success "PyICU built from source successfully" +log "๐Ÿ”ง Building PyICU from source against local ICU..." +pip install --force-reinstall --no-binary :all: PyICU || { + error "Failed to build PyICU from source" + exit 1 +} +success "PyICU built from source successfully" # MARK: Download Wikidata Dump First From 8bcbbf45f77547dd5dbbf3e7625c70dcbc495337 Mon Sep 17 00:00:00 2001 From: Lance Sigersmith Date: Tue, 28 Apr 2026 10:13:28 -0400 Subject: [PATCH 15/17] remove testing env --- .github/workflows/update_scribe_data.yml | 2 +- update_data.sh | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/update_scribe_data.yml b/.github/workflows/update_scribe_data.yml index 72d8aa5..1410924 100644 --- a/.github/workflows/update_scribe_data.yml +++ b/.github/workflows/update_scribe_data.yml @@ -51,7 +51,7 @@ jobs: - name: Install system dependencies run: | sudo apt-get update - sudo apt-get install -y curl git sqlite3 make + sudo apt-get install -y curl git sqlite3 make libicu-dev pkg-config g++ python3-dev - name: Make script executable run: chmod +x ./update_data.sh diff --git a/update_data.sh b/update_data.sh index b1862fa..53e11c3 100755 --- a/update_data.sh +++ b/update_data.sh @@ -16,7 +16,7 @@ SKIP_MIGRATION=${1:-false} PROJECT_ROOT=$(pwd) # Define target languages and data types. -TARGET_LANGUAGES=("english") #"french" "german" "italian" "spanish" "portuguese" "russian" "swedish") +TARGET_LANGUAGES=("english" "french" "german" "italian" "spanish" "portuguese" "russian" "swedish") DATA_TYPES=("nouns" "verbs" "emoji_keywords") RED='\033[0;31m' @@ -62,8 +62,7 @@ log "Log file: $LOG_FILE" log "๐Ÿ“ฆ Setting up Scribe-Data repository..." if [ ! -d "$SCRIBE_DATA_DIR" ]; then log "Cloning Scribe-Data repository..." - #git clone --depth=1 https://github.com/scribe-org/Scribe-Data.git "$SCRIBE_DATA_DIR" || { - git clone --depth=1 --branch fix/emoji-keywords-sqlite-generation https://github.com/LJSigersmith/Scribe-Data.git "$SCRIBE_DATA_DIR" || { + git clone --depth=1 https://github.com/scribe-org/Scribe-Data.git "$SCRIBE_DATA_DIR" || { error "Failed to clone Scribe-Data repo" exit 1 } From 99577af7766dcce05845a4bc2ef000704493525c Mon Sep 17 00:00:00 2001 From: Lance Sigersmith Date: Tue, 28 Apr 2026 10:33:19 -0400 Subject: [PATCH 16/17] test english --- update_data.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/update_data.sh b/update_data.sh index 53e11c3..2c3d24c 100755 --- a/update_data.sh +++ b/update_data.sh @@ -16,7 +16,7 @@ SKIP_MIGRATION=${1:-false} PROJECT_ROOT=$(pwd) # Define target languages and data types. -TARGET_LANGUAGES=("english" "french" "german" "italian" "spanish" "portuguese" "russian" "swedish") +TARGET_LANGUAGES=("english") #"french" "german" "italian" "spanish" "portuguese" "russian" "swedish") DATA_TYPES=("nouns" "verbs" "emoji_keywords") RED='\033[0;31m' From 3f19d3689a8a613657fdea53c3bae83ff27ceef6 Mon Sep 17 00:00:00 2001 From: Lance Sigersmith Date: Tue, 28 Apr 2026 10:44:07 -0400 Subject: [PATCH 17/17] all languages --- update_data.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/update_data.sh b/update_data.sh index 2c3d24c..53e11c3 100755 --- a/update_data.sh +++ b/update_data.sh @@ -16,7 +16,7 @@ SKIP_MIGRATION=${1:-false} PROJECT_ROOT=$(pwd) # Define target languages and data types. -TARGET_LANGUAGES=("english") #"french" "german" "italian" "spanish" "portuguese" "russian" "swedish") +TARGET_LANGUAGES=("english" "french" "german" "italian" "spanish" "portuguese" "russian" "swedish") DATA_TYPES=("nouns" "verbs" "emoji_keywords") RED='\033[0;31m'