From 713e752ddbd4f21bca440d6929bf99f0141be3e1 Mon Sep 17 00:00:00 2001 From: lucifermorningstar1305 Date: Sun, 22 Mar 2026 02:08:18 +1300 Subject: [PATCH] Added: error ignore for read_json_auto --- pyproject.toml | 2 +- src/pyalexs3/core.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6d2d463..85243c6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "pyalexs3" -version = "0.1.6" +version = "0.1.7" description = "OpenAlex S3 processor" readme = "README.md" requires-python = ">=3.10" diff --git a/src/pyalexs3/core.py b/src/pyalexs3/core.py index 4662acf..77fce27 100644 --- a/src/pyalexs3/core.py +++ b/src/pyalexs3/core.py @@ -533,7 +533,7 @@ def load_table( t0 = time.time() table_exists = False - select_clause = f"SELECT {cols_sel} FROM read_json_auto('{download_dir}/*'){where_sel}{limit_sel}" + select_clause = f"SELECT {cols_sel} FROM read_json_auto('{download_dir}/*', ignore_errors=true){where_sel}{limit_sel}" exists_cmd = self.__conn.execute( f"SELECT count(*) FROM duckdb_tables() WHERE table_name='{obj_type}'" @@ -648,7 +648,7 @@ def batch_load_table( start_date_sel = start_from.split("/")[0] t0 = time.time() - select_clause = f"SELECT {cols_sel} FROM read_json_auto('{download_dir}/*'){where_sel}{limit_sel}" + select_clause = f"SELECT {cols_sel} FROM read_json_auto('{download_dir}/*', ignore_errors=true){where_sel}{limit_sel}" for file_ls in files_gen: @@ -789,7 +789,7 @@ def lazy_load( end_date=end_date_sel, ) - select_clause = f"SELECT {cols_sel} FROM read_json_auto('{download_dir}/*'){where_sel}{limit_sel}" + select_clause = f"SELECT {cols_sel} FROM read_json_auto('{download_dir}/*', ignore_errors=true){where_sel}{limit_sel}" for fb in files_gen: os.makedirs(download_dir, exist_ok=True)