diff --git a/pyproject.toml b/pyproject.toml index 6d2d463..85243c6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "pyalexs3" -version = "0.1.6" +version = "0.1.7" description = "OpenAlex S3 processor" readme = "README.md" requires-python = ">=3.10" diff --git a/src/pyalexs3/core.py b/src/pyalexs3/core.py index 4662acf..77fce27 100644 --- a/src/pyalexs3/core.py +++ b/src/pyalexs3/core.py @@ -533,7 +533,7 @@ def load_table( t0 = time.time() table_exists = False - select_clause = f"SELECT {cols_sel} FROM read_json_auto('{download_dir}/*'){where_sel}{limit_sel}" + select_clause = f"SELECT {cols_sel} FROM read_json_auto('{download_dir}/*', ignore_errors=true){where_sel}{limit_sel}" exists_cmd = self.__conn.execute( f"SELECT count(*) FROM duckdb_tables() WHERE table_name='{obj_type}'" @@ -648,7 +648,7 @@ def batch_load_table( start_date_sel = start_from.split("/")[0] t0 = time.time() - select_clause = f"SELECT {cols_sel} FROM read_json_auto('{download_dir}/*'){where_sel}{limit_sel}" + select_clause = f"SELECT {cols_sel} FROM read_json_auto('{download_dir}/*', ignore_errors=true){where_sel}{limit_sel}" for file_ls in files_gen: @@ -789,7 +789,7 @@ def lazy_load( end_date=end_date_sel, ) - select_clause = f"SELECT {cols_sel} FROM read_json_auto('{download_dir}/*'){where_sel}{limit_sel}" + select_clause = f"SELECT {cols_sel} FROM read_json_auto('{download_dir}/*', ignore_errors=true){where_sel}{limit_sel}" for fb in files_gen: os.makedirs(download_dir, exist_ok=True)