Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 0 additions & 13 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -122,19 +122,6 @@ omit = ["*/tests/*", "*/__init__.py"]
[tool.ty.src]
include = ["src"]

[tool.ty.rules]
unresolved-import = "warn"
invalid-parameter-default = "warn"
invalid-return-type = "warn"
invalid-argument-type = "warn"
invalid-assignment = "warn"
unsupported-operator = "warn"
unresolved-attribute = "warn"
missing-argument = "warn"
unknown-argument = "warn"
not-subscriptable = "warn"
possibly-missing-submodule = "warn"

[tool.numpydoc_validation]
checks = [
"all", # report on all checks, except the below
Expand Down
16 changes: 8 additions & 8 deletions src/scribe_data/check/check_missing_forms/check_missing_forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ def execute_sparql_query(query: str, max_retries: int = 3) -> Optional[list]:
query : str
The SPARQL query to execute.

max_retries : int, optional
Maximum number of retry attempts (default: 3).
max_retries : int, optional, default=3
Maximum number of retry attempts.

Returns
-------
Expand Down Expand Up @@ -183,8 +183,8 @@ def get_forms_from_sparql_service(
Minimum frequency threshold for including form combinations.
Default is 0 (include all combinations).

max_results : int, optional
Maximum number of results to return (default: 1000).
max_results : int, optional, default=1000
Maximum number of results to return.
Helps prevent timeout for very large datasets.

language_name : str, optional
Expand Down Expand Up @@ -288,8 +288,8 @@ def get_forms_from_sparql_service_all_languages(
Minimum frequency threshold for including form combinations.
Default is 0 (include all combinations).

max_results : int, optional
Maximum results per query to prevent timeouts (default: 1000).
max_results : int, optional, default=1000
Maximum results per query to prevent timeouts.

Returns
-------
Expand Down Expand Up @@ -384,8 +384,8 @@ def get_features_from_sparql_service(
Minimum frequency threshold for including form combinations.
Default is 0 (include all combinations).

max_results : int, optional
Maximum results per query to prevent timeouts (default: 1000).
max_results : int, optional, default=1000
Maximum results per query to prevent timeouts .

Returns
-------
Expand Down
12 changes: 7 additions & 5 deletions src/scribe_data/check/check_missing_forms/normalize_forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
Order QID from a missing_unique_forms based on lexeme_form_metadata.yaml.
"""

from typing import List

from scribe_data.utils import lexeme_form_metadata


# Precompute QID positions mapping only once when the module is imported.
def sort_qids_in_list(qids_lists: list[list[str]]) -> list[list[str]]:
def sort_qids_in_list(qids_lists: List[List[str]]) -> List[List[str]]:
"""
Sort QIDs within each sublist based on their predefined positions.

Expand All @@ -16,7 +18,7 @@ def sort_qids_in_list(qids_lists: list[list[str]]) -> list[list[str]]:

Parameters
----------
qids_lists : list[list[str]]
qids_lists : List[List[str]]
A list of lists, where each sublist contains QIDs.

Returns
Expand All @@ -37,17 +39,17 @@ def sort_qids_in_list(qids_lists: list[list[str]]) -> list[list[str]]:
]


def sort_qids_by_position(nested_qids: list[list[str]]) -> list[list[str]]:
def sort_qids_by_position(nested_qids: List[List[str]]) -> List[List[str]]:
"""
Sort lists of QIDs based on their predefined positions and sublist length.

This function sorts the sublists within `nested_qids` based on two criteria:
This function sorts the sub-lists within `nested_qids` based on two criteria:
1. The length of the sublist (shorter lists come first).
2. The positions of the QIDs within each sublist, as defined in `lexeme_form_metadata`.

Parameters
----------
nested_qids : list[list[str]]
nested_qids : List[List[str]]
A list of lists, where each sublist contains QIDs.

Returns
Expand Down
9 changes: 5 additions & 4 deletions src/scribe_data/check/check_project_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import difflib
import sys
from typing import List

from scribe_data.utils import (
WIKIDATA_QUERIES_ALL_DATA_DIR,
Expand All @@ -19,13 +20,13 @@
all_data_types = tuple(data_type_metadata.keys())


def get_available_languages() -> dict[str, list[str]]:
def get_available_languages() -> dict[str, List[str]]:
"""
Get available languages from the data extraction folder.

Returns
-------
dict[str, list[str]]
dict[str, List[str]]
A dictionary with the language name as the key and a list of its sub-languages (if available).
"""
available_languages = {}
Expand Down Expand Up @@ -64,7 +65,7 @@ def get_available_languages() -> dict[str, list[str]]:

def get_missing_languages(
reference_languages: dict, target_languages: dict
) -> list[str]:
) -> List[str]:
"""
Compare two language dictionaries and return a list of languages and sub-languages that exist.

Expand All @@ -78,7 +79,7 @@ def get_missing_languages(

Returns
-------
list[str]
List[str]
A list of languages and sub-languages that are in target_languages but not in reference_languages.
"""
missing_languages = []
Expand Down
6 changes: 3 additions & 3 deletions src/scribe_data/check/check_pyicu.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import subprocess
import sys
from pathlib import Path
from typing import Optional
from typing import List, Optional

import questionary
import requests
Expand Down Expand Up @@ -55,7 +55,7 @@ def get_python_version_and_architecture() -> tuple[str, str]:
return python_version, architecture


def fetch_wheel_releases() -> tuple[list[tuple[str, str]], float]:
def fetch_wheel_releases() -> tuple[List[tuple[str, str]], float]:
"""
Fetch the release data for PyICU from GitHub with error handling for rate limits.

Expand Down Expand Up @@ -114,7 +114,7 @@ def download_wheel_file(wheel_url: str, output_dir: Path) -> str:


def find_matching_wheel(
wheels: list[tuple[str, str]], python_version: str, architecture: str
wheels: List[tuple[str, str]], python_version: str, architecture: str
) -> Optional[str]:
"""
Find the matching wheel file based on Python version and architecture.
Expand Down
12 changes: 6 additions & 6 deletions src/scribe_data/check/check_query_forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def decompose_label_features(label: str) -> list:
# MARK: Extract QIDs


def extract_form_qids(form_text: str) -> Optional[list[str]]:
def extract_form_qids(form_text: str) -> Optional[List[str]]:
"""
Extract all QIDs from an optional query form.

Expand All @@ -146,7 +146,7 @@ def extract_form_qids(form_text: str) -> Optional[list[str]]:

Returns
-------
list[str]
Optional[List[str]]
All QIDS that make up the form.
"""
qids_pattern = r"wikibase:grammaticalFeature .+ \."
Expand Down Expand Up @@ -224,13 +224,13 @@ def check_query_formatting(form_text: str) -> bool:
# MARK: Correct Label


def return_correct_form_label(qids: list[str]) -> str:
def return_correct_form_label(qids: List[str]) -> str:
"""
Return the correct label for a lexeme form representation given the QIDs that compose it.

Parameters
----------
qids : list[str]
qids : List[str]
All QIDS that make up the form.

Returns
Expand Down Expand Up @@ -447,12 +447,12 @@ def compare_key(components: List[str]) -> List[str | int | float]:

Parameters
----------
components : list[str]
components : List[str]
The components that can make up the form identifier.

Returns
-------
list[str]
List[str | int | float]
The list of component parts to compare against.
"""
return [order_map.get(c, float("inf")) for c in components]
Expand Down
6 changes: 3 additions & 3 deletions src/scribe_data/cli/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,13 +397,13 @@ def convert_wrapper(
output_type : str
The desired output format. Can be 'json', 'csv', 'tsv', or 'sqlite'.

overwrite : bool, optional (default=False)
overwrite : bool, optional, default=False
Whether to overwrite existing output files.

identifier_case : str, optional (default='camel')
identifier_case : str, optional, default='camel'
The case format for identifiers.

all : bool, optional (default=False)
all : bool, optional, default=False
Convert all languages and data types.

Returns
Expand Down
6 changes: 3 additions & 3 deletions src/scribe_data/cli/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from collections.abc import Callable
from datetime import date, datetime
from pathlib import Path
from typing import Optional
from typing import List, Optional

import questionary
import requests
Expand Down Expand Up @@ -298,7 +298,7 @@ def wd_lexeme_dump_download_wrapper(

def download_wiktionary_dumps(
output_dir: Path = DEFAULT_WIKTIONARY_DUMP_EXPORT_DIR,
language_isos: list[str] = ["en"],
language_isos: List[str] = ["en"],
dump_snapshot: Optional[str] = "latest",
) -> Optional[Path]:
"""
Expand All @@ -309,7 +309,7 @@ def download_wiktionary_dumps(
output_dir : Path, optional, default=DEFAULT_WIKTIONARY_DUMP_EXPORT_DIR
Directory to save the dump. Defaults to DEFAULT_WIKTIONARY_DUMP_EXPORT_DIR.

language_isos : list[str], optional, default=['en']
language_isos : List[str], optional, default=['en']
A list of ISO-2 codes for desired Wiktionary dumps.

dump_snapshot : str, optional, default='latest'
Expand Down
8 changes: 4 additions & 4 deletions src/scribe_data/cli/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ def get_data(

Parameters
----------
languages : list[str]
languages : List[str]
The language(s) to get.

data_types : list[str]
data_types : List[str]
The data type(s) to get.

output_type : str
Expand All @@ -60,7 +60,7 @@ def get_data(
output_dir : Path
The output directory path for results.

overwrite : bool (default: False)
overwrite : bool, default=False
Whether to overwrite existing files.

outputs_per_entry : str
Expand All @@ -69,7 +69,7 @@ def get_data(
all_bool : bool
Get all languages and data types.

interactive : bool (default: False)
interactive : bool, default: False
Whether it's running in interactive mode.

identifier_case : str
Expand Down
2 changes: 1 addition & 1 deletion src/scribe_data/cli/interactive.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def create_word_completer(

Parameters
----------
options : list[str]
options : List[str]
The options that could complete the current input.

include_all : bool
Expand Down
2 changes: 1 addition & 1 deletion src/scribe_data/cli/total.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def get_datatype_list(language: str) -> list | dict:

Returns
-------
list[str]
list | dict
A list of the corresponding data types.
"""
language_key = language.strip().lower() # normalize input
Expand Down
28 changes: 14 additions & 14 deletions src/scribe_data/load/data_to_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@


def create_table(
cursor: sqlite3.Cursor, identifier_case: str, data_type: str, cols: list[str]
cursor: sqlite3.Cursor, identifier_case: str, data_type: str, cols: List[str]
) -> None:
"""
Create a table in the language database.
Expand Down Expand Up @@ -102,10 +102,10 @@ def translations_to_sqlite(
A list of current languages.
identifier_case : str, optional
The identifier case. Default is "snake".
input_file : str, optional
The input JSON export directory (default=DEFAULT_JSON_EXPORT_DIR).
output_file : str, optional
The output SQLite export directory (default=DEFAULT_SQLITE_EXPORT_DIR).
input_file : str, optional, default=DEFAULT_JSON_EXPORT_DIR
The input JSON export directory.
output_file : str, optional, default=DEFAULT_SQLITE_EXPORT_DIR
The output SQLite export directory.
overwrite : bool, optional
If True, existing SQLite files will be overwritten without prompting.
"""
Expand Down Expand Up @@ -195,11 +195,11 @@ def wiktionary_translations_to_sqlite(
identifier_case : str, optional
Either "camel" or "snake" to determine column naming. Default is "snake".

input_file : str, optional
The input JSON export directory (default=DEFAULT_JSON_EXPORT_DIR).
input_file : str, optional, default=DEFAULT_JSON_EXPORT_DIR
The input JSON export directory.

output_file : str, optional
The output SQLite export directory (default=DEFAULT_SQLITE_EXPORT_DIR).
output_file : str, optional, default=DEFAULT_SQLITE_EXPORT_DIR
The output SQLite export directory.

overwrite : bool, optional
If True, existing SQLite files will be overwritten without prompting.
Expand Down Expand Up @@ -296,14 +296,14 @@ def data_to_sqlite(
specific_tables : Optional[Union[str, List[str]]]
The specific tables to process. If None, process all tables.

identifier_case : str, optional (default='camel')
identifier_case : str, optional, default='camel'
Format of the identifiers ("camel" or "snake"). Defaults to "camel".

input_file : str, optional
The input JSON export directory (default=DEFAULT_JSON_EXPORT_DIR).
input_file : str, optional, default=DEFAULT_JSON_EXPORT_DIR
The input JSON export directory.

output_file : str, optional
The output SQLite export directory (default=DEFAULT_SQLITE_EXPORT_DIR).
output_file : str, optional, default=DEFAULT_SQLITE_EXPORT_DIR
The output SQLite export directory.

overwrite : bool, optional
If set to True, existing SQLite files will be overwritten without prompting.
Expand Down
4 changes: 2 additions & 2 deletions src/scribe_data/unicode/process_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ def gen_emoji_lexicon(

Parameters
----------
language : str (default=None)
language : str
The language keywords are being generated for.

emojis_per_keyword : int (default=None)
emojis_per_keyword : int
The limit for number of emoji keywords that should be generated per keyword.

Returns
Expand Down
Loading
Loading