diff --git a/CHANGELOG.md b/CHANGELOG.md index 36f558cd..7a4bc2d5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,8 +11,10 @@ Previously, loading a schema whose version existed only in the prerelease cache - Removed the `check_prerelease` parameter from `load_schema_version()`, `load_schema()`, `from_string()`, and `from_dataframes()` in `hed_schema_io.py`. - Removed the parameter from `SchemaLoader` (base class) and all subclasses (`SchemaLoaderXML`, `SchemaLoaderWiki`, `SchemaLoaderJSON`, `SchemaLoaderDF`). - `get_hed_version_path()` in `hed_cache.py` now always searches both regular and prerelease directories (regular first). -- `get_hed_versions()` in `hed_cache.py` now defaults to `check_prerelease=True`. **This is a silent API change**: external callers that omitted `check_prerelease` will now receive prerelease versions. Internal callers that need released-only versions (compliance checker, `deprecatedFrom` validation, hedId comparison) explicitly pass `check_prerelease=False`. +- `get_hed_versions()` in `hed_cache.py` retains the default `check_prerelease=False` (no public API change). Internal callers that need prerelease inclusion (`get_hed_version_path`, error messages) now pass `check_prerelease=True` explicitly. - Default schema version is now resolved dynamically from the cache (highest released version) instead of being hardcoded, so new schema releases no longer require a code change. +- `get_hed_version_path()` now automatically downloads schemas from GitHub when a requested version is not found in the local cache (default cache directory only). +- `_load_schema_version_sub()` now raises `BAD_PARAMETERS` (was `FILE_NOT_FOUND`) when no version is specified and the cache is empty, since the problem is a missing argument rather than a missing file. - `check_schema_loading.py` simplified — removed `_is_prerelease_partner()` helper. - `run_loading_check()` now raises `ValueError` immediately for mutually exclusive flag combinations (`prerelease_only` + `exclude_prereleases`, or `library_filter` + `standard_only`), consistent with the existing CLI-level validation. diff --git a/hed/schema/hed_cache.py b/hed/schema/hed_cache.py index 125a0950..4a9d2924 100644 --- a/hed/schema/hed_cache.py +++ b/hed/schema/hed_cache.py @@ -79,7 +79,7 @@ def get_cache_directory(cache_folder=None) -> str: return HED_CACHE_DIRECTORY -def get_hed_versions(local_hed_directory=None, library_name=None, check_prerelease=True) -> Union[list, dict]: +def get_hed_versions(local_hed_directory=None, library_name=None, check_prerelease=False) -> Union[list, dict]: """Get the HED versions in the HED directory. Parameters: @@ -88,7 +88,7 @@ def get_hed_versions(local_hed_directory=None, library_name=None, check_prerelea None retrieves the standard schema only. Pass "all" to retrieve all standard and library schemas as a dict. check_prerelease (bool): If True, results can include prerelease schemas. - Pass False to get only released versions (used by compliance checks). + Default is False, returning only released versions. Returns: Union[list, dict]: List of version numbers or dictionary {library_name: [versions]}. @@ -136,21 +136,51 @@ def get_hed_versions(local_hed_directory=None, library_name=None, check_prerelea def get_hed_version_path(xml_version, library_name=None, local_hed_directory=None) -> Union[str, None]: - """Get HED XML file path in a directory. Only returns filenames that exist. + """Get the HED XML file path for a given version. + + Searches the local cache first. If the version is not found and local_hed_directory + is the default HED cache, the cache is refreshed from GitHub before a second lookup. + No network call is made for custom directories. Parameters: - xml_version (str): Returns this version if it exists - library_name (str or None): Optional the schema library name. - local_hed_directory (str): Path to local HED directory. Defaults to HED_CACHE_DIRECTORY + xml_version (str): The version string to look up. + library_name (str or None): Optional schema library name. + local_hed_directory (str or None): Path to local HED directory. Defaults to HED_CACHE_DIRECTORY. + Passing a custom path disables the automatic GitHub refresh. Returns: - Union[str, None]: The path to the requested HED version the HED directory. + Union[str, None]: The path to the requested HED XML file, or None. """ if not local_hed_directory: local_hed_directory = HED_CACHE_DIRECTORY - hed_versions = get_hed_versions(local_hed_directory, library_name) + result = _find_hed_version_path(xml_version, library_name, local_hed_directory) + if result: + return result + + # Version not found locally — try refreshing cache from GitHub (default cache only). + # cache_xml_versions() returns -1 on failure (network error, lock contention, rate limit). + # In that case the second lookup will return None, which the caller treats as "version not found". + if not xml_version or local_hed_directory != HED_CACHE_DIRECTORY: + return None + + cache_xml_versions() + return _find_hed_version_path(xml_version, library_name, local_hed_directory) + + +def _find_hed_version_path(xml_version, library_name, local_hed_directory): + """Look up a HED version path in the given directory without downloading. + + Parameters: + xml_version (str): The version to find. + library_name (str or None): Optional schema library name. + local_hed_directory (str): Directory to search. + + Returns: + Union[str, None]: The path if found, None otherwise. + """ + hed_versions = get_hed_versions(local_hed_directory, library_name, check_prerelease=True) if not hed_versions or not xml_version: return None if xml_version in hed_versions: diff --git a/hed/schema/hed_schema_io.py b/hed/schema/hed_schema_io.py index fe508cb2..d0a37e35 100644 --- a/hed/schema/hed_schema_io.py +++ b/hed/schema/hed_schema_io.py @@ -362,8 +362,9 @@ def _load_schema_version_sub(xml_version, schema_namespace="", xml_folder=None, xml_version = versions[0] else: raise HedFileError( - HedExceptions.FILE_NOT_FOUND, - "No HED standard schema versions found in cache. Ensure schemas are installed or cached.", + HedExceptions.BAD_PARAMETERS, + "No version specified and no HED standard schema versions found in cache. " + "Run hed.schema.cache_xml_versions() or install hedtools to populate the cache.", "", ) @@ -392,7 +393,9 @@ def _load_schema_version_sub(xml_version, schema_namespace="", xml_folder=None, hed_schema = load_schema(hed_file_path, schema_namespace=schema_namespace, schema=schema, name=name) else: library_string = f"for library '{library_name}'" if library_name else "" - known_versions = hed_cache.get_hed_versions(xml_folder, library_name=library_name if library_name else "all") + known_versions = hed_cache.get_hed_versions( + xml_folder, library_name=library_name if library_name else "all", check_prerelease=True + ) raise HedFileError( HedExceptions.FILE_NOT_FOUND, f"HED version {library_string}: '{version_to_validate}' not found. Check {hed_cache.get_cache_directory(xml_folder)} for cache or https://github.com/hed-standard/hed-schemas/tree/main/library_schemas. " diff --git a/spec_tests/test_hed_cache.py b/spec_tests/test_hed_cache.py index 236aaaf0..9734b796 100644 --- a/spec_tests/test_hed_cache.py +++ b/spec_tests/test_hed_cache.py @@ -84,10 +84,11 @@ def test_get_hed_versions_library_prerelease(self): os.makedirs(prerelease_dir, exist_ok=True) fake_prerelease = os.path.join(prerelease_dir, "HED_score_0.0.1-alpha.1.xml") try: + # Empty file is fine — get_hed_versions only parses filenames, never reads contents with open(fake_prerelease, "w") as f: f.write("") - all_versions = hed_cache.get_hed_versions(self.hed_cache_dir, library_name="score") - released_only = hed_cache.get_hed_versions(self.hed_cache_dir, library_name="score", check_prerelease=False) + all_versions = hed_cache.get_hed_versions(self.hed_cache_dir, library_name="score", check_prerelease=True) + released_only = hed_cache.get_hed_versions(self.hed_cache_dir, library_name="score") self.assertIsInstance(all_versions, list) self.assertIn("0.0.1-alpha.1", all_versions) self.assertNotIn("0.0.1-alpha.1", released_only) @@ -110,6 +111,34 @@ def test_find_hed_expression(self): final_version = f"HED{version}.xml" self.assertFalse(hed_cache.version_pattern.match(final_version)) + def test_get_hed_version_path_no_auto_refresh_for_custom_directory(self): + """get_hed_version_path returns None for a nonexistent version in a custom directory without downloading.""" + empty_dir = os.path.join(self.hed_cache_dir, "empty_subdir") + os.makedirs(empty_dir, exist_ok=True) + try: + result = hed_cache.get_hed_version_path("99.99.99", local_hed_directory=empty_dir) + self.assertIsNone(result) + finally: + shutil.rmtree(empty_dir) + + def test_get_hed_version_path_auto_refresh_downloads_missing_version(self): + """get_hed_version_path automatically downloads from GitHub when a version is not cached locally.""" + # Use a fresh cache directory so the version is definitely not present + fresh_cache = os.path.join(os.path.dirname(self.hed_cache_dir), "schema_cache_auto_refresh/") + if os.path.exists(fresh_cache): + shutil.rmtree(fresh_cache) + os.makedirs(fresh_cache) + saved = hed_cache.HED_CACHE_DIRECTORY + try: + hed_cache.HED_CACHE_DIRECTORY = fresh_cache + # 8.0.0 is a released version that should be downloadable from GitHub + result = hed_cache.get_hed_version_path("8.0.0") + self.assertIsNotNone(result, "Auto-refresh should download 8.0.0 from GitHub") + self.assertTrue(os.path.exists(result)) + finally: + hed_cache.HED_CACHE_DIRECTORY = saved + shutil.rmtree(fresh_cache, ignore_errors=True) + class TestLocal(unittest.TestCase): @classmethod diff --git a/tests/schema/test_hed_schema_io.py b/tests/schema/test_hed_schema_io.py index 0fcde29d..334bbc45 100644 --- a/tests/schema/test_hed_schema_io.py +++ b/tests/schema/test_hed_schema_io.py @@ -108,7 +108,8 @@ def test_load_schema_version_default_no_standard_raises(self): with self.assertRaises(HedFileError) as context: load_schema_version("", xml_folder=tmp_dir) - self.assertIn("No HED standard schema", str(context.exception)) + self.assertEqual(context.exception.args[0], "BAD_PARAMETERS") + self.assertIn("No version specified", str(context.exception)) def test_load_and_verify_tags(self): # Load 'testlib' by itself