Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ Previously, loading a schema whose version existed only in the prerelease cache
- Removed the `check_prerelease` parameter from `load_schema_version()`, `load_schema()`, `from_string()`, and `from_dataframes()` in `hed_schema_io.py`.
- Removed the parameter from `SchemaLoader` (base class) and all subclasses (`SchemaLoaderXML`, `SchemaLoaderWiki`, `SchemaLoaderJSON`, `SchemaLoaderDF`).
- `get_hed_version_path()` in `hed_cache.py` now always searches both regular and prerelease directories (regular first).
- `get_hed_versions()` in `hed_cache.py` now defaults to `check_prerelease=True`. **This is a silent API change**: external callers that omitted `check_prerelease` will now receive prerelease versions. Internal callers that need released-only versions (compliance checker, `deprecatedFrom` validation, hedId comparison) explicitly pass `check_prerelease=False`.
- `get_hed_versions()` in `hed_cache.py` retains the default `check_prerelease=False` (no public API change). Internal callers that need prerelease inclusion (`get_hed_version_path`, error messages) now pass `check_prerelease=True` explicitly.
Comment thread
VisLab marked this conversation as resolved.
- Default schema version is now resolved dynamically from the cache (highest released version) instead of being hardcoded, so new schema releases no longer require a code change.
Comment thread
VisLab marked this conversation as resolved.
- `get_hed_version_path()` now automatically downloads schemas from GitHub when a requested version is not found in the local cache (default cache directory only).
- `_load_schema_version_sub()` now raises `BAD_PARAMETERS` (was `FILE_NOT_FOUND`) when no version is specified and the cache is empty, since the problem is a missing argument rather than a missing file.
- `check_schema_loading.py` simplified — removed `_is_prerelease_partner()` helper.
- `run_loading_check()` now raises `ValueError` immediately for mutually exclusive flag combinations (`prerelease_only` + `exclude_prereleases`, or `library_filter` + `standard_only`), consistent with the existing CLI-level validation.

Expand Down
46 changes: 38 additions & 8 deletions hed/schema/hed_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def get_cache_directory(cache_folder=None) -> str:
return HED_CACHE_DIRECTORY


def get_hed_versions(local_hed_directory=None, library_name=None, check_prerelease=True) -> Union[list, dict]:
def get_hed_versions(local_hed_directory=None, library_name=None, check_prerelease=False) -> Union[list, dict]:
Comment thread
VisLab marked this conversation as resolved.
"""Get the HED versions in the HED directory.

Parameters:
Expand All @@ -88,7 +88,7 @@ def get_hed_versions(local_hed_directory=None, library_name=None, check_prerelea
None retrieves the standard schema only.
Pass "all" to retrieve all standard and library schemas as a dict.
check_prerelease (bool): If True, results can include prerelease schemas.
Pass False to get only released versions (used by compliance checks).
Default is False, returning only released versions.

Returns:
Union[list, dict]: List of version numbers or dictionary {library_name: [versions]}.
Expand Down Expand Up @@ -136,21 +136,51 @@ def get_hed_versions(local_hed_directory=None, library_name=None, check_prerelea


def get_hed_version_path(xml_version, library_name=None, local_hed_directory=None) -> Union[str, None]:
"""Get HED XML file path in a directory. Only returns filenames that exist.
"""Get the HED XML file path for a given version.

Searches the local cache first. If the version is not found and local_hed_directory
is the default HED cache, the cache is refreshed from GitHub before a second lookup.
No network call is made for custom directories.

Parameters:
xml_version (str): Returns this version if it exists
library_name (str or None): Optional the schema library name.
local_hed_directory (str): Path to local HED directory. Defaults to HED_CACHE_DIRECTORY
xml_version (str): The version string to look up.
library_name (str or None): Optional schema library name.
local_hed_directory (str or None): Path to local HED directory. Defaults to HED_CACHE_DIRECTORY.
Passing a custom path disables the automatic GitHub refresh.

Returns:
Union[str, None]: The path to the requested HED version the HED directory.
Union[str, None]: The path to the requested HED XML file, or None.

"""
if not local_hed_directory:
local_hed_directory = HED_CACHE_DIRECTORY

hed_versions = get_hed_versions(local_hed_directory, library_name)
result = _find_hed_version_path(xml_version, library_name, local_hed_directory)
Comment thread
VisLab marked this conversation as resolved.
if result:
return result

# Version not found locally — try refreshing cache from GitHub (default cache only).
# cache_xml_versions() returns -1 on failure (network error, lock contention, rate limit).
# In that case the second lookup will return None, which the caller treats as "version not found".
if not xml_version or local_hed_directory != HED_CACHE_DIRECTORY:
return None

cache_xml_versions()
Comment thread
VisLab marked this conversation as resolved.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The return value of cache_xml_versions() is silently discarded. It returns -1 on network errors, lock contention, and rate-limit/too-recent cases. The comment above documents this correctly, but a log line on failure would make the silent skip observable:

Suggested change
cache_xml_versions()
result = cache_xml_versions()
if result == -1:
import logging
logging.getLogger(__name__).debug(
"cache_xml_versions() could not refresh (network error, lock contention, or cached recently); "
"version %r may not be found.", xml_version
)

Without logging, a transient network failure is completely invisible — callers just get None with no way to distinguish "version does not exist" from "download silently failed".

return _find_hed_version_path(xml_version, library_name, local_hed_directory)


def _find_hed_version_path(xml_version, library_name, local_hed_directory):
"""Look up a HED version path in the given directory without downloading.

Parameters:
xml_version (str): The version to find.
library_name (str or None): Optional schema library name.
local_hed_directory (str): Directory to search.

Returns:
Union[str, None]: The path if found, None otherwise.
"""
hed_versions = get_hed_versions(local_hed_directory, library_name, check_prerelease=True)
if not hed_versions or not xml_version:
return None
if xml_version in hed_versions:
Expand Down
9 changes: 6 additions & 3 deletions hed/schema/hed_schema_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,8 +362,9 @@ def _load_schema_version_sub(xml_version, schema_namespace="", xml_folder=None,
xml_version = versions[0]
else:
raise HedFileError(
HedExceptions.FILE_NOT_FOUND,
"No HED standard schema versions found in cache. Ensure schemas are installed or cached.",
HedExceptions.BAD_PARAMETERS,
"No version specified and no HED standard schema versions found in cache. "
Comment thread
VisLab marked this conversation as resolved.
"Run hed.schema.cache_xml_versions() or install hedtools to populate the cache.",
"",
)

Expand Down Expand Up @@ -392,7 +393,9 @@ def _load_schema_version_sub(xml_version, schema_namespace="", xml_folder=None,
hed_schema = load_schema(hed_file_path, schema_namespace=schema_namespace, schema=schema, name=name)
else:
library_string = f"for library '{library_name}'" if library_name else ""
known_versions = hed_cache.get_hed_versions(xml_folder, library_name=library_name if library_name else "all")
known_versions = hed_cache.get_hed_versions(
xml_folder, library_name=library_name if library_name else "all", check_prerelease=True
)
raise HedFileError(
HedExceptions.FILE_NOT_FOUND,
f"HED version {library_string}: '{version_to_validate}' not found. Check {hed_cache.get_cache_directory(xml_folder)} for cache or https://github.com/hed-standard/hed-schemas/tree/main/library_schemas. "
Expand Down
33 changes: 31 additions & 2 deletions spec_tests/test_hed_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,11 @@ def test_get_hed_versions_library_prerelease(self):
os.makedirs(prerelease_dir, exist_ok=True)
fake_prerelease = os.path.join(prerelease_dir, "HED_score_0.0.1-alpha.1.xml")
try:
# Empty file is fine — get_hed_versions only parses filenames, never reads contents
with open(fake_prerelease, "w") as f:
f.write("")
all_versions = hed_cache.get_hed_versions(self.hed_cache_dir, library_name="score")
released_only = hed_cache.get_hed_versions(self.hed_cache_dir, library_name="score", check_prerelease=False)
all_versions = hed_cache.get_hed_versions(self.hed_cache_dir, library_name="score", check_prerelease=True)
released_only = hed_cache.get_hed_versions(self.hed_cache_dir, library_name="score")
self.assertIsInstance(all_versions, list)
self.assertIn("0.0.1-alpha.1", all_versions)
self.assertNotIn("0.0.1-alpha.1", released_only)
Expand All @@ -110,6 +111,34 @@ def test_find_hed_expression(self):
final_version = f"HED{version}.xml"
self.assertFalse(hed_cache.version_pattern.match(final_version))

def test_get_hed_version_path_no_auto_refresh_for_custom_directory(self):
"""get_hed_version_path returns None for a nonexistent version in a custom directory without downloading."""
empty_dir = os.path.join(self.hed_cache_dir, "empty_subdir")
os.makedirs(empty_dir, exist_ok=True)
try:
result = hed_cache.get_hed_version_path("99.99.99", local_hed_directory=empty_dir)
self.assertIsNone(result)
finally:
shutil.rmtree(empty_dir)
Comment thread
VisLab marked this conversation as resolved.

def test_get_hed_version_path_auto_refresh_downloads_missing_version(self):
"""get_hed_version_path automatically downloads from GitHub when a version is not cached locally."""
# Use a fresh cache directory so the version is definitely not present
fresh_cache = os.path.join(os.path.dirname(self.hed_cache_dir), "schema_cache_auto_refresh/")
if os.path.exists(fresh_cache):
shutil.rmtree(fresh_cache)
os.makedirs(fresh_cache)
saved = hed_cache.HED_CACHE_DIRECTORY
try:
hed_cache.HED_CACHE_DIRECTORY = fresh_cache
# 8.0.0 is a released version that should be downloadable from GitHub
result = hed_cache.get_hed_version_path("8.0.0")
self.assertIsNotNone(result, "Auto-refresh should download 8.0.0 from GitHub")
self.assertTrue(os.path.exists(result))
finally:
hed_cache.HED_CACHE_DIRECTORY = saved
shutil.rmtree(fresh_cache, ignore_errors=True)


class TestLocal(unittest.TestCase):
@classmethod
Expand Down
3 changes: 2 additions & 1 deletion tests/schema/test_hed_schema_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ def test_load_schema_version_default_no_standard_raises(self):

with self.assertRaises(HedFileError) as context:
load_schema_version("", xml_folder=tmp_dir)
self.assertIn("No HED standard schema", str(context.exception))
self.assertEqual(context.exception.args[0], "BAD_PARAMETERS")
self.assertIn("No version specified", str(context.exception))

def test_load_and_verify_tags(self):
# Load 'testlib' by itself
Expand Down
Loading