Skip to content

Commit 28f7eba

Browse files
committed
Addressed recommendations in hed-standard#1240
1 parent d3ca678 commit 28f7eba

13 files changed

Lines changed: 229 additions & 27 deletions

hed/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from hed.models.hed_string import HedString
22
from hed.models.hed_tag import HedTag
33
from hed.errors.error_reporter import get_printable_issue_string
4-
from hed.errors.exceptions import HedFileError, HedExceptions
4+
from hed.errors.exceptions import HedFileError, HedExceptions, HedQueryError
55

66
from hed.models.base_input import BaseInput
77
from hed.models.spreadsheet_input import SpreadsheetInput

hed/errors/exceptions.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,3 +66,12 @@ def __init__(self, code, message, filename, issues=None):
6666
self.issues = issues
6767
if self.issues is None:
6868
self.issues = []
69+
70+
71+
class HedQueryError(ValueError):
72+
"""Exception raised when a HED query string cannot be parsed.
73+
74+
Inherits from :class:`ValueError` so that existing ``except ValueError`` handlers
75+
continue to work, while allowing callers that need finer-grained control to
76+
catch only query parse errors with ``except HedQueryError``.
77+
"""

hed/models/__init__.py

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,51 @@
1-
"""Data structures for HED tag handling."""
1+
"""HED data models: strings, tags, groups, inputs, queries, and definitions.
2+
3+
This module provides the core data structures used to represent, validate, and
4+
transform HED-annotated data. A loaded :class:`~hed.schema.HedSchema` (from
5+
``hed.schema``) is typically passed in when constructing these objects.
6+
7+
Typical usage
8+
-------------
9+
Parse and validate a raw HED string::
10+
11+
from hed.schema import load_schema_version
12+
from hed.models import HedString
13+
14+
schema = load_schema_version("8.3.0")
15+
hs = HedString("Sensory-event, (Action, Move/Flexion)", schema)
16+
issues = hs.validate(schema)
17+
18+
Load a BIDS events file with a sidecar::
19+
20+
from hed.models import TabularInput, Sidecar
21+
22+
sidecar = Sidecar("task-rest_events.json", name="MySidecar")
23+
events = TabularInput("sub-01_task-rest_events.tsv", sidecar=sidecar)
24+
issues = events.validate(schema)
25+
26+
Search HED annotations with a query::
27+
28+
from hed.models import QueryHandler
29+
30+
query = QueryHandler("Sensory-event && Action")
31+
matches = query.search(hs)
32+
33+
Key exports
34+
-----------
35+
- :class:`HedString` — a parsed HED annotation string (root of the parse tree).
36+
- :class:`HedTag` — a single HED tag with schema linkage and canonical form.
37+
- :class:`HedGroup` — a parenthesised group of tags and nested groups.
38+
- :class:`TabularInput` — a BIDS-style TSV events file with optional sidecar.
39+
- :class:`Sidecar` — a BIDS JSON sidecar mapping column values to HED strings.
40+
- :class:`SpreadsheetInput` — an Excel / TSV spreadsheet with HED columns.
41+
- :class:`TimeseriesInput` — a continuous time-series file with HED annotations.
42+
- :class:`DefinitionDict` — a collection of resolved HED Def/Def-expand definitions.
43+
- :class:`QueryHandler` — compile and execute queries against HED strings.
44+
- :func:`get_query_handlers` / :func:`search_hed_objs` — convenience helpers for
45+
batch querying.
46+
- :func:`convert_to_form`, :func:`shrink_defs`, :func:`expand_defs`,
47+
:func:`process_def_expands` — DataFrame-level HED transformation utilities.
48+
"""
249

350
from .base_input import BaseInput
451
from .column_mapper import ColumnMapper

hed/models/column_mapper.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import copy
1111
from collections import Counter
12+
from functools import partial
1213

1314
PANDAS_COLUMN_PREFIX_TO_IGNORE = "Unnamed: "
1415
NO_WARN_COLUMNS = ["onset", "duration"]
@@ -110,20 +111,14 @@ def get_transformers(self):
110111
if isinstance(assign_to_column, int):
111112
if self._column_map:
112113
assign_to_column = self._column_map[assign_to_column]
113-
else:
114-
assign_to_column = assign_to_column
115114
if column.column_type == ColumnType.Ignore:
116115
continue
117116
elif column.column_type == ColumnType.Value:
118117
value_str = column.hed_dict
119-
from functools import partial
120-
121118
final_transformers[assign_to_column] = partial(self._value_handler, value_str)
122119
elif column.column_type == ColumnType.Categorical:
123120
need_categorical.append(column.column_name)
124121
category_values = column.hed_dict
125-
from functools import partial
126-
127122
final_transformers[assign_to_column] = partial(self._category_handler, category_values)
128123
else:
129124
final_transformers[assign_to_column] = lambda x: x

hed/models/def_expand_gather.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@ def add_def(self, def_tag, def_expand_group):
3232
def_extension = def_tag.extension.split("/")
3333
existing_contents = self.actual_contents.get(def_extension[1], None)
3434
if existing_contents and existing_contents != orig_group:
35-
raise ValueError("Invalid Definition")
35+
raise ValueError(
36+
f"Definition '{def_extension[0]}' has conflicting contents for value '{def_extension[1]}': "
37+
f"existing={existing_contents} vs new={orig_group}"
38+
)
3639
elif existing_contents:
3740
return
3841
self.actual_contents[def_extension[1]] = orig_group.copy()
@@ -42,14 +45,20 @@ def add_def(self, def_tag, def_expand_group):
4245
tag for tag in orig_group.get_all_tags() if tag.extension == def_extension[1] and tag.is_takes_value_tag()
4346
]
4447
if len(matching_tags) == 0:
45-
raise ValueError("Invalid Definition")
48+
raise ValueError(
49+
f"Definition '{def_extension[0]}': no takes-value tag with extension '{def_extension[1]}' "
50+
f"found in group {orig_group}"
51+
)
4652
matching_names = {tag.short_base_tag for tag in matching_tags}
4753
if self.matching_names is not None:
4854
self.matching_names = self.matching_names & matching_names
4955
else:
5056
self.matching_names = matching_names
5157
if len(self.matching_names) == 0:
52-
raise ValueError("Invalid Definition")
58+
raise ValueError(
59+
f"Definition '{def_extension[0]}': no tag name is consistently the takes-value tag across "
60+
f"all observed values — candidate names were {matching_names}"
61+
)
5362

5463
def resolve_definition(self):
5564
"""Try to resolve the definition based on the information available.
@@ -84,7 +93,10 @@ def resolve_definition(self):
8493
self.resolved_definition = candidate_contents
8594
return True
8695
if len(candidate_tags) == 0 or (1 < len(candidate_tags) < len(tuple_list)):
87-
raise ValueError("Invalid Definition")
96+
raise ValueError(
97+
f"Definition '{self.def_tag_name}': could not resolve a unique takes-value tag — "
98+
f"found {len(candidate_tags)} candidate(s) across {len(tuple_list)} value(s)"
99+
)
88100
return False
89101

90102
def get_definition_string(self):

hed/models/hed_group.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,10 @@ def remove(self, items_to_remove: Iterable[Union[HedTag, "HedGroup"]]):
133133
item._parent = None
134134

135135
def __copy__(self):
136-
raise ValueError("Cannot make shallow copies of HedGroups")
136+
raise copy.Error(
137+
"Shallow copy of HedGroup is not supported: _parent pointers would alias the original. "
138+
"Use .copy() for a deep copy."
139+
)
137140

138141
def copy(self) -> "HedGroup":
139142
"""Return a deep copy of this group.

hed/models/hed_string.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,17 @@ def __init__(self, hed_string, hed_schema, def_dict=None, _contents=None):
3434
try:
3535
contents = self.split_into_groups(hed_string, hed_schema, def_dict)
3636
except ValueError:
37+
# ValueError is raised by split_into_groups for structurally malformed
38+
# strings (mismatched or misordered parentheses). Rather than raising
39+
# here, we fall back to an empty parse tree so that the object can be
40+
# passed to the validator, which will independently detect and report
41+
# the structural error through check_count_tag_group_parentheses /
42+
# check_delimiter_issues_in_hed_string on the raw string.
43+
#
44+
# Callers that construct HedString without running it through
45+
# HedValidator will receive an empty children list with no error
46+
# indication. Always validate after construction if correctness is
47+
# required.
3748
contents = []
3849
super().__init__(hed_string, contents=contents, startpos=0, endpos=len(hed_string))
3950
self._schema = hed_schema

hed/models/query_handler.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
ExpressionExactMatch,
1313
)
1414
from hed.models.query_util import Token
15+
from hed.errors.exceptions import HedQueryError
1516

1617

1718
class QueryHandler:
@@ -76,7 +77,7 @@ def _get_next_token(self):
7677
"""Returns the current token and advances the counter"""
7778
self.at_token += 1
7879
if self.at_token >= len(self.tokens):
79-
raise ValueError("Parse error in get next token")
80+
raise HedQueryError("Parse error in get next token")
8081
return self.tokens[self.at_token]
8182

8283
def _next_token_is(self, kinds):
@@ -94,7 +95,7 @@ def _parse(self, expression_string):
9495
expr = self._handle_or_op()
9596

9697
if self.at_token + 1 != len(self.tokens):
97-
raise ValueError("Parse error in search string")
98+
raise HedQueryError("Parse error in search string")
9899

99100
return expr
100101

@@ -137,7 +138,7 @@ def _handle_negation(self):
137138
if next_token == Token.LogicalNegation:
138139
interior = self._handle_grouping_op()
139140
if "?" in str(interior):
140-
raise ValueError(
141+
raise HedQueryError(
141142
"Cannot negate wildcards, or expressions that contain wildcards."
142143
"Use {required_expression : optional_expression}."
143144
)
@@ -152,13 +153,13 @@ def _handle_grouping_op(self):
152153
expr = self._handle_or_op()
153154
next_token = self._next_token_is([Token.LogicalGroupEnd])
154155
if next_token != Token.LogicalGroupEnd:
155-
raise ValueError("Parse error: Missing closing paren")
156+
raise HedQueryError("Parse error: Missing closing paren")
156157
elif next_token == Token.DescendantGroup:
157158
interior = self._handle_or_op()
158159
expr = ExpressionDescendantGroup(next_token, right=interior)
159160
next_token = self._next_token_is([Token.DescendantGroupEnd])
160161
if next_token != Token.DescendantGroupEnd:
161-
raise ValueError("Parse error: Missing closing square bracket")
162+
raise HedQueryError("Parse error: Missing closing square bracket")
162163
elif next_token == Token.ExactMatch:
163164
interior = self._handle_or_op()
164165
expr = ExpressionExactMatch(next_token, right=interior)
@@ -172,14 +173,14 @@ def _handle_grouping_op(self):
172173
expr.left = optional_portion
173174
next_token = self._next_token_is([Token.ExactMatchEnd])
174175
if "~" in str(expr):
175-
raise ValueError(
176+
raise HedQueryError(
176177
"Cannot use negation in exact matching groups,"
177178
" as it's not clear what is being matched.\n"
178179
"{thing and ~(expression)} is allowed."
179180
)
180181

181182
if next_token is None:
182-
raise ValueError("Parse error: Missing closing curly bracket")
183+
raise HedQueryError("Parse error: Missing closing curly bracket")
183184
else:
184185
next_token = self._get_next_token()
185186
if next_token and next_token.kind == Token.Wildcard:

hed/schema/__init__.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,43 @@
1-
"""Data structures for handling the HED schema."""
1+
"""HED schema loading, caching, and introspection.
2+
3+
This module exposes everything needed to load and inspect HED schemas — the
4+
vocabularies that define valid HED tags.
5+
6+
Typical usage
7+
-------------
8+
Load a released schema by version number (auto-downloaded and cached)::
9+
10+
from hed.schema import load_schema_version
11+
schema = load_schema_version("8.3.0")
12+
13+
Load a schema from a local file or URL::
14+
15+
from hed.schema import load_schema
16+
schema = load_schema("/path/to/HED8.3.0.xml")
17+
18+
Load a library schema alongside a standard schema::
19+
20+
schema = load_schema_version(["8.3.0", "sc:score_1.0.0"])
21+
22+
Key exports
23+
-----------
24+
- :class:`HedSchema` — a single loaded schema; use it to validate tags.
25+
- :class:`HedSchemaGroup` — two or more schemas used together (base + libraries).
26+
- :func:`load_schema` — load from a file path or URL.
27+
- :func:`load_schema_version` — load by version string(s), with caching.
28+
- :func:`from_string` — parse a schema from an in-memory string.
29+
- :func:`from_dataframes` — reconstruct a schema from TSV DataFrames.
30+
- :data:`HedKey` / :data:`HedSectionKey` — enumerations of schema attribute and
31+
section names used when querying schema entries.
32+
- :func:`get_hed_versions` — list versions available in the local cache.
33+
- :func:`cache_xml_versions` — pre-populate the local cache from the HED GitHub
34+
releases.
35+
36+
See also
37+
--------
38+
``hed.models`` for data structures that *use* a loaded schema (HedString, HedTag,
39+
TabularInput, etc.).
40+
"""
241

342
from .hed_schema import HedSchema
443
from .hed_schema_entry import HedSchemaEntry, UnitClassEntry, UnitEntry, HedTagEntry

hed/schema/hed_schema_entry.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -405,8 +405,9 @@ def _finalize_inherited_attributes(self):
405405
# Replace the list with a copy we can modify.
406406
self.inherited_attributes = self.attributes.copy()
407407
for attribute in self._section.inheritable_attributes:
408-
if self._check_inherited_attribute(attribute):
409-
self.inherited_attributes[attribute] = self._check_inherited_attribute(attribute, True)
408+
value = self._check_inherited_attribute(attribute, return_value=True)
409+
if value is not None:
410+
self.inherited_attributes[attribute] = value
410411

411412
def finalize_entry(self, schema):
412413
"""Called once after schema loading to set state.

0 commit comments

Comments
 (0)