Skip to content

Commit f46ed41

Browse files
committed
Addressed copilot suggestions
1 parent 356a44b commit f46ed41

8 files changed

Lines changed: 318 additions & 63 deletions

File tree

docs/_templates/base.html

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,17 @@
33
{%- block scripts %}
44
{{ super() }}
55

6-
<!-- OSA Chat Widget (HED Assistant) -->
6+
<!-- OSA Chat Widget (HED Assistant)
7+
Privacy Note: This widget sends page content to an external AI service when
8+
allowPageContext is enabled. The widget provides contextual help for HEDTools documentation.
9+
Data sent: Page text content, user questions
10+
Service: OSA Chat Widget (osa-demo.pages.dev)
11+
-->
712
<script src="https://osa-demo.pages.dev/osa-chat-widget.js"
8-
crossorigin="anonymous"></script>
9-
<script>
13+
crossorigin="anonymous"
14+
defer></script>
15+
<script defer>
16+
// Initialize widget after script loads (defer ensures non-blocking)
1017
if (window.OSAChatWidget) {
1118
OSAChatWidget.setConfig({
1219
communityId: 'hed',

docs/user_guide.md

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,15 @@ HED (Hierarchical Event Descriptors) is a framework for systematically describin
1818
## Table of contents
1919

2020
01. [What is HED?](#what-is-hed)
21-
02. [What are Python HEDTools?](#what-are-python-hedtools)
22-
03. [Getting started](#getting-started)
23-
04. [Working with HED schemas](#working-with-hed-schemas)
24-
05. [Validating HED strings](#validating-hed-strings)
25-
06. [Working with BIDS datasets](#working-with-bids-datasets)
26-
07. [Working with sidecars](#working-with-sidecars)
27-
08. [Jupyter notebooks](#jupyter-notebooks)
28-
09. [Command-line tools](#command-line-tools)
29-
10. [Best practices](#best-practices)
30-
11. [Troubleshooting](#troubleshooting)
21+
02. [Getting started](#getting-started)
22+
03. [Working with HED schemas](#working-with-hed-schemas)
23+
04. [Validating HED strings](#validating-hed-strings)
24+
05. [Working with BIDS datasets](#working-with-bids-datasets)
25+
06. [Working with sidecars](#working-with-sidecars)
26+
07. [Jupyter notebooks](#jupyter-notebooks)
27+
08. [Command-line tools](#command-line-tools)
28+
09. [Best practices](#best-practices)
29+
10. [Troubleshooting](#troubleshooting)
3130

3231
## Getting started
3332

hed/models/query_expressions.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def merge_and_groups(groups1, groups2):
113113
dont_add = False
114114
# This is trash and slow
115115
for finalized_value in return_list:
116-
if merged_result.has_same_tags(finalized_value):
116+
if merged_result.has_same_children(finalized_value):
117117
dont_add = True
118118
break
119119
if dont_add:
@@ -171,7 +171,7 @@ def handle_expr(self, hed_group, exact=False):
171171
duplicates = []
172172
for group in groups1:
173173
for other_group in groups2:
174-
if group.has_same_tags(other_group):
174+
if group.has_same_children(other_group):
175175
duplicates.append(group)
176176

177177
groups1 = [group for group in groups1 if not any(other_group is group for other_group in duplicates)]

hed/models/query_util.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,28 @@
22

33

44
class SearchResult:
5-
"""Holder for and manipulation of search results."""
5+
"""Holder for and manipulation of search results.
6+
7+
Represents a query match result consisting of:
8+
- group: The containing HedGroup where matches were found
9+
- children: The specific matched elements (tags/groups) within that group
10+
(NOT all children of the group - only those that satisfied the query)
11+
12+
Example: When searching for "Red" in the HED string "(Red, Blue, Green)":
13+
- group = the containing group (Red, Blue, Green)
14+
- children = [Red] (only the matched tag)
15+
"""
616

717
def __init__(self, group, children):
818
"""Initialize a search result.
919
1020
Parameters:
1121
group (HedGroup): The group where the children were found.
12-
children (HedTag, HedGroup, or list): The matched child elements (tags or groups).
22+
children (HedTag, HedGroup, or list): The matched child elements (tags or groups)
23+
that satisfied the query condition. Can be:
24+
- Single tag/group that matched
25+
- List of tags/groups that matched
26+
- Empty list (for negation or when group matched but no specific children)
1327
"""
1428
self.group = group
1529
if not isinstance(children, list):
@@ -41,7 +55,7 @@ def merge_and_result(self, other):
4155
raise ValueError("Internal error")
4256
return SearchResult(self.group, new_children)
4357

44-
def has_same_tags(self, other):
58+
def has_same_children(self, other):
4559
"""Checks if these two results have the same children by identity (not equality).
4660
4761
Parameters:
@@ -58,6 +72,9 @@ def has_same_tags(self, other):
5872

5973
return all(child is child2 for child, child2 in zip(self.children, other.children, strict=False))
6074

75+
# Backward compatibility alias
76+
has_same_tags = has_same_children
77+
6178
def __str__(self):
6279
return str(self.group) + " Children: " + "---".join([str(child) for child in self.children])
6380

hed/scripts/hed_convert_schema.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,20 +29,21 @@ def convert_and_update(filenames, set_ids):
2929

3030
updated = []
3131
# If we are here, we have validated the schemas(and if there's more than one version changed, that they're the same)
32-
for basename, extensions in schema_files.items():
32+
for basename, extension_paths in schema_files.items():
3333
# Skip any with multiple extensions or not in pre-release
3434
if "prerelease" not in basename:
3535
print(f"Skipping updates on {basename}, not in a prerelease folder.")
3636
continue
37-
source_filename = add_extension(basename, list(extensions)[0]) # Load any changed schema version, they're all the same
38-
39-
# todo: more properly decide how we want to handle non lowercase extensions.
40-
tsv_extension = ".tsv"
41-
for extension in extensions:
42-
if extension.lower() == ".tsv":
43-
tsv_extension = extension
44-
45-
source_df_filename = add_extension(basename, tsv_extension)
37+
# Use the actual file paths to preserve case on case-sensitive filesystems
38+
source_filename = list(extension_paths.values())[0] # Load any changed schema version, they're all the same
39+
40+
# Find TSV file path if it exists
41+
tsv_path = extension_paths.get(".tsv")
42+
if tsv_path:
43+
source_df_filename = tsv_path
44+
else:
45+
# Construct TSV path if not found (unlikely in normal workflow)
46+
source_df_filename = add_extension(basename, ".tsv")
4647
schema = load_schema(source_filename)
4748
print(f"Trying to convert/update file {source_filename}")
4849
source_dataframes = load_dataframes(source_df_filename)

hed/scripts/hed_script_util.py

Lines changed: 38 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,15 @@ def add_extension(basename, extension):
9595
TSV files are placed in a 'hedtsv' subdirectory, while other formats
9696
simply append the extension to the basename.
9797
98+
Note: This function preserves the case of the extension to maintain
99+
compatibility with case-sensitive filesystems. Extensions should only
100+
be normalized (lowercased) for comparison purposes, not for file path
101+
construction.
102+
98103
Parameters:
99104
basename (str): The base path/name of the schema file without extension.
100105
extension (str): The file extension including the dot (e.g., '.xml', '.tsv').
106+
Case is preserved as-is.
101107
102108
Returns:
103109
str: The complete file path with extension applied.
@@ -107,8 +113,9 @@ def add_extension(basename, extension):
107113
"""
108114
if not isinstance(extension, str):
109115
raise TypeError(f"extension must be a string, got {type(extension).__name__}")
110-
extension = extension.lower()
111-
if extension == ".tsv":
116+
# Normalize only for comparison, not for path construction
117+
extension_lower = extension.lower()
118+
if extension_lower == ".tsv":
112119
parent_path, basename = os.path.split(basename)
113120
return os.path.join(parent_path, "hedtsv", basename)
114121
return basename + extension
@@ -120,13 +127,17 @@ def sort_base_schemas(filenames, add_all_extensions=False):
120127
Groups schema files by their base name, tracking which formats (extensions)
121128
have been modified. Handles special TSV directory structure (hedtsv subfolder).
122129
130+
Returns a nested dict that maps basename -> normalized_extension -> actual_filepath.
131+
This preserves the original file casing for case-sensitive filesystems while
132+
still allowing normalized extension comparisons.
133+
123134
Example input:
124-
["test_schema.mediawiki", "hedtsv/test_schema/test_schema_Tag.tsv", "other_schema.xml"]
135+
["test_schema.mediawiki", "hedtsv/test_schema/test_schema_Tag.tsv", "other_schema.XML"]
125136
126137
Example output:
127138
{
128-
"test_schema": {".mediawiki", ".tsv"},
129-
"other_schema": {".xml"}
139+
"test_schema": {".mediawiki": "test_schema.mediawiki", ".tsv": "hedtsv/.../test_schema_Tag.tsv"},
140+
"other_schema": {".xml": "other_schema.XML"}
130141
}
131142
132143
Parameters:
@@ -135,20 +146,21 @@ def sort_base_schemas(filenames, add_all_extensions=False):
135146
Default is False.
136147
137148
Returns:
138-
dict: A dictionary where keys are the basename (str), and values are sets of
139-
extensions modified. Can include .tsv, .mediawiki, .xml, and .json.
149+
dict: A nested dictionary where keys are basenames (str), values are dicts mapping
150+
normalized extensions (str, lowercase) to actual file paths (str, preserving case).
151+
Can include .tsv, .mediawiki, .xml, and .json as keys.
140152
"""
141-
schema_files = defaultdict(set)
153+
schema_files = defaultdict(dict)
142154
for file_path in filenames:
143155
if not os.path.exists(file_path):
144156
print(f"Ignoring deleted file {file_path}.")
145157
continue
146158
basename, extension = os.path.splitext(file_path)
147-
extension = extension.lower()
148-
if extension == ".xml" or extension == ".mediawiki":
149-
schema_files[basename].add(extension)
159+
extension_lower = extension.lower() # Normalize for comparison only
160+
if extension_lower == ".xml" or extension_lower == ".mediawiki":
161+
schema_files[basename][extension_lower] = file_path
150162
continue
151-
elif extension == ".tsv":
163+
elif extension_lower == ".tsv":
152164
tsv_basename = basename.rpartition("_")[0]
153165
full_parent_path, real_basename = os.path.split(tsv_basename)
154166
full_parent_path, real_basename2 = os.path.split(full_parent_path)
@@ -160,14 +172,17 @@ def sort_base_schemas(filenames, add_all_extensions=False):
160172
print(f"Ignoring file {file_path}. .tsv files must be in a subfolder with the same name.")
161173
continue
162174
real_name = os.path.join(real_parent_path, real_basename)
163-
schema_files[real_name].add(extension)
175+
schema_files[real_name][extension_lower] = file_path
164176
else:
165177
print(f"Ignoring file {file_path}")
166178

167179
if add_all_extensions:
168180
for schema_name in schema_files:
169181
for extension in all_extensions:
170-
schema_files[schema_name].add(extension)
182+
# Only add if not already present - don't overwrite actual paths
183+
if extension not in schema_files[schema_name]:
184+
# Construct path for missing extensions - use the add_extension logic
185+
schema_files[schema_name][extension] = add_extension(schema_name, extension)
171186

172187
return schema_files
173188

@@ -209,24 +224,25 @@ def validate_all_schemas(schema_files):
209224
for a prerelease schema, ensures all formats exist and are identical.
210225
211226
Parameters:
212-
schema_files (dict): Dictionary mapping basenames (str) to sets of extensions (str)
213-
representing all files changed.
227+
schema_files (dict): Dictionary mapping basenames (str) to dicts of
228+
{normalized_extension (str) -> actual_filepath (str)} representing
229+
all files changed.
214230
215231
Returns:
216232
list: A list of all validation issues found across all schemas.
217233
"""
218234
all_issues = []
219-
for basename, extensions in schema_files.items():
235+
for basename, extension_paths in schema_files.items():
220236
single_schema_issues = []
221-
for extension in extensions:
222-
full_path = add_extension(basename, extension)
223-
single_schema_issues += validate_schema(full_path)
237+
for _extension, file_path in extension_paths.items():
238+
# Use the actual file path to preserve case on case-sensitive filesystems
239+
single_schema_issues += validate_schema(file_path)
224240

225-
if len(extensions) > 1 and not single_schema_issues and "prerelease" in basename:
241+
if len(extension_paths) > 1 and not single_schema_issues and "prerelease" in basename:
226242
single_schema_issues += validate_all_schema_formats(basename)
227243

228244
print(f"Validating: {basename}...")
229-
print(f"Extensions: {extensions}")
245+
print(f"Extensions: {set(extension_paths.keys())}")
230246
if single_schema_issues:
231247
for issue in single_schema_issues:
232248
print(issue)

0 commit comments

Comments
 (0)