Addressed copilot suggestions

VisLab · VisLab · commit f46ed41e60f5 · 2026-02-10T11:47:00.000-06:00
diff --git a/docs/_templates/base.html b/docs/_templates/base.html
@@ -3,10 +3,17 @@
 {%- block scripts %}
   {{ super() }}
   
-  <!-- OSA Chat Widget (HED Assistant) -->
+  <!-- OSA Chat Widget (HED Assistant)
+       Privacy Note: This widget sends page content to an external AI service when
+       allowPageContext is enabled. The widget provides contextual help for HEDTools documentation.
+       Data sent: Page text content, user questions
+       Service: OSA Chat Widget (osa-demo.pages.dev)
+  -->
   <script src="https://osa-demo.pages.dev/osa-chat-widget.js"
-          crossorigin="anonymous"></script>
-  <script>
+          crossorigin="anonymous"
+          defer></script>
+  <script defer>
+    // Initialize widget after script loads (defer ensures non-blocking)
     if (window.OSAChatWidget) {
       OSAChatWidget.setConfig({
         communityId: 'hed',
diff --git a/docs/user_guide.md b/docs/user_guide.md
@@ -18,16 +18,15 @@ HED (Hierarchical Event Descriptors) is a framework for systematically describin
 ## Table of contents
 
 01. [What is HED?](#what-is-hed)
-02. [What are Python HEDTools?](#what-are-python-hedtools)
-03. [Getting started](#getting-started)
-04. [Working with HED schemas](#working-with-hed-schemas)
-05. [Validating HED strings](#validating-hed-strings)
-06. [Working with BIDS datasets](#working-with-bids-datasets)
-07. [Working with sidecars](#working-with-sidecars)
-08. [Jupyter notebooks](#jupyter-notebooks)
-09. [Command-line tools](#command-line-tools)
-10. [Best practices](#best-practices)
-11. [Troubleshooting](#troubleshooting)
+02. [Getting started](#getting-started)
+03. [Working with HED schemas](#working-with-hed-schemas)
+04. [Validating HED strings](#validating-hed-strings)
+05. [Working with BIDS datasets](#working-with-bids-datasets)
+06. [Working with sidecars](#working-with-sidecars)
+07. [Jupyter notebooks](#jupyter-notebooks)
+08. [Command-line tools](#command-line-tools)
+09. [Best practices](#best-practices)
+10. [Troubleshooting](#troubleshooting)
 
 ## Getting started
 
diff --git a/hed/models/query_expressions.py b/hed/models/query_expressions.py
@@ -113,7 +113,7 @@ def merge_and_groups(groups1, groups2):
                     dont_add = False
                     # This is trash and slow
                     for finalized_value in return_list:
-                        if merged_result.has_same_tags(finalized_value):
+                        if merged_result.has_same_children(finalized_value):
                             dont_add = True
                             break
                     if dont_add:
@@ -171,7 +171,7 @@ def handle_expr(self, hed_group, exact=False):
         duplicates = []
         for group in groups1:
             for other_group in groups2:
-                if group.has_same_tags(other_group):
+                if group.has_same_children(other_group):
                     duplicates.append(group)
 
         groups1 = [group for group in groups1 if not any(other_group is group for other_group in duplicates)]
diff --git a/hed/models/query_util.py b/hed/models/query_util.py
@@ -2,14 +2,28 @@
 
 
 class SearchResult:
-    """Holder for and manipulation of search results."""
+    """Holder for and manipulation of search results.
+
+    Represents a query match result consisting of:
+    - group: The containing HedGroup where matches were found
+    - children: The specific matched elements (tags/groups) within that group
+                (NOT all children of the group - only those that satisfied the query)
+
+    Example: When searching for "Red" in the HED string "(Red, Blue, Green)":
+        - group = the containing group (Red, Blue, Green)
+        - children = [Red] (only the matched tag)
+    """
 
     def __init__(self, group, children):
         """Initialize a search result.
 
         Parameters:
             group (HedGroup): The group where the children were found.
-            children (HedTag, HedGroup, or list): The matched child elements (tags or groups).
+            children (HedTag, HedGroup, or list): The matched child elements (tags or groups)
+                that satisfied the query condition. Can be:
+                - Single tag/group that matched
+                - List of tags/groups that matched
+                - Empty list (for negation or when group matched but no specific children)
         """
         self.group = group
         if not isinstance(children, list):
@@ -41,7 +55,7 @@ def merge_and_result(self, other):
             raise ValueError("Internal error")
         return SearchResult(self.group, new_children)
 
-    def has_same_tags(self, other):
+    def has_same_children(self, other):
         """Checks if these two results have the same children by identity (not equality).
 
         Parameters:
@@ -58,6 +72,9 @@ def has_same_tags(self, other):
 
         return all(child is child2 for child, child2 in zip(self.children, other.children, strict=False))
 
+    # Backward compatibility alias
+    has_same_tags = has_same_children
+
     def __str__(self):
         return str(self.group) + " Children: " + "---".join([str(child) for child in self.children])
 
diff --git a/hed/scripts/hed_convert_schema.py b/hed/scripts/hed_convert_schema.py
@@ -29,20 +29,21 @@ def convert_and_update(filenames, set_ids):
 
     updated = []
     # If we are here, we have validated the schemas(and if there's more than one version changed, that they're the same)
-    for basename, extensions in schema_files.items():
+    for basename, extension_paths in schema_files.items():
         # Skip any with multiple extensions or not in pre-release
         if "prerelease" not in basename:
             print(f"Skipping updates on {basename}, not in a prerelease folder.")
             continue
-        source_filename = add_extension(basename, list(extensions)[0])  # Load any changed schema version, they're all the same
-
-        # todo: more properly decide how we want to handle non lowercase extensions.
-        tsv_extension = ".tsv"
-        for extension in extensions:
-            if extension.lower() == ".tsv":
-                tsv_extension = extension
-
-        source_df_filename = add_extension(basename, tsv_extension)
+        # Use the actual file paths to preserve case on case-sensitive filesystems
+        source_filename = list(extension_paths.values())[0]  # Load any changed schema version, they're all the same
+
+        # Find TSV file path if it exists
+        tsv_path = extension_paths.get(".tsv")
+        if tsv_path:
+            source_df_filename = tsv_path
+        else:
+            # Construct TSV path if not found (unlikely in normal workflow)
+            source_df_filename = add_extension(basename, ".tsv")
         schema = load_schema(source_filename)
         print(f"Trying to convert/update file {source_filename}")
         source_dataframes = load_dataframes(source_df_filename)
diff --git a/hed/scripts/hed_script_util.py b/hed/scripts/hed_script_util.py
@@ -95,9 +95,15 @@ def add_extension(basename, extension):
     TSV files are placed in a 'hedtsv' subdirectory, while other formats
     simply append the extension to the basename.
 
+    Note: This function preserves the case of the extension to maintain
+    compatibility with case-sensitive filesystems. Extensions should only
+    be normalized (lowercased) for comparison purposes, not for file path
+    construction.
+
     Parameters:
         basename (str): The base path/name of the schema file without extension.
         extension (str): The file extension including the dot (e.g., '.xml', '.tsv').
+            Case is preserved as-is.
 
     Returns:
         str: The complete file path with extension applied.
@@ -107,8 +113,9 @@ def add_extension(basename, extension):
     """
     if not isinstance(extension, str):
         raise TypeError(f"extension must be a string, got {type(extension).__name__}")
-    extension = extension.lower()
-    if extension == ".tsv":
+    # Normalize only for comparison, not for path construction
+    extension_lower = extension.lower()
+    if extension_lower == ".tsv":
         parent_path, basename = os.path.split(basename)
         return os.path.join(parent_path, "hedtsv", basename)
     return basename + extension
@@ -120,13 +127,17 @@ def sort_base_schemas(filenames, add_all_extensions=False):
     Groups schema files by their base name, tracking which formats (extensions)
     have been modified. Handles special TSV directory structure (hedtsv subfolder).
 
+    Returns a nested dict that maps basename -> normalized_extension -> actual_filepath.
+    This preserves the original file casing for case-sensitive filesystems while
+    still allowing normalized extension comparisons.
+
     Example input:
-        ["test_schema.mediawiki", "hedtsv/test_schema/test_schema_Tag.tsv", "other_schema.xml"]
+        ["test_schema.mediawiki", "hedtsv/test_schema/test_schema_Tag.tsv", "other_schema.XML"]
 
     Example output:
         {
-            "test_schema": {".mediawiki", ".tsv"},
-            "other_schema": {".xml"}
+            "test_schema": {".mediawiki": "test_schema.mediawiki", ".tsv": "hedtsv/.../test_schema_Tag.tsv"},
+            "other_schema": {".xml": "other_schema.XML"}
         }
 
     Parameters:
@@ -135,20 +146,21 @@ def sort_base_schemas(filenames, add_all_extensions=False):
             Default is False.
 
     Returns:
-        dict: A dictionary where keys are the basename (str), and values are sets of
-            extensions modified. Can include .tsv, .mediawiki, .xml, and .json.
+        dict: A nested dictionary where keys are basenames (str), values are dicts mapping
+            normalized extensions (str, lowercase) to actual file paths (str, preserving case).
+            Can include .tsv, .mediawiki, .xml, and .json as keys.
     """
-    schema_files = defaultdict(set)
+    schema_files = defaultdict(dict)
     for file_path in filenames:
         if not os.path.exists(file_path):
             print(f"Ignoring deleted file {file_path}.")
             continue
         basename, extension = os.path.splitext(file_path)
-        extension = extension.lower()
-        if extension == ".xml" or extension == ".mediawiki":
-            schema_files[basename].add(extension)
+        extension_lower = extension.lower()  # Normalize for comparison only
+        if extension_lower == ".xml" or extension_lower == ".mediawiki":
+            schema_files[basename][extension_lower] = file_path
             continue
-        elif extension == ".tsv":
+        elif extension_lower == ".tsv":
             tsv_basename = basename.rpartition("_")[0]
             full_parent_path, real_basename = os.path.split(tsv_basename)
             full_parent_path, real_basename2 = os.path.split(full_parent_path)
@@ -160,14 +172,17 @@ def sort_base_schemas(filenames, add_all_extensions=False):
                 print(f"Ignoring file {file_path}.  .tsv files must be in a subfolder with the same name.")
                 continue
             real_name = os.path.join(real_parent_path, real_basename)
-            schema_files[real_name].add(extension)
+            schema_files[real_name][extension_lower] = file_path
         else:
             print(f"Ignoring file {file_path}")
 
     if add_all_extensions:
         for schema_name in schema_files:
             for extension in all_extensions:
-                schema_files[schema_name].add(extension)
+                # Only add if not already present - don't overwrite actual paths
+                if extension not in schema_files[schema_name]:
+                    # Construct path for missing extensions - use the add_extension logic
+                    schema_files[schema_name][extension] = add_extension(schema_name, extension)
 
     return schema_files
 
@@ -209,24 +224,25 @@ def validate_all_schemas(schema_files):
     for a prerelease schema, ensures all formats exist and are identical.
 
     Parameters:
-        schema_files (dict): Dictionary mapping basenames (str) to sets of extensions (str)
-            representing all files changed.
+        schema_files (dict): Dictionary mapping basenames (str) to dicts of
+            {normalized_extension (str) -> actual_filepath (str)} representing
+            all files changed.
 
     Returns:
         list: A list of all validation issues found across all schemas.
     """
     all_issues = []
-    for basename, extensions in schema_files.items():
+    for basename, extension_paths in schema_files.items():
         single_schema_issues = []
-        for extension in extensions:
-            full_path = add_extension(basename, extension)
-            single_schema_issues += validate_schema(full_path)
+        for _extension, file_path in extension_paths.items():
+            # Use the actual file path to preserve case on case-sensitive filesystems
+            single_schema_issues += validate_schema(file_path)
 
-        if len(extensions) > 1 and not single_schema_issues and "prerelease" in basename:
+        if len(extension_paths) > 1 and not single_schema_issues and "prerelease" in basename:
             single_schema_issues += validate_all_schema_formats(basename)
 
         print(f"Validating: {basename}...")
-        print(f"Extensions: {extensions}")
+        print(f"Extensions: {set(extension_paths.keys())}")
         if single_schema_issues:
             for issue in single_schema_issues:
                 print(issue)
diff --git a/tests/models/test_query_util.py b/tests/models/test_query_util.py
diff --git a/tests/scripts/test_script_util.py b/tests/scripts/test_script_util.py