Addressed the copilot suggestions

VisLab · VisLab · commit f547feb4ca0f · 2026-02-17T11:29:58.000-06:00
diff --git a/hed/schema/schema_io/schema2xml.py b/hed/schema/schema_io/schema2xml.py
@@ -48,15 +48,13 @@ def _output_sources(self, hed_schema):
         if sources is None or sources.empty:
             return
 
-        # Filter for unmerged library schemas - only output library entries
+        # Filter for unmerged library schemas - only output library entries if tracking is available
         if not self._save_merged and hed_schema.library and hed_schema.with_standard:
             if df_constants.in_library in sources.columns:
                 sources = sources[sources[df_constants.in_library].notna()].copy()
                 if sources.empty:
                     return
-            else:
-                # No in_library tracking, skip output for safety
-                return
+            # Otherwise fall back to writing all rows (assume all are library entries)
 
         sources_node = SubElement(self.hed_node, xml_constants.SCHEMA_SOURCE_SECTION_ELEMENT)
         for _, row in sources.iterrows():
@@ -81,15 +79,13 @@ def _output_prefixes(self, hed_schema):
         if prefixes is None or prefixes.empty:
             return
 
-        # Filter for unmerged library schemas - only output library entries
+        # Filter for unmerged library schemas - only output library entries if tracking is available
         if not self._save_merged and hed_schema.library and hed_schema.with_standard:
             if df_constants.in_library in prefixes.columns:
                 prefixes = prefixes[prefixes[df_constants.in_library].notna()].copy()
                 if prefixes.empty:
                     return
-            else:
-                # No in_library tracking, skip output for safety
-                return
+            # Otherwise fall back to writing all rows (assume all are library entries)
 
         prefixes_node = SubElement(self.hed_node, xml_constants.SCHEMA_PREFIX_SECTION_ELEMENT)
         for _, row in prefixes.iterrows():
@@ -113,15 +109,13 @@ def _output_external_annotations(self, hed_schema):
         if externals is None or externals.empty:
             return
 
-        # Filter for unmerged library schemas - only output library entries
+        # Filter for unmerged library schemas - only output library entries if tracking is available
         if not self._save_merged and hed_schema.library and hed_schema.with_standard:
             if df_constants.in_library in externals.columns:
                 externals = externals[externals[df_constants.in_library].notna()].copy()
                 if externals.empty:
                     return
-            else:
-                # No in_library tracking, skip output for safety
-                return
+            # Otherwise fall back to writing all rows (assume all are library entries)
 
         externals_node = SubElement(self.hed_node, xml_constants.SCHEMA_EXTERNAL_SECTION_ELEMENT)
         for _, row in externals.iterrows():
diff --git a/hed/schema/schema_io/xml2schema.py b/hed/schema/schema_io/xml2schema.py
@@ -94,6 +94,23 @@ def _read_epilogue(self):
             return epilogue_elements[0].text
         return ""
 
+    def _get_in_library_attribute(self, element):
+        """Parse inLibrary attribute from an extras element if present.
+
+        Parameters:
+            element: XML element to parse
+
+        Returns:
+            str or None: Library name if inLibrary attribute found, None otherwise
+        """
+        for attr_element in element.findall(xml_constants.ATTRIBUTE_ELEMENT):
+            name_elem = attr_element.find(xml_constants.NAME_ELEMENT)
+            if name_elem is not None and name_elem.text == HedKey.InLibrary:
+                value_elem = attr_element.find(xml_constants.VALUE_ELEMENT)
+                if value_elem is not None:
+                    return value_elem.text
+        return None
+
     def _read_extras(self):
         self._schema.extras = {}
         self._read_sources()
@@ -107,22 +124,27 @@ def _read_sources(self):
             source_name = self._get_element_value(source_element, xml_constants.NAME_ELEMENT)
             source_link = self._get_element_value(source_element, xml_constants.LINK_ELEMENT)
             description = self._get_element_value(source_element, xml_constants.DESCRIPTION_ELEMENT)
+
+            # Parse inLibrary attribute from element if present (for merged XML)
+            in_library_value = self._get_in_library_attribute(source_element)
+            # If not found in XML but this is an unmerged library schema, use self.library
+            if in_library_value is None and self.library and not self._loading_merged:
+                in_library_value = self.library
+
             data.append(
-                {df_constants.source: source_name, df_constants.link: source_link, df_constants.description: description}
+                {
+                    df_constants.source: source_name,
+                    df_constants.link: source_link,
+                    df_constants.description: description,
+                    df_constants.in_library: in_library_value,
+                }
             )
-        library_df = pd.DataFrame(data, columns=df_constants.source_columns)
-
-        # Add in_library column if this is a library schema
-        if self.library:
-            library_df[df_constants.in_library] = self.library
+        library_df = pd.DataFrame(data)
 
-        # Merge with standard schema extras if applicable
-        if self.appending_to_schema:
-            standard_df = self._schema.extras.get(df_constants.SOURCES_KEY, None)
-            if standard_df is not None and not standard_df.empty:
-                self._schema.extras[df_constants.SOURCES_KEY] = pd.concat([standard_df, library_df], ignore_index=True)
-            else:
-                self._schema.extras[df_constants.SOURCES_KEY] = library_df
+        # Merge with existing schema extras if present (from withStandard base schema)
+        standard_df = self._schema.extras.get(df_constants.SOURCES_KEY, None)
+        if standard_df is not None and not standard_df.empty:
+            self._schema.extras[df_constants.SOURCES_KEY] = pd.concat([standard_df, library_df], ignore_index=True)
         else:
             self._schema.extras[df_constants.SOURCES_KEY] = library_df
 
@@ -133,26 +155,27 @@ def _read_prefixes(self):
             prefix_name = self._get_element_value(prefix_element, xml_constants.NAME_ELEMENT)
             prefix_namespace = self._get_element_value(prefix_element, xml_constants.NAMESPACE_ELEMENT)
             prefix_description = self._get_element_value(prefix_element, xml_constants.DESCRIPTION_ELEMENT)
+
+            # Parse inLibrary attribute from element if present (for merged XML)
+            in_library_value = self._get_in_library_attribute(prefix_element)
+            # If not found in XML but this is an unmerged library schema, use self.library
+            if in_library_value is None and self.library and not self._loading_merged:
+                in_library_value = self.library
+
             data.append(
                 {
                     df_constants.prefix: prefix_name,
                     df_constants.namespace: prefix_namespace,
                     df_constants.description: prefix_description,
+                    df_constants.in_library: in_library_value,
                 }
             )
-        library_df = pd.DataFrame(data, columns=df_constants.prefix_columns)
+        library_df = pd.DataFrame(data)
 
-        # Add in_library column if this is a library schema
-        if self.library:
-            library_df[df_constants.in_library] = self.library
-
-        # Merge with standard schema extras if applicable
-        if self.appending_to_schema:
-            standard_df = self._schema.extras.get(df_constants.PREFIXES_KEY, None)
-            if standard_df is not None and not standard_df.empty:
-                self._schema.extras[df_constants.PREFIXES_KEY] = pd.concat([standard_df, library_df], ignore_index=True)
-            else:
-                self._schema.extras[df_constants.PREFIXES_KEY] = library_df
+        # Merge with existing schema extras if present (from withStandard base schema)
+        standard_df = self._schema.extras.get(df_constants.PREFIXES_KEY, None)
+        if standard_df is not None and not standard_df.empty:
+            self._schema.extras[df_constants.PREFIXES_KEY] = pd.concat([standard_df, library_df], ignore_index=True)
         else:
             self._schema.extras[df_constants.PREFIXES_KEY] = library_df
 
@@ -164,29 +187,28 @@ def _read_external_annotations(self):
             external_id = self._get_element_value(external_element, xml_constants.ID_ELEMENT)
             external_iri = self._get_element_value(external_element, xml_constants.IRI_ELEMENT)
             external_description = self._get_element_value(external_element, xml_constants.DESCRIPTION_ELEMENT)
+
+            # Parse inLibrary attribute from element if present (for merged XML)
+            in_library_value = self._get_in_library_attribute(external_element)
+            # If not found in XML but this is an unmerged library schema, use self.library
+            if in_library_value is None and self.library and not self._loading_merged:
+                in_library_value = self.library
+
             data.append(
                 {
                     df_constants.prefix: external_name,
                     df_constants.id: external_id,
                     df_constants.iri: external_iri,
                     df_constants.description: external_description,
+                    df_constants.in_library: in_library_value,
                 }
             )
-        library_df = pd.DataFrame(data, columns=df_constants.external_annotation_columns)
-
-        # Add in_library column if this is a library schema
-        if self.library:
-            library_df[df_constants.in_library] = self.library
-
-        # Merge with standard schema extras if applicable
-        if self.appending_to_schema:
-            standard_df = self._schema.extras.get(df_constants.EXTERNAL_ANNOTATION_KEY, None)
-            if standard_df is not None and not standard_df.empty:
-                self._schema.extras[df_constants.EXTERNAL_ANNOTATION_KEY] = pd.concat(
-                    [standard_df, library_df], ignore_index=True
-                )
-            else:
-                self._schema.extras[df_constants.EXTERNAL_ANNOTATION_KEY] = library_df
+        library_df = pd.DataFrame(data)
+
+        # Merge with existing schema extras if present (from withStandard base schema)
+        standard_df = self._schema.extras.get(df_constants.EXTERNAL_ANNOTATION_KEY, None)
+        if standard_df is not None and not standard_df.empty:
+            self._schema.extras[df_constants.EXTERNAL_ANNOTATION_KEY] = pd.concat([standard_df, library_df], ignore_index=True)
         else:
             self._schema.extras[df_constants.EXTERNAL_ANNOTATION_KEY] = library_df
 
diff --git a/tests/data/schema_tests/test_merge/HED_testlib_4.0.0.xml b/tests/data/schema_tests/test_merge/HED_testlib_4.0.0.xml
@@ -0,0 +1,78 @@
+<?xml version="1.0" ?>
+<HED version="4.0.0" library="testlib" withStandard="8.4.0" unmerged="True">
+   <prologue>This schema is designed to be lazy partnered with prerelease 8.4.0.</prologue>
+   <schema>
+      <node>
+         <name>Test-some</name>
+         <description>Unknown stuff.</description>
+         <node>
+            <name>Unknown1</name>
+            <description>Unknown1 stuff</description>
+         </node>
+      </node>
+      <node>
+         <name>Fruit</name>
+         <description>Fruit stuff.</description>
+         <attribute>
+            <name>rooted</name>
+            <value>Plant</value>
+         </attribute>
+         <node>
+            <name>Apple</name>
+            <description>Apple stuff</description>
+            <attribute>
+               <name>annotation</name>
+               <value>foodonto:has_botanical_name</value>
+            </attribute>
+            <node>
+               <name>Honey-crisp</name>
+               <description>Type of apple</description>
+            </node>
+         </node>
+      </node>
+      <node>
+         <name>Vegetable</name>
+         <description>Vegetable stuff.</description>
+         <attribute>
+            <name>rooted</name>
+            <value>Plant</value>
+         </attribute>
+         <node>
+            <name>Carrot</name>
+            <description>Carrot stuff</description>
+            <attribute>
+               <name>annotation</name>
+               <value>foodonto:has_botanical_name</value>
+            </attribute>
+         </node>
+      </node>
+   </schema>
+   <unitClassDefinitions/>
+   <unitModifierDefinitions/>
+   <valueClassDefinitions/>
+   <schemaAttributeDefinitions/>
+   <propertyDefinitions/>
+   <epilogue>A final section.</epilogue>
+   <schemaSources>
+      <schemaSource>
+         <name>FoodDB</name>
+         <link>https://fooddb.example.org</link>
+         <description>Botanical and nutritional database for fruits and vegetables.</description>
+      </schemaSource>
+   </schemaSources>
+   <schemaPrefixes>
+      <schemaPrefix>
+         <name>foodonto:</name>
+         <namespace>http://purl.obolibrary.org/obo/foodon.owl</namespace>
+         <description>Food Ontology (FOODON)</description>
+      </schemaPrefix>
+   </schemaPrefixes>
+   <externalAnnotations>
+      <externalAnnotation>
+         <name>foodonto:</name>
+         <id>has_botanical_name</id>
+         <iri>http://purl.obolibrary.org/obo/FOODON_00001234</iri>
+         <description>The botanical or scientific name of a food item.</description>
+      </externalAnnotation>
+   </externalAnnotations>
+</HED>
diff --git a/tests/schema/test_schema_extras_xml_roundtrip.py b/tests/schema/test_schema_extras_xml_roundtrip.py
@@ -25,9 +25,7 @@ def setUpClass(cls):
         cls.temp_dir = tempfile.mkdtemp(prefix="hed_extras_test_")
 
         # Path to testlib 4.0.0 which has all three extras sections
-        cls.testlib_4_path = os.path.join(
-            os.path.dirname(__file__), "../../.status/HED_testlib_4.0.0_converted/HED_testlib_4.0.0.xml"
-        )
+        cls.testlib_4_path = os.path.join(os.path.dirname(__file__), "../data/schema_tests/test_merge/HED_testlib_4.0.0.xml")
 
         # Normalize path
         cls.testlib_4_path = os.path.normpath(cls.testlib_4_path)
@@ -40,9 +38,6 @@ def tearDownClass(cls):
 
     def test_read_unmerged_library_extras_has_in_library_column(self):
         """Test that reading unmerged library schema adds in_library column to extras."""
-        if not os.path.exists(self.testlib_4_path):
-            self.skipTest(f"Test file not found: {self.testlib_4_path}")
-
         schema = load_schema(self.testlib_4_path)
 
         # Verify schema properties
@@ -81,10 +76,10 @@ def test_read_unmerged_library_extras_has_in_library_column(self):
         )
 
     def test_read_merged_schema_has_mixed_in_library(self):
-        """Test that merged library schema has entries from both standard and library with proper in_library tracking."""
-        if not os.path.exists(self.testlib_4_path):
-            self.skipTest(f"Test file not found: {self.testlib_4_path}")
+        """Test that merged library schema properly tracks library entries with in_library column.
 
+        Note: Standard schema 8.4.0 may not have extras sections, so we only verify library entries exist.
+        """
         # Load as merged (this happens automatically because withStandard is set)
         schema = load_schema(self.testlib_4_path)
 
@@ -115,9 +110,6 @@ def test_read_merged_schema_has_mixed_in_library(self):
 
     def test_write_unmerged_only_outputs_library_extras(self):
         """Test that saving unmerged only outputs extras with in_library column (merged schema saved as unmerged)."""
-        if not os.path.exists(self.testlib_4_path):
-            self.skipTest(f"Test file not found: {self.testlib_4_path}")
-
         # Load schema - it will auto-merge with standard 8.4.0
         merged_schema = load_schema(self.testlib_4_path)
 
@@ -157,8 +149,6 @@ def test_write_unmerged_only_outputs_library_extras(self):
 
     def test_write_merged_outputs_all_extras(self):
         """Test that saving merged outputs all extras (library and standard)."""
-        if not os.path.exists(self.testlib_4_path):
-            self.skipTest(f"Test file not found: {self.testlib_4_path}")
         # Load schema - auto-merges with standard 8.4.0
         merged_schema = load_schema(self.testlib_4_path)
 
@@ -227,9 +217,6 @@ def test_write_merged_outputs_all_extras(self):
 
     def test_roundtrip_unmerged_preserves_library_extras(self):
         """Test round-trip with unmerged: read merged -> save unmerged -> read -> save unmerged -> verify identical."""
-        if not os.path.exists(self.testlib_4_path):
-            self.skipTest(f"Test file not found: {self.testlib_4_path}")
-
         # Load original (auto-merges with standard)
         schema1 = load_schema(self.testlib_4_path)
 
@@ -270,9 +257,6 @@ def test_roundtrip_unmerged_preserves_library_extras(self):
 
     def test_roundtrip_merged_preserves_all_extras(self):
         """Test round-trip with merged: read -> save merged -> read -> save merged -> verify identical."""
-        if not os.path.exists(self.testlib_4_path):
-            self.skipTest(f"Test file not found: {self.testlib_4_path}")
-
         # Load original (auto-merges)
         schema1 = load_schema(self.testlib_4_path)
 
@@ -323,9 +307,6 @@ def test_roundtrip_merged_preserves_all_extras(self):
 
     def test_in_library_column_not_in_xml_output(self):
         """Test that in_library column is not serialized to XML output, but inLibrary attributes are correctly written."""
-        if not os.path.exists(self.testlib_4_path):
-            self.skipTest(f"Test file not found: {self.testlib_4_path}")
-
         schema = load_schema(self.testlib_4_path)
 
         # Check that extras have the in_library column