Skip to content

Commit f547feb

Browse files
committed
Addressed the copilot suggestions
1 parent 28b1c38 commit f547feb

4 files changed

Lines changed: 150 additions & 75 deletions

File tree

hed/schema/schema_io/schema2xml.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,13 @@ def _output_sources(self, hed_schema):
4848
if sources is None or sources.empty:
4949
return
5050

51-
# Filter for unmerged library schemas - only output library entries
51+
# Filter for unmerged library schemas - only output library entries if tracking is available
5252
if not self._save_merged and hed_schema.library and hed_schema.with_standard:
5353
if df_constants.in_library in sources.columns:
5454
sources = sources[sources[df_constants.in_library].notna()].copy()
5555
if sources.empty:
5656
return
57-
else:
58-
# No in_library tracking, skip output for safety
59-
return
57+
# Otherwise fall back to writing all rows (assume all are library entries)
6058

6159
sources_node = SubElement(self.hed_node, xml_constants.SCHEMA_SOURCE_SECTION_ELEMENT)
6260
for _, row in sources.iterrows():
@@ -81,15 +79,13 @@ def _output_prefixes(self, hed_schema):
8179
if prefixes is None or prefixes.empty:
8280
return
8381

84-
# Filter for unmerged library schemas - only output library entries
82+
# Filter for unmerged library schemas - only output library entries if tracking is available
8583
if not self._save_merged and hed_schema.library and hed_schema.with_standard:
8684
if df_constants.in_library in prefixes.columns:
8785
prefixes = prefixes[prefixes[df_constants.in_library].notna()].copy()
8886
if prefixes.empty:
8987
return
90-
else:
91-
# No in_library tracking, skip output for safety
92-
return
88+
# Otherwise fall back to writing all rows (assume all are library entries)
9389

9490
prefixes_node = SubElement(self.hed_node, xml_constants.SCHEMA_PREFIX_SECTION_ELEMENT)
9591
for _, row in prefixes.iterrows():
@@ -113,15 +109,13 @@ def _output_external_annotations(self, hed_schema):
113109
if externals is None or externals.empty:
114110
return
115111

116-
# Filter for unmerged library schemas - only output library entries
112+
# Filter for unmerged library schemas - only output library entries if tracking is available
117113
if not self._save_merged and hed_schema.library and hed_schema.with_standard:
118114
if df_constants.in_library in externals.columns:
119115
externals = externals[externals[df_constants.in_library].notna()].copy()
120116
if externals.empty:
121117
return
122-
else:
123-
# No in_library tracking, skip output for safety
124-
return
118+
# Otherwise fall back to writing all rows (assume all are library entries)
125119

126120
externals_node = SubElement(self.hed_node, xml_constants.SCHEMA_EXTERNAL_SECTION_ELEMENT)
127121
for _, row in externals.iterrows():

hed/schema/schema_io/xml2schema.py

Lines changed: 62 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,23 @@ def _read_epilogue(self):
9494
return epilogue_elements[0].text
9595
return ""
9696

97+
def _get_in_library_attribute(self, element):
98+
"""Parse inLibrary attribute from an extras element if present.
99+
100+
Parameters:
101+
element: XML element to parse
102+
103+
Returns:
104+
str or None: Library name if inLibrary attribute found, None otherwise
105+
"""
106+
for attr_element in element.findall(xml_constants.ATTRIBUTE_ELEMENT):
107+
name_elem = attr_element.find(xml_constants.NAME_ELEMENT)
108+
if name_elem is not None and name_elem.text == HedKey.InLibrary:
109+
value_elem = attr_element.find(xml_constants.VALUE_ELEMENT)
110+
if value_elem is not None:
111+
return value_elem.text
112+
return None
113+
97114
def _read_extras(self):
98115
self._schema.extras = {}
99116
self._read_sources()
@@ -107,22 +124,27 @@ def _read_sources(self):
107124
source_name = self._get_element_value(source_element, xml_constants.NAME_ELEMENT)
108125
source_link = self._get_element_value(source_element, xml_constants.LINK_ELEMENT)
109126
description = self._get_element_value(source_element, xml_constants.DESCRIPTION_ELEMENT)
127+
128+
# Parse inLibrary attribute from element if present (for merged XML)
129+
in_library_value = self._get_in_library_attribute(source_element)
130+
# If not found in XML but this is an unmerged library schema, use self.library
131+
if in_library_value is None and self.library and not self._loading_merged:
132+
in_library_value = self.library
133+
110134
data.append(
111-
{df_constants.source: source_name, df_constants.link: source_link, df_constants.description: description}
135+
{
136+
df_constants.source: source_name,
137+
df_constants.link: source_link,
138+
df_constants.description: description,
139+
df_constants.in_library: in_library_value,
140+
}
112141
)
113-
library_df = pd.DataFrame(data, columns=df_constants.source_columns)
114-
115-
# Add in_library column if this is a library schema
116-
if self.library:
117-
library_df[df_constants.in_library] = self.library
142+
library_df = pd.DataFrame(data)
118143

119-
# Merge with standard schema extras if applicable
120-
if self.appending_to_schema:
121-
standard_df = self._schema.extras.get(df_constants.SOURCES_KEY, None)
122-
if standard_df is not None and not standard_df.empty:
123-
self._schema.extras[df_constants.SOURCES_KEY] = pd.concat([standard_df, library_df], ignore_index=True)
124-
else:
125-
self._schema.extras[df_constants.SOURCES_KEY] = library_df
144+
# Merge with existing schema extras if present (from withStandard base schema)
145+
standard_df = self._schema.extras.get(df_constants.SOURCES_KEY, None)
146+
if standard_df is not None and not standard_df.empty:
147+
self._schema.extras[df_constants.SOURCES_KEY] = pd.concat([standard_df, library_df], ignore_index=True)
126148
else:
127149
self._schema.extras[df_constants.SOURCES_KEY] = library_df
128150

@@ -133,26 +155,27 @@ def _read_prefixes(self):
133155
prefix_name = self._get_element_value(prefix_element, xml_constants.NAME_ELEMENT)
134156
prefix_namespace = self._get_element_value(prefix_element, xml_constants.NAMESPACE_ELEMENT)
135157
prefix_description = self._get_element_value(prefix_element, xml_constants.DESCRIPTION_ELEMENT)
158+
159+
# Parse inLibrary attribute from element if present (for merged XML)
160+
in_library_value = self._get_in_library_attribute(prefix_element)
161+
# If not found in XML but this is an unmerged library schema, use self.library
162+
if in_library_value is None and self.library and not self._loading_merged:
163+
in_library_value = self.library
164+
136165
data.append(
137166
{
138167
df_constants.prefix: prefix_name,
139168
df_constants.namespace: prefix_namespace,
140169
df_constants.description: prefix_description,
170+
df_constants.in_library: in_library_value,
141171
}
142172
)
143-
library_df = pd.DataFrame(data, columns=df_constants.prefix_columns)
173+
library_df = pd.DataFrame(data)
144174

145-
# Add in_library column if this is a library schema
146-
if self.library:
147-
library_df[df_constants.in_library] = self.library
148-
149-
# Merge with standard schema extras if applicable
150-
if self.appending_to_schema:
151-
standard_df = self._schema.extras.get(df_constants.PREFIXES_KEY, None)
152-
if standard_df is not None and not standard_df.empty:
153-
self._schema.extras[df_constants.PREFIXES_KEY] = pd.concat([standard_df, library_df], ignore_index=True)
154-
else:
155-
self._schema.extras[df_constants.PREFIXES_KEY] = library_df
175+
# Merge with existing schema extras if present (from withStandard base schema)
176+
standard_df = self._schema.extras.get(df_constants.PREFIXES_KEY, None)
177+
if standard_df is not None and not standard_df.empty:
178+
self._schema.extras[df_constants.PREFIXES_KEY] = pd.concat([standard_df, library_df], ignore_index=True)
156179
else:
157180
self._schema.extras[df_constants.PREFIXES_KEY] = library_df
158181

@@ -164,29 +187,28 @@ def _read_external_annotations(self):
164187
external_id = self._get_element_value(external_element, xml_constants.ID_ELEMENT)
165188
external_iri = self._get_element_value(external_element, xml_constants.IRI_ELEMENT)
166189
external_description = self._get_element_value(external_element, xml_constants.DESCRIPTION_ELEMENT)
190+
191+
# Parse inLibrary attribute from element if present (for merged XML)
192+
in_library_value = self._get_in_library_attribute(external_element)
193+
# If not found in XML but this is an unmerged library schema, use self.library
194+
if in_library_value is None and self.library and not self._loading_merged:
195+
in_library_value = self.library
196+
167197
data.append(
168198
{
169199
df_constants.prefix: external_name,
170200
df_constants.id: external_id,
171201
df_constants.iri: external_iri,
172202
df_constants.description: external_description,
203+
df_constants.in_library: in_library_value,
173204
}
174205
)
175-
library_df = pd.DataFrame(data, columns=df_constants.external_annotation_columns)
176-
177-
# Add in_library column if this is a library schema
178-
if self.library:
179-
library_df[df_constants.in_library] = self.library
180-
181-
# Merge with standard schema extras if applicable
182-
if self.appending_to_schema:
183-
standard_df = self._schema.extras.get(df_constants.EXTERNAL_ANNOTATION_KEY, None)
184-
if standard_df is not None and not standard_df.empty:
185-
self._schema.extras[df_constants.EXTERNAL_ANNOTATION_KEY] = pd.concat(
186-
[standard_df, library_df], ignore_index=True
187-
)
188-
else:
189-
self._schema.extras[df_constants.EXTERNAL_ANNOTATION_KEY] = library_df
206+
library_df = pd.DataFrame(data)
207+
208+
# Merge with existing schema extras if present (from withStandard base schema)
209+
standard_df = self._schema.extras.get(df_constants.EXTERNAL_ANNOTATION_KEY, None)
210+
if standard_df is not None and not standard_df.empty:
211+
self._schema.extras[df_constants.EXTERNAL_ANNOTATION_KEY] = pd.concat([standard_df, library_df], ignore_index=True)
190212
else:
191213
self._schema.extras[df_constants.EXTERNAL_ANNOTATION_KEY] = library_df
192214

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
<?xml version="1.0" ?>
2+
<HED version="4.0.0" library="testlib" withStandard="8.4.0" unmerged="True">
3+
<prologue>This schema is designed to be lazy partnered with prerelease 8.4.0.</prologue>
4+
<schema>
5+
<node>
6+
<name>Test-some</name>
7+
<description>Unknown stuff.</description>
8+
<node>
9+
<name>Unknown1</name>
10+
<description>Unknown1 stuff</description>
11+
</node>
12+
</node>
13+
<node>
14+
<name>Fruit</name>
15+
<description>Fruit stuff.</description>
16+
<attribute>
17+
<name>rooted</name>
18+
<value>Plant</value>
19+
</attribute>
20+
<node>
21+
<name>Apple</name>
22+
<description>Apple stuff</description>
23+
<attribute>
24+
<name>annotation</name>
25+
<value>foodonto:has_botanical_name</value>
26+
</attribute>
27+
<node>
28+
<name>Honey-crisp</name>
29+
<description>Type of apple</description>
30+
</node>
31+
</node>
32+
</node>
33+
<node>
34+
<name>Vegetable</name>
35+
<description>Vegetable stuff.</description>
36+
<attribute>
37+
<name>rooted</name>
38+
<value>Plant</value>
39+
</attribute>
40+
<node>
41+
<name>Carrot</name>
42+
<description>Carrot stuff</description>
43+
<attribute>
44+
<name>annotation</name>
45+
<value>foodonto:has_botanical_name</value>
46+
</attribute>
47+
</node>
48+
</node>
49+
</schema>
50+
<unitClassDefinitions/>
51+
<unitModifierDefinitions/>
52+
<valueClassDefinitions/>
53+
<schemaAttributeDefinitions/>
54+
<propertyDefinitions/>
55+
<epilogue>A final section.</epilogue>
56+
<schemaSources>
57+
<schemaSource>
58+
<name>FoodDB</name>
59+
<link>https://fooddb.example.org</link>
60+
<description>Botanical and nutritional database for fruits and vegetables.</description>
61+
</schemaSource>
62+
</schemaSources>
63+
<schemaPrefixes>
64+
<schemaPrefix>
65+
<name>foodonto:</name>
66+
<namespace>http://purl.obolibrary.org/obo/foodon.owl</namespace>
67+
<description>Food Ontology (FOODON)</description>
68+
</schemaPrefix>
69+
</schemaPrefixes>
70+
<externalAnnotations>
71+
<externalAnnotation>
72+
<name>foodonto:</name>
73+
<id>has_botanical_name</id>
74+
<iri>http://purl.obolibrary.org/obo/FOODON_00001234</iri>
75+
<description>The botanical or scientific name of a food item.</description>
76+
</externalAnnotation>
77+
</externalAnnotations>
78+
</HED>

tests/schema/test_schema_extras_xml_roundtrip.py

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,7 @@ def setUpClass(cls):
2525
cls.temp_dir = tempfile.mkdtemp(prefix="hed_extras_test_")
2626

2727
# Path to testlib 4.0.0 which has all three extras sections
28-
cls.testlib_4_path = os.path.join(
29-
os.path.dirname(__file__), "../../.status/HED_testlib_4.0.0_converted/HED_testlib_4.0.0.xml"
30-
)
28+
cls.testlib_4_path = os.path.join(os.path.dirname(__file__), "../data/schema_tests/test_merge/HED_testlib_4.0.0.xml")
3129

3230
# Normalize path
3331
cls.testlib_4_path = os.path.normpath(cls.testlib_4_path)
@@ -40,9 +38,6 @@ def tearDownClass(cls):
4038

4139
def test_read_unmerged_library_extras_has_in_library_column(self):
4240
"""Test that reading unmerged library schema adds in_library column to extras."""
43-
if not os.path.exists(self.testlib_4_path):
44-
self.skipTest(f"Test file not found: {self.testlib_4_path}")
45-
4641
schema = load_schema(self.testlib_4_path)
4742

4843
# Verify schema properties
@@ -81,10 +76,10 @@ def test_read_unmerged_library_extras_has_in_library_column(self):
8176
)
8277

8378
def test_read_merged_schema_has_mixed_in_library(self):
84-
"""Test that merged library schema has entries from both standard and library with proper in_library tracking."""
85-
if not os.path.exists(self.testlib_4_path):
86-
self.skipTest(f"Test file not found: {self.testlib_4_path}")
79+
"""Test that merged library schema properly tracks library entries with in_library column.
8780
81+
Note: Standard schema 8.4.0 may not have extras sections, so we only verify library entries exist.
82+
"""
8883
# Load as merged (this happens automatically because withStandard is set)
8984
schema = load_schema(self.testlib_4_path)
9085

@@ -115,9 +110,6 @@ def test_read_merged_schema_has_mixed_in_library(self):
115110

116111
def test_write_unmerged_only_outputs_library_extras(self):
117112
"""Test that saving unmerged only outputs extras with in_library column (merged schema saved as unmerged)."""
118-
if not os.path.exists(self.testlib_4_path):
119-
self.skipTest(f"Test file not found: {self.testlib_4_path}")
120-
121113
# Load schema - it will auto-merge with standard 8.4.0
122114
merged_schema = load_schema(self.testlib_4_path)
123115

@@ -157,8 +149,6 @@ def test_write_unmerged_only_outputs_library_extras(self):
157149

158150
def test_write_merged_outputs_all_extras(self):
159151
"""Test that saving merged outputs all extras (library and standard)."""
160-
if not os.path.exists(self.testlib_4_path):
161-
self.skipTest(f"Test file not found: {self.testlib_4_path}")
162152
# Load schema - auto-merges with standard 8.4.0
163153
merged_schema = load_schema(self.testlib_4_path)
164154

@@ -227,9 +217,6 @@ def test_write_merged_outputs_all_extras(self):
227217

228218
def test_roundtrip_unmerged_preserves_library_extras(self):
229219
"""Test round-trip with unmerged: read merged -> save unmerged -> read -> save unmerged -> verify identical."""
230-
if not os.path.exists(self.testlib_4_path):
231-
self.skipTest(f"Test file not found: {self.testlib_4_path}")
232-
233220
# Load original (auto-merges with standard)
234221
schema1 = load_schema(self.testlib_4_path)
235222

@@ -270,9 +257,6 @@ def test_roundtrip_unmerged_preserves_library_extras(self):
270257

271258
def test_roundtrip_merged_preserves_all_extras(self):
272259
"""Test round-trip with merged: read -> save merged -> read -> save merged -> verify identical."""
273-
if not os.path.exists(self.testlib_4_path):
274-
self.skipTest(f"Test file not found: {self.testlib_4_path}")
275-
276260
# Load original (auto-merges)
277261
schema1 = load_schema(self.testlib_4_path)
278262

@@ -323,9 +307,6 @@ def test_roundtrip_merged_preserves_all_extras(self):
323307

324308
def test_in_library_column_not_in_xml_output(self):
325309
"""Test that in_library column is not serialized to XML output, but inLibrary attributes are correctly written."""
326-
if not os.path.exists(self.testlib_4_path):
327-
self.skipTest(f"Test file not found: {self.testlib_4_path}")
328-
329310
schema = load_schema(self.testlib_4_path)
330311

331312
# Check that extras have the in_library column

0 commit comments

Comments
 (0)