Skip to content

Commit 6a8f0da

Browse files
committed
Fixed writing of schema df when empty
1 parent a1bc7e4 commit 6a8f0da

4 files changed

Lines changed: 201 additions & 180 deletions

File tree

-4.61 KB
Binary file not shown.

hed/schema/schema_io/df_constants.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from hed.schema.hed_schema_constants import HedSectionKey
2-
from hed.schema import hed_schema_constants
32

43
KEY_COLUMN_NAME = "rdfs.label"
54

@@ -71,7 +70,6 @@
7170
subclass_of = "omn:SubClassOf"
7271
attributes = "Attributes"
7372
dcdescription = "dc:description"
74-
equivalent_to = "omn:EquivalentTo"
7573
has_unit_class = "hasUnitClass"
7674
prefix = "prefix" # for the prefixes section, this is the column name in the prefixes dataframe
7775
namespace = "namespace" # for the prefixes section, this is the column name in the prefixes dataframe
@@ -105,6 +103,21 @@
105103
property_type = "Type"
106104
properties = "Properties"
107105

106+
# Map of column names(suffix keys) to valid attribute names
107+
attribute_key_names = {
108+
TAG_KEY: tag_columns,
109+
VALUE_CLASS_KEY: other_columns,
110+
UNIT_CLASS_KEY: other_columns,
111+
UNIT_KEY: unit_columns,
112+
UNIT_MODIFIER_KEY: other_columns,
113+
ANNOTATION_KEY: attribute_columns,
114+
DATA_KEY: attribute_columns,
115+
OBJECT_KEY: attribute_columns,
116+
ATTRIBUTE_PROPERTY_KEY: property_columns,
117+
PREFIXES_KEY: prefix_columns,
118+
EXTERNAL_ANNOTATION_KEY: external_annotation_columns,
119+
SOURCES_KEY: source_columns,
120+
}
108121

109122
# HED_00X__YY where X is the library starting index, and Y is the entity number below.
110123
struct_base_ids = {
@@ -122,14 +135,6 @@
122135
"HedEpilogue": 12,
123136
}
124137

125-
# todo: this should be retrieved directly from the appropriate spreadsheet
126-
valid_omn_attributes = {
127-
hed_schema_constants.VERSION_ATTRIBUTE: "HED_0000300",
128-
hed_schema_constants.LIBRARY_ATTRIBUTE: "HED_0000301",
129-
hed_schema_constants.WITH_STANDARD_ATTRIBUTE: "HED_0000302",
130-
hed_schema_constants.UNMERGED_ATTRIBUTE: "HED_0000303",
131-
}
132-
133138
# Extra spreadsheet columns
134139
EXTRAS_CONVERSIONS = {
135140
"Prefix": "prefix",

hed/schema/schema_io/schema2df.py

Lines changed: 7 additions & 170 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,8 @@
44
from hed.schema.schema_io.df_util import (
55
create_empty_dataframes,
66
get_library_name_and_id,
7-
calculate_attribute_type,
87
)
98
from hed.schema.schema_io.schema2base import Schema2Base
10-
from hed.schema.schema_io import text_util
119
import pandas as pd
1210
import hed.schema.schema_io.df_constants as constants
1311
from hed.schema.hed_schema_entry import HedTagEntry
@@ -24,15 +22,9 @@
2422

2523

2624
class Schema2DF(Schema2Base):
27-
def __init__(self, get_as_ids=False):
28-
"""Constructor for schema to dataframe converter
29-
30-
Parameters:
31-
get_as_ids(bool): If true, return the hedId rather than name in most places
32-
This is mostly relevant for creating an ontology.
33-
"""
25+
def __init__(self):
26+
"""Constructor for schema to dataframe converter"""
3427
super().__init__()
35-
self._get_as_ids = get_as_ids
3628
self._suffix_rows = {v: [] for v in constants.DF_SUFFIXES}
3729

3830
def _get_object_name_and_id(self, object_name, include_prefix=False):
@@ -73,7 +65,6 @@ def _create_and_add_object_row(self, base_object, attributes="", description="")
7365
constants.attributes: attributes,
7466
constants.subclass_of: base_object,
7567
constants.dcdescription: description.replace("\n", "\\n"),
76-
# constants.equivalent_to: self._get_header_equivalent_to(attributes, base_object)
7768
}
7869
self.output[constants.STRUCT_KEY].loc[len(self.output[constants.STRUCT_KEY])] = new_row
7970

@@ -113,7 +104,8 @@ def _start_section(self, key_class):
113104
pass
114105

115106
def _end_tag_section(self):
116-
self.output[constants.TAG_KEY] = pd.DataFrame(self._suffix_rows[constants.TAG_KEY], dtype=str)
107+
if self._suffix_rows[constants.TAG_KEY]:
108+
self.output[constants.TAG_KEY] = pd.DataFrame(self._suffix_rows[constants.TAG_KEY], dtype=str)
117109

118110
def _end_units_section(self):
119111
if self._suffix_rows[constants.UNIT_KEY]:
@@ -142,11 +134,7 @@ def _write_tag_entry(self, tag_entry, parent_node=None, level=0):
142134
constants.attributes: self._format_tag_attributes(tag_entry.attributes),
143135
constants.dcdescription: tag_entry.description,
144136
}
145-
if self._get_as_ids:
146-
new_row[constants.equivalent_to] = self._get_tag_equivalent_to(tag_entry)
147137

148-
# constants.equivalent_to: self._get_tag_equivalent_to(tag_entry),
149-
# Todo: do other sections like this as well for efficiency
150138
self._suffix_rows[constants.TAG_KEY].append(new_row)
151139

152140
def _write_entry(self, entry, parent_node, include_props=True):
@@ -177,16 +165,12 @@ def _write_entry(self, entry, parent_node, include_props=True):
177165
constants.attributes: self._format_tag_attributes(entry.attributes),
178166
constants.dcdescription: entry.description,
179167
}
180-
if self._get_as_ids:
181-
new_row[constants.equivalent_to] = self._get_tag_equivalent_to(entry)
168+
182169
# Handle the special case of units, which have the extra unit class
183170
if hasattr(entry, "unit_class_entry"):
184171
class_entry_name = entry.unit_class_entry.name
185-
if self._get_as_ids:
186-
class_entry_name = f"{entry.unit_class_entry.attributes.get(constants.hed_id)}"
187172
new_row[constants.has_unit_class] = class_entry_name
188173
self._suffix_rows[df_key].append(new_row)
189-
pass
190174

191175
def _write_attribute_entry(self, entry, include_props):
192176
df_key = constants.OBJECT_KEY
@@ -230,170 +214,23 @@ def _write_property_entry(self, entry):
230214
constants.dcdescription: entry.description,
231215
}
232216
self._suffix_rows[constants.ATTRIBUTE_PROPERTY_KEY].append(new_row)
233-
pass
234-
# df.loc[len(df)] = new_row
235217

236218
def _attribute_disallowed(self, attribute):
237219
if super()._attribute_disallowed(attribute):
238220
return True
239221
# strip out hedID in dataframe format
240222
return attribute in [HedKey.HedID, HedKey.AnnotationProperty]
241223

242-
def _get_header_equivalent_to(self, attributes_string, subclass_of):
243-
attribute_strings = []
244-
245-
attributes, _ = text_util._parse_header_attributes_line(attributes_string)
246-
schema_name, schema_id = self._get_object_name_and_id("HedSchema", include_prefix=True)
247-
248-
if self._get_as_ids:
249-
attribute_strings.append(f"(hed:HED_0000102 some {schema_id})")
250-
else:
251-
attribute_strings.append(f"(inHedSchema some {schema_name})")
252-
253-
for attribute, value in attributes.items():
254-
if attribute not in constants.valid_omn_attributes:
255-
continue
256-
257-
if self._get_as_ids:
258-
attribute = f"hed:{constants.valid_omn_attributes[attribute]}"
259-
attribute_strings.append(f'({attribute} value "{value}")')
260-
261-
if self._get_as_ids:
262-
# we just want the ID for normal HED objects, not schema specific
263-
subclass_of = self._get_object_id(subclass_of, base_id=0, include_prefix=True)
264-
265-
# If they match, we want to leave equivalent_to blank
266-
final_out = " and ".join([subclass_of] + attribute_strings)
267-
if final_out == subclass_of:
268-
return ""
269-
return final_out
270-
271-
def _get_tag_equivalent_to(self, tag_entry):
272-
subclass = self._get_subclass_of(tag_entry)
273-
attribute_strings = []
274-
275-
attribute_strings.extend(self._process_attributes(tag_entry))
276-
attribute_strings.extend(self._process_unit_class_entry(tag_entry))
277-
attribute_strings.extend(self._process_schema_parent(tag_entry))
278-
279-
final_out = " and ".join([subclass] + attribute_strings)
280-
if final_out == subclass:
281-
return ""
282-
return final_out
283-
284-
def _process_attributes(self, tag_entry):
285-
attribute_strings = []
286-
attribute_types = {"object": "some", "data": "value"}
287-
range_types = {
288-
HedKey.TagRange: HedSectionKey.Tags,
289-
HedKey.UnitRange: HedSectionKey.Units,
290-
HedKey.UnitClassRange: HedSectionKey.UnitClasses,
291-
HedKey.ValueClassRange: HedSectionKey.ValueClasses,
292-
HedKey.NumericRange: HedKey.NumericRange,
293-
}
294-
295-
for attribute, value in tag_entry.attributes.items():
296-
attribute_entry = self._schema.attributes.get(attribute)
297-
attribute_type = calculate_attribute_type(attribute_entry)
298-
299-
if self._attribute_disallowed(attribute) or attribute_type == "annotation":
300-
continue
301-
302-
values = self._prepare_values(attribute_entry, value, range_types)
303-
304-
for v in values:
305-
if self._get_as_ids:
306-
attribute = f"hed:{attribute_entry.attributes[HedKey.HedID]}"
307-
attribute_strings.append(f"({attribute} {attribute_types[attribute_type]} {v})")
308-
309-
return attribute_strings
310-
311-
def _prepare_values(self, attribute_entry, value, range_types):
312-
if isinstance(value, str):
313-
values = value.split(",")
314-
values = [v.strip() for v in values]
315-
316-
found_range = self._find_range(attribute_entry, range_types)
317-
if self._get_as_ids and found_range and found_range != HedKey.NumericRange:
318-
section = self._schema[found_range]
319-
if any(section.get(v) is None for v in values):
320-
raise ValueError(f"Cannot find schema entry for {values}")
321-
for v in values:
322-
test_id = section.get(v).attributes.get(HedKey.HedID)
323-
if not test_id:
324-
raise ValueError(f"Schema entry {v} has no hedId.")
325-
values = [f"hed:{section.get(v).attributes[HedKey.HedID]}" for v in values]
326-
elif not found_range:
327-
values = [f'"{v}"' for v in values]
328-
else:
329-
if value is True:
330-
value = "true"
331-
values = [value]
332-
333-
return values
334-
335-
def _find_range(self, attribute_entry, range_types):
336-
for range_type in range_types:
337-
if range_type in attribute_entry.attributes:
338-
return range_types[range_type]
339-
return None
340-
341-
def _process_unit_class_entry(self, tag_entry):
342-
"""Extract a list of unit class equivalent_to strings from a unit class entry.
343-
344-
Parameters:
345-
tag_entry (HedUnitClassEntry): The unit class entry to process.
346-
347-
Returns:
348-
list: A list of strings representing the equivalent_to for the unit class.
349-
"""
350-
attribute_strings = []
351-
352-
if hasattr(tag_entry, "unit_class_entry"):
353-
class_entry_name = tag_entry.unit_class_entry.name
354-
if self._get_as_ids:
355-
class_entry_name = f"hed:{tag_entry.unit_class_entry.attributes.get(constants.hed_id)}"
356-
357-
if self._get_as_ids:
358-
attribute_strings.append(f"(hed:HED_0000103 some {class_entry_name})")
359-
else:
360-
attribute_strings.append(f"({constants.has_unit_class} some {class_entry_name})")
361-
362-
return attribute_strings
363-
364-
def _process_schema_parent(self, tag_entry):
365-
attribute_strings = []
366-
367-
if hasattr(tag_entry, "parent") and not tag_entry.parent:
368-
schema_name, schema_id = self._get_object_name_and_id("HedSchema", include_prefix=True)
369-
if self._get_as_ids:
370-
attribute_strings.append(f"(hed:HED_0000102 some {schema_id})")
371-
else:
372-
attribute_strings.append(f"(inHedSchema some {schema_name})")
373-
374-
return attribute_strings
375-
376224
def _get_subclass_of(self, tag_entry):
377225
# Special case for HedTag
378226
if isinstance(tag_entry, HedTagEntry):
379-
if self._get_as_ids:
380-
parent_entry = tag_entry.parent
381-
if parent_entry:
382-
return f"hed:{parent_entry.attributes[HedKey.HedID]}"
383-
384-
# HedTag always returns as base object
385-
return "hed:HED_0000005"
386-
else:
387-
return tag_entry.parent.short_tag_name if tag_entry.parent else "HedTag"
227+
return tag_entry.parent.short_tag_name if tag_entry.parent else "HedTag"
388228

389229
base_objects = {
390230
HedSectionKey.Units: "HedUnit",
391231
HedSectionKey.UnitClasses: "HedUnitClass",
392232
HedSectionKey.UnitModifiers: "HedUnitModifier",
393233
HedSectionKey.ValueClasses: "HedValueClass",
394234
}
395-
name, obj_id = self._get_object_name_and_id(base_objects[tag_entry.section_key], include_prefix=True)
396-
397-
if self._get_as_ids:
398-
return obj_id
235+
name, obj_id = self._get_object_name_and_id(base_objects[tag_entry.section_key], include_prefix=False)
399236
return name

0 commit comments

Comments
 (0)