|
4 | 4 | from hed.schema.schema_io.df_util import ( |
5 | 5 | create_empty_dataframes, |
6 | 6 | get_library_name_and_id, |
7 | | - calculate_attribute_type, |
8 | 7 | ) |
9 | 8 | from hed.schema.schema_io.schema2base import Schema2Base |
10 | | -from hed.schema.schema_io import text_util |
11 | 9 | import pandas as pd |
12 | 10 | import hed.schema.schema_io.df_constants as constants |
13 | 11 | from hed.schema.hed_schema_entry import HedTagEntry |
|
24 | 22 |
|
25 | 23 |
|
26 | 24 | class Schema2DF(Schema2Base): |
27 | | - def __init__(self, get_as_ids=False): |
28 | | - """Constructor for schema to dataframe converter |
29 | | -
|
30 | | - Parameters: |
31 | | - get_as_ids(bool): If true, return the hedId rather than name in most places |
32 | | - This is mostly relevant for creating an ontology. |
33 | | - """ |
| 25 | + def __init__(self): |
| 26 | + """Constructor for schema to dataframe converter""" |
34 | 27 | super().__init__() |
35 | | - self._get_as_ids = get_as_ids |
36 | 28 | self._suffix_rows = {v: [] for v in constants.DF_SUFFIXES} |
37 | 29 |
|
38 | 30 | def _get_object_name_and_id(self, object_name, include_prefix=False): |
@@ -73,7 +65,6 @@ def _create_and_add_object_row(self, base_object, attributes="", description="") |
73 | 65 | constants.attributes: attributes, |
74 | 66 | constants.subclass_of: base_object, |
75 | 67 | constants.dcdescription: description.replace("\n", "\\n"), |
76 | | - # constants.equivalent_to: self._get_header_equivalent_to(attributes, base_object) |
77 | 68 | } |
78 | 69 | self.output[constants.STRUCT_KEY].loc[len(self.output[constants.STRUCT_KEY])] = new_row |
79 | 70 |
|
@@ -113,7 +104,8 @@ def _start_section(self, key_class): |
113 | 104 | pass |
114 | 105 |
|
115 | 106 | def _end_tag_section(self): |
116 | | - self.output[constants.TAG_KEY] = pd.DataFrame(self._suffix_rows[constants.TAG_KEY], dtype=str) |
| 107 | + if self._suffix_rows[constants.TAG_KEY]: |
| 108 | + self.output[constants.TAG_KEY] = pd.DataFrame(self._suffix_rows[constants.TAG_KEY], dtype=str) |
117 | 109 |
|
118 | 110 | def _end_units_section(self): |
119 | 111 | if self._suffix_rows[constants.UNIT_KEY]: |
@@ -142,11 +134,7 @@ def _write_tag_entry(self, tag_entry, parent_node=None, level=0): |
142 | 134 | constants.attributes: self._format_tag_attributes(tag_entry.attributes), |
143 | 135 | constants.dcdescription: tag_entry.description, |
144 | 136 | } |
145 | | - if self._get_as_ids: |
146 | | - new_row[constants.equivalent_to] = self._get_tag_equivalent_to(tag_entry) |
147 | 137 |
|
148 | | - # constants.equivalent_to: self._get_tag_equivalent_to(tag_entry), |
149 | | - # Todo: do other sections like this as well for efficiency |
150 | 138 | self._suffix_rows[constants.TAG_KEY].append(new_row) |
151 | 139 |
|
152 | 140 | def _write_entry(self, entry, parent_node, include_props=True): |
@@ -177,16 +165,12 @@ def _write_entry(self, entry, parent_node, include_props=True): |
177 | 165 | constants.attributes: self._format_tag_attributes(entry.attributes), |
178 | 166 | constants.dcdescription: entry.description, |
179 | 167 | } |
180 | | - if self._get_as_ids: |
181 | | - new_row[constants.equivalent_to] = self._get_tag_equivalent_to(entry) |
| 168 | + |
182 | 169 | # Handle the special case of units, which have the extra unit class |
183 | 170 | if hasattr(entry, "unit_class_entry"): |
184 | 171 | class_entry_name = entry.unit_class_entry.name |
185 | | - if self._get_as_ids: |
186 | | - class_entry_name = f"{entry.unit_class_entry.attributes.get(constants.hed_id)}" |
187 | 172 | new_row[constants.has_unit_class] = class_entry_name |
188 | 173 | self._suffix_rows[df_key].append(new_row) |
189 | | - pass |
190 | 174 |
|
191 | 175 | def _write_attribute_entry(self, entry, include_props): |
192 | 176 | df_key = constants.OBJECT_KEY |
@@ -230,170 +214,23 @@ def _write_property_entry(self, entry): |
230 | 214 | constants.dcdescription: entry.description, |
231 | 215 | } |
232 | 216 | self._suffix_rows[constants.ATTRIBUTE_PROPERTY_KEY].append(new_row) |
233 | | - pass |
234 | | - # df.loc[len(df)] = new_row |
235 | 217 |
|
236 | 218 | def _attribute_disallowed(self, attribute): |
237 | 219 | if super()._attribute_disallowed(attribute): |
238 | 220 | return True |
239 | 221 | # strip out hedID in dataframe format |
240 | 222 | return attribute in [HedKey.HedID, HedKey.AnnotationProperty] |
241 | 223 |
|
242 | | - def _get_header_equivalent_to(self, attributes_string, subclass_of): |
243 | | - attribute_strings = [] |
244 | | - |
245 | | - attributes, _ = text_util._parse_header_attributes_line(attributes_string) |
246 | | - schema_name, schema_id = self._get_object_name_and_id("HedSchema", include_prefix=True) |
247 | | - |
248 | | - if self._get_as_ids: |
249 | | - attribute_strings.append(f"(hed:HED_0000102 some {schema_id})") |
250 | | - else: |
251 | | - attribute_strings.append(f"(inHedSchema some {schema_name})") |
252 | | - |
253 | | - for attribute, value in attributes.items(): |
254 | | - if attribute not in constants.valid_omn_attributes: |
255 | | - continue |
256 | | - |
257 | | - if self._get_as_ids: |
258 | | - attribute = f"hed:{constants.valid_omn_attributes[attribute]}" |
259 | | - attribute_strings.append(f'({attribute} value "{value}")') |
260 | | - |
261 | | - if self._get_as_ids: |
262 | | - # we just want the ID for normal HED objects, not schema specific |
263 | | - subclass_of = self._get_object_id(subclass_of, base_id=0, include_prefix=True) |
264 | | - |
265 | | - # If they match, we want to leave equivalent_to blank |
266 | | - final_out = " and ".join([subclass_of] + attribute_strings) |
267 | | - if final_out == subclass_of: |
268 | | - return "" |
269 | | - return final_out |
270 | | - |
271 | | - def _get_tag_equivalent_to(self, tag_entry): |
272 | | - subclass = self._get_subclass_of(tag_entry) |
273 | | - attribute_strings = [] |
274 | | - |
275 | | - attribute_strings.extend(self._process_attributes(tag_entry)) |
276 | | - attribute_strings.extend(self._process_unit_class_entry(tag_entry)) |
277 | | - attribute_strings.extend(self._process_schema_parent(tag_entry)) |
278 | | - |
279 | | - final_out = " and ".join([subclass] + attribute_strings) |
280 | | - if final_out == subclass: |
281 | | - return "" |
282 | | - return final_out |
283 | | - |
284 | | - def _process_attributes(self, tag_entry): |
285 | | - attribute_strings = [] |
286 | | - attribute_types = {"object": "some", "data": "value"} |
287 | | - range_types = { |
288 | | - HedKey.TagRange: HedSectionKey.Tags, |
289 | | - HedKey.UnitRange: HedSectionKey.Units, |
290 | | - HedKey.UnitClassRange: HedSectionKey.UnitClasses, |
291 | | - HedKey.ValueClassRange: HedSectionKey.ValueClasses, |
292 | | - HedKey.NumericRange: HedKey.NumericRange, |
293 | | - } |
294 | | - |
295 | | - for attribute, value in tag_entry.attributes.items(): |
296 | | - attribute_entry = self._schema.attributes.get(attribute) |
297 | | - attribute_type = calculate_attribute_type(attribute_entry) |
298 | | - |
299 | | - if self._attribute_disallowed(attribute) or attribute_type == "annotation": |
300 | | - continue |
301 | | - |
302 | | - values = self._prepare_values(attribute_entry, value, range_types) |
303 | | - |
304 | | - for v in values: |
305 | | - if self._get_as_ids: |
306 | | - attribute = f"hed:{attribute_entry.attributes[HedKey.HedID]}" |
307 | | - attribute_strings.append(f"({attribute} {attribute_types[attribute_type]} {v})") |
308 | | - |
309 | | - return attribute_strings |
310 | | - |
311 | | - def _prepare_values(self, attribute_entry, value, range_types): |
312 | | - if isinstance(value, str): |
313 | | - values = value.split(",") |
314 | | - values = [v.strip() for v in values] |
315 | | - |
316 | | - found_range = self._find_range(attribute_entry, range_types) |
317 | | - if self._get_as_ids and found_range and found_range != HedKey.NumericRange: |
318 | | - section = self._schema[found_range] |
319 | | - if any(section.get(v) is None for v in values): |
320 | | - raise ValueError(f"Cannot find schema entry for {values}") |
321 | | - for v in values: |
322 | | - test_id = section.get(v).attributes.get(HedKey.HedID) |
323 | | - if not test_id: |
324 | | - raise ValueError(f"Schema entry {v} has no hedId.") |
325 | | - values = [f"hed:{section.get(v).attributes[HedKey.HedID]}" for v in values] |
326 | | - elif not found_range: |
327 | | - values = [f'"{v}"' for v in values] |
328 | | - else: |
329 | | - if value is True: |
330 | | - value = "true" |
331 | | - values = [value] |
332 | | - |
333 | | - return values |
334 | | - |
335 | | - def _find_range(self, attribute_entry, range_types): |
336 | | - for range_type in range_types: |
337 | | - if range_type in attribute_entry.attributes: |
338 | | - return range_types[range_type] |
339 | | - return None |
340 | | - |
341 | | - def _process_unit_class_entry(self, tag_entry): |
342 | | - """Extract a list of unit class equivalent_to strings from a unit class entry. |
343 | | -
|
344 | | - Parameters: |
345 | | - tag_entry (HedUnitClassEntry): The unit class entry to process. |
346 | | -
|
347 | | - Returns: |
348 | | - list: A list of strings representing the equivalent_to for the unit class. |
349 | | - """ |
350 | | - attribute_strings = [] |
351 | | - |
352 | | - if hasattr(tag_entry, "unit_class_entry"): |
353 | | - class_entry_name = tag_entry.unit_class_entry.name |
354 | | - if self._get_as_ids: |
355 | | - class_entry_name = f"hed:{tag_entry.unit_class_entry.attributes.get(constants.hed_id)}" |
356 | | - |
357 | | - if self._get_as_ids: |
358 | | - attribute_strings.append(f"(hed:HED_0000103 some {class_entry_name})") |
359 | | - else: |
360 | | - attribute_strings.append(f"({constants.has_unit_class} some {class_entry_name})") |
361 | | - |
362 | | - return attribute_strings |
363 | | - |
364 | | - def _process_schema_parent(self, tag_entry): |
365 | | - attribute_strings = [] |
366 | | - |
367 | | - if hasattr(tag_entry, "parent") and not tag_entry.parent: |
368 | | - schema_name, schema_id = self._get_object_name_and_id("HedSchema", include_prefix=True) |
369 | | - if self._get_as_ids: |
370 | | - attribute_strings.append(f"(hed:HED_0000102 some {schema_id})") |
371 | | - else: |
372 | | - attribute_strings.append(f"(inHedSchema some {schema_name})") |
373 | | - |
374 | | - return attribute_strings |
375 | | - |
376 | 224 | def _get_subclass_of(self, tag_entry): |
377 | 225 | # Special case for HedTag |
378 | 226 | if isinstance(tag_entry, HedTagEntry): |
379 | | - if self._get_as_ids: |
380 | | - parent_entry = tag_entry.parent |
381 | | - if parent_entry: |
382 | | - return f"hed:{parent_entry.attributes[HedKey.HedID]}" |
383 | | - |
384 | | - # HedTag always returns as base object |
385 | | - return "hed:HED_0000005" |
386 | | - else: |
387 | | - return tag_entry.parent.short_tag_name if tag_entry.parent else "HedTag" |
| 227 | + return tag_entry.parent.short_tag_name if tag_entry.parent else "HedTag" |
388 | 228 |
|
389 | 229 | base_objects = { |
390 | 230 | HedSectionKey.Units: "HedUnit", |
391 | 231 | HedSectionKey.UnitClasses: "HedUnitClass", |
392 | 232 | HedSectionKey.UnitModifiers: "HedUnitModifier", |
393 | 233 | HedSectionKey.ValueClasses: "HedValueClass", |
394 | 234 | } |
395 | | - name, obj_id = self._get_object_name_and_id(base_objects[tag_entry.section_key], include_prefix=True) |
396 | | - |
397 | | - if self._get_as_ids: |
398 | | - return obj_id |
| 235 | + name, obj_id = self._get_object_name_and_id(base_objects[tag_entry.section_key], include_prefix=False) |
399 | 236 | return name |
0 commit comments