hed-standard · VisLab · Feb 20, 2026 · Feb 20, 2026
diff --git a/hed/schema/schema_io/df2schema.py b/hed/schema/schema_io/df2schema.py
@@ -90,7 +90,7 @@ def _parse_data(self):
             )
         extras = {key: self.input_data[key] for key in constants.DF_EXTRAS if key in self.input_data}
         for key, _item in extras.items():
-            self._schema.extras[key] = df_util.merge_dataframes(extras[key], self._schema.extras.get(key, None), key)
+            self._schema.extras[key] = df_util.merge_extras_dataframes(extras[key], self._schema.extras.get(key, None))
 
     def _get_prologue_epilogue(self, file_data):
         prologue, epilogue = "", ""

diff --git a/hed/schema/schema_io/df_util.py b/hed/schema/schema_io/df_util.py
@@ -56,6 +56,32 @@ def merge_dataframes(df1, df2, key):
     return combined
 
 
+def merge_extras_dataframes(library_df, standard_df):
+    """Merge library and standard extras DataFrames by combining and deduplicating.
+
+    The library extras should contain all entries (standard + library-specific).
+    This function combines both and removes exact duplicates.
+
+    Parameters:
+        library_df (pd.DataFrame): DataFrame from library schema extras section
+        standard_df (pd.DataFrame): DataFrame from standard schema extras section
+
+    Returns:
+        pd.DataFrame: Combined DataFrame with duplicates removed and sorted
+    """
+    if standard_df is None or standard_df.empty:
+        if library_df is None or library_df.empty:
+            return pd.DataFrame()
+        return library_df.drop_duplicates().sort_values(by=list(library_df.columns)).reset_index(drop=True)
+    if library_df is None or library_df.empty:
+        return standard_df.drop_duplicates().sort_values(by=list(standard_df.columns)).reset_index(drop=True)
+
+    combined = pd.concat([standard_df, library_df], ignore_index=True)
+    combined = combined.drop_duplicates()
+    combined = combined.sort_values(by=list(combined.columns)).reset_index(drop=True)
+    return combined
+
+
 def merge_dataframe_dicts(df_dict1, df_dict2, key_column=constants.KEY_COLUMN_NAME):
     """Create a new dictionary of DataFrames where dict2 is merged into dict1.
 

diff --git a/hed/schema/schema_io/json2schema.py b/hed/schema/schema_io/json2schema.py
@@ -5,7 +5,7 @@
 import json
 from hed.errors.exceptions import HedFileError, HedExceptions
 from hed.schema.hed_schema_constants import HedSectionKey, HedKey
-from hed.schema.schema_io import json_constants
+from hed.schema.schema_io import json_constants, df_util
 from hed.schema.schema_io.base2schema import SchemaLoader
 
 
@@ -498,103 +498,64 @@ def _load_extras(self):
         if json_constants.SOURCES_KEY in self._json_data:
             sources_data = self._json_data[json_constants.SOURCES_KEY]
             for source_data in sources_data:
-                # Parse inLibrary attribute from JSON if present (for merged JSON)
-                in_library_value = source_data.get(HedKey.InLibrary, None)
-                # If not found in JSON but this is a library schema, use self.library
-                if in_library_value is None and self.library:
-                    in_library_value = self.library
-
                 sources_list.append(
                     {
                         df_constants.source: source_data.get("name", ""),
                         df_constants.link: source_data.get("link", ""),
                         df_constants.description: source_data.get(json_constants.DESCRIPTION_KEY, ""),
-                        df_constants.in_library: in_library_value,
                     }
                 )
         # Create DataFrame - if empty, use column specification to match XML/MEDIAWIKI behavior
         if sources_list:
             library_df = pd.DataFrame(sources_list).fillna("").astype(str)
         else:
             library_df = pd.DataFrame([], columns=df_constants.source_columns)
-        # Convert in_library None values to empty strings for consistency
-        if df_constants.in_library in library_df.columns:
-            library_df[df_constants.in_library] = library_df[df_constants.in_library].replace("None", "")
 
         # Merge with existing schema extras if present (from withStandard base schema)
         standard_df = self._schema.extras.get(df_constants.SOURCES_KEY, None)
-        if standard_df is not None and not standard_df.empty:
-            self._schema.extras[df_constants.SOURCES_KEY] = pd.concat([standard_df, library_df], ignore_index=True)
-        else:
-            self._schema.extras[df_constants.SOURCES_KEY] = library_df
+        self._schema.extras[df_constants.SOURCES_KEY] = df_util.merge_extras_dataframes(library_df, standard_df)
 
         # Load prefixes - always create DataFrame even if empty
         prefixes_list = []
         if json_constants.PREFIXES_KEY in self._json_data:
             prefixes_data = self._json_data[json_constants.PREFIXES_KEY]
             for prefix_data in prefixes_data:
-                # Parse inLibrary attribute from JSON if present (for merged JSON)
-                in_library_value = prefix_data.get(HedKey.InLibrary, None)
-                # If not found in JSON but this is a library schema, use self.library
-                if in_library_value is None and self.library:
-                    in_library_value = self.library
-
                 prefixes_list.append(
                     {
                         df_constants.prefix: prefix_data.get("name", ""),
                         df_constants.namespace: prefix_data.get("namespace", ""),
                         df_constants.description: prefix_data.get(json_constants.DESCRIPTION_KEY, ""),
-                        df_constants.in_library: in_library_value,
                     }
                 )
         # Create DataFrame - if empty, use column specification to match XML/MEDIAWIKI behavior
         if prefixes_list:
             library_df = pd.DataFrame(prefixes_list).fillna("").astype(str)
         else:
             library_df = pd.DataFrame([], columns=df_constants.prefix_columns)
-        # Convert in_library None values to empty strings for consistency
-        if df_constants.in_library in library_df.columns:
-            library_df[df_constants.in_library] = library_df[df_constants.in_library].replace("None", "")
 
         # Merge with existing schema extras if present (from withStandard base schema)
         standard_df = self._schema.extras.get(df_constants.PREFIXES_KEY, None)
-        if standard_df is not None and not standard_df.empty:
-            self._schema.extras[df_constants.PREFIXES_KEY] = pd.concat([standard_df, library_df], ignore_index=True)
-        else:
-            self._schema.extras[df_constants.PREFIXES_KEY] = library_df
+        self._schema.extras[df_constants.PREFIXES_KEY] = df_util.merge_extras_dataframes(library_df, standard_df)
 
         # Load external annotations - always create DataFrame even if empty
         externals_list = []
         if json_constants.EXTERNAL_ANNOTATIONS_KEY in self._json_data:
             externals_data = self._json_data[json_constants.EXTERNAL_ANNOTATIONS_KEY]
             for external_data in externals_data:
-                # Parse inLibrary attribute from JSON if present (for merged JSON)
-                in_library_value = external_data.get(HedKey.InLibrary, None)
-                # If not found in JSON but this is a library schema, use self.library
-                if in_library_value is None and self.library:
-                    in_library_value = self.library
-
                 externals_list.append(
                     {
                         df_constants.prefix: external_data.get("name", ""),
                         df_constants.id: external_data.get("id", ""),
                         df_constants.iri: external_data.get("iri", ""),
                         df_constants.description: external_data.get(json_constants.DESCRIPTION_KEY, ""),
-                        df_constants.in_library: in_library_value,
                     }
                 )
         # Create DataFrame - if empty, use column specification to match XML/MEDIAWIKI behavior
         if externals_list:
             library_df = pd.DataFrame(externals_list).fillna("").astype(str)
         else:
             library_df = pd.DataFrame([], columns=df_constants.external_annotation_columns)
-        # Convert in_library None values to empty strings for consistency
-        if df_constants.in_library in library_df.columns:
-            library_df[df_constants.in_library] = library_df[df_constants.in_library].replace("None", "")
 
         # Merge with existing schema extras if present (from withStandard base schema)
         standard_df = self._schema.extras.get(df_constants.EXTERNAL_ANNOTATION_KEY, None)
-        if standard_df is not None and not standard_df.empty:
-            self._schema.extras[df_constants.EXTERNAL_ANNOTATION_KEY] = pd.concat([standard_df, library_df], ignore_index=True)
-        else:
-            self._schema.extras[df_constants.EXTERNAL_ANNOTATION_KEY] = library_df
+        self._schema.extras[df_constants.EXTERNAL_ANNOTATION_KEY] = df_util.merge_extras_dataframes(library_df, standard_df)
diff --git a/hed/schema/schema_io/schema2json.py b/hed/schema/schema_io/schema2json.py
@@ -1,7 +1,6 @@
 """Allows output of HedSchema objects as .json format"""
 
 import json
-import pandas as pd
 from hed.schema.hed_schema_constants import HedSectionKey, HedKey
 from hed.schema.schema_io import json_constants, df_constants
 from hed.schema.schema_io.schema2base import Schema2Base
@@ -64,29 +63,13 @@ def _output_sources(self, hed_schema):
         if sources is None or sources.empty:
             return
 
-        # Filter for unmerged library schemas - only output library entries if tracking is available
-        if not self._save_merged and hed_schema.library and hed_schema.with_standard:
-            if df_constants.in_library in sources.columns:
-                sources = sources[sources[df_constants.in_library].notna() & (sources[df_constants.in_library] != "")].copy()
-                if sources.empty:
-                    return
-            # Otherwise fall back to writing all rows (assume all are library entries)
-
         sources_list = []
         for _, row in sources.iterrows():
             source_dict = {
                 "name": row[df_constants.source],
                 "link": row[df_constants.link],
                 json_constants.DESCRIPTION_KEY: row[df_constants.description],
             }
-            # Add inLibrary attribute in merged saves if present
-            if (
-                self._save_merged
-                and df_constants.in_library in row.index
-                and pd.notna(row[df_constants.in_library])
-                and row[df_constants.in_library] != ""
-            ):
-                source_dict[HedKey.InLibrary] = row[df_constants.in_library]
             sources_list.append(source_dict)
 
         self.output[json_constants.SOURCES_KEY] = sources_list
@@ -101,31 +84,13 @@ def _output_prefixes(self, hed_schema):
         if prefixes is None or prefixes.empty:
             return
 
-        # Filter for unmerged library schemas - only output library entries if tracking is available
-        if not self._save_merged and hed_schema.library and hed_schema.with_standard:
-            if df_constants.in_library in prefixes.columns:
-                prefixes = prefixes[
-                    prefixes[df_constants.in_library].notna() & (prefixes[df_constants.in_library] != "")
-                ].copy()
-                if prefixes.empty:
-                    return
-            # Otherwise fall back to writing all rows (assume all are library entries)
-
         prefixes_list = []
         for _, row in prefixes.iterrows():
             prefix_dict = {
                 "name": row[df_constants.prefix],
                 "namespace": row[df_constants.namespace],
                 json_constants.DESCRIPTION_KEY: row[df_constants.description],
             }
-            # Add inLibrary attribute in merged saves if present
-            if (
-                self._save_merged
-                and df_constants.in_library in row.index
-                and pd.notna(row[df_constants.in_library])
-                and row[df_constants.in_library] != ""
-            ):
-                prefix_dict[HedKey.InLibrary] = row[df_constants.in_library]
             prefixes_list.append(prefix_dict)
 
         self.output[json_constants.PREFIXES_KEY] = prefixes_list
@@ -140,16 +105,6 @@ def _output_external_annotations(self, hed_schema):
         if externals is None or externals.empty:
             return
 
-        # Filter for unmerged library schemas - only output library entries if tracking is available
-        if not self._save_merged and hed_schema.library and hed_schema.with_standard:
-            if df_constants.in_library in externals.columns:
-                externals = externals[
-                    externals[df_constants.in_library].notna() & (externals[df_constants.in_library] != "")
-                ].copy()
-                if externals.empty:
-                    return
-            # Otherwise fall back to writing all rows (assume all are library entries)
-
         externals_list = []
         for _, row in externals.iterrows():
             external_dict = {
@@ -158,14 +113,6 @@ def _output_external_annotations(self, hed_schema):
                 "iri": row[df_constants.iri],
                 json_constants.DESCRIPTION_KEY: row[df_constants.description],
             }
-            # Add inLibrary attribute in merged saves if present
-            if (
-                self._save_merged
-                and df_constants.in_library in row.index
-                and pd.notna(row[df_constants.in_library])
-                and row[df_constants.in_library] != ""
-            ):
-                external_dict[HedKey.InLibrary] = row[df_constants.in_library]
             externals_list.append(external_dict)
 
         self.output[json_constants.EXTERNAL_ANNOTATIONS_KEY] = externals_list

diff --git a/hed/schema/schema_io/schema2wiki.py b/hed/schema/schema_io/schema2wiki.py
@@ -1,7 +1,7 @@
 """Allows output of HedSchema objects as MEDIAWIKI format"""
 
 import pandas as pd
-from hed.schema.hed_schema_constants import HedSectionKey, HedKey
+from hed.schema.hed_schema_constants import HedSectionKey
 from hed.schema.schema_io import wiki_constants, df_constants
 from hed.schema.schema_io.schema2base import Schema2Base
 
@@ -59,33 +59,19 @@ def _output_extra(self, hed_schema, section_key, wiki_key):
             wiki_key (string): The key in the wiki constants for the section.
 
         """
-        # In the base class, we do nothing, but subclasses can override this method.
         extra = hed_schema.get_extras(section_key)
         if extra is None or extra.empty:
             return
 
-        # Filter for unmerged library schemas - only output library entries if tracking is available
-        if not self._save_merged and hed_schema.library and hed_schema.with_standard:
-            if df_constants.in_library in extra.columns:
-                extra = extra[extra[df_constants.in_library].notna() & (extra[df_constants.in_library] != "")].copy()
-                if extra.empty:
-                    return
-            # Otherwise fall back to writing all rows (assume all are library entries)
-
         self._add_blank_line()
         self.current_tag_string = wiki_key
         self._flush_current_tag()
         for _, row in extra.iterrows():
             self.current_tag_string += "*"
-            # Build column string, excluding in_library column for output
+            # Build column string from all columns
             column_strings = []
             for col in extra.columns:
-                if col == df_constants.in_library:
-                    # For merged saves, include inLibrary in the output
-                    if self._save_merged and pd.notna(row[col]) and row[col] != "":
-                        column_strings.append(f"{HedKey.InLibrary}={row[col]}")
-                    # For unmerged saves, skip writing in_library
-                else:
+                if pd.notna(row[col]) and row[col] != "":
                     column_strings.append(f"{col}={row[col]}")
             self.current_tag_extra = ",".join(column_strings)
             self._flush_current_tag()