@@ -97,6 +97,24 @@ def test_not_int(self):
9797 errors = _verify_hedid_matches (self .schema_82 .tags , df , hed_id_util ._get_hedid_range ("" , constants .TAG_KEY ))
9898 self .assertEqual (len (errors ), 1 )
9999
100+ def test_verify_unknown_library_skips_range_check (self ):
101+ """An unregistered library returns empty range — IDs should not be reported as out-of-range."""
102+ empty_range = set ()
103+ df = pd .DataFrame ([{"rdfs:label" : "Event" , "hedId" : "HED_0012001" }])
104+ # testlib has no library_data entry, so _get_hedid_range returns {}
105+ errors = _verify_hedid_matches (self .schema_82 .tags , df , empty_range )
106+ self .assertEqual (len (errors ), 0 , "Unknown-library empty range should not trigger range errors" )
107+
108+ def test_empty_unused_ids_no_crash (self ):
109+ """_verify_hedid_matches must not crash when unused_tag_ids is empty (covers min/max guard)."""
110+ empty_range = set ()
111+ df = pd .DataFrame (
112+ [{"rdfs:label" : "Event" , "hedId" : "HED_0099999" }, {"rdfs:label" : "Age-#" , "hedId" : "HED_0000001" }]
113+ )
114+ # Should complete without raising ValueError from min()/max()
115+ errors = _verify_hedid_matches (self .schema_82 .tags , df , empty_range )
116+ self .assertEqual (len (errors ), 0 )
117+
100118 def test_get_all_ids_exists (self ):
101119 # Test when hedId column exists and has proper prefixed IDs
102120 df = pd .DataFrame ({"hedId" : ["HED_0000001" , "HED_0000002" , "HED_0000003" ]})
@@ -156,29 +174,53 @@ def test_assign_hed_ids_section(self):
156174
157175 self .assertTrue (df .equals (expected_result ))
158176
177+ def test_assign_actually_mutates_df (self ):
178+ """assign_hed_ids_section must write IDs back into the original DataFrame."""
179+ df = pd .DataFrame ({"hedId" : ["" , "" , "" ], "label" : ["A" , "B" , "C" ]})
180+ assign_hed_ids_section (df , {1 , 2 , 3 })
181+ # All rows should now have a non-empty hedId
182+ self .assertTrue (all (df ["hedId" ].str .startswith ("HED_" )), "IDs were not written into the DataFrame" )
183+
184+ def test_assign_preserves_existing_ids (self ):
185+ """assign_hed_ids_section must not overwrite rows that already have an ID."""
186+ df = pd .DataFrame ({"hedId" : ["HED_0000005" , "" , "HED_0000010" ], "label" : ["A" , "B" , "C" ]})
187+ assign_hed_ids_section (df , {1 , 2 , 3 , 4 , 5 , 10 })
188+ self .assertEqual (df .loc [0 , "hedId" ], "HED_0000005" )
189+ self .assertEqual (df .loc [2 , "hedId" ], "HED_0000010" )
190+ self .assertTrue (df .loc [1 , "hedId" ].startswith ("HED_" ))
191+
159192
160193class TestUpdateDataframes (unittest .TestCase ):
161194 def test_update_dataframes_from_schema (self ):
162- # valid direction first
163- schema_dataframes = hed_schema_global . get_as_dataframes ( )
164- schema_83 = load_schema_version ( "8.3.0" )
195+ # Use matching schema + dataframes so the ID verification passes
196+ schema = load_schema_version ( "8.4.0" )
197+ schema_dataframes = schema . get_as_dataframes ( )
165198 # Add a test column and ensure it stays around
166199 fixed_value = "test_column_value"
167200 for _key , df in schema_dataframes .items ():
168201 df ["test_column" ] = fixed_value
169202
170- updated_dataframes = update_dataframes_from_schema (schema_dataframes , schema_83 )
203+ updated_dataframes = update_dataframes_from_schema (schema_dataframes , schema )
171204
172205 for key , df in updated_dataframes .items ():
173206 if key not in constants .DF_EXTRAS :
174207 self .assertTrue ((df ["test_column" ] == fixed_value ).all ())
175- # this is expected to bomb horribly, since schema lacks many of the spreadsheet entries.
176- schema = load_schema_version ("8.3.0" )
177- schema_dataframes_new = load_schema_version ("8.3.0" ).get_as_dataframes ()
178- try :
179- update_dataframes_from_schema (schema_dataframes_new , schema )
180- except HedFileError as e :
181- self .assertEqual (len (e .issues ), 115 )
208+
209+ def test_conflict_detected (self ):
210+ """Bug #1 regression: verify HedFileError IS raised when a hedId in the dataframe mismatches the schema."""
211+ schema = load_schema_version ("8.4.0" )
212+ schema_dataframes = schema .get_as_dataframes ()
213+
214+ # Corrupt a hedId in the Tag dataframe so it mismatches the schema
215+ tag_df = schema_dataframes [constants .TAG_KEY ]
216+ # Change the first non-empty hedId to an out-of-range value
217+ non_empty_mask = tag_df ["hedId" ].str .startswith ("HED_" , na = False )
218+ first_idx = tag_df .index [non_empty_mask ][0 ]
219+ schema_dataframes [constants .TAG_KEY ].loc [first_idx , "hedId" ] = "HED_0000000"
220+
221+ with self .assertRaises (HedFileError ) as ctx :
222+ update_dataframes_from_schema (schema_dataframes , schema )
223+ self .assertGreater (len (ctx .exception .issues ), 0 )
182224
183225
184226if __name__ == "__main__" :
0 commit comments