@@ -351,6 +351,325 @@ def test_unit_columns_with_has_unit_class(self):
351351 for value in has_unit_class_values :
352352 self .assertFalse (value .startswith ("hed:HED_" ), f"hasUnitClass should contain names, not IDs: { value } " )
353353
354+ def test_tsv_output_uses_lf_line_endings (self ):
355+ """Test that TSV output always uses LF (\\ n) line endings, not CRLF (\\ r\\ n)."""
356+ from tests .schema .util_create_schemas import load_schema1
357+ import tempfile
358+
359+ schema = load_schema1 ()
360+
361+ # Save to a temporary location
362+ with tempfile .TemporaryDirectory () as tmpdir :
363+ output_path = os .path .join (tmpdir , "test_schema.tsv" )
364+ schema .save_as_dataframes (output_path )
365+
366+ # Check that the tag TSV file uses LF endings
367+ tag_file = output_path .replace (".tsv" , "_Tag.tsv" )
368+ self .assertTrue (os .path .exists (tag_file ), "Tag TSV file should exist" )
369+
370+ # Read file in binary mode to check actual line endings
371+ with open (tag_file , "rb" ) as f :
372+ content = f .read ()
373+
374+ # Check that file uses LF (\n) not CRLF (\r\n)
375+ self .assertNotIn (b"\r \n " , content , "File should not contain CRLF line endings" )
376+ self .assertIn (b"\n " , content , "File should contain LF line endings" )
377+
378+ def test_tsv_reading_handles_both_line_endings (self ):
379+ """Test that TSV files can be read correctly with either LF or CRLF line endings."""
380+ from tests .schema .util_create_schemas import load_schema1
381+ from hed .schema import load_schema
382+ import tempfile
383+
384+ schema = load_schema1 ()
385+
386+ with tempfile .TemporaryDirectory () as tmpdir :
387+ # Save the schema with LF endings (our standard)
388+ lf_path = os .path .join (tmpdir , "lf_schema.tsv" )
389+ schema .save_as_dataframes (lf_path )
390+
391+ # Create a version with CRLF endings
392+ crlf_path = os .path .join (tmpdir , "crlf_schema.tsv" )
393+ tag_lf = lf_path .replace (".tsv" , "_Tag.tsv" )
394+ tag_crlf = crlf_path .replace (".tsv" , "_Tag.tsv" )
395+
396+ # Read the LF file and convert to CRLF
397+ with open (tag_lf , "rb" ) as f :
398+ lf_content = f .read ()
399+
400+ crlf_content = lf_content .replace (b"\n " , b"\r \n " )
401+
402+ # Write CRLF version
403+ os .makedirs (os .path .dirname (crlf_path ), exist_ok = True )
404+ with open (tag_crlf , "wb" ) as f :
405+ f .write (crlf_content )
406+
407+ # Copy other files
408+ for suffix in [
409+ "Structure" ,
410+ "UnitClass" ,
411+ "Unit" ,
412+ "UnitModifier" ,
413+ "ValueClass" ,
414+ "AnnotationProperty" ,
415+ "DataProperty" ,
416+ "ObjectProperty" ,
417+ "AttributeProperty" ,
418+ ]:
419+ src = lf_path .replace (".tsv" , f"_{ suffix } .tsv" )
420+ dst = crlf_path .replace (".tsv" , f"_{ suffix } .tsv" )
421+ if os .path .exists (src ):
422+ with open (src , "rb" ) as f :
423+ content = f .read ()
424+ with open (dst , "wb" ) as f :
425+ f .write (content .replace (b"\n " , b"\r \n " ))
426+
427+ # Both should load successfully
428+ lf_schema = load_schema (lf_path )
429+ crlf_schema = load_schema (crlf_path )
430+
431+ # And they should be equivalent
432+ self .assertEqual (lf_schema , crlf_schema , "Schemas with different line endings should be equivalent" )
433+
434+ def test_xml_output_uses_lf_line_endings (self ):
435+ """Test that XML schema files always use LF line endings, not CRLF."""
436+ import tempfile
437+
438+ with tempfile .TemporaryDirectory () as tmpdir :
439+ xml_path = os .path .join (tmpdir , "test_schema.xml" )
440+
441+ # Save schema as XML
442+ self .schema .save_as_xml (xml_path )
443+
444+ # Read file in binary mode to check actual line endings
445+ with open (xml_path , "rb" ) as f :
446+ content = f .read ()
447+
448+ # Should not contain CRLF (b'\r\n')
449+ self .assertNotIn (b"\r \n " , content , "XML file should not contain CRLF line endings" )
450+ # Should contain LF (b'\n')
451+ self .assertIn (b"\n " , content , "XML file should contain LF line endings" )
452+
453+ def test_mediawiki_output_uses_lf_line_endings (self ):
454+ """Test that MediaWiki schema files always use LF line endings, not CRLF."""
455+ import tempfile
456+
457+ with tempfile .TemporaryDirectory () as tmpdir :
458+ wiki_path = os .path .join (tmpdir , "test_schema.mediawiki" )
459+
460+ # Save schema as MediaWiki
461+ self .schema .save_as_mediawiki (wiki_path )
462+
463+ # Read file in binary mode to check actual line endings
464+ with open (wiki_path , "rb" ) as f :
465+ content = f .read ()
466+
467+ # Should not contain CRLF (b'\r\n')
468+ self .assertNotIn (b"\r \n " , content , "MediaWiki file should not contain CRLF line endings" )
469+ # Should contain LF (b'\n')
470+ self .assertIn (b"\n " , content , "MediaWiki file should contain LF line endings" )
471+
472+ def test_json_output_uses_lf_line_endings (self ):
473+ """Test that JSON schema files always use LF line endings, not CRLF."""
474+ import tempfile
475+
476+ with tempfile .TemporaryDirectory () as tmpdir :
477+ json_path = os .path .join (tmpdir , "test_schema.json" )
478+
479+ # Save schema as JSON
480+ self .schema .save_as_json (json_path )
481+
482+ # Read file in binary mode to check actual line endings
483+ with open (json_path , "rb" ) as f :
484+ content = f .read ()
485+
486+ # Should not contain CRLF (b'\r\n')
487+ self .assertNotIn (b"\r \n " , content , "JSON file should not contain CRLF line endings" )
488+ # Should contain LF (b'\n')
489+ self .assertIn (b"\n " , content , "JSON file should contain LF line endings" )
490+
491+ def test_xml_library_schema_uses_lf (self ):
492+ """Test that library schemas saved as XML use LF line endings."""
493+ import tempfile
494+
495+ with tempfile .TemporaryDirectory () as tmpdir :
496+ # Load a library schema
497+ lib_schema = load_schema_version ("testlib_3.0.0" )
498+
499+ # Test both merged and unmerged saves
500+ for save_merged in [True , False ]:
501+ xml_path = os .path .join (tmpdir , f"testlib_merged_{ save_merged } .xml" )
502+ lib_schema .save_as_xml (xml_path , save_merged = save_merged )
503+
504+ with open (xml_path , "rb" ) as f :
505+ content = f .read ()
506+
507+ self .assertNotIn (
508+ b"\r \n " ,
509+ content ,
510+ f"XML library schema (save_merged={ save_merged } ) should not contain CRLF" ,
511+ )
512+ self .assertIn (b"\n " , content , "XML file should contain LF line endings" )
513+
514+ def test_mediawiki_library_schema_uses_lf (self ):
515+ """Test that library schemas saved as MediaWiki use LF line endings."""
516+ import tempfile
517+
518+ with tempfile .TemporaryDirectory () as tmpdir :
519+ # Load a library schema
520+ lib_schema = load_schema_version ("testlib_3.0.0" )
521+
522+ # Test both merged and unmerged saves
523+ for save_merged in [True , False ]:
524+ wiki_path = os .path .join (tmpdir , f"testlib_merged_{ save_merged } .mediawiki" )
525+ lib_schema .save_as_mediawiki (wiki_path , save_merged = save_merged )
526+
527+ with open (wiki_path , "rb" ) as f :
528+ content = f .read ()
529+
530+ self .assertNotIn (
531+ b"\r \n " ,
532+ content ,
533+ f"MediaWiki library schema (save_merged={ save_merged } ) should not contain CRLF" ,
534+ )
535+ self .assertIn (b"\n " , content , "MediaWiki file should contain LF line endings" )
536+
537+ def test_json_library_schema_uses_lf (self ):
538+ """Test that library schemas saved as JSON use LF line endings."""
539+ import tempfile
540+
541+ with tempfile .TemporaryDirectory () as tmpdir :
542+ # Load a library schema
543+ lib_schema = load_schema_version ("testlib_3.0.0" )
544+
545+ # Test both merged and unmerged saves
546+ for save_merged in [True , False ]:
547+ json_path = os .path .join (tmpdir , f"testlib_merged_{ save_merged } .json" )
548+ lib_schema .save_as_json (json_path , save_merged = save_merged )
549+
550+ with open (json_path , "rb" ) as f :
551+ content = f .read ()
552+
553+ self .assertNotIn (b"\r \n " , content , f"JSON library schema (save_merged={ save_merged } ) should not contain CRLF" )
554+ self .assertIn (b"\n " , content , "JSON file should contain LF line endings" )
555+
556+ def test_tsv_library_schema_uses_lf (self ):
557+ """Test that library schemas saved as TSV use LF line endings."""
558+ import tempfile
559+
560+ with tempfile .TemporaryDirectory () as tmpdir :
561+ # Load a library schema
562+ lib_schema = load_schema_version ("testlib_3.0.0" )
563+
564+ # Test both merged and unmerged saves
565+ for save_merged in [True , False ]:
566+ tsv_path = os .path .join (tmpdir , f"testlib_merged_{ save_merged } .tsv" )
567+ lib_schema .save_as_dataframes (tsv_path , save_merged = save_merged )
568+
569+ # Check all TSV files
570+ tag_path = tsv_path .replace (".tsv" , "_Tag.tsv" )
571+ if os .path .exists (tag_path ):
572+ with open (tag_path , "rb" ) as f :
573+ content = f .read ()
574+
575+ self .assertNotIn (
576+ b"\r \n " ,
577+ content ,
578+ f"TSV library schema Tag file (save_merged={ save_merged } ) should not contain CRLF" ,
579+ )
580+ self .assertIn (b"\n " , content , "TSV file should contain LF line endings" )
581+
582+ def test_all_formats_roundtrip_with_lf (self ):
583+ """Test that all formats can be saved and reloaded with LF line endings preserved."""
584+ import tempfile
585+
586+ with tempfile .TemporaryDirectory () as tmpdir :
587+ schema = load_schema_version ("8.3.0" )
588+
589+ # Test XML
590+ xml_path = os .path .join (tmpdir , "test.xml" )
591+ schema .save_as_xml (xml_path )
592+ reloaded_xml = load_schema (xml_path )
593+ self .assertEqual (schema , reloaded_xml , "XML schema should round-trip correctly" )
594+
595+ # Verify LF in saved file
596+ with open (xml_path , "rb" ) as f :
597+ self .assertNotIn (b"\r \n " , f .read (), "Saved XML should use LF" )
598+
599+ # Test MediaWiki
600+ wiki_path = os .path .join (tmpdir , "test.mediawiki" )
601+ schema .save_as_mediawiki (wiki_path )
602+ reloaded_wiki = load_schema (wiki_path )
603+ self .assertEqual (schema , reloaded_wiki , "MediaWiki schema should round-trip correctly" )
604+
605+ # Verify LF in saved file
606+ with open (wiki_path , "rb" ) as f :
607+ self .assertNotIn (b"\r \n " , f .read (), "Saved MediaWiki should use LF" )
608+
609+ # Test JSON
610+ json_path = os .path .join (tmpdir , "test.json" )
611+ schema .save_as_json (json_path )
612+ reloaded_json = load_schema (json_path )
613+ self .assertEqual (schema , reloaded_json , "JSON schema should round-trip correctly" )
614+
615+ # Verify LF in saved file
616+ with open (json_path , "rb" ) as f :
617+ self .assertNotIn (b"\r \n " , f .read (), "Saved JSON should use LF" )
618+
619+ # Test TSV
620+ tsv_path = os .path .join (tmpdir , "test.tsv" )
621+ schema .save_as_dataframes (tsv_path )
622+ reloaded_tsv = load_schema (tsv_path )
623+ self .assertEqual (schema , reloaded_tsv , "TSV schema should round-trip correctly" )
624+
625+ # Verify LF in all TSV files
626+ for suffix in ["Tag" , "Structure" , "Unit" , "UnitClass" , "UnitModifier" , "ValueClass" ]:
627+ file_path = tsv_path .replace (".tsv" , f"_{ suffix } .tsv" )
628+ if os .path .exists (file_path ):
629+ with open (file_path , "rb" ) as f :
630+ self .assertNotIn (b"\r \n " , f .read (), f"Saved TSV { suffix } should use LF" )
631+
632+ def test_no_carriage_return_anywhere_in_output (self ):
633+ """Test that there are absolutely no carriage return characters in any schema output."""
634+ import tempfile
635+
636+ with tempfile .TemporaryDirectory () as tmpdir :
637+ schema = load_schema_version ("8.4.0" )
638+
639+ # Test all formats
640+ formats = [
641+ ("xml" , lambda p : schema .save_as_xml (p )),
642+ ("mediawiki" , lambda p : schema .save_as_mediawiki (p )),
643+ ("json" , lambda p : schema .save_as_json (p )),
644+ ]
645+
646+ for ext , save_func in formats :
647+ file_path = os .path .join (tmpdir , f"test.{ ext } " )
648+ save_func (file_path )
649+
650+ with open (file_path , "rb" ) as f :
651+ content = f .read ()
652+
653+ # Count carriage returns - should be zero
654+ cr_count = content .count (b"\r " )
655+ self .assertEqual (cr_count , 0 , f"Format { ext } should have ZERO carriage return characters, found { cr_count } " )
656+
657+ # Test TSV format
658+ tsv_path = os .path .join (tmpdir , "test.tsv" )
659+ schema .save_as_dataframes (tsv_path )
660+
661+ # Check all generated TSV files
662+ for file in os .listdir (tmpdir ):
663+ if file .startswith ("test" ) and file .endswith (".tsv" ):
664+ file_path = os .path .join (tmpdir , file )
665+ with open (file_path , "rb" ) as f :
666+ content = f .read ()
667+
668+ cr_count = content .count (b"\r " )
669+ self .assertEqual (
670+ cr_count , 0 , f"TSV file { file } should have ZERO carriage return characters, found { cr_count } "
671+ )
672+
354673
355674if __name__ == "__main__" :
356675 unittest .main ()
0 commit comments