@@ -95,9 +95,15 @@ def add_extension(basename, extension):
9595 TSV files are placed in a 'hedtsv' subdirectory, while other formats
9696 simply append the extension to the basename.
9797
98+ Note: This function preserves the case of the extension to maintain
99+ compatibility with case-sensitive filesystems. Extensions should only
100+ be normalized (lowercased) for comparison purposes, not for file path
101+ construction.
102+
98103 Parameters:
99104 basename (str): The base path/name of the schema file without extension.
100105 extension (str): The file extension including the dot (e.g., '.xml', '.tsv').
106+ Case is preserved as-is.
101107
102108 Returns:
103109 str: The complete file path with extension applied.
@@ -107,8 +113,9 @@ def add_extension(basename, extension):
107113 """
108114 if not isinstance (extension , str ):
109115 raise TypeError (f"extension must be a string, got { type (extension ).__name__ } " )
110- extension = extension .lower ()
111- if extension == ".tsv" :
116+ # Normalize only for comparison, not for path construction
117+ extension_lower = extension .lower ()
118+ if extension_lower == ".tsv" :
112119 parent_path , basename = os .path .split (basename )
113120 return os .path .join (parent_path , "hedtsv" , basename )
114121 return basename + extension
@@ -120,13 +127,17 @@ def sort_base_schemas(filenames, add_all_extensions=False):
120127 Groups schema files by their base name, tracking which formats (extensions)
121128 have been modified. Handles special TSV directory structure (hedtsv subfolder).
122129
130+ Returns a nested dict that maps basename -> normalized_extension -> actual_filepath.
131+ This preserves the original file casing for case-sensitive filesystems while
132+ still allowing normalized extension comparisons.
133+
123134 Example input:
124- ["test_schema.mediawiki", "hedtsv/test_schema/test_schema_Tag.tsv", "other_schema.xml "]
135+ ["test_schema.mediawiki", "hedtsv/test_schema/test_schema_Tag.tsv", "other_schema.XML "]
125136
126137 Example output:
127138 {
128- "test_schema": {".mediawiki", ".tsv"},
129- "other_schema": {".xml"}
139+ "test_schema": {".mediawiki": "test_schema.mediawiki" , ".tsv": "hedtsv/.../test_schema_Tag .tsv"},
140+ "other_schema": {".xml": "other_schema.XML" }
130141 }
131142
132143 Parameters:
@@ -135,20 +146,21 @@ def sort_base_schemas(filenames, add_all_extensions=False):
135146 Default is False.
136147
137148 Returns:
138- dict: A dictionary where keys are the basename (str), and values are sets of
139- extensions modified. Can include .tsv, .mediawiki, .xml, and .json.
149+ dict: A nested dictionary where keys are basenames (str), values are dicts mapping
150+ normalized extensions (str, lowercase) to actual file paths (str, preserving case).
151+ Can include .tsv, .mediawiki, .xml, and .json as keys.
140152 """
141- schema_files = defaultdict (set )
153+ schema_files = defaultdict (dict )
142154 for file_path in filenames :
143155 if not os .path .exists (file_path ):
144156 print (f"Ignoring deleted file { file_path } ." )
145157 continue
146158 basename , extension = os .path .splitext (file_path )
147- extension = extension .lower ()
148- if extension == ".xml" or extension == ".mediawiki" :
149- schema_files [basename ]. add ( extension )
159+ extension_lower = extension .lower () # Normalize for comparison only
160+ if extension_lower == ".xml" or extension_lower == ".mediawiki" :
161+ schema_files [basename ][ extension_lower ] = file_path
150162 continue
151- elif extension == ".tsv" :
163+ elif extension_lower == ".tsv" :
152164 tsv_basename = basename .rpartition ("_" )[0 ]
153165 full_parent_path , real_basename = os .path .split (tsv_basename )
154166 full_parent_path , real_basename2 = os .path .split (full_parent_path )
@@ -160,14 +172,17 @@ def sort_base_schemas(filenames, add_all_extensions=False):
160172 print (f"Ignoring file { file_path } . .tsv files must be in a subfolder with the same name." )
161173 continue
162174 real_name = os .path .join (real_parent_path , real_basename )
163- schema_files [real_name ]. add ( extension )
175+ schema_files [real_name ][ extension_lower ] = file_path
164176 else :
165177 print (f"Ignoring file { file_path } " )
166178
167179 if add_all_extensions :
168180 for schema_name in schema_files :
169181 for extension in all_extensions :
170- schema_files [schema_name ].add (extension )
182+ # Only add if not already present - don't overwrite actual paths
183+ if extension not in schema_files [schema_name ]:
184+ # Construct path for missing extensions - use the add_extension logic
185+ schema_files [schema_name ][extension ] = add_extension (schema_name , extension )
171186
172187 return schema_files
173188
@@ -209,24 +224,25 @@ def validate_all_schemas(schema_files):
209224 for a prerelease schema, ensures all formats exist and are identical.
210225
211226 Parameters:
212- schema_files (dict): Dictionary mapping basenames (str) to sets of extensions (str)
213- representing all files changed.
227+ schema_files (dict): Dictionary mapping basenames (str) to dicts of
228+ {normalized_extension (str) -> actual_filepath (str)} representing
229+ all files changed.
214230
215231 Returns:
216232 list: A list of all validation issues found across all schemas.
217233 """
218234 all_issues = []
219- for basename , extensions in schema_files .items ():
235+ for basename , extension_paths in schema_files .items ():
220236 single_schema_issues = []
221- for extension in extensions :
222- full_path = add_extension ( basename , extension )
223- single_schema_issues += validate_schema (full_path )
237+ for _extension , file_path in extension_paths . items () :
238+ # Use the actual file path to preserve case on case-sensitive filesystems
239+ single_schema_issues += validate_schema (file_path )
224240
225- if len (extensions ) > 1 and not single_schema_issues and "prerelease" in basename :
241+ if len (extension_paths ) > 1 and not single_schema_issues and "prerelease" in basename :
226242 single_schema_issues += validate_all_schema_formats (basename )
227243
228244 print (f"Validating: { basename } ..." )
229- print (f"Extensions: { extensions } " )
245+ print (f"Extensions: { set ( extension_paths . keys ()) } " )
230246 if single_schema_issues :
231247 for issue in single_schema_issues :
232248 print (issue )
0 commit comments