From 5b4b877e0e3663fd7c5a1aa3901467d546c84950 Mon Sep 17 00:00:00 2001 From: Jash Rana Date: Wed, 17 Jun 2026 11:01:01 +0000 Subject: [PATCH 1/2] Refactor empty filter handling in GribSplitterV2 --- weather_sp/splitter_pipeline/file_splitters.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/weather_sp/splitter_pipeline/file_splitters.py b/weather_sp/splitter_pipeline/file_splitters.py index f3d18b46..f625c2b3 100644 --- a/weather_sp/splitter_pipeline/file_splitters.py +++ b/weather_sp/splitter_pipeline/file_splitters.py @@ -207,7 +207,7 @@ def split_data(self) -> None: grib_copy_cmd = shutil.which('grib_copy') grib_get_cmd = shutil.which('grib_get') uniq_cmd = shutil.which('uniq') - for cmd, name in [(grib_get_cmd, 'grib_copy'), (grib_get_cmd, 'grib_get'), (uniq_cmd, 'uniq')]: + for cmd, name in [(grib_copy_cmd, 'grib_copy'), (grib_get_cmd, 'grib_get'), (uniq_cmd, 'uniq')]: if not cmd: raise EnvironmentError(f'binary {name!r} is not available in the current environment!') @@ -235,9 +235,22 @@ def split_data(self) -> None: grib_get_args = [grib_get_cmd, '-p', split_dims_arg, local_file.name] grib_get_process = subprocess.Popen(grib_get_args, stdout=subprocess.PIPE) uniq_output = subprocess.check_output((uniq_cmd,), stdin=grib_get_process.stdout) + + # Filter out empty strings that might result from trailing newlines. + decoded_text = uniq_output.decode('utf-8').strip() + if not decoded_text: + self.logger.warning( + 'No GRIB messages in %r matched the filter expression: %r. Skipping...', + self.input_path, self.grib_filter_expression + ) + metrics.Metrics.counter('file_splitters', 'skipped_empty_filter').inc() + return + + decoded_uniq = decoded_text.splitlines() + output_paths = [] skipped_paths = [] - for line in uniq_output.decode('utf-8').rstrip('\n').split('\n'): + for line in decoded_uniq: splits = dict(zip(split_dims, line.split(' '))) output_path = self.output_info.formatted_output_path(splits) if self.should_skip_file(output_path): From 4f644616587edccce8335730e8eee9bbb757a9d2 Mon Sep 17 00:00:00 2001 From: Jash Rana Date: Tue, 23 Jun 2026 09:56:20 +0000 Subject: [PATCH 2/2] Added 'total_skipped' metric counter to track all the skipped input files. --- weather_sp/splitter_pipeline/file_splitters.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/weather_sp/splitter_pipeline/file_splitters.py b/weather_sp/splitter_pipeline/file_splitters.py index f625c2b3..db968b26 100644 --- a/weather_sp/splitter_pipeline/file_splitters.py +++ b/weather_sp/splitter_pipeline/file_splitters.py @@ -244,6 +244,7 @@ def split_data(self) -> None: self.input_path, self.grib_filter_expression ) metrics.Metrics.counter('file_splitters', 'skipped_empty_filter').inc() + metrics.Metrics.counter('file_splitters', 'total_skipped').inc() return decoded_uniq = decoded_text.splitlines() @@ -258,7 +259,7 @@ def split_data(self) -> None: continue output_paths.append(output_path) if not output_paths: - metrics.Metrics.counter('file_splitters', 'skipped').inc() + metrics.Metrics.counter('file_splitters', 'total_skipped').inc() self.logger.info('Skipping %s, file already split into: %s', repr(self.input_path), ', '.join(skipped_paths)) return