Skip to content

Commit 70d545b

Browse files
authored
Merge pull request hed-standard#1145 from VisLab/json_schema
Added extract sidecar script
2 parents 4ea7ce1 + dbf79f1 commit 70d545b

3 files changed

Lines changed: 427 additions & 0 deletions

File tree

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Command-line script for extracting sidecar templates from BIDS datasets.
4+
5+
Logging Options:
6+
- Default: WARNING level logs go to stderr (quiet unless there are issues)
7+
- --verbose or --log-level INFO: Show informational messages about progress
8+
- --log-level DEBUG: Show detailed debugging information
9+
- --log-file FILE: Save logs to a file instead of/in addition to stderr
10+
- --log-quiet: When using --log-file, suppress stderr output (file only)
11+
12+
Examples:
13+
extract_bids_sidecar /path/to/dataset --suffix events
14+
extract_bids_sidecar /path/to/dataset --suffix events --verbose
15+
extract_bids_sidecar /path/to/dataset --suffix events --log-file log.txt --log-quiet
16+
"""
17+
18+
import argparse
19+
import json
20+
import logging
21+
import sys
22+
from hed import _version as vr
23+
from hed.tools import BidsDataset
24+
from hed.tools.analysis.tabular_summary import TabularSummary
25+
26+
27+
def get_parser():
28+
"""Create the argument parser for extract_bids_sidecar."""
29+
parser = argparse.ArgumentParser(description="Extract sidecar template from a BIDS dataset.")
30+
parser.add_argument("data_path", help="Full path of BIDS dataset root directory.")
31+
parser.add_argument(
32+
"-s",
33+
"--suffix",
34+
dest="suffix",
35+
required=True,
36+
help="Suffix (without underscore) of tsv files to process (e.g., 'events', 'participants').",
37+
)
38+
parser.add_argument(
39+
"-vc",
40+
"--value-columns",
41+
dest="value_columns",
42+
nargs="*",
43+
default=None,
44+
help="List of column names to treat as value columns.",
45+
)
46+
parser.add_argument(
47+
"-sc",
48+
"--skip-columns",
49+
dest="skip_columns",
50+
nargs="*",
51+
default=["onset", "duration", "sample"],
52+
help="List of column names to skip in the extraction.",
53+
)
54+
parser.add_argument(
55+
"-l",
56+
"--log-level",
57+
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
58+
default="WARNING",
59+
help="Log level (case insensitive). Default: WARNING",
60+
)
61+
parser.add_argument(
62+
"-lf",
63+
"--log-file",
64+
dest="log_file",
65+
default=None,
66+
help="Full path to save log output to file. If not specified, logs go to stderr.",
67+
)
68+
parser.add_argument(
69+
"-lq",
70+
"--log-quiet",
71+
action="store_true",
72+
dest="log_quiet",
73+
help="If present, suppress log output to stderr (only applies if --log-file is used).",
74+
)
75+
parser.add_argument(
76+
"-o",
77+
"--output_file",
78+
dest="output_file",
79+
default="",
80+
help="Full path of output file for the sidecar template -- otherwise output written to standard out.",
81+
)
82+
parser.add_argument(
83+
"-v",
84+
"--verbose",
85+
action="store_true",
86+
help="If present, output informative messages as computation progresses (equivalent to --log-level INFO).",
87+
)
88+
parser.add_argument(
89+
"-x",
90+
"--exclude-dirs",
91+
nargs="*",
92+
default=["sourcedata", "derivatives", "code", "stimuli"],
93+
dest="exclude_dirs",
94+
help="Directories name to exclude in search for files to process.",
95+
)
96+
return parser
97+
98+
99+
def extract_template(args):
100+
"""Extract sidecar template from the BIDS dataset.
101+
102+
Parameters:
103+
args: Parsed command line arguments
104+
105+
Returns:
106+
dict: Sidecar template dictionary
107+
"""
108+
logger = logging.getLogger("extract_bids_sidecar")
109+
logger.info(f"Data directory: {args.data_path}")
110+
logger.info(f"HED tools version: {str(vr.get_versions())}")
111+
logger.debug(f"Exclude directories: {args.exclude_dirs}")
112+
logger.debug(f"File suffix: {args.suffix}")
113+
logger.debug(f"Value columns: {args.value_columns}")
114+
logger.debug(f"Skip columns: {args.skip_columns}")
115+
116+
try:
117+
logger.info("Creating BIDS dataset object...")
118+
bids = BidsDataset(args.data_path, suffixes=[args.suffix], exclude_dirs=args.exclude_dirs)
119+
logger.info(f"BIDS dataset created")
120+
logger.info(f"Found file groups: {list(bids.file_groups.keys())}")
121+
122+
# Get the file group for the specified suffix
123+
file_group = bids.get_file_group(args.suffix)
124+
if not file_group:
125+
logger.warning(f"No file group found for suffix '{args.suffix}'")
126+
return {}
127+
128+
logger.debug(f"File group '{args.suffix}' has {len(file_group.datafile_dict)} data files")
129+
130+
# Combine default skip columns with user-specified ones
131+
# Default skips: onset, duration, sample (timing/indexing columns)
132+
default_skip = ["onset", "duration", "sample"]
133+
skip_cols = default_skip.copy()
134+
if args.skip_columns:
135+
skip_cols.extend(args.skip_columns)
136+
137+
logger.debug(f"Skip columns: {skip_cols}")
138+
139+
# Create TabularSummary using the summarize method of BidsFileGroup
140+
logger.info("Creating tabular summary...")
141+
summary = file_group.summarize(value_cols=args.value_columns, skip_cols=skip_cols)
142+
143+
logger.info(f"Processed {summary.total_files} files")
144+
logger.info(f"Total events: {summary.total_events}")
145+
146+
# Extract the sidecar template
147+
logger.info("Extracting sidecar template...")
148+
template = summary.extract_sidecar_template()
149+
logger.info(f"Template extracted with {len(template)} columns")
150+
151+
return template
152+
153+
except Exception as e:
154+
logger.error(f"Error during template extraction: {e}")
155+
logger.debug("Full exception details:", exc_info=True)
156+
raise
157+
158+
159+
def format_output(template, args):
160+
"""Format the template as JSON output.
161+
162+
Parameters:
163+
template (dict): The sidecar template dictionary
164+
args: Parsed command line arguments
165+
166+
Returns:
167+
str: JSON-formatted output
168+
"""
169+
output_dict = {"sidecar_template": template, "hedtools_version": str(vr.get_versions())}
170+
return json.dumps(output_dict, indent=4)
171+
172+
173+
def main(arg_list=None):
174+
"""Main entry point for the script."""
175+
# Create the argument parser
176+
parser = get_parser()
177+
178+
# Parse the arguments
179+
args = parser.parse_args(arg_list)
180+
181+
# Setup logging configuration
182+
log_level = args.log_level.upper() if args.log_level else "WARNING"
183+
if args.verbose:
184+
log_level = "INFO"
185+
186+
# Configure logging format
187+
log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
188+
date_format = "%Y-%m-%d %H:%M:%S"
189+
190+
# Clear any existing handlers from root logger
191+
root_logger = logging.getLogger()
192+
for handler in root_logger.handlers[:]:
193+
root_logger.removeHandler(handler)
194+
195+
# Set the root logger level - this is crucial for filtering
196+
root_logger.setLevel(getattr(logging, log_level))
197+
198+
# Create and configure handlers
199+
formatter = logging.Formatter(log_format, datefmt=date_format)
200+
201+
# File handler if log file specified
202+
if args.log_file:
203+
file_handler = logging.FileHandler(args.log_file, mode="w", encoding="utf-8")
204+
file_handler.setLevel(getattr(logging, log_level))
205+
file_handler.setFormatter(formatter)
206+
root_logger.addHandler(file_handler)
207+
208+
# Console handler (stderr) unless explicitly quieted and file logging is used
209+
if not args.log_quiet or not args.log_file:
210+
console_handler = logging.StreamHandler(sys.stderr)
211+
console_handler.setLevel(getattr(logging, log_level))
212+
console_handler.setFormatter(formatter)
213+
root_logger.addHandler(console_handler)
214+
215+
logger = logging.getLogger("extract_bids_sidecar")
216+
logger.info(f"Starting BIDS sidecar extraction with log level: {log_level}")
217+
if args.log_file:
218+
logger.info(f"Log output will be saved to: {args.log_file}")
219+
220+
try:
221+
template = extract_template(args)
222+
except Exception as e:
223+
logger.error(f"Extraction failed with exception: {e}")
224+
raise
225+
226+
# Format output as JSON
227+
output = format_output(template, args)
228+
229+
# Write to file or print to stdout
230+
if args.output_file:
231+
logger.info(f"Writing output to: {args.output_file}")
232+
with open(args.output_file, "w", encoding="utf-8") as fp:
233+
fp.write(output)
234+
else:
235+
print(output)
236+
237+
logger.info("Extraction completed successfully")
238+
return 0
239+
240+
241+
if __name__ == "__main__":
242+
sys.exit(main())

hed/tools/analysis/tabular_summary.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,9 @@ def _update_dataframe(self, data, name):
198198
self.total_files = self.total_files + 1
199199
self.total_events = self.total_events + len(df.index)
200200
for col_name, col_values in df.items():
201+
# Always skip HED columns
202+
if col_name in ["HED", "HED_assembled"]:
203+
continue
201204
if self.skip_cols and col_name in self.skip_cols:
202205
continue
203206
if col_name in self.value_info.keys():

0 commit comments

Comments
 (0)