|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Command-line script for extracting sidecar templates from BIDS datasets. |
| 4 | +
|
| 5 | +Logging Options: |
| 6 | +- Default: WARNING level logs go to stderr (quiet unless there are issues) |
| 7 | +- --verbose or --log-level INFO: Show informational messages about progress |
| 8 | +- --log-level DEBUG: Show detailed debugging information |
| 9 | +- --log-file FILE: Save logs to a file instead of/in addition to stderr |
| 10 | +- --log-quiet: When using --log-file, suppress stderr output (file only) |
| 11 | +
|
| 12 | +Examples: |
| 13 | + extract_bids_sidecar /path/to/dataset --suffix events |
| 14 | + extract_bids_sidecar /path/to/dataset --suffix events --verbose |
| 15 | + extract_bids_sidecar /path/to/dataset --suffix events --log-file log.txt --log-quiet |
| 16 | +""" |
| 17 | + |
| 18 | +import argparse |
| 19 | +import json |
| 20 | +import logging |
| 21 | +import sys |
| 22 | +from hed import _version as vr |
| 23 | +from hed.tools import BidsDataset |
| 24 | +from hed.tools.analysis.tabular_summary import TabularSummary |
| 25 | + |
| 26 | + |
| 27 | +def get_parser(): |
| 28 | + """Create the argument parser for extract_bids_sidecar.""" |
| 29 | + parser = argparse.ArgumentParser(description="Extract sidecar template from a BIDS dataset.") |
| 30 | + parser.add_argument("data_path", help="Full path of BIDS dataset root directory.") |
| 31 | + parser.add_argument( |
| 32 | + "-s", |
| 33 | + "--suffix", |
| 34 | + dest="suffix", |
| 35 | + required=True, |
| 36 | + help="Suffix (without underscore) of tsv files to process (e.g., 'events', 'participants').", |
| 37 | + ) |
| 38 | + parser.add_argument( |
| 39 | + "-vc", |
| 40 | + "--value-columns", |
| 41 | + dest="value_columns", |
| 42 | + nargs="*", |
| 43 | + default=None, |
| 44 | + help="List of column names to treat as value columns.", |
| 45 | + ) |
| 46 | + parser.add_argument( |
| 47 | + "-sc", |
| 48 | + "--skip-columns", |
| 49 | + dest="skip_columns", |
| 50 | + nargs="*", |
| 51 | + default=["onset", "duration", "sample"], |
| 52 | + help="List of column names to skip in the extraction.", |
| 53 | + ) |
| 54 | + parser.add_argument( |
| 55 | + "-l", |
| 56 | + "--log-level", |
| 57 | + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], |
| 58 | + default="WARNING", |
| 59 | + help="Log level (case insensitive). Default: WARNING", |
| 60 | + ) |
| 61 | + parser.add_argument( |
| 62 | + "-lf", |
| 63 | + "--log-file", |
| 64 | + dest="log_file", |
| 65 | + default=None, |
| 66 | + help="Full path to save log output to file. If not specified, logs go to stderr.", |
| 67 | + ) |
| 68 | + parser.add_argument( |
| 69 | + "-lq", |
| 70 | + "--log-quiet", |
| 71 | + action="store_true", |
| 72 | + dest="log_quiet", |
| 73 | + help="If present, suppress log output to stderr (only applies if --log-file is used).", |
| 74 | + ) |
| 75 | + parser.add_argument( |
| 76 | + "-o", |
| 77 | + "--output_file", |
| 78 | + dest="output_file", |
| 79 | + default="", |
| 80 | + help="Full path of output file for the sidecar template -- otherwise output written to standard out.", |
| 81 | + ) |
| 82 | + parser.add_argument( |
| 83 | + "-v", |
| 84 | + "--verbose", |
| 85 | + action="store_true", |
| 86 | + help="If present, output informative messages as computation progresses (equivalent to --log-level INFO).", |
| 87 | + ) |
| 88 | + parser.add_argument( |
| 89 | + "-x", |
| 90 | + "--exclude-dirs", |
| 91 | + nargs="*", |
| 92 | + default=["sourcedata", "derivatives", "code", "stimuli"], |
| 93 | + dest="exclude_dirs", |
| 94 | + help="Directories name to exclude in search for files to process.", |
| 95 | + ) |
| 96 | + return parser |
| 97 | + |
| 98 | + |
| 99 | +def extract_template(args): |
| 100 | + """Extract sidecar template from the BIDS dataset. |
| 101 | +
|
| 102 | + Parameters: |
| 103 | + args: Parsed command line arguments |
| 104 | +
|
| 105 | + Returns: |
| 106 | + dict: Sidecar template dictionary |
| 107 | + """ |
| 108 | + logger = logging.getLogger("extract_bids_sidecar") |
| 109 | + logger.info(f"Data directory: {args.data_path}") |
| 110 | + logger.info(f"HED tools version: {str(vr.get_versions())}") |
| 111 | + logger.debug(f"Exclude directories: {args.exclude_dirs}") |
| 112 | + logger.debug(f"File suffix: {args.suffix}") |
| 113 | + logger.debug(f"Value columns: {args.value_columns}") |
| 114 | + logger.debug(f"Skip columns: {args.skip_columns}") |
| 115 | + |
| 116 | + try: |
| 117 | + logger.info("Creating BIDS dataset object...") |
| 118 | + bids = BidsDataset(args.data_path, suffixes=[args.suffix], exclude_dirs=args.exclude_dirs) |
| 119 | + logger.info(f"BIDS dataset created") |
| 120 | + logger.info(f"Found file groups: {list(bids.file_groups.keys())}") |
| 121 | + |
| 122 | + # Get the file group for the specified suffix |
| 123 | + file_group = bids.get_file_group(args.suffix) |
| 124 | + if not file_group: |
| 125 | + logger.warning(f"No file group found for suffix '{args.suffix}'") |
| 126 | + return {} |
| 127 | + |
| 128 | + logger.debug(f"File group '{args.suffix}' has {len(file_group.datafile_dict)} data files") |
| 129 | + |
| 130 | + # Combine default skip columns with user-specified ones |
| 131 | + # Default skips: onset, duration, sample (timing/indexing columns) |
| 132 | + default_skip = ["onset", "duration", "sample"] |
| 133 | + skip_cols = default_skip.copy() |
| 134 | + if args.skip_columns: |
| 135 | + skip_cols.extend(args.skip_columns) |
| 136 | + |
| 137 | + logger.debug(f"Skip columns: {skip_cols}") |
| 138 | + |
| 139 | + # Create TabularSummary using the summarize method of BidsFileGroup |
| 140 | + logger.info("Creating tabular summary...") |
| 141 | + summary = file_group.summarize(value_cols=args.value_columns, skip_cols=skip_cols) |
| 142 | + |
| 143 | + logger.info(f"Processed {summary.total_files} files") |
| 144 | + logger.info(f"Total events: {summary.total_events}") |
| 145 | + |
| 146 | + # Extract the sidecar template |
| 147 | + logger.info("Extracting sidecar template...") |
| 148 | + template = summary.extract_sidecar_template() |
| 149 | + logger.info(f"Template extracted with {len(template)} columns") |
| 150 | + |
| 151 | + return template |
| 152 | + |
| 153 | + except Exception as e: |
| 154 | + logger.error(f"Error during template extraction: {e}") |
| 155 | + logger.debug("Full exception details:", exc_info=True) |
| 156 | + raise |
| 157 | + |
| 158 | + |
| 159 | +def format_output(template, args): |
| 160 | + """Format the template as JSON output. |
| 161 | +
|
| 162 | + Parameters: |
| 163 | + template (dict): The sidecar template dictionary |
| 164 | + args: Parsed command line arguments |
| 165 | +
|
| 166 | + Returns: |
| 167 | + str: JSON-formatted output |
| 168 | + """ |
| 169 | + output_dict = {"sidecar_template": template, "hedtools_version": str(vr.get_versions())} |
| 170 | + return json.dumps(output_dict, indent=4) |
| 171 | + |
| 172 | + |
| 173 | +def main(arg_list=None): |
| 174 | + """Main entry point for the script.""" |
| 175 | + # Create the argument parser |
| 176 | + parser = get_parser() |
| 177 | + |
| 178 | + # Parse the arguments |
| 179 | + args = parser.parse_args(arg_list) |
| 180 | + |
| 181 | + # Setup logging configuration |
| 182 | + log_level = args.log_level.upper() if args.log_level else "WARNING" |
| 183 | + if args.verbose: |
| 184 | + log_level = "INFO" |
| 185 | + |
| 186 | + # Configure logging format |
| 187 | + log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" |
| 188 | + date_format = "%Y-%m-%d %H:%M:%S" |
| 189 | + |
| 190 | + # Clear any existing handlers from root logger |
| 191 | + root_logger = logging.getLogger() |
| 192 | + for handler in root_logger.handlers[:]: |
| 193 | + root_logger.removeHandler(handler) |
| 194 | + |
| 195 | + # Set the root logger level - this is crucial for filtering |
| 196 | + root_logger.setLevel(getattr(logging, log_level)) |
| 197 | + |
| 198 | + # Create and configure handlers |
| 199 | + formatter = logging.Formatter(log_format, datefmt=date_format) |
| 200 | + |
| 201 | + # File handler if log file specified |
| 202 | + if args.log_file: |
| 203 | + file_handler = logging.FileHandler(args.log_file, mode="w", encoding="utf-8") |
| 204 | + file_handler.setLevel(getattr(logging, log_level)) |
| 205 | + file_handler.setFormatter(formatter) |
| 206 | + root_logger.addHandler(file_handler) |
| 207 | + |
| 208 | + # Console handler (stderr) unless explicitly quieted and file logging is used |
| 209 | + if not args.log_quiet or not args.log_file: |
| 210 | + console_handler = logging.StreamHandler(sys.stderr) |
| 211 | + console_handler.setLevel(getattr(logging, log_level)) |
| 212 | + console_handler.setFormatter(formatter) |
| 213 | + root_logger.addHandler(console_handler) |
| 214 | + |
| 215 | + logger = logging.getLogger("extract_bids_sidecar") |
| 216 | + logger.info(f"Starting BIDS sidecar extraction with log level: {log_level}") |
| 217 | + if args.log_file: |
| 218 | + logger.info(f"Log output will be saved to: {args.log_file}") |
| 219 | + |
| 220 | + try: |
| 221 | + template = extract_template(args) |
| 222 | + except Exception as e: |
| 223 | + logger.error(f"Extraction failed with exception: {e}") |
| 224 | + raise |
| 225 | + |
| 226 | + # Format output as JSON |
| 227 | + output = format_output(template, args) |
| 228 | + |
| 229 | + # Write to file or print to stdout |
| 230 | + if args.output_file: |
| 231 | + logger.info(f"Writing output to: {args.output_file}") |
| 232 | + with open(args.output_file, "w", encoding="utf-8") as fp: |
| 233 | + fp.write(output) |
| 234 | + else: |
| 235 | + print(output) |
| 236 | + |
| 237 | + logger.info("Extraction completed successfully") |
| 238 | + return 0 |
| 239 | + |
| 240 | + |
| 241 | +if __name__ == "__main__": |
| 242 | + sys.exit(main()) |
0 commit comments