Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 27 additions & 1 deletion pyprophet/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ class RunnerConfig:
ipf_max_peakgroup_pep (float): Max PEP for peak group consideration in IPF.
ipf_max_transition_isotope_overlap (float): Max isotope overlap for transition selection in IPF.
ipf_min_transition_sn (float): Min log S/N for transition selection in IPF.
transition_training_require_unique_mapping (bool): Whether to restrict transition semi-supervised target training peaks to uniquely mapped transitions.
transition_training_require_phospho_loss (bool): Whether to restrict transition semi-supervised target training peaks to phospho-loss transitions.

glyco (bool): Whether glycopeptide-specific scoring is enabled.
density_estimator (str): Score density estimation method ('kde' or 'gmm').
Expand All @@ -124,6 +126,8 @@ class RunnerConfig:
threads (int): Number of CPU threads to use; -1 means all CPUs.
test (bool): Whether to enable test mode with deterministic behavior.
color_palette (str): Color palette used in PDF report rendering.
report_mode (str): PDF report scope: 'full', 'main', or 'none'.
apply_weights_run_batch_size (int): Number of runs to score together per streamed OSW apply batch. `0` means auto.
"""

# Scoring / classifier options
Expand Down Expand Up @@ -160,6 +164,8 @@ class RunnerConfig:
ipf_max_peakgroup_pep: float = 0.7
ipf_max_transition_isotope_overlap: float = 0.5
ipf_min_transition_sn: float = 0.0
transition_training_require_unique_mapping: bool = False
transition_training_require_phospho_loss: bool = False

# Glyco options
glyco: bool = False
Expand All @@ -172,6 +178,8 @@ class RunnerConfig:
threads: int = 1
test: bool = False
color_palette: str = "normal"
report_mode: Literal["full", "main", "none"] = "full"
apply_weights_run_batch_size: int = 0

def __post_init__(self):
# Check for auto main score selection
Expand Down Expand Up @@ -215,6 +223,8 @@ def __str__(self):
f" ipf_max_peakgroup_pep={self.ipf_max_peakgroup_pep}",
f" ipf_max_transition_isotope_overlap={self.ipf_max_transition_isotope_overlap}",
f" ipf_min_transition_sn={self.ipf_min_transition_sn}",
f" transition_training_require_unique_mapping={self.transition_training_require_unique_mapping}",
f" transition_training_require_phospho_loss={self.transition_training_require_phospho_loss}",
]
)

Expand All @@ -235,6 +245,8 @@ def __str__(self):
f" threads={self.threads}",
f" test={self.test}",
f" color_palette='{self.color_palette}'",
f" report_mode='{self.report_mode}'",
f" apply_weights_run_batch_size={self.apply_weights_run_batch_size}",
")",
]
)
Expand All @@ -247,7 +259,11 @@ def __repr__(self):
f"ss_main_score='{self.ss_main_score}', xeval_fraction={self.xeval_fraction}, "
f"xeval_num_iter={self.xeval_num_iter}, ss_initial_fdr={self.ss_initial_fdr}, "
f"ss_iteration_fdr={self.ss_iteration_fdr}, ss_num_iter={self.ss_num_iter}, "
f"group_id='{self.group_id}', glyco={self.glyco}, threads={self.threads})"
f"group_id='{self.group_id}', glyco={self.glyco}, threads={self.threads}, "
f"transition_training_require_unique_mapping={self.transition_training_require_unique_mapping}, "
f"transition_training_require_phospho_loss={self.transition_training_require_phospho_loss}, "
f"report_mode='{self.report_mode}', "
f"apply_weights_run_batch_size={self.apply_weights_run_batch_size})"
)


Expand All @@ -267,6 +283,7 @@ class RunnerIOConfig(BaseIOConfig):
"""

runner: RunnerConfig
run_id_filter: Optional[Union[int, List[int], tuple]] = None
extra_writes: dict = field(init=False)

def __post_init__(self):
Expand Down Expand Up @@ -294,6 +311,7 @@ def to_kwargs(self) -> Dict[str, Any]:
"subsample_ratio": self.subsample_ratio,
"level": self.level,
"prefix": self.prefix,
"run_id_filter": self.run_id_filter,
**vars(self.runner),
}

Expand Down Expand Up @@ -331,6 +349,8 @@ def from_cli_args(
ipf_max_peakgroup_pep,
ipf_max_transition_isotope_overlap,
ipf_min_transition_sn,
transition_training_require_unique_mapping,
transition_training_require_phospho_loss,
add_alignment_features,
glyco,
density_estimator,
Expand All @@ -340,6 +360,8 @@ def from_cli_args(
test,
color_palette,
main_score_selection_report,
report_mode,
apply_weights_run_batch_size,
):
"""
Creates a configuration object from command-line arguments.
Expand Down Expand Up @@ -399,6 +421,8 @@ def from_cli_args(
ipf_max_peakgroup_pep=ipf_max_peakgroup_pep,
ipf_max_transition_isotope_overlap=ipf_max_transition_isotope_overlap,
ipf_min_transition_sn=ipf_min_transition_sn,
transition_training_require_unique_mapping=transition_training_require_unique_mapping,
transition_training_require_phospho_loss=transition_training_require_phospho_loss,
add_alignment_features=add_alignment_features,
glyco=glyco,
density_estimator=density_estimator,
Expand All @@ -407,6 +431,8 @@ def from_cli_args(
threads=threads,
test=test,
color_palette=color_palette,
report_mode=report_mode,
apply_weights_run_batch_size=apply_weights_run_batch_size,
)

return cls(
Expand Down
54 changes: 53 additions & 1 deletion pyprophet/cli/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
# Defer import of runner to avoid premature sklearn import before OMP_NUM_THREADS is set
# from ..scoring.runner import PyProphetLearner, PyProphetWeightApplier

LARGE_RUN_MAIN_REPORT_THRESHOLD = 50


# PyProphet semi-supervised learning and scoring
@click.command(name="score", cls=AdvancedHelpCommand)
Expand Down Expand Up @@ -176,6 +178,18 @@
help="Minimum log signal-to-noise level to consider transitions in IPF. Set -1 to disable this filter.",
hidden=True,
)
@click.option(
"--transition_training_require_unique_mapping/--no-transition_training_require_unique_mapping",
default=False,
show_default=True,
help="Experimental: when learning transition scores, restrict target training peaks to uniquely mapped transitions.",
)
@click.option(
"--transition_training_require_phospho_loss/--no-transition_training_require_phospho_loss",
default=False,
show_default=True,
help="Experimental: when learning transition scores, restrict target training peaks to phospho-loss transitions.",
)
# Glyco/GproDIA Options
@click.option(
"--glyco/--no-glyco",
Expand Down Expand Up @@ -224,6 +238,20 @@
help="Generate a report for main score selection process.",
hidden=True,
)
@click.option(
"--report_mode",
default="auto",
show_default=True,
type=click.Choice(["auto", "full", "main", "none"]),
help="PDF report scope: 'full' writes all report pages, 'main' writes only the core score diagnostics, and 'none' disables report generation.",
)
@click.option(
"--apply_weights_run_batch_size",
default=0,
show_default=True,
type=int,
help="When streamed OSW weight application is used, score this many runs per batch. Use 0 for automatic batching and 1 to force one run at a time.",
)
# Processing
@click.option(
"--threads",
Expand Down Expand Up @@ -283,12 +311,16 @@ def score(
ipf_max_peakgroup_pep,
ipf_max_transition_isotope_overlap,
ipf_min_transition_sn,
transition_training_require_unique_mapping,
transition_training_require_phospho_loss,
glyco,
density_estimator,
grid_size,
tric_chromprob,
color_palette,
main_score_selection_report,
report_mode,
apply_weights_run_batch_size,
threads,
test,
profile, # NOQA: F841 unused variable, but used in decorator
Expand Down Expand Up @@ -357,6 +389,8 @@ def score(
ipf_max_peakgroup_pep,
ipf_max_transition_isotope_overlap,
ipf_min_transition_sn,
transition_training_require_unique_mapping,
transition_training_require_phospho_loss,
add_alignment_features,
glyco,
density_estimator,
Expand All @@ -366,6 +400,8 @@ def score(
test,
color_palette,
main_score_selection_report,
report_mode,
apply_weights_run_batch_size,
)

write_logfile(
Expand All @@ -374,10 +410,13 @@ def score(
ctx.obj["LOG_HEADER"],
)

num_runs = None
if subsample_ratio == 1.0 or report_mode == "auto":
num_runs = get_num_runs(infile, config.file_type)

# Auto-subsample based on number of runs if applicable
if subsample_ratio == 1.0:
# Check if we should auto-subsample
num_runs = get_num_runs(infile, config.file_type)
if num_runs > 20:
config.subsample_ratio = 1.0 / num_runs
logger.info(
Expand All @@ -393,6 +432,19 @@ def score(
"Using full dataset for semi-supervised learning."
)

if report_mode == "auto":
if num_runs and num_runs > LARGE_RUN_MAIN_REPORT_THRESHOLD:
config.runner.report_mode = "main"
logger.info(
f"Large experiment detected ({num_runs} runs). "
"Switching report_mode to 'main' to skip expensive identification/quantification report pages. "
"Use --report_mode full to force the complete report."
)
else:
config.runner.report_mode = "full"
else:
config.runner.report_mode = report_mode

# Validate file type and subsample ratio. OSW, parquet, parquet_split, and parquet_split_multi all support subsampling
if (
config.file_type not in ["osw", "parquet", "parquet_split", "parquet_split_multi"]
Expand Down
14 changes: 14 additions & 0 deletions pyprophet/io/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,14 @@ def save_weights(self, weights):
f"Classifier {self.classifier} not supported for saving weights."
)

def save_scorer(self, scorer):
"""
Persist a scorer object when the backend supports it.

The default implementation is a no-op.
"""
return None

def _prepare_score_dataframe(
self, df: pd.DataFrame, level: str, prefix: str
) -> pd.DataFrame:
Expand Down Expand Up @@ -446,6 +454,11 @@ def _write_pdf_report(self, result, pi0):
Write a PDF report if the scoring results contain final statistics.
"""

report_mode = getattr(self.config.runner, "report_mode", "full")
if report_mode == "none":
logger.info("Skipping PDF report generation (report_mode=none).")
return

if result.final_statistics is None:
return

Expand Down Expand Up @@ -503,6 +516,7 @@ def _write_pdf_report(self, result, pi0):
self.config.runner.color_palette,
self.level,
df=df,
report_mode=report_mode,
)
logger.success(f"{pdf_path} written.")

Expand Down
12 changes: 6 additions & 6 deletions pyprophet/io/export/osw.py
Original file line number Diff line number Diff line change
Expand Up @@ -781,9 +781,9 @@ def _add_transition_data(self, data, con, cfg):
if check_sqlite_table(con, "SCORE_TRANSITION"):
transition_query = f"""
SELECT FEATURE_TRANSITION.FEATURE_ID AS id,
GROUP_CONCAT(AREA_INTENSITY,';') AS aggr_Peak_Area,
GROUP_CONCAT(APEX_INTENSITY,';') AS aggr_Peak_Apex,
GROUP_CONCAT(TRANSITION.ID || "_" || TRANSITION.TYPE || TRANSITION.ORDINAL || "_" || TRANSITION.CHARGE,';') AS aggr_Fragment_Annotation
GROUP_CONCAT(AREA_INTENSITY,';' ORDER BY TRANSITION.ID) AS aggr_Peak_Area,
GROUP_CONCAT(APEX_INTENSITY,';' ORDER BY TRANSITION.ID) AS aggr_Peak_Apex,
GROUP_CONCAT(TRANSITION.ID || "_" || TRANSITION.TYPE || TRANSITION.ORDINAL || "_" || TRANSITION.CHARGE,';' ORDER BY TRANSITION.ID) AS aggr_Fragment_Annotation
FROM FEATURE_TRANSITION
INNER JOIN TRANSITION ON FEATURE_TRANSITION.TRANSITION_ID = TRANSITION.ID
INNER JOIN SCORE_TRANSITION ON FEATURE_TRANSITION.TRANSITION_ID = SCORE_TRANSITION.TRANSITION_ID AND FEATURE_TRANSITION.FEATURE_ID = SCORE_TRANSITION.FEATURE_ID
Expand All @@ -793,9 +793,9 @@ def _add_transition_data(self, data, con, cfg):
else:
transition_query = """
SELECT FEATURE_ID AS id,
GROUP_CONCAT(AREA_INTENSITY,';') AS aggr_Peak_Area,
GROUP_CONCAT(APEX_INTENSITY,';') AS aggr_Peak_Apex,
GROUP_CONCAT(TRANSITION.ID || "_" || TRANSITION.TYPE || TRANSITION.ORDINAL || "_" || TRANSITION.CHARGE,';') AS aggr_Fragment_Annotation
GROUP_CONCAT(AREA_INTENSITY,';' ORDER BY TRANSITION.ID) AS aggr_Peak_Area,
GROUP_CONCAT(APEX_INTENSITY,';' ORDER BY TRANSITION.ID) AS aggr_Peak_Apex,
GROUP_CONCAT(TRANSITION.ID || "_" || TRANSITION.TYPE || TRANSITION.ORDINAL || "_" || TRANSITION.CHARGE,';' ORDER BY TRANSITION.ID) AS aggr_Fragment_Annotation
FROM FEATURE_TRANSITION
INNER JOIN TRANSITION ON FEATURE_TRANSITION.TRANSITION_ID = TRANSITION.ID
GROUP BY FEATURE_ID
Expand Down
Loading
Loading