From 529090c38a1fccff706f01efda79ee8ecea8293e Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Sun, 24 May 2026 11:37:45 -0500 Subject: [PATCH 1/6] Update colocboost.ipynb Changed `colocboost_analysis_pipeline` to `colocboost_pipeline`. See PR/494 in StatFunGen/pecotmr (https://github.com/StatFunGen/pecotmr/pull/494) for rationale and detailed changes. --- .../mnm_analysis/mnm_methods/colocboost.ipynb | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/code/mnm_analysis/mnm_methods/colocboost.ipynb b/code/mnm_analysis/mnm_methods/colocboost.ipynb index 4e769c005..c8e61d129 100644 --- a/code/mnm_analysis/mnm_methods/colocboost.ipynb +++ b/code/mnm_analysis/mnm_methods/colocboost.ipynb @@ -291,7 +291,9 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ "# Multi-ancestry: per-study LD reference specified in gwas_meta_multi.tsv\n", "sos run pipeline/colocboost.ipynb colocboost \\\n", @@ -306,9 +308,7 @@ " --separate-gwas --xqtl-coloc \\\n", " --region-name ENSG00000239945 \\\n", " --phenotype-names trait_A\n" - ], - "outputs": [], - "execution_count": null + ] }, { "cell_type": "markdown", @@ -1182,7 +1182,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "kernel": "SoS", "tags": [] @@ -1350,21 +1350,21 @@ " \n", " # - analysis parameters\n", " region_info = list(region_coord=parse_region(\"${_meta_info[0]}\"), grange=parse_region(\"${_meta_info[1]}\"), region_name=region_name)\n", - " res <- colocboost_analysis_pipeline(region_data = fdat, \n", - " event_filters = event_filters,\n", - " # - analysis\n", - " xqtl_coloc = xqtl_coloc,\n", - " joint_gwas = joint_gwas,\n", - " separate_gwas = separate_gwas,\n", - " # - individual QC\n", - " maf_cutoff = maf_cutoff, \n", - " pip_cutoff_to_skip_ind = pip_cutoff_to_skip_ind,\n", - " # - sumstat QC\n", - " pip_cutoff_to_skip_sumstat = pip_cutoff_to_skip_sumstat,\n", - " keep_indel = keep_indel,\n", - " qc_method = qc_method,\n", - " impute = impute, \n", - " impute_opts = impute_opts)\n", + " res <- colocboost_pipeline(region_data = fdat, \n", + " event_filters = event_filters,\n", + " # - analysis\n", + " xqtl_coloc = xqtl_coloc,\n", + " joint_gwas = joint_gwas,\n", + " separate_gwas = separate_gwas,\n", + " # - individual QC\n", + " maf_cutoff = maf_cutoff, \n", + " pip_cutoff_to_skip_ind = pip_cutoff_to_skip_ind,\n", + " # - sumstat QC\n", + " pip_cutoff_to_skip_sumstat = pip_cutoff_to_skip_sumstat,\n", + " keep_indel = keep_indel,\n", + " qc_method = qc_method,\n", + " impute = impute, \n", + " impute_opts = impute_opts)\n", "\n", " # Reorganize outputs\n", " computing_time <- c(computing_time, res$computing_time)\n", From 6ecfcd1362a01fb6f511603509aa7d277707818a Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Sat, 30 May 2026 03:07:23 -0500 Subject: [PATCH 2/6] Update mnm_regression.ipynb --- code/mnm_analysis/mnm_methods/mnm_regression.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/code/mnm_analysis/mnm_methods/mnm_regression.ipynb b/code/mnm_analysis/mnm_methods/mnm_regression.ipynb index c6a5b1d73..91bc5b366 100644 --- a/code/mnm_analysis/mnm_methods/mnm_regression.ipynb +++ b/code/mnm_analysis/mnm_methods/mnm_regression.ipynb @@ -979,7 +979,7 @@ " X_scalar=fdat$residual_X_scalar[[r]], \n", " Y_scalar=if (fdat$residual_Y_scalar[[r]] == 1) 1 else fdat$residual_Y_scalar[[r]][,i,drop=FALSE],\n", " X_variance=fdat$X_variance[[r]],\n", - " other_quantities = list(dropped_samples = dropped_samples),\n", + " other_quantities = list(dropped_samples = dropped_samples, region = \"${_meta_info[1]}\", condition_id = \"${_meta_info[2]}\"),\n", " # filters\n", " imiss_cutoff = ${imiss},\n", " maf_cutoff = NULL,\n", @@ -1014,7 +1014,7 @@ " X_scalar=fdat$residual_X_scalar[[r]], \n", " Y_scalar=if (fdat$residual_Y_scalar[[r]] == 1) 1 else fdat$residual_Y_scalar[[r]][,i,drop=FALSE],\n", " X_variance=fdat$X_variance[[r]],\n", - " other_quantities = list(dropped_samples = dropped_samples),\n", + " other_quantities = list(dropped_samples = dropped_samples, region = \"${_meta_info[1]}\", condition_id = \"${_meta_info[2]}\"),\n", " # filters\n", " imiss_cutoff = ${imiss},\n", " maf_cutoff = ${min_twas_maf},\n", From 1e59f74cc87968af9af5d8a64fcf2002848d0e11 Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Sat, 30 May 2026 03:20:39 -0500 Subject: [PATCH 3/6] Update mnm_regression.ipynb --- code/mnm_analysis/mnm_methods/mnm_regression.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/code/mnm_analysis/mnm_methods/mnm_regression.ipynb b/code/mnm_analysis/mnm_methods/mnm_regression.ipynb index 91bc5b366..84f6bee5f 100644 --- a/code/mnm_analysis/mnm_methods/mnm_regression.ipynb +++ b/code/mnm_analysis/mnm_methods/mnm_regression.ipynb @@ -979,7 +979,7 @@ " X_scalar=fdat$residual_X_scalar[[r]], \n", " Y_scalar=if (fdat$residual_Y_scalar[[r]] == 1) 1 else fdat$residual_Y_scalar[[r]][,i,drop=FALSE],\n", " X_variance=fdat$X_variance[[r]],\n", - " other_quantities = list(dropped_samples = dropped_samples, region = \"${_meta_info[1]}\", condition_id = \"${_meta_info[2]}\"),\n", + " other_quantities = list(dropped_samples = dropped_samples, region = \"${_meta_info[1]}\", condition_id = names(fdat$residual_Y)[r]),\n", " # filters\n", " imiss_cutoff = ${imiss},\n", " maf_cutoff = NULL,\n", @@ -1014,7 +1014,7 @@ " X_scalar=fdat$residual_X_scalar[[r]], \n", " Y_scalar=if (fdat$residual_Y_scalar[[r]] == 1) 1 else fdat$residual_Y_scalar[[r]][,i,drop=FALSE],\n", " X_variance=fdat$X_variance[[r]],\n", - " other_quantities = list(dropped_samples = dropped_samples, region = \"${_meta_info[1]}\", condition_id = \"${_meta_info[2]}\"),\n", + " other_quantities = list(dropped_samples = dropped_samples, region = \"${_meta_info[1]}\", condition_id = names(fdat$residual_Y)[r]),\n", " # filters\n", " imiss_cutoff = ${imiss},\n", " maf_cutoff = ${min_twas_maf},\n", From 69849cee1a31526af424c253ba0506e14c463c2d Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Sat, 30 May 2026 10:00:44 -0500 Subject: [PATCH 4/6] Update mnm_regression.ipynb --- code/mnm_analysis/mnm_methods/mnm_regression.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/code/mnm_analysis/mnm_methods/mnm_regression.ipynb b/code/mnm_analysis/mnm_methods/mnm_regression.ipynb index 84f6bee5f..ac2893b0c 100644 --- a/code/mnm_analysis/mnm_methods/mnm_regression.ipynb +++ b/code/mnm_analysis/mnm_methods/mnm_regression.ipynb @@ -1414,14 +1414,14 @@ " gene_id <- names(combined_list[[\"extracted_regional_window_combined_susie_result\"]][[condition_name]])[i]\n", " \n", " # Extract top_loci variants\n", - " variants1 <- unique(susie_result_1[[\"top_loci\"]][[\"variant_id\"]])\n", + " variants1 <- unique(susie_result_1[[\"top_loci\"]][[\"variant\"]])\n", " if (length(variants1) > 0) {\n", " different_in_all_genes <- TRUE\n", " \n", " for (j in 1:length(combined_list[[\"extracted_regional_window_combined_susie_result\"]][[condition_name]])) {\n", " if (i != j) {\n", " susie_result_2 <- combined_list[[\"extracted_regional_window_combined_susie_result\"]][[condition_name]][[j]]\n", - " variants2 <- unique(susie_result_2[[\"top_loci\"]][[\"variant_id\"]])\n", + " variants2 <- unique(susie_result_2[[\"top_loci\"]][[\"variant\"]])\n", " \n", " # Compare variants\n", " if (length(variants2) > 0) {\n", @@ -1827,4 +1827,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file From 3761ad0db393a064bdf49096b905ab9a73a37a5d Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Sat, 30 May 2026 14:48:04 -0500 Subject: [PATCH 5/6] Update mnm_regression.ipynb remove region from other quantities and add region as an input parameter to univariate analysis pipeline in pecotmr --- code/mnm_analysis/mnm_methods/mnm_regression.ipynb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/code/mnm_analysis/mnm_methods/mnm_regression.ipynb b/code/mnm_analysis/mnm_methods/mnm_regression.ipynb index ac2893b0c..eb04d4a41 100644 --- a/code/mnm_analysis/mnm_methods/mnm_regression.ipynb +++ b/code/mnm_analysis/mnm_methods/mnm_regression.ipynb @@ -979,7 +979,8 @@ " X_scalar=fdat$residual_X_scalar[[r]], \n", " Y_scalar=if (fdat$residual_Y_scalar[[r]] == 1) 1 else fdat$residual_Y_scalar[[r]][,i,drop=FALSE],\n", " X_variance=fdat$X_variance[[r]],\n", - " other_quantities = list(dropped_samples = dropped_samples, region = \"${_meta_info[1]}\", condition_id = names(fdat$residual_Y)[r]),\n", + " region = \"${_meta_info[1]}\",\n", + " other_quantities = list(dropped_samples = dropped_samples, condition_id = names(fdat$residual_Y)[r]),\n", " # filters\n", " imiss_cutoff = ${imiss},\n", " maf_cutoff = NULL,\n", @@ -1014,7 +1015,8 @@ " X_scalar=fdat$residual_X_scalar[[r]], \n", " Y_scalar=if (fdat$residual_Y_scalar[[r]] == 1) 1 else fdat$residual_Y_scalar[[r]][,i,drop=FALSE],\n", " X_variance=fdat$X_variance[[r]],\n", - " other_quantities = list(dropped_samples = dropped_samples, region = \"${_meta_info[1]}\", condition_id = names(fdat$residual_Y)[r]),\n", + " region = \"${_meta_info[1]}\",\n", + " other_quantities = list(dropped_samples = dropped_samples, condition_id = names(fdat$residual_Y)[r]),\n", " # filters\n", " imiss_cutoff = ${imiss},\n", " maf_cutoff = ${min_twas_maf},\n", From 5ae771f8e93337acce159a771dd35f34888f121c Mon Sep 17 00:00:00 2001 From: xuewei cao <36172337+xueweic@users.noreply.github.com> Date: Sun, 31 May 2026 00:38:27 -0400 Subject: [PATCH 6/6] Update mnm_regression.ipynb --- .../mnm_methods/mnm_regression.ipynb | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/code/mnm_analysis/mnm_methods/mnm_regression.ipynb b/code/mnm_analysis/mnm_methods/mnm_regression.ipynb index eb04d4a41..d6c6e1197 100644 --- a/code/mnm_analysis/mnm_methods/mnm_regression.ipynb +++ b/code/mnm_analysis/mnm_methods/mnm_regression.ipynb @@ -944,7 +944,7 @@ " region_name = \"${_meta_info[2]}\"\n", " }\n", "\n", - " region_info = list(region_coord=parse_region(\"${_meta_info[0]}\"), grange=parse_region(\"${_meta_info[1]}\"), region_name=region_name)\n", + " region_info = list(region_coord=parse_region(\"${_meta_info[0]}\"), analysis_region = \"${_meta_info[1]}\", grange=parse_region(\"${_meta_info[1]}\"), region_name=region_name)\n", "\n", " finemapping_result = list()\n", " preset_variants_result = list()\n", @@ -979,7 +979,7 @@ " X_scalar=fdat$residual_X_scalar[[r]], \n", " Y_scalar=if (fdat$residual_Y_scalar[[r]] == 1) 1 else fdat$residual_Y_scalar[[r]][,i,drop=FALSE],\n", " X_variance=fdat$X_variance[[r]],\n", - " region = \"${_meta_info[1]}\",\n", + " region = region_info$analysis_region,\n", " other_quantities = list(dropped_samples = dropped_samples, condition_id = names(fdat$residual_Y)[r]),\n", " # filters\n", " imiss_cutoff = ${imiss},\n", @@ -1015,7 +1015,7 @@ " X_scalar=fdat$residual_X_scalar[[r]], \n", " Y_scalar=if (fdat$residual_Y_scalar[[r]] == 1) 1 else fdat$residual_Y_scalar[[r]][,i,drop=FALSE],\n", " X_variance=fdat$X_variance[[r]],\n", - " region = \"${_meta_info[1]}\",\n", + " region = region_info$analysis_region,\n", " other_quantities = list(dropped_samples = dropped_samples, condition_id = names(fdat$residual_Y)[r]),\n", " # filters\n", " imiss_cutoff = ${imiss},\n", @@ -1234,7 +1234,7 @@ "\n", " context_regions <- list(${\",\".join([(\"c('\"+x+\"')\") if isinstance(x, str) else (\"c\"+ str(x)) for x in _meta_info[3]])})\n", " names(context_regions) <- colnames(fdat$residual_Y)\n", - " region_info = list(region_coord=parse_region(\"${_meta_info[0]}\"), grange=parse_region(\"${_meta_info[1]}\"), region_name=region_name)\n", + " region_info = list(region_coord=parse_region(\"${_meta_info[0]}\"), analysis_region = \"${_meta_info[1]}\", grange=parse_region(\"${_meta_info[1]}\"), region_name=region_name)\n", "\n", " dd_prior <- if (${mixture_prior:r} == '.' || ${mixture_prior:r} == '') NULL else readRDS(${mixture_prior:r})\n", " dd_prior_cv <- if (${mixture_prior_cv:r} == '.' || ${mixture_prior_cv:r} == '') NULL else readRDS(${mixture_prior_cv:r})\n", @@ -1246,6 +1246,7 @@ " Y = fdat$residual_Y,\n", " maf = fdat$maf,\n", " X_variance = fdat$X_variance,\n", + " region = region_info$analysis_region,\n", " other_quantities = list(dropped_samples = fdat$dropped_samples),\n", " # filters\n", " imiss_cutoff = ${imiss}, \n", @@ -1517,7 +1518,7 @@ " region_name = \"${_meta_info[2]}\"\n", " }\n", "\n", - " region_info = list(region_coord=parse_region(\"${_meta_info[0]}\"), grange=parse_region(\"${_meta_info[1]}\"), region_name=region_name)\n", + " region_info = list(region_coord=parse_region(\"${_meta_info[0]}\"), analysis_region = \"${_meta_info[1]}\", grange=parse_region(\"${_meta_info[1]}\"), region_name=region_name)\n", " \n", " dd_prior <- NULL\n", " if (${\"TRUE\" if data_driven_prior else \"FALSE\"}) {\n", @@ -1534,6 +1535,7 @@ " Y=fdat$residual_Y[,filtered_event_names],\n", " maf=fdat$maf,\n", " X_variance = fdat$X_variance,\n", + " region = region_info$analysis_region,\n", " other_quantities = list(dropped_samples = fdat$dropped_samples),\n", " # filters\n", " imiss_cutoff = ${imiss}, \n", @@ -1666,6 +1668,7 @@ " }\n", " \n", " fitted = setNames(replicate(length(fdat$residual_Y), list(), simplify = FALSE), names(fdat$residual_Y))\n", + " region_info = list(region_coord=parse_region(\"${_meta_info[0]}\"), analysis_region = \"${_meta_info[1]}\", grange=parse_region(\"${_meta_info[1]}\"), region_name=\"${_meta_info[2]}\")\n", " for (r in 1:length(fitted)) {\n", " st = proc.time()\n", " fitted[[r]] = list()\n", @@ -1686,6 +1689,7 @@ " data_x = fdat$residual_X[[r]], data_y = top_pc_data[,i],\n", " X_scalar = fdat$residual_X_scalar[[r]], y_scalar = 1, maf = fdat$maf[[r]],\n", " coverage = ${coverage[0]}, secondary_coverage = c(${\",\".join([str(x) for x in coverage[1:]])}), signal_cutoff = ${pip_cutoff},\n", + " region = region_info$analysis_region,\n", " other_quantities = list(dropped_samples = list(X=fdat$dropped_sample$dropped_samples_X[[r]], \n", " y=fdat$dropped_sample$dropped_samples_Y[[r]], \n", " covar=fdat$dropped_sample$dropped_samples_covar[[r]]))),\n", @@ -1724,11 +1728,12 @@ " data_x = fdat$residual_X[[r]], data_y = NULL,\n", " X_scalar = fdat$residual_X_scalar[[r]], y_scalar = 1, maf = fdat$maf[[r]],\n", " coverage = ${coverage[0]}, secondary_coverage = c(${\",\".join([str(x) for x in coverage[1:]])}), signal_cutoff = ${pip_cutoff},\n", + " region = region_info$analysis_region,\n", " other_quantities = list(dropped_samples = list(X=fdat$dropped_sample$dropped_samples_X[[r]], y=fdat$dropped_sample$dropped_samples_Y[[r]], \n", " covar=fdat$dropped_sample$dropped_samples_covar[[r]]))),\n", " primary_method = \"fsusie\")\n", " fitted[[r]]$total_time_elapsed = proc.time() - st\n", - " fitted[[r]]$region_info = list(region_coord=parse_region(\"${_meta_info[0]}\"), grange=parse_region(\"${_meta_info[1]}\"), region_name=\"${_meta_info[2]}\")\n", + " fitted[[r]]$region_info = region_info\n", " # original data no longer relevant, set to NA to release memory\n", " fdat$residual_X[[r]] <- NA\n", " fdat$residual_Y[[r]] <- NA\n",