diff --git a/DESCRIPTION b/DESCRIPTION index d00f17dd..62adc0ee 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,7 +34,7 @@ Imports: readr, rlang, stringr, - susieR, + susieR (>= 0.16.2), tibble, tictoc, tidyr, diff --git a/NAMESPACE b/NAMESPACE index 225f15c6..5d0c6e38 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -373,6 +373,7 @@ importFrom(susieR,mr.ash.rss) importFrom(susieR,susie) importFrom(susieR,susie_get_cs) importFrom(susieR,susie_rss) +importFrom(susieR,susie_ser) importFrom(susieR,univariate_regression) importFrom(tibble,as_tibble) importFrom(tibble,tibble) diff --git a/R/file_utils.R b/R/file_utils.R index dc6c7f1b..24807af8 100644 --- a/R/file_utils.R +++ b/R/file_utils.R @@ -1396,9 +1396,15 @@ load_rss_data <- function(sumstat_path, column_file_path = NULL, n_sample = 0, n sumstats <- load_tsv_region(file_path = sumstat_path, region = region, extract_region_name = extract_region_name, region_name_col = region_name_col) # To keep a log message - n_variants <- nrow(sumstats) + n_variants <- if (is.null(sumstats)) 0L else nrow(sumstats) + if (length(n_variants) == 0 || is.na(n_variants)) { + n_variants <- 0L + } if (n_variants == 0) { message(paste0("No variants in region ", region, ".")) + if (is.null(sumstats)) { + sumstats <- data.frame() + } return(list(sumstats = sumstats, n = NULL, var_y = NULL)) } else { message(paste0("Region ", region, " include ", n_variants, " in input sumstats.")) diff --git a/R/sumstats_qc.R b/R/sumstats_qc.R index 16e59b95..d0f13b6a 100644 --- a/R/sumstats_qc.R +++ b/R/sumstats_qc.R @@ -210,8 +210,8 @@ ld_mismatch_qc <- function(zScore, R = NULL, X = NULL, nSample = NULL, #' #' When the combined path receives genotype-backed reference data #' (\code{X_ref}), basic harmonization avoids constructing an LD matrix. PIP -#' screening uses an LD-independent single-effect screen with -#' \code{susie_rss(R = diag(p), L = 1, max_iter = 1)}, and LD-mismatch QC +#' screening uses the LD-independent single-effect summary-statistic model +#' \code{susie_ser(coverage = NULL)}, and LD-mismatch QC #' computes only the filtered local correlation matrix required by #' SLALOM/DENTIST. RAISS imputation temporarily centers/scales #' genotype-backed \code{X_ref} before using the whole-region genotype/SVD @@ -228,6 +228,7 @@ ld_mismatch_qc <- function(zScore, R = NULL, X = NULL, nSample = NULL, #' qc_method = "none") #' #' @importFrom dplyr mutate row_number filter pull +#' @importFrom susieR susie_ser #' @export summary_stats_qc <- function(sumstats, LD_data, n = NULL, method = c("slalom", "dentist"), @@ -517,9 +518,7 @@ summary_stats_qc <- function(sumstats, LD_data, n = NULL, if (cutoff < 0) cutoff <- 3 / nrow(sumstats) message("QC track: running LD-independent single-effect initial screen for summary-stat study ", study, ".") - pip_args <- list(z = sumstats$z, R = diag(nrow(sumstats)), - L = 1, max_iter = 1, n = n) - pip <- do.call(susie_rss, pip_args)$pip + pip <- susie_ser(z = sumstats$z, n = n, coverage = NULL)$pip if (!any(pip > cutoff)) { message("Skipping follow-up analysis: No signals above PIP threshold ", cutoff) message("QC track: skipping summary-stat study ", study, diff --git a/man/summary_stats_qc.Rd b/man/summary_stats_qc.Rd index 541310e0..bf267c9c 100644 --- a/man/summary_stats_qc.Rd +++ b/man/summary_stats_qc.Rd @@ -77,8 +77,8 @@ This function applies the specified quality control method to the When the combined path receives genotype-backed reference data (\code{X_ref}), basic harmonization avoids constructing an LD matrix. PIP - screening uses an LD-independent single-effect screen with - \code{susie_rss(R = diag(p), L = 1, max_iter = 1)}, and LD-mismatch QC + screening uses the LD-independent single-effect summary-statistic model + \code{susie_ser(coverage = NULL)}, and LD-mismatch QC computes only the filtered local correlation matrix required by SLALOM/DENTIST. RAISS imputation temporarily centers/scales genotype-backed \code{X_ref} before using the whole-region genotype/SVD diff --git a/tests/testthat/test_file_utils.R b/tests/testthat/test_file_utils.R index c3c985dc..0dfd8749 100644 --- a/tests/testthat/test_file_utils.R +++ b/tests/testthat/test_file_utils.R @@ -1435,6 +1435,25 @@ test_that("load_rss_data returns empty sumstats message for zero-row region", { file.remove(tmp_sumstat, tmp_col) }) +test_that("load_rss_data handles tabix regions with no records", { + tmp_sumstat <- tempfile(fileext = ".tsv") + writeLines("chrom\tpos\tb\ts", tmp_sumstat) + tmp_col <- tempfile(fileext = ".txt") + writeLines(c("beta:b", "se:s"), tmp_col) + + local_mocked_bindings( + load_tsv_region = function(...) NULL + ) + expect_message( + result <- suppressWarnings(load_rss_data(tmp_sumstat, tmp_col, region = "chr21:1-2")), + "No variants in region chr21:1-2." + ) + expect_true(is.data.frame(result$sumstats)) + expect_equal(nrow(result$sumstats), 0) + expect_null(result$n) + file.remove(tmp_sumstat, tmp_col) +}) + test_that("load_rss_data extracts n from n_sample column in sumstats", { tmp_sumstat <- tempfile(fileext = ".tsv") df <- data.frame( diff --git a/tests/testthat/test_sumstats_qc.R b/tests/testthat/test_sumstats_qc.R index a7152f42..68322daa 100644 --- a/tests/testthat/test_sumstats_qc.R +++ b/tests/testthat/test_sumstats_qc.R @@ -435,9 +435,9 @@ test_that("summary_stats_qc PIP screening uses LD-independent SER", { local_mocked_bindings( compute_LD = function(...) stop("compute_LD should not be called"), - susie_rss = function(z, R = NULL, X = NULL, ...) { - expect_null(X) - expect_equal(R, diag(length(z))) + susie_ser = function(z, n = NULL, coverage = 0.95, ...) { + expect_equal(n, rss_input$n) + expect_null(coverage) list(pip = rep(1, length(z))) } ) diff --git a/tests/testthat/test_univariate_pipeline.R b/tests/testthat/test_univariate_pipeline.R index 20eaa7f1..762a22d3 100644 --- a/tests/testthat/test_univariate_pipeline.R +++ b/tests/testthat/test_univariate_pipeline.R @@ -947,7 +947,7 @@ test_that("rss: pip_cutoff_to_skip > 0, no signal => early return", { local_mocked_bindings( load_rss_data = function(...) list(sumstats = ss, n = 1000, var_y = 1), rss_basic_qc = function(...) list(sumstats = ss, LD_mat = ld_mat), - susie_rss = function(...) list(pip = rep(0.01, 5)), + susie_ser = function(...) list(pip = rep(0.01, 5)), ) result <- expect_message( @@ -1004,7 +1004,7 @@ test_that("rss: negative pip_cutoff_to_skip auto-computes threshold", { local_mocked_bindings( load_rss_data = function(...) list(sumstats = ss, n = 1000, var_y = 1), rss_basic_qc = function(...) list(sumstats = ss, LD_mat = ld_mat), - susie_rss = function(...) list(pip = rep(0.01, 5)), + susie_ser = function(...) list(pip = rep(0.01, 5)), ) # auto threshold = 3 * 1/5 = 0.6, all PIPs are 0.01 so skip