diff --git a/R/file_utils.R b/R/file_utils.R index 58aa82e6..945878de 100644 --- a/R/file_utils.R +++ b/R/file_utils.R @@ -1317,18 +1317,15 @@ standardise_sumstats_columns <- function(sumstats, column_file_path = NULL, comm ) # Make a copy to avoid in-place modification by MungeSumstats sumstats_copy <- data.frame(sumstats, check.names = FALSE) - # Use MungeSumstats for comprehensive column standardization - sumstats_copy <- standardise_header( - sumstats_copy, return_list = FALSE, uppercase_unmapped = FALSE - ) - # Rename MungeSumstats standard names to pecotmr conventions - for (ms_name in names(ms_to_pecotmr)) { - idx <- which(colnames(sumstats_copy) == ms_name) - if (length(idx) > 0) { - colnames(sumstats_copy)[idx] <- ms_to_pecotmr[ms_name] - } - } - # Apply additional custom column mapping if provided + + # Read the explicit user column mapping first. User declarations are + # AUTHORITATIVE: a column the user mapped (e.g. `af:effect_allele_frequency`) + # must not be silently overridden by MungeSumstats (which would otherwise + # absorb `effect_allele_frequency` into `FRQ` -> `maf` before the custom map + # could run). We therefore shield each declared source column behind a unique + # placeholder, let MungeSumstats standardize everything else, then restore the + # declared columns to their requested standard names last. + placeholders <- character(0) if (!is.null(column_file_path)) { if (!file.exists(column_file_path)) { stop("Column mapping file not found: ", column_file_path) @@ -1342,10 +1339,32 @@ standardise_sumstats_columns <- function(sumstats, column_file_path = NULL, comm for (i in seq_len(nrow(column_data))) { idx <- which(colnames(sumstats_copy) == column_data$original[i]) if (length(idx) > 0) { - colnames(sumstats_copy)[idx] <- column_data$standard[i] + ph <- paste0(".pecotmr_decl_", i) + colnames(sumstats_copy)[idx] <- ph + placeholders[[ph]] <- column_data$standard[i] } } } + + # Use MungeSumstats for comprehensive column standardization (shielded + # declared columns pass through untouched as unmapped placeholders). + sumstats_copy <- standardise_header( + sumstats_copy, return_list = FALSE, uppercase_unmapped = FALSE + ) + # Rename MungeSumstats standard names to pecotmr conventions + for (ms_name in names(ms_to_pecotmr)) { + idx <- which(colnames(sumstats_copy) == ms_name) + if (length(idx) > 0) { + colnames(sumstats_copy)[idx] <- ms_to_pecotmr[ms_name] + } + } + # Restore user-declared columns to their requested standard names (last word). + for (ph in names(placeholders)) { + idx <- which(colnames(sumstats_copy) == ph) + if (length(idx) > 0) { + colnames(sumstats_copy)[idx] <- placeholders[[ph]] + } + } as.data.frame(sumstats_copy) } @@ -1419,6 +1438,41 @@ load_rss_data <- function(sumstat_path, column_file_path = NULL, n_sample = 0, n # Standardize column names via MungeSumstats + optional custom mapping sumstats <- standardise_sumstats_columns(sumstats, column_file_path, comment_string) + + # ---- Effect-allele frequency (af) propagation ------------------------------- + # `af` is the frequency of the effect allele / A1 and is exported (after + # harmonization) as top_loci$af. It becomes available ONLY through an explicit + # column-file mapping to the standard name `af` (MungeSumstats never emits + # `af`; it maps FRQ -> `maf`, which stays an internal QC quantity). Ambiguous + # frequency headers therefore never silently become `af`. The effect allele + # must also be resolvable (an A1 column or an allele-bearing variant id), or + # the declared frequency cannot be tied to a direction and is not exported. + af_declared <- "af" %in% colnames(sumstats) + has_effect_allele <- "A1" %in% colnames(sumstats) || + any(c("variant_id", "variant") %in% colnames(sumstats)) + if (af_declared && has_effect_allele) { + sumstats$af <- suppressWarnings(as.numeric(sumstats$af)) + if (all(is.na(sumstats$af))) { + warning("Effect-allele frequency column 'af' was declared but its values ", + "are missing/unusable; top_loci$af will be NA and MAF filtering ", + "will be skipped.") + } + } else { + if (af_declared && !has_effect_allele) { + warning("Effect-allele frequency 'af' was declared but no effect allele ", + "(A1 / allele-bearing variant id) is available to tie it to a ", + "direction; it will not be exported. top_loci$af will be NA and ", + "MAF filtering will be skipped.") + } else { + warning("Effect-allele frequency (af) was not declared in the column ", + "file; top_loci$af will be NA and MAF filtering will be skipped. ", + "Generic frequency headers (FRQ/AF/allele_frequency) are kept as ", + "internal MAF only and are never exported as af.") + } + sumstats$af <- NA_real_ + } + # ---------------------------------------------------------------------------- + has_observed_beta_se <- all(c("beta", "se") %in% colnames(sumstats)) if (binary_trait_model == "ols" && !has_observed_beta_se) { stop("binary_trait_model = 'ols' requires observed beta and se columns ", diff --git a/R/misc.R b/R/misc.R index d9505296..13e3420a 100644 --- a/R/misc.R +++ b/R/misc.R @@ -31,6 +31,20 @@ compute_maf <- function(geno) { return(min(f, 1 - f)) } +#' Derive minor-allele frequency from effect-allele frequency +#' +#' MAF is an internal QC/filtering quantity only; it is never exported. Use this +#' helper wherever a MAF is needed from a (directional) effect-allele frequency +#' \code{af}, instead of carrying a separate \code{maf} column. NA in -> NA out. +#' +#' @param af Numeric vector of effect-allele frequencies in \code{[0, 1]}. +#' @return Numeric vector \code{pmin(af, 1 - af)}, preserving NA. +#' @noRd +maf_from_af <- function(af) { + af <- as.numeric(af) + pmin(af, 1 - af) +} + compute_missing <- function(geno) { miss <- sum(is.na(geno)) / length(geno) return(miss) diff --git a/R/susie_wrapper.R b/R/susie_wrapper.R index 1de8f671..f2070703 100644 --- a/R/susie_wrapper.R +++ b/R/susie_wrapper.R @@ -215,7 +215,8 @@ fit_susie_inf_then_susie_rss <- function(z, R, n, args = list(), #' @param data_y Phenotype vector/matrix or summary statistics. Default NULL. #' @param X_scalar Scaling factor for genotype effects. Default 1. #' @param y_scalar Scaling factor for phenotype effects. Default 1. -#' @param maf Minor allele frequencies. Default NULL. +#' @param af Effect-allele frequencies (exported as the \code{af} column; never +#' MAF). Default NULL. #' @param coverage Primary credible-set coverage. #' @param secondary_coverage Additional credible-set coverages. #' @param signal_cutoff PIP cutoff for including non-CS variants in top loci. @@ -230,7 +231,7 @@ fit_susie_inf_then_susie_rss <- function(z, R, n, args = list(), #' @export postprocess_finemapping_fits <- function(fits, data_x, data_y = NULL, X_scalar = 1, y_scalar = 1, - maf = NULL, coverage = NULL, + af = NULL, coverage = NULL, secondary_coverage = c(0.7, 0.5), signal_cutoff = 0.1, other_quantities = NULL, @@ -251,7 +252,7 @@ postprocess_finemapping_fits <- function(fits, data_x, data_y = NULL, fit <- .set_finemapping_fit_class(fits[[method]], method) postprocess_finemapping_fit( fit, method = method, data_x = data_x, data_y = data_y, - X_scalar = X_scalar, y_scalar = y_scalar, maf = maf, + X_scalar = X_scalar, y_scalar = y_scalar, af = af, coverage = coverage, secondary_coverage = secondary_coverage, signal_cutoff = signal_cutoff, other_quantities = other_quantities, region = region, @@ -316,7 +317,7 @@ postprocess_finemapping_fit.susiF <- function(fit, method = "fsusie", cs_input = .postprocess_finemapping_fit_common <- function(fit, method, data_x, data_y = NULL, X_scalar = 1, y_scalar = 1, - maf = NULL, coverage = NULL, + af = NULL, coverage = NULL, secondary_coverage = c(0.7, 0.5), signal_cutoff = 0.1, other_quantities = NULL, @@ -335,7 +336,7 @@ postprocess_finemapping_fit.susiF <- function(fit, method = "fsusie", cs_input = ) top_loci <- build_top_loci( fit, cs_tables, variant_names = variant_names, sumstats = sumstats, - maf = maf, method = method, signal_cutoff = signal_cutoff, + af = af, method = method, signal_cutoff = signal_cutoff, data_x = data_x, data_y = data_y, other_quantities = other_quantities, region = region ) @@ -489,7 +490,7 @@ compute_cs_table <- function(fit, data_x, coverage, cs_input = c("X", "Xcorr", " #' single \code{top_loci} returned by \code{format_finemapping_output()}. #' #' Output columns, in order: \code{#chr}, \code{start}, \code{end}, \code{a1}, -#' \code{a2}, \code{variant}, \code{gene}, \code{event}, \code{n}, \code{maf}, +#' \code{a2}, \code{variant}, \code{gene}, \code{event}, \code{n}, \code{af}, #' \code{beta}, \code{se}, \code{pip}, \code{posterior_effect_mean}, #' \code{posterior_effect_se}, \code{cs_95}, \code{cs_70}, \code{cs_50}, #' \code{cs_95_purity}, \code{method}, \code{grange_start}, \code{grange_end}. @@ -513,7 +514,11 @@ compute_cs_table <- function(fit, data_x, coverage, cs_input = c("X", "Xcorr", " #' (\code{chr:pos:A2:A1}). #' @param sumstats Optional marginal-association summary (\code{betahat}, #' \code{sebetahat}) filling \code{beta} / \code{se}. -#' @param maf Optional numeric vector of minor-allele frequencies. +#' @param af Optional numeric vector of effect-allele frequencies (frequency of +#' the final effect allele / \code{a1} after allele harmonization against the +#' LD/reference variants). Exported directly as the \code{af} column. MAF is +#' never exported; derive it from \code{af} at filter time. Default NULL -> +#' \code{af = NA_real_}. #' @param method Method name (e.g. \code{"susie"}, \code{"susie_inf"}). Required. #' @param signal_cutoff PIP cutoff for retaining PIP-only (non-CS) variants. #' @param data_x Optional regional genotype matrix. @@ -525,7 +530,7 @@ compute_cs_table <- function(fit, data_x, coverage, cs_input = c("X", "Xcorr", " #' or an empty data frame if nothing is retained. #' @export build_top_loci <- function(fit, cs_tables, variant_names, sumstats = NULL, - maf = NULL, method, signal_cutoff = 0.1, + af = NULL, method, signal_cutoff = 0.1, data_x = NULL, data_y = NULL, other_quantities = NULL, region = NULL) { @@ -631,7 +636,7 @@ build_top_loci <- function(fit, cs_tables, variant_names, sumstats = NULL, gene = rep(fit_gene, n_keys), event = rep(fit_event, n_keys), n = rep(fit_n, n_keys), - maf = pick(maf), + af = pick(af), beta = pick(sumstats$betahat), se = pick(sumstats$sebetahat), pip = as.numeric(fit$pip[v_idx]), @@ -679,7 +684,7 @@ build_top_loci <- function(fit, cs_tables, variant_names, sumstats = NULL, gene = character(), event = character(), n = integer(), - maf = numeric(), + af = numeric(), beta = numeric(), se = numeric(), pip = numeric(), @@ -1094,10 +1099,16 @@ susie_rss_pipeline <- function(sumstats, LD_mat = NULL, X_mat = NULL, n = NULL, pp_cs_input <- "X" } + # Effect-allele frequency for top_loci$af. Carried only when the harmonized + # sumstats declares it (effect-allele AF); aligned to the z / variant order. + # MAF is never exported here; it is an internal QC quantity derived from af. + af <- if (!is.null(sumstats$af)) as.numeric(sumstats$af) else NULL + post <- postprocess_finemapping_fits( fits = fitted_models, data_x = data_x, data_y = list(z = z), + af = af, coverage = coverage, secondary_coverage = secondary_coverage, signal_cutoff = signal_cutoff, diff --git a/R/univariate_pipeline.R b/R/univariate_pipeline.R index 28eefd14..9ba9c95f 100644 --- a/R/univariate_pipeline.R +++ b/R/univariate_pipeline.R @@ -8,7 +8,12 @@ #' @param Y A vector of phenotype measurements. #' @param X_scalar A scalar or vector to rescale X to its original scale. #' @param Y_scalar A scalar to rescale Y to its original scale. -#' @param maf A vector of minor allele frequencies for each variant in X. +#' @param maf A vector of minor allele frequencies for each variant in X. Used +#' only for \code{maf_cutoff} filtering; never exported. +#' @param af Optional vector of directional effect-allele frequencies (frequency +#' of \code{a1}) aligned to the columns of X. When supplied it is exported as +#' the \code{top_loci$af} column; when NULL, \code{af} is \code{NA_real_}. +#' Default NULL. #' @param X_variance Optional variance of X. Default is NULL. #' @param other_quantities A list of other quantities to be carried into fine-mapping post-processing. Default is an empty list. #' @param region Optional \code{"chr:start-end"} string for the analysis region. Default is NULL. @@ -60,6 +65,7 @@ univariate_analysis_pipeline <- function( X, Y, maf, + af = NULL, X_scalar = 1, Y_scalar = 1, X_variance = NULL, @@ -95,6 +101,13 @@ univariate_analysis_pipeline <- function( if (nrow(X) != length(Y)) stop("X and Y must have the same number of rows/length") if (!is.numeric(maf) || length(maf) != ncol(X)) stop("maf must be a numeric vector with length equal to the number of columns in X") if (any(maf < 0 | maf > 1)) stop("maf values must be between 0 and 1") + # af (directional effect-allele frequency) is optional. When supplied it must + # align with X columns; it is exported as top_loci$af. maf stays directionless + # and is used only for maf_cutoff filtering. + if (!is.null(af)) { + if (!is.numeric(af) || length(af) != ncol(X)) stop("af must be NULL or a numeric vector with length equal to the number of columns in X") + if (any(af < 0 | af > 1, na.rm = TRUE)) stop("af values must be between 0 and 1") + } if (!is.numeric(X_scalar) || (length(X_scalar) != 1 && length(X_scalar) != ncol(X))) stop("X_scalar must be a numeric scalar or vector with length equal to the number of columns in X") if (!is.numeric(Y_scalar) || length(Y_scalar) != 1) stop("Y_scalar must be a numeric scalar") if (!is.numeric(L) || L <= 0) stop("L must be a positive integer") @@ -153,6 +166,7 @@ univariate_analysis_pipeline <- function( variants_kept <- filter_variants_by_ld_reference(colnames(X), ld_reference_meta_file) X <- X[, variants_kept$data, drop = FALSE] maf <- maf[variants_kept$idx] + if (!is.null(af)) af <- af[variants_kept$idx] if (length(X_scalar) > 1) X_scalar <- X_scalar[variants_kept$idx] } @@ -161,6 +175,7 @@ univariate_analysis_pipeline <- function( X_filtered <- filter_X(X, imiss_cutoff, maf_cutoff, var_thresh = xvar_cutoff, maf = maf, X_variance = X_variance) kept_indices <- match(colnames(X_filtered), colnames(X)) maf <- maf[kept_indices] + if (!is.null(af)) af <- af[kept_indices] if (length(X_scalar) > 1) X_scalar <- X_scalar[kept_indices] X <- X_filtered } @@ -241,7 +256,7 @@ univariate_analysis_pipeline <- function( data_y = Y, X_scalar = X_scalar, y_scalar = Y_scalar, - maf = maf, + af = af, coverage = coverage[1], secondary_coverage = if (length(coverage) > 1) coverage[-1] else NULL, signal_cutoff = signal_cutoff, diff --git a/man/build_top_loci.Rd b/man/build_top_loci.Rd index f44ae5a3..b80b2b3b 100644 --- a/man/build_top_loci.Rd +++ b/man/build_top_loci.Rd @@ -9,7 +9,7 @@ build_top_loci( cs_tables, variant_names, sumstats = NULL, - maf = NULL, + af = NULL, method, signal_cutoff = 0.1, data_x = NULL, @@ -31,7 +31,11 @@ build_top_loci( \item{sumstats}{Optional marginal-association summary (\code{betahat}, \code{sebetahat}) filling \code{beta} / \code{se}.} -\item{maf}{Optional numeric vector of minor-allele frequencies.} +\item{af}{Optional numeric vector of effect-allele frequencies (frequency of +the final effect allele / \code{a1} after allele harmonization against the +LD/reference variants). Exported directly as the \code{af} column. MAF is +never exported; derive it from \code{af} at filter time. Default NULL -> +\code{af = NA_real_}.} \item{method}{Method name (e.g. \code{"susie"}, \code{"susie_inf"}). Required.} @@ -58,7 +62,7 @@ single \code{top_loci} returned by \code{format_finemapping_output()}. } \details{ Output columns, in order: \code{#chr}, \code{start}, \code{end}, \code{a1}, -\code{a2}, \code{variant}, \code{gene}, \code{event}, \code{n}, \code{maf}, +\code{a2}, \code{variant}, \code{gene}, \code{event}, \code{n}, \code{af}, \code{beta}, \code{se}, \code{pip}, \code{posterior_effect_mean}, \code{posterior_effect_se}, \code{cs_95}, \code{cs_70}, \code{cs_50}, \code{cs_95_purity}, \code{method}, \code{grange_start}, \code{grange_end}. diff --git a/man/postprocess_finemapping_fits.Rd b/man/postprocess_finemapping_fits.Rd index 80120792..5a9ff56c 100644 --- a/man/postprocess_finemapping_fits.Rd +++ b/man/postprocess_finemapping_fits.Rd @@ -10,7 +10,7 @@ postprocess_finemapping_fits( data_y = NULL, X_scalar = 1, y_scalar = 1, - maf = NULL, + af = NULL, coverage = NULL, secondary_coverage = c(0.7, 0.5), signal_cutoff = 0.1, @@ -35,7 +35,8 @@ input used for credible-set purity and correlations.} \item{y_scalar}{Scaling factor for phenotype effects. Default 1.} -\item{maf}{Minor allele frequencies. Default NULL.} +\item{af}{Effect-allele frequencies (exported as the \code{af} column; never +MAF). Default NULL.} \item{coverage}{Primary credible-set coverage.} diff --git a/man/univariate_analysis_pipeline.Rd b/man/univariate_analysis_pipeline.Rd index 3f82d2d9..95b3943e 100644 --- a/man/univariate_analysis_pipeline.Rd +++ b/man/univariate_analysis_pipeline.Rd @@ -8,6 +8,7 @@ univariate_analysis_pipeline( X, Y, maf, + af = NULL, X_scalar = 1, Y_scalar = 1, X_variance = NULL, @@ -40,7 +41,13 @@ univariate_analysis_pipeline( \item{Y}{A vector of phenotype measurements.} -\item{maf}{A vector of minor allele frequencies for each variant in X.} +\item{maf}{A vector of minor allele frequencies for each variant in X. Used +only for \code{maf_cutoff} filtering; never exported.} + +\item{af}{Optional vector of directional effect-allele frequencies (frequency +of \code{a1}) aligned to the columns of X. When supplied it is exported as +the \code{top_loci$af} column; when NULL, \code{af} is \code{NA_real_}. +Default NULL.} \item{X_scalar}{A scalar or vector to rescale X to its original scale.} diff --git a/tests/testthat/test_file_utils.R b/tests/testthat/test_file_utils.R index af5350ea..46761f93 100644 --- a/tests/testthat/test_file_utils.R +++ b/tests/testthat/test_file_utils.R @@ -1384,6 +1384,110 @@ test_that("load_rss_data creates beta from z when beta is missing", { file.remove(tmp_sumstat, tmp_col) }) +# ---- rss-top-loci-af: effect-allele frequency (af) propagation -------------- + +.write_rss_inputs <- function(df, col_lines) { + tmp_sumstat <- tempfile(fileext = ".tsv") + readr::write_tsv(df, tmp_sumstat) + tmp_col <- tempfile(fileext = ".txt") + writeLines(col_lines, tmp_col) + list(sumstat = tmp_sumstat, col = tmp_col) +} + +test_that("load_rss_data exports declared effect-allele af", { + # `eaf_col` is a custom (MungeSumstats-unrecognized) header so the column-file + # mapping `af:eaf_col` is what declares effect-allele frequency. + df <- data.frame( + chrom = c("chr1", "chr1"), pos = c(100, 200), + A1 = c("G", "T"), A2 = c("A", "C"), + effect = c(0.5, -0.3), stderr = c(0.1, 0.15), n = c(1000, 1000), + eaf_col = c(0.21, 0.78), stringsAsFactors = FALSE + ) + f <- .write_rss_inputs(df, c("beta:effect", "se:stderr", "n_sample:n", "af:eaf_col")) + result <- suppressWarnings(load_rss_data(f$sumstat, f$col)) + expect_true("af" %in% colnames(result$sumstats)) + expect_equal(result$sumstats$af, c(0.21, 0.78), tolerance = 1e-10) + expect_false(any(is.na(result$sumstats$af))) + file.remove(f$sumstat, f$col) +}) + +test_that("load_rss_data sets af = NA and warns once when af is not declared", { + df <- data.frame( + chrom = c("chr1", "chr1"), pos = c(100, 200), + A1 = c("G", "T"), A2 = c("A", "C"), + effect = c(0.5, -0.3), stderr = c(0.1, 0.15), n = c(1000, 1000), + stringsAsFactors = FALSE + ) + f <- .write_rss_inputs(df, c("beta:effect", "se:stderr", "n_sample:n")) + expect_warning(load_rss_data(f$sumstat, f$col), "not declared") + result <- suppressWarnings(load_rss_data(f$sumstat, f$col)) + expect_true("af" %in% colnames(result$sumstats)) + expect_true(all(is.na(result$sumstats$af))) + file.remove(f$sumstat, f$col) +}) + +test_that("load_rss_data emits a distinct warning when af is declared but missing", { + df <- data.frame( + chrom = c("chr1", "chr1"), pos = c(100, 200), + A1 = c("G", "T"), A2 = c("A", "C"), + effect = c(0.5, -0.3), stderr = c(0.1, 0.15), n = c(1000, 1000), + eaf_col = c(NA_real_, NA_real_), stringsAsFactors = FALSE + ) + f <- .write_rss_inputs(df, c("beta:effect", "se:stderr", "n_sample:n", "af:eaf_col")) + expect_warning(load_rss_data(f$sumstat, f$col), "declared but its values") + result <- suppressWarnings(load_rss_data(f$sumstat, f$col)) + expect_true(all(is.na(result$sumstats$af))) + file.remove(f$sumstat, f$col) +}) + +test_that("load_rss_data never exports an ambiguous frequency column as af", { + # FRQ is mapped by MungeSumstats to the internal `maf`, never to `af`. + df <- data.frame( + chrom = c("chr1", "chr1"), pos = c(100, 200), + A1 = c("G", "T"), A2 = c("A", "C"), + effect = c(0.5, -0.3), stderr = c(0.1, 0.15), n = c(1000, 1000), + FRQ = c(0.3, 0.6), stringsAsFactors = FALSE + ) + f <- .write_rss_inputs(df, c("beta:effect", "se:stderr", "n_sample:n")) + result <- suppressWarnings(load_rss_data(f$sumstat, f$col)) + expect_true(all(is.na(result$sumstats$af))) + # the ambiguous values did not leak into af + expect_false(isTRUE(all.equal(result$sumstats$af, c(0.3, 0.6)))) + file.remove(f$sumstat, f$col) +}) + +test_that("explicit af declaration wins over MungeSumstats (recognized freq header)", { + # `effect_allele_frequency` is a header MungeSumstats recognizes and would + # otherwise absorb into the internal `maf`. An explicit `af:...` mapping must + # win, exporting the effect-allele frequency as `af` (not `maf`). + df <- data.frame( + chrom = c("chr1", "chr1"), pos = c(100, 200), + A1 = c("G", "T"), A2 = c("A", "C"), + effect_allele_frequency = c(0.12, 0.44), + beta = c(0.5, -0.3), se = c(0.1, 0.15), n = c(1000, 1000), + stringsAsFactors = FALSE + ) + f <- .write_rss_inputs(df, c("n_sample:n", "af:effect_allele_frequency")) + result <- suppressWarnings(load_rss_data(f$sumstat, f$col)) + expect_true("af" %in% colnames(result$sumstats)) + expect_false("maf" %in% colnames(result$sumstats)) + expect_equal(result$sumstats$af, c(0.12, 0.44), tolerance = 1e-10) + file.remove(f$sumstat, f$col) +}) + +test_that("load_rss_data does not export af when no effect allele is resolvable", { + # af declared via column file, but no A1 / variant id to tie it to a direction. + df <- data.frame( + chrom = c("chr1", "chr1"), pos = c(100, 200), + effect = c(0.5, -0.3), stderr = c(0.1, 0.15), n = c(1000, 1000), + eaf_col = c(0.21, 0.78), stringsAsFactors = FALSE + ) + f <- .write_rss_inputs(df, c("beta:effect", "se:stderr", "n_sample:n", "af:eaf_col")) + result <- suppressWarnings(load_rss_data(f$sumstat, f$col)) + expect_true(all(is.na(result$sumstats$af))) + file.remove(f$sumstat, f$col) +}) + test_that("load_rss_data errors when both n_sample and n_case+n_control are provided", { tmp_sumstat <- tempfile(fileext = ".tsv") df <- data.frame( diff --git a/tests/testthat/test_susie_wrapper.R b/tests/testthat/test_susie_wrapper.R index 72b967a6..7115379e 100644 --- a/tests/testthat/test_susie_wrapper.R +++ b/tests/testthat/test_susie_wrapper.R @@ -923,7 +923,7 @@ if (!exists(".make_univariate_data", inherits = FALSE)) { .UNIFIED_TOP_LOCI_COLS <- c( "#chr", "start", "end", "a1", "a2", "variant", "gene", "event", - "n", "maf", "beta", "se", + "n", "af", "beta", "se", "pip", "posterior_effect_mean", "posterior_effect_se", "cs_95", "cs_70", "cs_50", "cs_95_purity", "method", "grange_start", "grange_end" @@ -990,13 +990,13 @@ if (!exists(".make_univariate_data", inherits = FALSE)) { } .run_build_top_loci <- function(inp, method = "susie", signal_cutoff = 0.05, - sumstats = NULL, maf = NULL, + sumstats = NULL, af = NULL, other_quantities = NULL, region = NULL) { build_top_loci( fit = inp$fit, cs_tables = inp$cs_tables, variant_names = inp$variant_names, - sumstats = sumstats, maf = maf, + sumstats = sumstats, af = af, method = method, signal_cutoff = signal_cutoff, data_x = inp$data_x, data_y = inp$data_y, other_quantities = other_quantities, @@ -1023,7 +1023,8 @@ test_that("build_top_loci returns the exact 22-column schema in order with stabl expect_true(is.character(out$gene)) expect_true(is.character(out$event)) expect_true(is.integer(out$n)) - expect_true(is.numeric(out$maf)) + expect_true(is.numeric(out$af)) + expect_false("maf" %in% names(out)) expect_true(is.numeric(out$pip)) expect_true(is.numeric(out$posterior_effect_mean)) expect_true(is.numeric(out$posterior_effect_se)) @@ -1047,7 +1048,7 @@ test_that("build_top_loci emits 22 columns in the fixed order on a non-empty fit out <- .run_build_top_loci(inp, method = "susie", sumstats = list(betahat = c(0.2, -0.1), sebetahat = c(0.05, 0.04)), - maf = c(0.10, 0.25), + af = c(0.10, 0.25), other_quantities = other_q, region = "chr10:10823338-14348298") expect_equal(names(out), .UNIFIED_TOP_LOCI_COLS)