Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ export(hasGenotypes)
export(invertMinmaxScaling)
export(isBinarySldscAnnot)
export(isSkipped)
export(krigingOutlierQc)
export(l0learnRssWeights)
export(l0learnWeights)
export(lassoWeights)
Expand Down
23 changes: 22 additions & 1 deletion R/allele_qc.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@ NULL
#' or a vector of strings in the format of "chr:pos:A2:A1"/"chr:pos_A2_A1". Can be automatically converted to a data frame if a vector.
#' @param refVariants A data frame with columns "chrom", "pos", "A2", "A1" or strings in the format of "chr:pos:A2:A1"/"chr:pos_A2_A1".
#' @param colToFlip The name of the column in targetData where flips are to be applied.
#' On an allele swap these columns are sign-flipped (multiplied by -1), the
#' correct operation for signed quantities like \code{beta} and \code{z}.
#' @param colToComplement Names of columns in targetData to complement
#' (\code{1 - x}) on an allele swap, the correct operation for an
#' effect-allele frequency like \code{af}. Default \code{character()} does no
#' complementing, so non-RSS callers are unchanged. Distinct from
#' \code{colToFlip}: frequencies are complemented, signed effects are
#' sign-flipped.
#' @param matchMinProp Minimum proportion of variants in the smallest data
#' to be matched, otherwise stops with an error. Default is 20%.
#' @param removeDups Whether to remove duplicates, default is TRUE.
Expand All @@ -33,7 +41,8 @@ NULL
matchRefPanel <- function(targetData, refVariants, colToFlip = NULL,
matchMinProp = 0.2, removeDups = TRUE,
removeIndels = FALSE, removeStrandAmbiguous = TRUE,
flipStrand = FALSE, removeUnmatched = TRUE, ...) {
flipStrand = FALSE, removeUnmatched = TRUE,
colToComplement = character(), ...) {
strandFlip <- function(ref) {
chartr("ATCG", "TAGC", ref)
}
Expand Down Expand Up @@ -139,6 +148,18 @@ matchRefPanel <- function(targetData, refVariants, colToFlip = NULL,
}
matchResult[matchResult$sign_flip, colToFlip] <- -1 * matchResult[matchResult$sign_flip, colToFlip]
}
# Complement (1 - x) colToComplement for the same swapped variants. A
# frequency tracks the effect allele, so an allele swap takes af -> 1 - af
# (not a sign flip). Kept independent of colToFlip so signed columns are
# untouched here.
if (length(colToComplement) > 0) {
missing <- setdiff(colToComplement, colnames(matchResult))
if (length(missing) > 0) {
stop("Column(s) '", paste(missing, collapse = "', '"), "' not found in targetData.")
}
matchResult[matchResult$sign_flip, colToComplement] <-
1 - matchResult[matchResult$sign_flip, colToComplement]
}
# flip the strands if there is a strand flip
if (flipStrand) {
strandFlippedIndices <- which(matchResult$strand_flip)
Expand Down
38 changes: 19 additions & 19 deletions R/colocboost_pipeline.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,11 +122,11 @@ regionDataToColocboostInput <- function(regionData) {
#'
#' Individual-level QC is only attempted when at least one individual QC control
#' is non-\code{NULL} and named \code{X} and \code{Y} inputs are available in
#' \code{...}. Summary-statistic QC is only attempted when \code{qc_method},
#' \code{...}. Summary-statistic QC is only attempted when \code{zMismatchQc},
#' \code{pip_cutoff_to_skip_sumstat}, \code{impute = TRUE}, or
#' \code{LD_reference_info} is supplied and named \code{sumstat} plus either
#' \code{LD}, \code{X_ref}, or \code{LD_reference_info} are available.
#' \code{qc_method = "none"} means run basic allele/variant harmonization
#' \code{zMismatchQc = "none"} means run basic allele/variant harmonization
#' only; it does not run SLALOM/DENTIST
#' LD-mismatch QC. RAISS imputation is controlled separately by
#' \code{impute = TRUE}.
Expand All @@ -146,8 +146,8 @@ regionDataToColocboostInput <- function(regionData) {
#' @param missingRateThresh,mafCutoff,xvarCutoff,ldReferenceMetaFile,pipCutoffToSkipInd
#' Individual-level QC controls. If all are \code{NULL}, individual-level QC
#' is not run.
#' @param keepIndel,pipCutoffToSkipSumstat,qcMethod,impute,imputeOpts
#' Summary-statistic QC controls. \code{qcMethod = "none"} runs
#' @param keepIndel,pipCutoffToSkipSumstat,zMismatchQc,impute,imputeOpts
#' Summary-statistic QC controls. \code{zMismatchQc = "none"} runs
#' basic allele harmonization without
#' LD-mismatch outlier detection. Imputation is only run when
#' \code{impute = TRUE}.
Expand All @@ -170,17 +170,17 @@ regionDataToColocboostInput <- function(regionData) {
#'
#' # Summary-statistic input with basic allele/variant harmonization only.
#' fit <- colocboostAnalysis(sumstat = sumstat, LD = LD,
#' qcMethod = "none", M = 500)
#' zMismatchQc = "none", M = 500)
#'
#' # Summary-statistic input with LD-mismatch QC and RAISS imputation.
#' fit <- colocboostAnalysis(sumstat = sumstat, LD = LD,
#' qcMethod = "slalom", impute = TRUE)
#' zMismatchQc = "slalom", impute = TRUE)
#'
#' # Use richer LD metadata from load_LD_matrix() for QC, while still passing
#' # ColocBoost's native LD input.
#' ldData <- load_LD_matrix(ldMetaFile, region)
#' fit <- colocboostAnalysis(sumstat = sumstat, LD = getCorrelation(ldData),
#' ldReferenceInfo = ldData, qcMethod = "none")
#' ldReferenceInfo = ldData, zMismatchQc = "none")
#'
#' # Individual-level input with explicit genotype QC thresholds.
#' fit <- colocboostAnalysis(X = X, Y = Y,
Expand All @@ -198,7 +198,7 @@ colocboostAnalysis <- function(...,
# sumstat QC
keepIndel = TRUE,
pipCutoffToSkipSumstat = NULL,
qcMethod = NULL,
zMismatchQc = NULL,
impute = FALSE,
imputeOpts = list(rcond = 0.01, R2_threshold = 0.6,
minimum_ld = 5, lamb = 0.01),
Expand All @@ -208,12 +208,12 @@ colocboostAnalysis <- function(...,
directArgs <- list(...)
preQcDataOutcomes <- .cbColocboostOutcomeNames(directArgs, preferSupplied = FALSE)
preQcDisplayOutcomes <- .cbColocboostOutcomeNames(directArgs, preferSupplied = TRUE)
if (!is.null(qcMethod)) qcMethod <- .resolveSummaryQcMethod(qcMethod)
if (!is.null(zMismatchQc)) zMismatchQc <- .resolveZMismatchQc(zMismatchQc)

individualQcRequested <- !is.null(missingRateThresh) ||
!is.null(mafCutoff) || !is.null(xvarCutoff) ||
!is.null(ldReferenceMetaFile) || !is.null(pipCutoffToSkipInd)
sumstatQcRequested <- !is.null(qcMethod) || isTRUE(impute) ||
sumstatQcRequested <- !is.null(zMismatchQc) || isTRUE(impute) ||
!is.null(pipCutoffToSkipSumstat) || !is.null(ldReferenceInfo)
qcRequested <- individualQcRequested || sumstatQcRequested
if (!qcRequested) {
Expand Down Expand Up @@ -303,7 +303,7 @@ colocboostAnalysis <- function(...,
ldData = sumstatQcInput$LD_data,
keepIndel = keepIndel,
pipCutoffToSkip = .cbDefault(pipCutoffToSkipSumstat, 0),
qcMethod = if (is.null(qcMethod)) "none" else qcMethod,
zMismatchQc = if (is.null(zMismatchQc)) "none" else zMismatchQc,
impute = impute,
imputeOpts = imputeOpts
)
Expand Down Expand Up @@ -352,7 +352,7 @@ colocboostAnalysis <- function(...,
#' @param mafCutoff A scalar to remove variants with maf < mafCutoff, dafault is 0.005.
#' @param pipCutoffToSkipInd A vector of cutoff values for skipping analysis based on PIP values for each context. Default is 0.
#' @param pipCutoffToSkipSumstat A vector of cutoff values for skipping analysis based on PIP values for each sumstat Default is 0.
#' @param qcMethod Quality control method to use. Options are "none",
#' @param zMismatchQc Quality control method to use. Options are "none",
#' "slalom", or "dentist". \code{NULL} is treated as \code{"none"} for
#' basic-only summary-stat preprocessing.
#' @param impute Logical; if TRUE, performs imputation for outliers identified in the analysis (default: TRUE).
Expand Down Expand Up @@ -388,7 +388,7 @@ colocboostPipeline <- function(
# - sumstat QC
keepIndel = TRUE,
pipCutoffToSkipSumstat = 0,
qcMethod = NULL,
zMismatchQc = NULL,
impute = TRUE,
imputeOpts = list(
rcond = 0.01, R2_threshold = 0.6,
Expand Down Expand Up @@ -512,7 +512,7 @@ colocboostPipeline <- function(
}

####### ========= resolve defaults ======== #######
qcMethod <- .resolveSummaryQcMethod(qcMethod)
zMismatchQc <- .resolveZMismatchQc(zMismatchQc)

####### ========= initial output results before QC ======== #######
analysisResults <- list("xqtl_coloc" = NULL, "joint_gwas" = NULL, "separate_gwas" = NULL)
Expand Down Expand Up @@ -589,7 +589,7 @@ colocboostPipeline <- function(
pipCutoffToSkipInd = pipCutoffToSkipInd,
keepIndel = keepIndel,
pipCutoffToSkipSumstat = pipCutoffToSkipSumstat,
qcMethod = qcMethod,
zMismatchQc = zMismatchQc,
impute = impute,
imputeOpts = imputeOpts
)
Expand Down Expand Up @@ -681,7 +681,7 @@ colocboostPipeline <- function(
#' @param keepIndel Logical; if \code{FALSE}, remove indel variants during
#' summary-statistic allele harmonization.
#' @param pipCutoffToSkipSumstat A vector of cutoff values for skipping summary-stat studies.
#' @param qcMethod Quality control method to use. Options are "none",
#' @param zMismatchQc Quality control method to use. Options are "none",
#' "slalom", or "dentist". \code{NULL} is treated as \code{"none"} for
#' basic-only summary-stat preprocessing.
#' @param impute Logical; if TRUE, performs imputation when required metadata are available.
Expand All @@ -695,10 +695,10 @@ qcRegionalData <- function(regionData,
# - sumstat
keepIndel = TRUE,
pipCutoffToSkipSumstat = 0,
qcMethod = NULL,
zMismatchQc = NULL,
impute = FALSE,
imputeOpts = list(rcond = 0.01, R2_threshold = 0.6, minimum_ld = 5, lamb = 0.01)) {
qcMethod <- .resolveSummaryQcMethod(qcMethod)
zMismatchQc <- .resolveZMismatchQc(zMismatchQc)
qcedIndividualToRegionData <- function(indQc) {
if (is.null(indQc) || length(indQc) == 0) return(NULL)
list(
Expand Down Expand Up @@ -770,7 +770,7 @@ qcRegionalData <- function(regionData,
ldData = rssInput$LD_data,
keepIndel = keepIndel,
pipCutoffToSkip = pipCutoffToSkipSumstat,
qcMethod = qcMethod,
zMismatchQc = zMismatchQc,
impute = impute,
imputeOpts = imputeOpts
)
Expand Down
8 changes: 5 additions & 3 deletions R/encoloc.R
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,9 @@ processColocResults <- function(colocResult, ldMetaFilePath, analysisRegion, pph
#' @param nCase Number of cases for binary traits.
#' @param nControl Number of controls for binary traits.
#' @param region Genomic region string (e.g., "chr1:1000-2000").
#' @param qcMethod QC method: "slalom", "dentist", or "none". Default "slalom".
#' @param zMismatchQc Z-score / LD-mismatch QC selector forwarded to
#' \code{\link{rssAnalysisPipeline}}: "slalom", "dentist", or "none".
#' Default "slalom". (Hard rename of the former \code{zMismatchQc}; no alias.)
#' @param finemappingMethod Fine-mapping method. Default "susie_rss".
#' @param finemappingOpts List of fine-mapping options passed to
#' \code{\link{rssAnalysisPipeline}}.
Expand Down Expand Up @@ -372,7 +374,7 @@ colocWrapper <- function(xqtlFile, gwasFiles = NULL,
ldData = NULL,
nSample = 0, nCase = 0, nControl = 0,
region = NULL,
qcMethod = "slalom",
zMismatchQc = "slalom",
finemappingMethod = "susie_rss",
finemappingOpts = list(
L = 20, L_greedy = 5,
Expand Down Expand Up @@ -408,7 +410,7 @@ colocWrapper <- function(xqtlFile, gwasFiles = NULL,
ldData = ldData,
nSample = nSample, nCase = nCase, nControl = nControl,
region = region,
qcMethod = qcMethod, finemappingMethod = finemappingMethod,
zMismatchQc = zMismatchQc, finemappingMethod = finemappingMethod,
finemappingOpts = finemappingOpts,
impute = impute, imputeOpts = imputeOpts
)
Expand Down
Loading