Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
34091aa
Bump codecov/codecov-action to v7
remlapmot Jun 9, 2026
080bf5e
Fix copy-paste bug in ldsc_rg()
remlapmot Jun 10, 2026
febe9ea
Bump version
remlapmot Jun 10, 2026
ab8b1af
Qualify bare aes() call as ggplot2::aes()
remlapmot Jun 10, 2026
5c1a796
Use isTRUE(all.equal()) instead of all.equal() == TRUE inside ifelse()
remlapmot Jun 10, 2026
5e088be
Fix extract_split() chunking
remlapmot Jun 10, 2026
ddd2a15
Fix penalised mode in mr_mode() for vector-valued phi
remlapmot Jun 10, 2026
f4a52c1
Fix row order restoration in add_metadata()
remlapmot Jun 10, 2026
816ee1d
Collapse duplicated OR condition in extract_outcome_data_internal()
remlapmot Jun 10, 2026
fe7e6b3
Remove dead TAUsq accumulation in PM() in rucker.R
remlapmot Jun 10, 2026
0c771c8
Simplify group Index assignment in forest_plot_1_to_many() using Lett…
remlapmot Jun 10, 2026
0ee2d45
Fix error message typo in combine_data() ("or of type" to "are of typ…
remlapmot Jun 10, 2026
49aed50
Replace superseded tidyr::gather() with tidyr::pivot_longer()
remlapmot Jun 10, 2026
45f5fb2
jarl check . --fix
remlapmot Jun 10, 2026
ff7b9f9
Use || instead of | in scalar if condition in mr_scatter_plot()
remlapmot Jun 10, 2026
ed5a583
Use inherits() instead of class()
remlapmot Jun 10, 2026
4dec4e0
Hoist assignments out of expect_no_error() calls in test_plots.R to r…
remlapmot Jun 10, 2026
7c37bcf
Drop unused p9 assignment inside capture.output()
remlapmot Jun 10, 2026
edb5ae8
Fix data.table subsetting broken by jarl --fix
remlapmot Jun 10, 2026
f93ddfb
Update NEWS.md
remlapmot Jun 10, 2026
83e850f
Fix two alignment bugs introduced by jarl --fix
remlapmot Jun 10, 2026
4c6cbca
Harden proxies validation in extract_outcome_data_internal()
remlapmot Jun 10, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test-coverage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
covr::to_cobertura(cov)
shell: Rscript {0}

- uses: codecov/codecov-action@v6
- uses: codecov/codecov-action@v7
with:
# Fail if error if not on PR, or if on PR and token is given
fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }}
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: TwoSampleMR
Title: Two Sample MR Functions and Interface to MRC Integrative
Epidemiology Unit OpenGWAS Database
Version: 0.7.7
Version: 0.7.8
Authors@R: c(
person("Gibran", "Hemani", , "g.hemani@bristol.ac.uk", role = c("aut", "cre"),
comment = c(ORCID = "0000-0003-0920-1055")),
Expand Down
17 changes: 17 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,20 @@
# TwoSampleMR v0.7.8

(Release date 2026-06-10)

* Fixed a copy-paste bug in `ldsc_rg()` which passed `d1$l2` instead of `d2$l2` as the weight vector to the trait-2 `ldsc_h2_internal()` call
* Fixed `extract_split()` chunking which gave `nsplit = 0` for SNP lists smaller than half the `splitsize`; now uses `ceiling()` with contiguous chunk ids
* Fixed the penalised mode in `mr_mode()` for vector-valued `phi`: each SNP is now penalised against the weighted-mode estimate for the same `phi` rather than recycling the mode vector across SNPs (results unchanged for the default scalar `phi = 1`)
* Fixed row order restoration in `add_metadata()` to use `dat[order(dat[[order_col]]), ]`
* Qualified a bare `aes()` call as `ggplot2::aes()` in `test_r_from_pn()`
* Used `isTRUE(all.equal())` instead of `all.equal() == TRUE` inside `ifelse()` for null-line detection in the forest plot functions, which could otherwise return a multi-element vector
* Simplified the group `Index` assignment in `forest_plot_1_to_many()` using `Letters[match()]`, which is also robust to non-contiguous group rows
* Collapsed a duplicated `||` condition in `extract_outcome_data_internal()`
* Removed dead `TAUsq` accumulation in `PM()` in `rucker.R`
* Fixed an error message typo in `combine_data()` and switched its `sapply()` to `vapply()`
* Replaced superseded `tidyr::gather()` with `tidyr::pivot_longer()` in `test_r_from_pn()` and dropped a redundant `requireNamespace("tidyr")` guard
* Lint package with Jarl

# TwoSampleMR v0.7.7

(Release date 2026-06-07)
Expand Down
2 changes: 1 addition & 1 deletion R/add_metadata.r
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ add_metadata <- function(dat, cols = c("sample_size", "ncase", "ncontrol", "unit
names(dat)[names(dat) == "unit.exposure"] <- "units.exposure"
names(dat)[names(dat) == "unit.outcome"] <- "units.outcome"

dat <- dat[dat[[order_col]], ]
dat <- dat[order(dat[[order_col]]), ]
dat <- dat[, !names(dat) %in% order_col]
# dat <- fix_ukb_d(dat)
return(dat)
Expand Down
18 changes: 8 additions & 10 deletions R/add_rsq.r
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ add_rsq_one <- function(dat, what = "exposure") {
message("Try adding metadata with add_metadata()")
}
} else if (
all(grepl("SD", dat[[paste0("units.", what)]])) && all(!is.na(dat[[paste0("eaf.", what)]]))
all(grepl("SD", dat[[paste0("units.", what)]])) && !anyNA(dat[[paste0("eaf.", what)]])
) {
dat[[paste0("rsq.", what)]] <- NA
dat[[paste0("rsq.", what)]] <- 2 *
Expand Down Expand Up @@ -115,13 +115,6 @@ get_r_from_pn_less_accurate <- function(p, n) {
}

test_r_from_pn <- function() {
if (!requireNamespace("tidyr", quietly = TRUE)) {
stop(
"Package \"tidyr\" must be installed to use this function.",
call. = FALSE
)
}

param <- expand.grid(
n = c(10, 100, 1000, 10000, 100000),
rsq = 10^seq(-4, -0.5, length.out = 30)
Expand All @@ -137,11 +130,16 @@ test_r_from_pn <- function() {
param$rsq2[i] <- (get_r_from_pn(param$pval[i], param$n[i]))^2
}

param <- tidyr::gather(param, key = out, value = value, rsq1, rsq2)
param <- tidyr::pivot_longer(
param,
cols = c("rsq1", "rsq2"),
names_to = "out",
values_to = "value"
)

p <- ggplot2::ggplot(param, ggplot2::aes(x = rsq_emp, value)) +
ggplot2::geom_abline(slope = 1, linetype = "dotted") +
ggplot2::geom_line(aes(colour = out)) +
ggplot2::geom_line(ggplot2::aes(colour = out)) +
ggplot2::facet_grid(. ~ n) +
ggplot2::scale_x_log10() +
ggplot2::scale_y_log10()
Expand Down
4 changes: 2 additions & 2 deletions R/forest_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ mr_forest_plot_grouped <-
}

## act on debug flag
if (debug == FALSE) {
if (!debug) {
options(warn = -1)
}

Expand Down Expand Up @@ -372,7 +372,7 @@ mr_forest_plot_grouped <-
## draw and export the annotated forest plot
grid.newpage()
grid.draw(group)
if (returnRobj == FALSE) {
if (!returnRobj) {
grDevices::pdf(outfile_Name, width = 23.4, height = 16.5)
grid.draw(group)
grDevices::dev.off()
Expand Down
4 changes: 2 additions & 2 deletions R/forest_plot2.R
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ forest_plot_basic <- function(
# OR or log(OR)?
# If CI are symmetric then log(OR)
# Use this to guess where to put the null line
null_line <- ifelse(all.equal(dat$effect - dat$lo_ci, dat$up_ci - dat$effect) == TRUE, 0, 1)
null_line <- if (isTRUE(all.equal(dat$effect - dat$lo_ci, dat$up_ci - dat$effect))) 0 else 1

# Change lab
if (!is.null(xlim)) {
Expand Down Expand Up @@ -435,7 +435,7 @@ forest_plot_names <- function(dat, section = NULL, bottom = TRUE) {
# OR or log(OR)?
# If CI are symmetric then log(OR)
# Use this to guess where to put the null line
null_line <- ifelse(all.equal(dat$effect - dat$lo_ci, dat$up_ci - dat$effect) == TRUE, 0, 1)
null_line <- if (isTRUE(all.equal(dat$effect - dat$lo_ci, dat$up_ci - dat$effect))) 0 else 1

# up <- max(dat$up_ci, na.rm=TRUE)
# lo <- min(dat$lo_ci, na.rm=TRUE)
Expand Down
12 changes: 6 additions & 6 deletions R/forest_plot_1-to-many.R
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,9 @@ sort_1_to_many <- function(
)
Letters <- sort(c(paste0("A", Letters), paste0("B", Letters), paste0("C", Letters)))
groups <- unique(mr_res[, group])
mr_res$Index <- unlist(lapply(seq_along(unique(mr_res[, group])), FUN = function(x) {
rep(Letters[Letters == Letters[x]], length(which(mr_res[, group] == groups[x])))
}))
# Assign each row the letter code for its group; match() makes this robust
# to groups whose rows are not contiguous in mr_res.
mr_res$Index <- Letters[match(mr_res[, group], groups)]
mr_res <- mr_res[order(mr_res[, b], decreasing = TRUE), ]
mr_res$Index2 <- Letters[seq_len(nrow(mr_res))]
mr_res$Index3 <- paste(mr_res$Index, mr_res$Index2, sep = "")
Expand Down Expand Up @@ -329,7 +329,7 @@ forest_plot_basic2 <- function(
# OR or log(OR)?
# If CI are symmetric then log(OR)
# Use this to guess where to put the null line
null_line <- ifelse(all.equal(dat$effect - dat$lo_ci, dat$up_ci - dat$effect) == TRUE, 0, 1)
null_line <- if (isTRUE(all.equal(dat$effect - dat$lo_ci, dat$up_ci - dat$effect))) 0 else 1

# Change lab
if (!is.null(xlim)) {
Expand Down Expand Up @@ -565,7 +565,7 @@ forest_plot_names2 <- function(
# OR or log(OR)?
# If CI are symmetric then log(OR)
# Use this to guess where to put the null line
null_line <- ifelse(all.equal(dat$effect - dat$lo_ci, dat$up_ci - dat$effect) == TRUE, 0, 1)
null_line <- if (isTRUE(all.equal(dat$effect - dat$lo_ci, dat$up_ci - dat$effect))) 0 else 1

# up <- max(dat$up_ci, na.rm=TRUE)
# lo <- min(dat$lo_ci, na.rm=TRUE)
Expand Down Expand Up @@ -683,7 +683,7 @@ forest_plot_addcol <- function(
# OR or log(OR)?
# If CI are symmetric then log(OR)
# Use this to guess where to put the null line
null_line <- ifelse(all.equal(dat$effect - dat$lo_ci, dat$up_ci - dat$effect) == TRUE, 0, 1)
null_line <- if (isTRUE(all.equal(dat$effect - dat$lo_ci, dat$up_ci - dat$effect))) 0 else 1

lo <- 0
up <- 1
Expand Down
22 changes: 13 additions & 9 deletions R/format_mr_results2.R
Original file line number Diff line number Diff line change
Expand Up @@ -306,26 +306,30 @@ power_prune <- function(dat, method = 1, dist.outcome = "binary") {
if (is.null(ncase)) {
ncase <- NA
}
if (any(is.na(ncase))) {
if (anyNA(ncase)) {
ncase <- dat1$samplesize.outcome
if (dist.outcome == "binary") {
warning(paste(
"dist.outcome set to binary but case sample size is missing. Will use total sample size instead but power pruning may be less accurate"
))
}
}
if (any(is.na(ncase))) {
if (anyNA(ncase)) {
stop("sample size missing for at least 1 summary set")
}
dat1 <- dat1[order(ncase, decreasing = TRUE), ]
# Permute dat1 and ncase identically; sort() would drop NA values and
# misalign ncase from dat1's rows.
ncase_ord <- order(ncase, decreasing = TRUE)
dat1 <- dat1[ncase_ord, ]
# id.expout<-paste(split_exposure(dat)$exposure,split_outcome(dat)$outcome)
ncase <- ncase[order(ncase, decreasing = TRUE)]
ncase <- ncase[ncase_ord]
# dat1$power.prune.ncase<-"drop"
# dat1$power.prune.ncase[ncase==ncase[1]]<-"keep"
dat1 <- dat1[ncase == ncase[1], ]
nexp <- dat1$samplesize.exposure
dat1 <- dat1[order(nexp, decreasing = TRUE), ]
nexp <- nexp[order(nexp, decreasing = TRUE)]
nexp_ord <- order(nexp, decreasing = TRUE)
dat1 <- dat1[nexp_ord, ]
nexp <- nexp[nexp_ord]
# dat1$power.prune.nexp<-"drop"
# dat1$power.prune.nexp[nexp==nexp[1]]<-"keep"
# dat1$power.prune<-"drop"
Expand Down Expand Up @@ -365,14 +369,14 @@ power_prune <- function(dat, method = 1, dist.outcome = "binary") {
z <- dat2$beta.exposure / dat2$se.exposure
n <- dat2$samplesize.exposure
b <- z / sqrt(2 * p * (1 - p) * (n + z^2))
if (any(is.na(dat2$ncase.outcome))) {
if (anyNA(dat2$ncase.outcome)) {
stop(paste("number of cases missing for summary set: ", id.subset.unique[j], sep = ""))
}
n.cas <- dat2$ncase.outcome
n.con <- dat2$ncontrol.outcome
var <- 1 # variance of risk factor assumed to be 1
r2 <- 2 * b^2 * p * (1 - p) / var
if (any(is.na(r2))) {
if (anyNA(r2)) {
warning(
"beta or allele frequency missing for some SNPs, which could affect accuracy of power pruning"
)
Expand All @@ -386,7 +390,7 @@ power_prune <- function(dat, method = 1, dist.outcome = "binary") {
iv.se <- 1 / sqrt(mean(dat2$samplesize.outcome, na.rm = TRUE) * r2sum) #standard error of the IV should be proportional to this
}
if (dist.outcome == "binary") {
if (any(is.na(n.cas)) || any(is.na(n.con))) {
if (anyNA(n.cas) || anyNA(n.con)) {
warning(
"dist.outcome set to binary but number of cases or controls is missing. Will try using total sample size instead but power pruning will be less accurate"
)
Expand Down
7 changes: 4 additions & 3 deletions R/ldsc.r
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ ldsc_rg <- function(id1, id2, ancestry = "infer", snpinfo = NULL, splitsize = 20
dplyr::inner_join(., d2, by = "rsid")

h1 <- ldsc_h2_internal(d1$z1, d1$l2, d1$n1, d1$l2)
h2 <- ldsc_h2_internal(d2$z2, d2$l2, d2$n2, d1$l2)
h2 <- ldsc_h2_internal(d2$z2, d2$l2, d2$n2, d2$l2)

dat <- dplyr::inner_join(d1, d2, by = "rsid") %>%
dplyr::mutate(
Expand Down Expand Up @@ -222,8 +222,9 @@ ldsc_rg <- function(id1, id2, ancestry = "infer", snpinfo = NULL, splitsize = 20


extract_split <- function(snplist, id, splitsize = 20000) {
nsplit <- round(length(snplist) / splitsize)
split(snplist, 1:nsplit) %>%
n <- length(snplist)
chunk_id <- rep(seq_len(ceiling(n / splitsize)), each = splitsize)[seq_len(n)]
split(snplist, chunk_id) %>%
pbapply::pblapply(., function(x) {
ieugwasr::associations(x, id, proxies = FALSE, x_api_source = x_api_source_header())
}) %>%
Expand Down
2 changes: 1 addition & 1 deletion R/leaveoneout.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ mr_leaveoneout <- function(dat, parameters = default_parameters(), method = mr_i
exp_id <- combos$id.exposure[i]
out_id <- combos$id.outcome[i]
X <- dat_dt[id.exposure == exp_id & id.outcome == out_id]
x <- X[mr_keep == TRUE]
x <- X[X$mr_keep]
nsnp <- nrow(x)
if (nsnp == 0) {
x <- X[1, ]
Expand Down
9 changes: 6 additions & 3 deletions R/mr.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ mr <- function(
exp_id <- combos$id.exposure[i]
out_id <- combos$id.outcome[i]
x1 <- dat_dt[id.exposure == exp_id & id.outcome == out_id]
x <- x1[mr_keep == TRUE]
x <- x1[x1$mr_keep]

if (nrow(x) == 0) {
message(
Expand Down Expand Up @@ -818,8 +818,11 @@ mr_simple_median <- function(b_exp, b_out, se_exp, se_out, parameters = default_
#' @export
#' @return MR estimate
weighted_median <- function(b_iv, weights) {
betaIV.order <- b_iv[order(b_iv)]
weights.order <- weights[order(b_iv)]
# Order b_iv and weights by the same permutation so they stay aligned; sort()
# would drop NA b_iv values and misalign the two vectors.
ord <- order(b_iv)
betaIV.order <- b_iv[ord]
weights.order <- weights[ord]
weights.sum <- cumsum(weights.order) - 0.5 * weights.order
weights.sum <- weights.sum / sum(weights.order)
below <- max(which(weights.sum < 0.5))
Expand Down
44 changes: 29 additions & 15 deletions R/mr_mode.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,17 +95,20 @@ mr_mode <- function(dat, parameters = default_parameters(), mode_method = "all")
phi = phi
)
#Penalised mode, not assuming NOME
penalty <- stats::pchisq(
weights * (BetaIV.boot - beta.boot[i, (nphi + 1):(2 * nphi)])^2,
df = 1,
lower.tail = FALSE
)
pen.weights <- weights * pmin(1, penalty * parameters$penk)

beta.boot[i, (2 * nphi + 1):(3 * nphi)] <- beta(
BetaIV.in = BetaIV.boot,
seBetaIV.in = sqrt(1 / pen.weights),
phi = phi
#Penalise each SNP against the weighted-mode estimate for the same phi
#value, so the per-SNP penalty is not recycled across phi when nphi > 1.
beta.boot[i, (2 * nphi + 1):(3 * nphi)] <- vapply(
seq_len(nphi),
function(j) {
penalty <- stats::pchisq(
weights * (BetaIV.boot - beta.boot[i, nphi + j])^2,
df = 1,
lower.tail = FALSE
)
pen.weights <- weights * pmin(1, penalty * parameters$penk)
beta(BetaIV.in = BetaIV.boot, seBetaIV.in = sqrt(1 / pen.weights), phi = phi[j])
},
numeric(1)
)

#Simple mode, assuming NOME
Expand Down Expand Up @@ -144,10 +147,21 @@ mr_mode <- function(dat, parameters = default_parameters(), mode_method = "all")
#Point causal effect estimate using the weighted mode (not asusming NOME)
beta_WeightedMode <- beta(BetaIV.in = BetaIV, seBetaIV.in = seBetaIV[, 1], phi = phi)
weights <- 1 / seBetaIV[, 1]^2
penalty <- stats::pchisq(weights * (BetaIV - beta_WeightedMode)^2, df = 1, lower.tail = FALSE)
pen.weights <- weights * pmin(1, penalty * parameters$penk) # penalized

beta_PenalisedMode <- beta(BetaIV.in = BetaIV, seBetaIV.in = sqrt(1 / pen.weights), phi = phi)
# Penalise each SNP against the weighted-mode estimate for the same phi value,
# so the per-SNP penalty is not recycled across phi when length(phi) > 1.
beta_PenalisedMode <- vapply(
seq_along(phi),
function(j) {
penalty <- stats::pchisq(
weights * (BetaIV - beta_WeightedMode[j])^2,
df = 1,
lower.tail = FALSE
)
pen.weights <- weights * pmin(1, penalty * parameters$penk) # penalized
beta(BetaIV.in = BetaIV, seBetaIV.in = sqrt(1 / pen.weights), phi = phi[j])
},
numeric(1)
)

#Point causal effect estimate using the weighted mode (asusming NOME)
beta_WeightedMode_NOME <- beta(BetaIV.in = BetaIV, seBetaIV.in = seBetaIV[, 2], phi = phi)
Expand Down
4 changes: 2 additions & 2 deletions R/other_formats.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ harmonise_ld_dat <- function(x, ld) {
return(NULL)
}

if (any(!snpnames$keep)) {
if (!all(snpnames$keep)) {
message(
" - the following SNPs could not be aligned to the LD reference panel: \n- ",
paste(subset(snpnames, !keep)$SNP, collapse = "\n - ")
Expand All @@ -123,7 +123,7 @@ harmonise_ld_dat <- function(x, ld) {
rownames(ld) <- snpnames$SNP
colnames(ld) <- snpnames$SNP

if (any(!snpnames$keep)) {
if (!all(snpnames$keep)) {
message("Removing ", sum(!snpnames$keep), " variants due to harmonisation issues")
ld <- ld[snpnames$keep, snpnames$keep]
x <- x[snpnames$keep, ]
Expand Down
13 changes: 5 additions & 8 deletions R/query.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,18 +115,15 @@ extract_outcome_data_internal <- function(
)
outcomes <- unique(outcomes)

if (proxies == FALSE) {
if (proxies %in% c(FALSE, 0)) {
proxies <- 0
} else if (proxies == TRUE) {
} else if (proxies %in% c(TRUE, 1)) {
proxies <- 1
} else {
stop("'proxies' argument should be TRUE or FALSE")
}

if (
(length(snps) < splitsize && length(outcomes) < splitsize) ||
(length(outcomes) < splitsize && length(snps) < splitsize)
) {
if (length(snps) < splitsize && length(outcomes) < splitsize) {
d <- ieugwasr::associations(
variants = snps,
id = outcomes,
Expand Down Expand Up @@ -318,14 +315,14 @@ format_d <- function(d) {

mrcols <- c("beta.outcome", "se.outcome", "effect_allele.outcome")
d$mr_keep.outcome <- complete.cases(d[, mrcols])
if (any(!d$mr_keep.outcome)) {
if (!all(d$mr_keep.outcome)) {
missinginfosnps <- paste(subset(d, !mr_keep.outcome)$SNP, collapse = " ")
warning(
"The following SNP(s) are missing required information for the MR tests and will be excluded: ",
missinginfosnps
)
}
if (all(!d$mr_keep.outcome)) {
if (!any(d$mr_keep.outcome)) {
warning(
"None of the provided SNPs can be used for MR analysis, they are missing required information."
)
Expand Down
Loading