MRCIEU · remlapmot · Jun 10, 2026 · Jun 9, 2026 · Jun 10, 2026 · Jun 10, 2026
diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
@@ -49,7 +49,7 @@ jobs:
           covr::to_cobertura(cov)
         shell: Rscript {0}
 
-      - uses: codecov/codecov-action@v6
+      - uses: codecov/codecov-action@v7
         with:
           # Fail if error if not on PR, or if on PR and token is given
           fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }}

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: TwoSampleMR
 Title: Two Sample MR Functions and Interface to MRC Integrative
     Epidemiology Unit OpenGWAS Database
-Version: 0.7.7
+Version: 0.7.8
 Authors@R: c(
     person("Gibran", "Hemani", , "g.hemani@bristol.ac.uk", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0003-0920-1055")),

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,20 @@
+# TwoSampleMR v0.7.8
+
+(Release date 2026-06-10)
+
+* Fixed a copy-paste bug in `ldsc_rg()` which passed `d1$l2` instead of `d2$l2` as the weight vector to the trait-2 `ldsc_h2_internal()` call
+* Fixed `extract_split()` chunking which gave `nsplit = 0` for SNP lists smaller than half the `splitsize`; now uses `ceiling()` with contiguous chunk ids
+* Fixed the penalised mode in `mr_mode()` for vector-valued `phi`: each SNP is now penalised against the weighted-mode estimate for the same `phi` rather than recycling the mode vector across SNPs (results unchanged for the default scalar `phi = 1`)
+* Fixed row order restoration in `add_metadata()` to use `dat[order(dat[[order_col]]), ]`
+* Qualified a bare `aes()` call as `ggplot2::aes()` in `test_r_from_pn()`
+* Used `isTRUE(all.equal())` instead of `all.equal() == TRUE` inside `ifelse()` for null-line detection in the forest plot functions, which could otherwise return a multi-element vector
+* Simplified the group `Index` assignment in `forest_plot_1_to_many()` using `Letters[match()]`, which is also robust to non-contiguous group rows
+* Collapsed a duplicated `||` condition in `extract_outcome_data_internal()`
+* Removed dead `TAUsq` accumulation in `PM()` in `rucker.R`
+* Fixed an error message typo in `combine_data()` and switched its `sapply()` to `vapply()`
+* Replaced superseded `tidyr::gather()` with `tidyr::pivot_longer()` in `test_r_from_pn()` and dropped a redundant `requireNamespace("tidyr")` guard
+* Lint package with Jarl
+
 # TwoSampleMR v0.7.7
 
 (Release date 2026-06-07)

diff --git a/R/add_metadata.r b/R/add_metadata.r
@@ -75,7 +75,7 @@ add_metadata <- function(dat, cols = c("sample_size", "ncase", "ncontrol", "unit
   names(dat)[names(dat) == "unit.exposure"] <- "units.exposure"
   names(dat)[names(dat) == "unit.outcome"] <- "units.outcome"
 
-  dat <- dat[dat[[order_col]], ]
+  dat <- dat[order(dat[[order_col]]), ]
   dat <- dat[, !names(dat) %in% order_col]
   # dat <- fix_ukb_d(dat)
   return(dat)

diff --git a/R/add_rsq.r b/R/add_rsq.r
@@ -73,7 +73,7 @@ add_rsq_one <- function(dat, what = "exposure") {
         message("Try adding metadata with add_metadata()")
       }
     } else if (
-      all(grepl("SD", dat[[paste0("units.", what)]])) && all(!is.na(dat[[paste0("eaf.", what)]]))
+      all(grepl("SD", dat[[paste0("units.", what)]])) && !anyNA(dat[[paste0("eaf.", what)]])
     ) {
       dat[[paste0("rsq.", what)]] <- NA
       dat[[paste0("rsq.", what)]] <- 2 *
@@ -115,13 +115,6 @@ get_r_from_pn_less_accurate <- function(p, n) {
 }
 
 test_r_from_pn <- function() {
-  if (!requireNamespace("tidyr", quietly = TRUE)) {
-    stop(
-      "Package \"tidyr\" must be installed to use this function.",
-      call. = FALSE
-    )
-  }
-
   param <- expand.grid(
     n = c(10, 100, 1000, 10000, 100000),
     rsq = 10^seq(-4, -0.5, length.out = 30)
@@ -137,11 +130,16 @@ test_r_from_pn <- function() {
     param$rsq2[i] <- (get_r_from_pn(param$pval[i], param$n[i]))^2
   }
 
-  param <- tidyr::gather(param, key = out, value = value, rsq1, rsq2)
+  param <- tidyr::pivot_longer(
+    param,
+    cols = c("rsq1", "rsq2"),
+    names_to = "out",
+    values_to = "value"
+  )
 
   p <- ggplot2::ggplot(param, ggplot2::aes(x = rsq_emp, value)) +
     ggplot2::geom_abline(slope = 1, linetype = "dotted") +
-    ggplot2::geom_line(aes(colour = out)) +
+    ggplot2::geom_line(ggplot2::aes(colour = out)) +
     ggplot2::facet_grid(. ~ n) +
     ggplot2::scale_x_log10() +
     ggplot2::scale_y_log10()

diff --git a/R/forest_plot.R b/R/forest_plot.R
@@ -323,7 +323,7 @@ mr_forest_plot_grouped <-
     }
 
     ## act on debug flag
-    if (debug == FALSE) {
+    if (!debug) {
       options(warn = -1)
     }
 
@@ -372,7 +372,7 @@ mr_forest_plot_grouped <-
     ## draw and export the annotated forest plot
     grid.newpage()
     grid.draw(group)
-    if (returnRobj == FALSE) {
+    if (!returnRobj) {
       grDevices::pdf(outfile_Name, width = 23.4, height = 16.5)
       grid.draw(group)
       grDevices::dev.off()

diff --git a/R/forest_plot2.R b/R/forest_plot2.R
@@ -242,7 +242,7 @@ forest_plot_basic <- function(
   # OR or log(OR)?
   # If CI are symmetric then log(OR)
   # Use this to guess where to put the null line
-  null_line <- ifelse(all.equal(dat$effect - dat$lo_ci, dat$up_ci - dat$effect) == TRUE, 0, 1)
+  null_line <- if (isTRUE(all.equal(dat$effect - dat$lo_ci, dat$up_ci - dat$effect))) 0 else 1
 
   # Change lab
   if (!is.null(xlim)) {
@@ -435,7 +435,7 @@ forest_plot_names <- function(dat, section = NULL, bottom = TRUE) {
   # OR or log(OR)?
   # If CI are symmetric then log(OR)
   # Use this to guess where to put the null line
-  null_line <- ifelse(all.equal(dat$effect - dat$lo_ci, dat$up_ci - dat$effect) == TRUE, 0, 1)
+  null_line <- if (isTRUE(all.equal(dat$effect - dat$lo_ci, dat$up_ci - dat$effect))) 0 else 1
 
   # up <- max(dat$up_ci, na.rm=TRUE)
   # lo <- min(dat$lo_ci, na.rm=TRUE)

diff --git a/R/forest_plot_1-to-many.R b/R/forest_plot_1-to-many.R
@@ -213,9 +213,9 @@ sort_1_to_many <- function(
     )
     Letters <- sort(c(paste0("A", Letters), paste0("B", Letters), paste0("C", Letters)))
     groups <- unique(mr_res[, group])
-    mr_res$Index <- unlist(lapply(seq_along(unique(mr_res[, group])), FUN = function(x) {
-      rep(Letters[Letters == Letters[x]], length(which(mr_res[, group] == groups[x])))
-    }))
+    # Assign each row the letter code for its group; match() makes this robust
+    # to groups whose rows are not contiguous in mr_res.
+    mr_res$Index <- Letters[match(mr_res[, group], groups)]
     mr_res <- mr_res[order(mr_res[, b], decreasing = TRUE), ]
     mr_res$Index2 <- Letters[seq_len(nrow(mr_res))]
     mr_res$Index3 <- paste(mr_res$Index, mr_res$Index2, sep = "")
@@ -329,7 +329,7 @@ forest_plot_basic2 <- function(
   # OR or log(OR)?
   # If CI are symmetric then log(OR)
   # Use this to guess where to put the null line
-  null_line <- ifelse(all.equal(dat$effect - dat$lo_ci, dat$up_ci - dat$effect) == TRUE, 0, 1)
+  null_line <- if (isTRUE(all.equal(dat$effect - dat$lo_ci, dat$up_ci - dat$effect))) 0 else 1
 
   # Change lab
   if (!is.null(xlim)) {
@@ -565,7 +565,7 @@ forest_plot_names2 <- function(
   # OR or log(OR)?
   # If CI are symmetric then log(OR)
   # Use this to guess where to put the null line
-  null_line <- ifelse(all.equal(dat$effect - dat$lo_ci, dat$up_ci - dat$effect) == TRUE, 0, 1)
+  null_line <- if (isTRUE(all.equal(dat$effect - dat$lo_ci, dat$up_ci - dat$effect))) 0 else 1
 
   # up <- max(dat$up_ci, na.rm=TRUE)
   # lo <- min(dat$lo_ci, na.rm=TRUE)
@@ -683,7 +683,7 @@ forest_plot_addcol <- function(
   # OR or log(OR)?
   # If CI are symmetric then log(OR)
   # Use this to guess where to put the null line
-  null_line <- ifelse(all.equal(dat$effect - dat$lo_ci, dat$up_ci - dat$effect) == TRUE, 0, 1)
+  null_line <- if (isTRUE(all.equal(dat$effect - dat$lo_ci, dat$up_ci - dat$effect))) 0 else 1
 
   lo <- 0
   up <- 1

diff --git a/R/format_mr_results2.R b/R/format_mr_results2.R
@@ -306,26 +306,30 @@ power_prune <- function(dat, method = 1, dist.outcome = "binary") {
       if (is.null(ncase)) {
         ncase <- NA
       }
-      if (any(is.na(ncase))) {
+      if (anyNA(ncase)) {
         ncase <- dat1$samplesize.outcome
         if (dist.outcome == "binary") {
           warning(paste(
             "dist.outcome set to binary but case sample size is missing. Will use total sample size instead but power pruning may be less accurate"
           ))
         }
       }
-      if (any(is.na(ncase))) {
+      if (anyNA(ncase)) {
         stop("sample size missing for at least 1 summary set")
       }
-      dat1 <- dat1[order(ncase, decreasing = TRUE), ]
+      # Permute dat1 and ncase identically; sort() would drop NA values and
+      # misalign ncase from dat1's rows.
+      ncase_ord <- order(ncase, decreasing = TRUE)
+      dat1 <- dat1[ncase_ord, ]
       # id.expout<-paste(split_exposure(dat)$exposure,split_outcome(dat)$outcome)
-      ncase <- ncase[order(ncase, decreasing = TRUE)]
+      ncase <- ncase[ncase_ord]
       # dat1$power.prune.ncase<-"drop"
       # dat1$power.prune.ncase[ncase==ncase[1]]<-"keep"
       dat1 <- dat1[ncase == ncase[1], ]
       nexp <- dat1$samplesize.exposure
-      dat1 <- dat1[order(nexp, decreasing = TRUE), ]
-      nexp <- nexp[order(nexp, decreasing = TRUE)]
+      nexp_ord <- order(nexp, decreasing = TRUE)
+      dat1 <- dat1[nexp_ord, ]
+      nexp <- nexp[nexp_ord]
       # dat1$power.prune.nexp<-"drop"
       # dat1$power.prune.nexp[nexp==nexp[1]]<-"keep"
       # dat1$power.prune<-"drop"
@@ -365,14 +369,14 @@ power_prune <- function(dat, method = 1, dist.outcome = "binary") {
         z <- dat2$beta.exposure / dat2$se.exposure
         n <- dat2$samplesize.exposure
         b <- z / sqrt(2 * p * (1 - p) * (n + z^2))
-        if (any(is.na(dat2$ncase.outcome))) {
+        if (anyNA(dat2$ncase.outcome)) {
           stop(paste("number of cases missing for summary set: ", id.subset.unique[j], sep = ""))
         }
         n.cas <- dat2$ncase.outcome
         n.con <- dat2$ncontrol.outcome
         var <- 1 # variance of risk factor assumed to be 1
         r2 <- 2 * b^2 * p * (1 - p) / var
-        if (any(is.na(r2))) {
+        if (anyNA(r2)) {
           warning(
             "beta or allele frequency missing for some SNPs, which could affect accuracy of power pruning"
           )
@@ -386,7 +390,7 @@ power_prune <- function(dat, method = 1, dist.outcome = "binary") {
           iv.se <- 1 / sqrt(mean(dat2$samplesize.outcome, na.rm = TRUE) * r2sum) #standard error of the IV should be proportional to this
         }
         if (dist.outcome == "binary") {
-          if (any(is.na(n.cas)) || any(is.na(n.con))) {
+          if (anyNA(n.cas) || anyNA(n.con)) {
             warning(
               "dist.outcome set to binary but number of cases or controls is missing. Will try using total sample size instead but power pruning will be less accurate"
             )

diff --git a/R/ldsc.r b/R/ldsc.r
@@ -191,7 +191,7 @@ ldsc_rg <- function(id1, id2, ancestry = "infer", snpinfo = NULL, splitsize = 20
     dplyr::inner_join(., d2, by = "rsid")
 
   h1 <- ldsc_h2_internal(d1$z1, d1$l2, d1$n1, d1$l2)
-  h2 <- ldsc_h2_internal(d2$z2, d2$l2, d2$n2, d1$l2)
+  h2 <- ldsc_h2_internal(d2$z2, d2$l2, d2$n2, d2$l2)
 
   dat <- dplyr::inner_join(d1, d2, by = "rsid") %>%
     dplyr::mutate(
@@ -222,8 +222,9 @@ ldsc_rg <- function(id1, id2, ancestry = "infer", snpinfo = NULL, splitsize = 20
 
 
 extract_split <- function(snplist, id, splitsize = 20000) {
-  nsplit <- round(length(snplist) / splitsize)
-  split(snplist, 1:nsplit) %>%
+  n <- length(snplist)
+  chunk_id <- rep(seq_len(ceiling(n / splitsize)), each = splitsize)[seq_len(n)]
+  split(snplist, chunk_id) %>%
     pbapply::pblapply(., function(x) {
       ieugwasr::associations(x, id, proxies = FALSE, x_api_source = x_api_source_header())
     }) %>%

diff --git a/R/leaveoneout.R b/R/leaveoneout.R
@@ -25,7 +25,7 @@ mr_leaveoneout <- function(dat, parameters = default_parameters(), method = mr_i
     exp_id <- combos$id.exposure[i]
     out_id <- combos$id.outcome[i]
     X <- dat_dt[id.exposure == exp_id & id.outcome == out_id]
-    x <- X[mr_keep == TRUE]
+    x <- X[X$mr_keep]
     nsnp <- nrow(x)
     if (nsnp == 0) {
       x <- X[1, ]

diff --git a/R/mr.R b/R/mr.R
@@ -58,7 +58,7 @@ mr <- function(
     exp_id <- combos$id.exposure[i]
     out_id <- combos$id.outcome[i]
     x1 <- dat_dt[id.exposure == exp_id & id.outcome == out_id]
-    x <- x1[mr_keep == TRUE]
+    x <- x1[x1$mr_keep]
 
     if (nrow(x) == 0) {
       message(
@@ -818,8 +818,11 @@ mr_simple_median <- function(b_exp, b_out, se_exp, se_out, parameters = default_
 #' @export
 #' @return MR estimate
 weighted_median <- function(b_iv, weights) {
-  betaIV.order <- b_iv[order(b_iv)]
-  weights.order <- weights[order(b_iv)]
+  # Order b_iv and weights by the same permutation so they stay aligned; sort()
+  # would drop NA b_iv values and misalign the two vectors.
+  ord <- order(b_iv)
+  betaIV.order <- b_iv[ord]
+  weights.order <- weights[ord]
   weights.sum <- cumsum(weights.order) - 0.5 * weights.order
   weights.sum <- weights.sum / sum(weights.order)
   below <- max(which(weights.sum < 0.5))

diff --git a/R/mr_mode.R b/R/mr_mode.R
@@ -95,17 +95,20 @@ mr_mode <- function(dat, parameters = default_parameters(), mode_method = "all")
         phi = phi
       )
       #Penalised mode, not assuming NOME
-      penalty <- stats::pchisq(
-        weights * (BetaIV.boot - beta.boot[i, (nphi + 1):(2 * nphi)])^2,
-        df = 1,
-        lower.tail = FALSE
-      )
-      pen.weights <- weights * pmin(1, penalty * parameters$penk)
-
-      beta.boot[i, (2 * nphi + 1):(3 * nphi)] <- beta(
-        BetaIV.in = BetaIV.boot,
-        seBetaIV.in = sqrt(1 / pen.weights),
-        phi = phi
+      #Penalise each SNP against the weighted-mode estimate for the same phi
+      #value, so the per-SNP penalty is not recycled across phi when nphi > 1.
+      beta.boot[i, (2 * nphi + 1):(3 * nphi)] <- vapply(
+        seq_len(nphi),
+        function(j) {
+          penalty <- stats::pchisq(
+            weights * (BetaIV.boot - beta.boot[i, nphi + j])^2,
+            df = 1,
+            lower.tail = FALSE
+          )
+          pen.weights <- weights * pmin(1, penalty * parameters$penk)
+          beta(BetaIV.in = BetaIV.boot, seBetaIV.in = sqrt(1 / pen.weights), phi = phi[j])
+        },
+        numeric(1)
       )
 
       #Simple mode, assuming NOME
@@ -144,10 +147,21 @@ mr_mode <- function(dat, parameters = default_parameters(), mode_method = "all")
   #Point causal effect estimate using the weighted mode (not asusming NOME)
   beta_WeightedMode <- beta(BetaIV.in = BetaIV, seBetaIV.in = seBetaIV[, 1], phi = phi)
   weights <- 1 / seBetaIV[, 1]^2
-  penalty <- stats::pchisq(weights * (BetaIV - beta_WeightedMode)^2, df = 1, lower.tail = FALSE)
-  pen.weights <- weights * pmin(1, penalty * parameters$penk) # penalized
-
-  beta_PenalisedMode <- beta(BetaIV.in = BetaIV, seBetaIV.in = sqrt(1 / pen.weights), phi = phi)
+  # Penalise each SNP against the weighted-mode estimate for the same phi value,
+  # so the per-SNP penalty is not recycled across phi when length(phi) > 1.
+  beta_PenalisedMode <- vapply(
+    seq_along(phi),
+    function(j) {
+      penalty <- stats::pchisq(
+        weights * (BetaIV - beta_WeightedMode[j])^2,
+        df = 1,
+        lower.tail = FALSE
+      )
+      pen.weights <- weights * pmin(1, penalty * parameters$penk) # penalized
+      beta(BetaIV.in = BetaIV, seBetaIV.in = sqrt(1 / pen.weights), phi = phi[j])
+    },
+    numeric(1)
+  )
 
   #Point causal effect estimate using the weighted mode (asusming NOME)
   beta_WeightedMode_NOME <- beta(BetaIV.in = BetaIV, seBetaIV.in = seBetaIV[, 2], phi = phi)

diff --git a/R/other_formats.R b/R/other_formats.R
@@ -104,7 +104,7 @@ harmonise_ld_dat <- function(x, ld) {
     return(NULL)
   }
 
-  if (any(!snpnames$keep)) {
+  if (!all(snpnames$keep)) {
     message(
       " - the following SNPs could not be aligned to the LD reference panel: \n- ",
       paste(subset(snpnames, !keep)$SNP, collapse = "\n - ")
@@ -123,7 +123,7 @@ harmonise_ld_dat <- function(x, ld) {
   rownames(ld) <- snpnames$SNP
   colnames(ld) <- snpnames$SNP
 
-  if (any(!snpnames$keep)) {
+  if (!all(snpnames$keep)) {
     message("Removing ", sum(!snpnames$keep), " variants due to harmonisation issues")
     ld <- ld[snpnames$keep, snpnames$keep]
     x <- x[snpnames$keep, ]

diff --git a/R/query.R b/R/query.R
@@ -115,18 +115,15 @@ extract_outcome_data_internal <- function(
   )
   outcomes <- unique(outcomes)
 
-  if (proxies == FALSE) {
+  if (proxies %in% c(FALSE, 0)) {
     proxies <- 0
-  } else if (proxies == TRUE) {
+  } else if (proxies %in% c(TRUE, 1)) {
     proxies <- 1
   } else {
     stop("'proxies' argument should be TRUE or FALSE")
   }
 
-  if (
-    (length(snps) < splitsize && length(outcomes) < splitsize) ||
-      (length(outcomes) < splitsize && length(snps) < splitsize)
-  ) {
+  if (length(snps) < splitsize && length(outcomes) < splitsize) {
     d <- ieugwasr::associations(
       variants = snps,
       id = outcomes,
@@ -318,14 +315,14 @@ format_d <- function(d) {
 
   mrcols <- c("beta.outcome", "se.outcome", "effect_allele.outcome")
   d$mr_keep.outcome <- complete.cases(d[, mrcols])
-  if (any(!d$mr_keep.outcome)) {
+  if (!all(d$mr_keep.outcome)) {
     missinginfosnps <- paste(subset(d, !mr_keep.outcome)$SNP, collapse = " ")
     warning(
       "The following SNP(s) are missing required information for the MR tests and will be excluded: ",
       missinginfosnps
     )
   }
-  if (all(!d$mr_keep.outcome)) {
+  if (!any(d$mr_keep.outcome)) {
     warning(
       "None of the provided SNPs can be used for MR analysis, they are missing required information."
     )