diff --git a/NAMESPACE b/NAMESPACE index b0f050187..b0e9e6210 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -57,6 +57,9 @@ export(estimateDominance) export(estimateEvenness) export(estimateFaith) export(estimateRichness) +export(exportMothur) +export(exportQIIME2) +export(exportRaw) export(full_join) export(getAbundanceClass) export(getAbundant) @@ -413,6 +416,8 @@ importFrom(ape,is.binary) importFrom(ape,is.rooted) importFrom(ape,read.tree) importFrom(ape,reorder.phylo) +importFrom(ape,write.FASTA) +importFrom(ape,write.tree) importFrom(bluster,clusterRows) importFrom(decontam,isContaminant) importFrom(decontam,isNotContaminant) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 241789a42..5ea880f5d 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -105,6 +105,21 @@ setGeneric("convertToBIOM", signature = c("x"), setGeneric("convertToPhyloseq", signature = c("x"), function(x, ...) standardGeneric("convertToPhyloseq")) +#' @rdname export-methods +#' @export +setGeneric("exportRaw", signature = c("x"), function(x, ...) + standardGeneric("exportRaw")) + +#' @rdname export-methods +#' @export +setGeneric("exportQIIME2", signature = c("x"), function(x, ...) + standardGeneric("exportQIIME2")) + +#' @rdname export-methods +#' @export +setGeneric("exportMothur", signature = c("x"), function(x, ...) + standardGeneric("exportMothur")) + #' @rdname isContaminant #' @export setGeneric("addContaminantQC", signature = c("x"), diff --git a/R/exporters.R b/R/exporters.R new file mode 100644 index 000000000..4ff078b6b --- /dev/null +++ b/R/exporters.R @@ -0,0 +1,296 @@ +#' Exporters to common formats for microbiome data outside of R +#' +#' @description +#' There are a few very popular external tools for microbiome analysis, +#' including QIIME2 and mothur. However, R does not currently provide any class +#' to accommodate those data formats. When exporting data from mia to external +#' tools, the best approach is therefore to break a data container into its +#' building blocks (assays, side information, trees, etc.). +#' +#' Thanks to \code{exportRaw}, \code{exportQIIME2} and \code{exportMothur}, +#' it is now possible to export a +#' \code{\link[TreeSummarizedExperiment]{TreeSummarizedExperiment}} object as +#' raw elements or near-ready QIIME2 and mothur formats, respectively. This way, +#' migrating from mia to an external system is still a bad idea, but at least it +#' is fairly straightforward. +#' +#' @param x a \code{\link[TreeSummarizedExperiment]{TreeSummarizedExperiment}} +#' object. +#' +#' @param dpath \code{Character scalar}. +#' +#' @param rowdata.file \code{Character scalar}. (Default: \code{"rowdata"}) +#' +#' @param coldata.file \code{Character scalar}. (Default: \code{"coldata"}) +#' +#' @param assay.dir \code{Character scalar}. (Default: \code{"assays"}) +#' +#' @param rowtree.dir \code{Character scalar}. (Default: \code{"row_trees"}) +#' +#' @param coltree.dir \code{Character scalar}. (Default: \code{"col_trees"}) +#' +#' @param dimred.dir \code{Character scalar}. (Default: \code{"dim_reds"}) +#' +#' @param altexp.dir \code{Character scalar}. (Default: \code{"alt_exps"}) +#' +#' @param assay.type \code{Character scalar}. (Default: \code{"counts"}) +#' +#' @param tree.name \code{Character scalar}. (Default: \code{"phylo"}) +#' +#' @param group.var \code{Character scalar}. (Default: \code{NULL}) +#' +#' @param ... Unused. +#' +#' @returns Directory at \code{dpath} with components of \code{x} each stored +#' as a file in the proper format. +#' +#' @details The output directory contains the elements of \code{x}. For +#' \code{exportQIIME2} and \code{exportMothur}, data will need some more +#' processing using the target tool. For some tips, check the rbiom package +#' vignettes on converting data: +#' \url{https://cmmr.github.io/rbiom/articles/convert.html} +#' +#' @examples +#' library(TreeSummarizedExperiment) +#' +#' tse <- makeTSE() +#' assayNames(tse) <- "counts" +#' names(rowData(tse))[1] <- "Genus" +#' +#' # Export raw TreeSE components in custom directory +#' exportRaw(tse, "out") +#' +#' # Export TreeSE components in near-ready QIIME2 format +#' exportQIIME2(tse, "qiime2_dir") +#' +#' # Export TreeSE components in near-ready mothur format +#' exportMothur(tse, "mothur_dir") +#' +#' @name export-methods +#' @aliases exportRaw exportQIIME2 exportMothur +NULL + + +#' @rdname export-methods +#' @importFrom ape write.tree +setMethod("exportRaw", signature = c(x = "TreeSummarizedExperiment"), + function(x, dpath, rowdata.file = "rowdata", coldata.file = "coldata", + assay.dir = "assays", rowtree.dir = "row_trees", coltree.dir = "col_trees", + dimred.dir = "dim_reds", altexp.dir = "alt_exps"){ + + if( !endsWith(dpath, "/") ) dpath <- paste0(dpath, "/") + if( !dir.exists(dpath) ) dir.create(dpath) + + write.table(rowData(x), paste0(dpath, rowdata.file, ".tsv"), sep = "\t") + + write.table(colData(x), paste0(dpath, coldata.file, ".tsv"), sep = "\t") + + assay.dir <- .create_slot_dir(x, assays, assay.dir, dpath) + + for( assay_name in assayNames(x) ){ + write.table( + assay(x, assay_name), + paste0(assay.dir, assay_name, ".tsv"), + sep = "\t" + ) + } + + rowtree.dir <- .create_slot_dir(x, rowTreeNames, rowtree.dir, dpath) + + for( tree_name in rowTreeNames(x) ){ + write.tree( + rowTree(x, tree_name), paste0(rowtree.dir, tree_name, ".nwk") + ) + } + + coltree.dir <- .create_slot_dir(x, colTreeNames, coltree.dir, dpath) + + for( tree_name in colTreeNames(x) ){ + write.tree( + colTree(x, tree_name), paste0(coltree.dir, tree_name, ".nwk") + ) + } + + dimred.dir <- .create_slot_dir(x, reducedDims, dimred.dir, dpath) + + for( dimred_name in reducedDimNames(x) ){ + write.table( + reducedDim(x, dimred_name), + paste0(dimred.dir, dimred_name, ".tsv"), + sep = "\t" + ) + } + + altexp.dir <- .create_slot_dir(x, altExps, altexp.dir, dpath) + + for( altexp_name in altExpNames(x) ){ + + altexp_path <- paste0(altexp.dir, altexp_name) + + if( is(altExp(x, altexp_name), "SummarizedExperiment") ){ + + exportToRaw(altExp(x, altexp_name), altexp_path) + + }else{ + + write.table(altExp(x, altexp_name), altexp_path, sep = "\t") + + } + } + invisible(NULL) +}) + + +.create_slot_dir <- function(x, FUN, slot.path, main.path = ""){ + + if( length(FUN(x)) != 0L ){ + slot.path <- paste0(main.path, slot.path) + if( !endsWith(slot.path, "/") ) slot.path <- paste0(slot.path, "/") + dir.create(slot.path) + } + return(slot.path) +} + + +#' @rdname export-methods +#' @importFrom ape write.tree write.FASTA +setMethod("exportQIIME2", signature = c(x = "TreeSummarizedExperiment"), + function(x, dpath, assay.type = "counts", tree.name = "phylo", + group.var = NULL){ + + if( !endsWith(dpath, "/") ) dpath <- paste0(dpath, "/") + if( !dir.exists(dpath) ) dir.create(dpath) + + if( !is.null(group.var) ){ + + group <- rowData(x)[[group.var]] + group <- gsub("-", "_", group) + group <- cbind(rownames(x), group) + colnames(group) <- c("Feature ID", group.var) + + write.table( + group, paste0(dpath, group.var, ".tsv"), + sep = "\t", quote = FALSE, row.names = FALSE + ) + } + + row_data <- apply(rowData(x)[taxonomyRanks(x)], 1L, paste, collapse = ";_") + row_data <- gsub("(;_|;_NA)+$", "", row_data) + + row_data <- data.frame(rownames(x), row_data, 1L, row.names = NULL) + colnames(row_data) <- c("Feature ID", "Taxon", "Confidence") + + write.table( + row_data, paste0(dpath, "taxonomy.tsv"), + sep = "\t", quote = FALSE, row.names = FALSE + ) + + col_data <- as.data.frame(colData(x)) + + col_data[] <- lapply( + col_data, function(col) if( is.factor(col) ) as.character(col) else col + ) + + col_types <- apply( + col_data, 2L, function(col) switch( + type(col), character = "categorical", integer = , double = "numeric") + ) + + col_data <- rbind(col_types, col_data) + col_data <- cbind(`sample-id` = c("#q2:types", colnames(x)), col_data) + + write.table( + col_data, paste0(dpath, "metadata.tsv"), + sep = "\t", quote = FALSE, row.names = FALSE + ) + + sel_assay <- data.frame(rownames(x), assay(x, assay.type), row.names = NULL) + colnames(sel_assay)[1L] <- "#OTU ID" + + write.table( + sel_assay, paste0(dpath, assay.type, ".tsv"), + sep = "\t", quote = FALSE, row.names = FALSE + ) + + row_tree <- rowTree(x, tree.name) + + if( !is.null(row_tree) ){ + write.tree(row_tree, paste0(dpath, "tree.nwk")) + } + + if( !is.null(referenceSeq(x)) ){ + write.FASTA(referenceSeq(x), paste0(dpath, "seqs.fna")) + } + invisible(NULL) +}) + + +#' @rdname export-methods +#' @importFrom ape write.tree write.FASTA +setMethod("exportMothur", signature = c(x = "TreeSummarizedExperiment"), + function(x, dpath, assay.type = "counts", tree.name = "phylo", + group.var = NULL){ + + if( !endsWith(dpath, "/") ) dpath <- paste0(dpath, "/") + if( !dir.exists(dpath) ) dir.create(dpath) + + rownames(x) <- gsub("-", "_", rownames(x), fixed = TRUE) + colnames(x) <- gsub("-", "_", colnames(x), fixed = TRUE) + + if( !is.null(group.var) ){ + + group <- rowData(x)[group.var] + group[[group.var]] <- gsub("-", "_", group[[group.var]]) + + write.table( + group, paste0(dpath, group.var, ".group"), + sep = "\t", quote = FALSE, col.names = FALSE + ) + } + + row_data <- apply(rowData(x)[taxonomyRanks(x)], 1L, paste, collapse = ";") + row_data <- gsub("(;|;NA)+$", "", row_data) + row_data <- gsub("-", "_", row_data, fixed = TRUE) + + sel_assay <- assay(x, assay.type) + row_sums <- rowSums(sel_assay) + + row_data <- data.frame( + names(row_data), row_sums, row_data, row.names = NULL + ) + colnames(row_data) <- c("OTU", "Size", "Taxonomy") + + write.table( + row_data, paste0(dpath, "taxonomy.tsv"), + sep = "\t", quote = FALSE, row.names = FALSE + ) + + col_data <- data.frame(group = colnames(x), colData(x), row.names = NULL) + + write.table( + col_data, paste0(dpath, "metadata.tsv"), + sep = "\t", quote = FALSE, row.names = FALSE + ) + + sel_assay <- data.frame( + rownames(sel_assay), total = row_sums, sel_assay, row.names = NULL + ) + colnames(sel_assay)[1L] <- "Representative_Sequence" + + write.table( + sel_assay, paste0(dpath, assay.type, ".tsv"), + sep = "\t", quote = FALSE, row.names = FALSE + ) + + row_tree <- rowTree(x, tree.name) + + if( !is.null(row_tree) ){ + row_tree$tip.label <- gsub("-", "_", row_tree$tip.label) + write.tree(row_tree, paste0(dpath, "tree.nwk")) + } + + if( !is.null(referenceSeq(x)) ){ + write.FASTA(referenceSeq(x), paste0(dpath, "seqs.fna")) + } + invisible(NULL) +}) diff --git a/man/export-methods.Rd b/man/export-methods.Rd new file mode 100644 index 000000000..9936c8e15 --- /dev/null +++ b/man/export-methods.Rd @@ -0,0 +1,110 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/AllGenerics.R, R/exporters.R +\name{exportRaw} +\alias{exportRaw} +\alias{exportQIIME2} +\alias{exportMothur} +\alias{export-methods} +\alias{exportRaw,TreeSummarizedExperiment-method} +\alias{exportQIIME2,TreeSummarizedExperiment-method} +\alias{exportMothur,TreeSummarizedExperiment-method} +\title{Exporters to common formats for microbiome data outside of R} +\usage{ +exportRaw(x, ...) + +exportQIIME2(x, ...) + +exportMothur(x, ...) + +\S4method{exportRaw}{TreeSummarizedExperiment}( + x, + dpath, + rowdata.file = "rowdata", + coldata.file = "coldata", + assay.dir = "assays", + rowtree.dir = "row_trees", + coltree.dir = "col_trees", + dimred.dir = "dim_reds", + altexp.dir = "alt_exps" +) + +\S4method{exportQIIME2}{TreeSummarizedExperiment}(x, dpath, assay.type = "counts", tree.name = "phylo") + +\S4method{exportMothur}{TreeSummarizedExperiment}( + x, + dpath, + assay.type = "counts", + tree.name = "phylo", + group.var = NULL +) +} +\arguments{ +\item{x}{a \code{\link[TreeSummarizedExperiment]{TreeSummarizedExperiment}} +object.} + +\item{...}{Unused.} + +\item{dpath}{\code{Character scalar}.} + +\item{rowdata.file}{\code{Character scalar}. (Default: \code{"rowdata"})} + +\item{coldata.file}{\code{Character scalar}. (Default: \code{"coldata"})} + +\item{assay.dir}{\code{Character scalar}. (Default: \code{"assays"})} + +\item{rowtree.dir}{\code{Character scalar}. (Default: \code{"row_trees"})} + +\item{coltree.dir}{\code{Character scalar}. (Default: \code{"col_trees"})} + +\item{dimred.dir}{\code{Character scalar}. (Default: \code{"dim_reds"})} + +\item{altexp.dir}{\code{Character scalar}. (Default: \code{"alt_exps"})} + +\item{assay.type}{\code{Character scalar}. (Default: \code{"counts"})} + +\item{tree.name}{\code{Character scalar}. (Default: \code{"phylo"})} + +\item{group.var}{\code{Character scalar}. (Default: \code{NULL})} +} +\value{ +Directory at \code{dpath} with components of \code{x} each stored +as a file in the proper format. +} +\description{ +There are a few very popular external tools for microbiome analysis, +including QIIME2 and mothur. However, R does not currently provide any class +to accommodate those data formats. When exporting data from mia to external +tools, the best approach is therefore to break a data container into its +building blocks (assays, side information, trees, etc.). + +Thanks to \code{exportRaw}, \code{exportQIIME2} and \code{exportMothur}, +it is now possible to export a +\code{\link[TreeSummarizedExperiment]{TreeSummarizedExperiment}} object as +raw elements or near-ready QIIME2 and mothur formats, respectively. This way, +migrating from mia to an external system is still a bad idea, but at least it +is fairly straightforward. +} +\details{ +The output directory contains the elements of \code{x}. For +\code{exportQIIME2} and \code{exportMothur}, data will need some more +processing using the target tool. For some tips, check the rbiom package +vignettes on converting data: +\url{https://cmmr.github.io/rbiom/articles/convert.html} +} +\examples{ +library(TreeSummarizedExperiment) + +tse <- makeTSE() +assayNames(tse) <- "counts" +names(rowData(tse))[1] <- "Genus" + +# Export raw TreeSE components in custom directory +exportRaw(tse, "out") + +# Export TreeSE components in near-ready QIIME2 format +exportQIIME2(tse, "qiime2_dir") + +# Export TreeSE components in near-ready mothur format +exportMothur(tse, "mothur_dir") + +} diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index d4f4fe208..d9d991444 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -51,7 +51,7 @@ reference: - summary - getDominant - getAbundant -- title: Data loading +- title: Import/export/convert data - contents: - importBIOM - importQIIME2 @@ -61,6 +61,9 @@ reference: - importTaxpasta - convertFromDADA2 - convertFromPhyloseq + - exportRaw + - exportMothur + - exportQIIME2 - title: Diversity - subtitle: Alpha Diversity