diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 2ad7c57..5f8147a 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -30,11 +30,11 @@ jobs: R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - uses: r-lib/actions/setup-pandoc@v2 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v6 with: python-version: "3.13" diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index bfc9f4d..2b29c92 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -23,7 +23,7 @@ jobs: permissions: contents: write steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - uses: r-lib/actions/setup-pandoc@v2 diff --git a/NAMESPACE b/NAMESPACE index d51840a..aa87135 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ export(flux_amf_credentials) export(flux_badm) +export(flux_citations) export(flux_discover_files) export(flux_download) export(flux_extract) diff --git a/NEWS.md b/NEWS.md index a5a87c0..5d6342e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # fluxnet (development version) +* Added `flux_citations()` to generate site-level citations either as plain text or BibTex entries. * Fixed a bug where hourly data wasn't being extracted or read in along with half-hourly data ([#73](https://github.com/EcosystemEcologyLab/fluxnet-package/issues/73)) # fluxnet 0.3.2 diff --git a/R/flux_citations.R b/R/flux_citations.R new file mode 100644 index 0000000..bb46892 --- /dev/null +++ b/R/flux_citations.R @@ -0,0 +1,187 @@ +#' Output required per-dataset citations for FLUXNET data +#' +#' Given a vector of site IDs, this either returns a dataframe with or BibTeX +#' citations for each site. +#' +#' @param site_ids Character vector of site IDs, e.g. `c("AR-Bal", "DE-Gwg")`. +#' @param output Either `"data.frame"` to return a tibble or `"bibtex"` to +#' return (or write) BibTeX entries. See '**Value**' for more details. +#' @param bibtex_path Path to a .bib file to write BibTeX to, passed to the +#' `con` argument of [writeLines()]. If `NULL` (default), BibTeX will be +#' returned as character. Has no effect if `output = 'data.frame'`. +#' @param ... Additional arguments passed to [flux_listall()]. +#' +#' @returns If `output = 'data.frame'`, a `tibble` is returned with a +#' `bibentry` list-column with elements of class [bibentry] and a `citation` +#' column with citations formatted in the default style (see the `format()` +#' method for [bibentry()] for more details). If `output = 'bibtex'`, BibTeX +#' entries are either returned as an atomic character vector (if `bibtex_path` +#' is `NULL`) or written to a file. +#' @examples +#' # Return dataframe with bibentries and formatted citations +#' flux_citations(c("AR-Bal", "DE-Gwg")) +#' +#' # Return BibTeX +#' flux_citations(c("AR-Bal", "DE-Gwg"), output = "bibtex") +#' +#' # Append BibTeX entries to a file +#' \dontrun{ +#' flux_citations( +#' c("AR-Bal", "DE-Gwg"), +#' output = "bibtex", +#' bibtex_path = "references.bib" +#' ) +#' } +#' +#' @export +flux_citations <- function( + site_ids, + output = c("data.frame", "bibtex"), + bibtex_path = NULL, + ... +) { + list <- flux_listall(...) + site_citations_raw <- list %>% + dplyr::filter(.data$site_id %in% site_ids) %>% + dplyr::select(dplyr::all_of(c( + "site_id", + "site_name", + "data_hub", + "product_citation", + "product_id", + "oneflux_code_version" + ))) + + by_hub <- split( + site_citations_raw, + site_citations_raw$data_hub + ) + + if (!is.null(by_hub$AmeriFlux)) { + amf_pattern <- "^(.+)\\((\\d{4})\\), (.+), Ver\\. .+, (.+), \\(Dataset\\)\\. (.+)$" + amf_split <- stringr::str_match( + by_hub$AmeriFlux$product_citation, + pattern = amf_pattern + ) + colnames(amf_split) <- c( + "product_citation", + "authors", + "year", + "title", + "publisher", + "url" + ) + amf_split <- dplyr::as_tibble(amf_split) + + amf <- dplyr::left_join( + by_hub$AmeriFlux, + amf_split, + by = "product_citation" + ) %>% + dplyr::mutate(doi = .data$product_id) + } else { + amf <- dplyr::tibble() + } + + if (!is.null(by_hub$ICOS)) { + # For ICOS, sometimes there is no author + icos_pattern <- "^(.+)?\\s?\\((\\d{4})\\)\\. (.+), FLUXNET, (https.+)$" + icos_split <- stringr::str_match( + by_hub$ICOS$product_citation, + pattern = icos_pattern + ) + colnames(icos_split) <- c( + "product_citation", + "authors", + "year", + "title", + "url" + ) + icos_split <- dplyr::as_tibble(icos_split) + icos <- dplyr::left_join( + by_hub$ICOS, + icos_split, + by = "product_citation" + ) %>% + dplyr::mutate( + publisher = "Ecosystem Thematic Centre", + pid = .data$product_id + ) + } else { + icos <- dplyr::tibble() + } + + if (!is.null(by_hub$TERN)) { + tern_pattern <- "^(.+)\\((\\d{4})\\): (.+\\.).?Version.+" + tern_split <- stringr::str_match( + by_hub$TERN$product_citation, + pattern = tern_pattern + ) + colnames(tern_split) <- c("product_citation", "authors", "year", "title") + tern_split <- dplyr::as_tibble(tern_split) + tern <- dplyr::left_join( + by_hub$TERN, + tern_split, + by = "product_citation" + ) %>% + dplyr::mutate( + publisher = "Terrestrial Ecosystem Research Network (TERN)", + url = .data$product_id, + doi = stringr::str_remove(.data$product_id, "https:\\/\\/dx.doi.org\\/") + ) + } else { + tern <- dplyr::tibble() + } + combined <- dplyr::bind_rows(amf, icos, tern) %>% + dplyr::mutate(type = "dataset") %>% + dplyr::select(-dplyr::all_of("product_citation")) + + bibentries <- combined %>% + tidyr::nest(.by = c("site_id", "site_name", "product_id")) %>% + dplyr::mutate( + bibentry = purrr::map(.data$data, \(x) { + utils::bibentry( + "misc", + author = x$authors, + title = x$title, + year = x$year, + publisher = x$publisher, + doi = x$doi, + pid = x$pid, + url = x$url, + type = "dataset" + ) + }) + ) %>% + dplyr::select(-dplyr::all_of("data")) + + # Add cite keys + # TODO: make these Zotero/BetterBibTex style with a piece of the title in them + # to make them more likely to be unique + bibentries$bibentry <- bibentries$bibentry %>% + purrr::map(\(x) { + first_author_family <- tolower(x$author[[1]]$family) + if (is.null(first_author_family)) { + first_author_family <- "noauthor" + } + x$key <- paste(first_author_family, x$year, sep = "_") + x + }) + + output <- match.arg(output) + if (output == "data.frame") { + bibentries %>% dplyr::mutate(citation = purrr::map_chr(.data$bibentry, format)) + } else if (output == "bibtex") { + bibtex_list <- purrr::map_chr(bibentries$bibentry, \(x) { + format(x, style = "bibtex") + }) + bibtex_text <- paste0(bibtex_list, collapse = "\n") + if (is.null(bibtex_path)) { + cat(bibtex_text) # print with nice formatting + return(invisible(bibtex_text)) # return character atomic vector + } else { + writeLines(bibtex_text, bibtex_path) + } + } +} + diff --git a/man/flux_citations.Rd b/man/flux_citations.Rd new file mode 100644 index 0000000..538ab76 --- /dev/null +++ b/man/flux_citations.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/flux_citations.R +\name{flux_citations} +\alias{flux_citations} +\title{Output required per-dataset citations for FLUXNET data} +\usage{ +flux_citations( + site_ids, + output = c("data.frame", "bibtex"), + bibtex_path = NULL, + ... +) +} +\arguments{ +\item{site_ids}{Character vector of site IDs, e.g. \code{c("AR-Bal", "DE-Gwg")}.} + +\item{output}{Either \code{"data.frame"} to return a tibble or \code{"bibtex"} to +return (or write) BibTeX entries. See '\strong{Value}' for more details.} + +\item{bibtex_path}{Path to a .bib file to write BibTeX to, passed to the +\code{con} argument of \code{\link[=writeLines]{writeLines()}}. If \code{NULL} (default), BibTeX will be +returned as character. Has no effect if \code{output = 'data.frame'}.} + +\item{...}{Additional arguments passed to \code{\link[=flux_listall]{flux_listall()}}.} +} +\value{ +If \code{output = 'data.frame'}, a \code{tibble} is returned with a +\code{bibentry} list-column with elements of class \link{bibentry} and a \code{citation} +column with citations formatted in the default style (see the \code{format()} +method for \code{\link[=bibentry]{bibentry()}} for more details). If \code{output = 'bibtex'}, BibTeX +entries are either returned as an atomic character vector (if \code{bibtex_path} +is \code{NULL}) or written to a file. +} +\description{ +Given a vector of site IDs, this either returns a dataframe with or BibTeX +citations for each site. +} +\examples{ +# Return dataframe with bibentries and formatted citations +flux_citations(c("AR-Bal", "DE-Gwg")) + +# Return BibTeX +flux_citations(c("AR-Bal", "DE-Gwg"), output = "bibtex") + +# Append BibTeX entries to a file +\dontrun{ +flux_citations( + c("AR-Bal", "DE-Gwg"), + output = "bibtex", + bibtex_path = "references.bib" +) +} + +} diff --git a/tests/testthat/test-flux_citations.R b/tests/testthat/test-flux_citations.R new file mode 100644 index 0000000..bdeb0e3 --- /dev/null +++ b/tests/testthat/test-flux_citations.R @@ -0,0 +1,20 @@ +test_that("citations work", { + citations <- flux_citations( + site_ids = c("AR-Bal", "DE-Gwg"), + output = "data.frame" + ) + expect_s3_class(citations, "data.frame") + expect_s3_class(citations$bibentry[[1]], "bibentry") +}) + +test_that("writing to bibtex works", { + tmp <- withr::local_tempfile(fileext = ".bib") + citations <- flux_citations( + site_ids = c("AR-Bal", "DE-Gwg"), + output = "bibtex", + bibtex_path = tmp + ) + expect_true( + readLines(tmp)[3] == " title = {AmeriFlux FLUXNET-1F AR-Bal Balcarce BA}," + ) +})