Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ jobs:
R_KEEP_PKG_SOURCE: yes

steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v6

- uses: r-lib/actions/setup-pandoc@v2

- uses: actions/setup-python@v4
- uses: actions/setup-python@v6
with:
python-version: "3.13"

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pkgdown.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
permissions:
contents: write
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v6

- uses: r-lib/actions/setup-pandoc@v2

Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export(flux_amf_credentials)
export(flux_badm)
export(flux_citations)
export(flux_discover_files)
export(flux_download)
export(flux_extract)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# fluxnet (development version)

* Added `flux_citations()` to generate site-level citations either as plain text or BibTex entries.
* Fixed a bug where hourly data wasn't being extracted or read in along with half-hourly data ([#73](https://github.com/EcosystemEcologyLab/fluxnet-package/issues/73))

# fluxnet 0.3.2
Expand Down
187 changes: 187 additions & 0 deletions R/flux_citations.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
#' Output required per-dataset citations for FLUXNET data
#'
#' Given a vector of site IDs, this either returns a dataframe with or BibTeX
#' citations for each site.
#'
#' @param site_ids Character vector of site IDs, e.g. `c("AR-Bal", "DE-Gwg")`.
#' @param output Either `"data.frame"` to return a tibble or `"bibtex"` to
#' return (or write) BibTeX entries. See '**Value**' for more details.
#' @param bibtex_path Path to a .bib file to write BibTeX to, passed to the
#' `con` argument of [writeLines()]. If `NULL` (default), BibTeX will be
#' returned as character. Has no effect if `output = 'data.frame'`.
#' @param ... Additional arguments passed to [flux_listall()].
#'
#' @returns If `output = 'data.frame'`, a `tibble` is returned with a
#' `bibentry` list-column with elements of class [bibentry] and a `citation`
#' column with citations formatted in the default style (see the `format()`
#' method for [bibentry()] for more details). If `output = 'bibtex'`, BibTeX
#' entries are either returned as an atomic character vector (if `bibtex_path`
#' is `NULL`) or written to a file.
#' @examples
#' # Return dataframe with bibentries and formatted citations
#' flux_citations(c("AR-Bal", "DE-Gwg"))
#'
#' # Return BibTeX
#' flux_citations(c("AR-Bal", "DE-Gwg"), output = "bibtex")
#'
#' # Append BibTeX entries to a file
#' \dontrun{
#' flux_citations(
#' c("AR-Bal", "DE-Gwg"),
#' output = "bibtex",
#' bibtex_path = "references.bib"
#' )
#' }
#'
#' @export
flux_citations <- function(
site_ids,
output = c("data.frame", "bibtex"),
bibtex_path = NULL,
...
) {
list <- flux_listall(...)
site_citations_raw <- list %>%
dplyr::filter(.data$site_id %in% site_ids) %>%
dplyr::select(dplyr::all_of(c(
"site_id",
"site_name",
"data_hub",
"product_citation",
"product_id",
"oneflux_code_version"
)))

by_hub <- split(
site_citations_raw,
site_citations_raw$data_hub
)

if (!is.null(by_hub$AmeriFlux)) {
amf_pattern <- "^(.+)\\((\\d{4})\\), (.+), Ver\\. .+, (.+), \\(Dataset\\)\\. (.+)$"
amf_split <- stringr::str_match(
by_hub$AmeriFlux$product_citation,
pattern = amf_pattern
)
colnames(amf_split) <- c(
"product_citation",
"authors",
"year",
"title",
"publisher",
"url"
)
amf_split <- dplyr::as_tibble(amf_split)

amf <- dplyr::left_join(
by_hub$AmeriFlux,
amf_split,
by = "product_citation"
) %>%
dplyr::mutate(doi = .data$product_id)
} else {
amf <- dplyr::tibble()
}

if (!is.null(by_hub$ICOS)) {
# For ICOS, sometimes there is no author
icos_pattern <- "^(.+)?\\s?\\((\\d{4})\\)\\. (.+), FLUXNET, (https.+)$"
icos_split <- stringr::str_match(
by_hub$ICOS$product_citation,
pattern = icos_pattern
)
colnames(icos_split) <- c(
"product_citation",
"authors",
"year",
"title",
"url"
)
icos_split <- dplyr::as_tibble(icos_split)
icos <- dplyr::left_join(
by_hub$ICOS,
icos_split,
by = "product_citation"
) %>%
dplyr::mutate(
publisher = "Ecosystem Thematic Centre",
pid = .data$product_id
)
} else {
icos <- dplyr::tibble()
}

if (!is.null(by_hub$TERN)) {
tern_pattern <- "^(.+)\\((\\d{4})\\): (.+\\.).?Version.+"
tern_split <- stringr::str_match(
by_hub$TERN$product_citation,
pattern = tern_pattern
)
colnames(tern_split) <- c("product_citation", "authors", "year", "title")
tern_split <- dplyr::as_tibble(tern_split)
tern <- dplyr::left_join(
by_hub$TERN,
tern_split,
by = "product_citation"
) %>%
dplyr::mutate(
publisher = "Terrestrial Ecosystem Research Network (TERN)",
url = .data$product_id,
doi = stringr::str_remove(.data$product_id, "https:\\/\\/dx.doi.org\\/")
)
} else {
tern <- dplyr::tibble()
}
combined <- dplyr::bind_rows(amf, icos, tern) %>%
dplyr::mutate(type = "dataset") %>%
dplyr::select(-dplyr::all_of("product_citation"))

bibentries <- combined %>%
tidyr::nest(.by = c("site_id", "site_name", "product_id")) %>%
dplyr::mutate(
bibentry = purrr::map(.data$data, \(x) {
utils::bibentry(
"misc",
author = x$authors,
title = x$title,
year = x$year,
publisher = x$publisher,
doi = x$doi,
pid = x$pid,
url = x$url,
type = "dataset"
)
})
) %>%
dplyr::select(-dplyr::all_of("data"))

# Add cite keys
# TODO: make these Zotero/BetterBibTex style with a piece of the title in them
# to make them more likely to be unique
bibentries$bibentry <- bibentries$bibentry %>%
purrr::map(\(x) {
first_author_family <- tolower(x$author[[1]]$family)
if (is.null(first_author_family)) {
first_author_family <- "noauthor"
}
x$key <- paste(first_author_family, x$year, sep = "_")
x
})

output <- match.arg(output)
if (output == "data.frame") {
bibentries %>% dplyr::mutate(citation = purrr::map_chr(.data$bibentry, format))
} else if (output == "bibtex") {
bibtex_list <- purrr::map_chr(bibentries$bibentry, \(x) {
format(x, style = "bibtex")
})
bibtex_text <- paste0(bibtex_list, collapse = "\n")
if (is.null(bibtex_path)) {
cat(bibtex_text) # print with nice formatting
return(invisible(bibtex_text)) # return character atomic vector
} else {
writeLines(bibtex_text, bibtex_path)
}
}
}

54 changes: 54 additions & 0 deletions man/flux_citations.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions tests/testthat/test-flux_citations.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
test_that("citations work", {
citations <- flux_citations(
site_ids = c("AR-Bal", "DE-Gwg"),
output = "data.frame"
)
expect_s3_class(citations, "data.frame")
expect_s3_class(citations$bibentry[[1]], "bibentry")
})

test_that("writing to bibtex works", {
tmp <- withr::local_tempfile(fileext = ".bib")
citations <- flux_citations(
site_ids = c("AR-Bal", "DE-Gwg"),
output = "bibtex",
bibtex_path = tmp
)
expect_true(
readLines(tmp)[3] == " title = {AmeriFlux FLUXNET-1F AR-Bal Balcarce BA},"
)
})
Loading