diff --git a/NAMESPACE b/NAMESPACE index 4d7ad6c..8cca4f1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,6 +33,7 @@ export(convertMlrTaskToOMLDataSet) export(convertOMLDataSetToMlr) export(convertOMLFlowToMlr) export(convertOMLMlrRunToBMR) +export(convertOMLRunEvalsToDT) export(convertOMLRunToBMR) export(convertOMLTaskToMlr) export(deleteOMLObject) diff --git a/R/convertOMLSetupParamsToDT.R b/R/convertOMLSetupParamsToDT.R new file mode 100644 index 0000000..4e41a3f --- /dev/null +++ b/R/convertOMLSetupParamsToDT.R @@ -0,0 +1,47 @@ +# https://test.openml.org/api/v1/evaluation/setup/list/flow/2117/task/403/limit/100 works, while https://test.openml.org/api/v1/evaluation/setup/list/flow/6794 returns nothing. + +# Result size limits are okay, as long as I can somehow reliably iterate with the offset, i.e. +# https://test.openml.org/api/v1/evaluation/setup/list/flow/2117/task/403/function/predictive_accuracy/limit/1/offset/2 + + + +#' @title Extract Parameters from an OpenML run into a flat structure +#' +#' @param run.evals [\code{data.frame}]\cr +#' Result of caling listOMLRunEvaluations(..., setup = TRUE). +#' @param drop.constant [\code{logical(1)]\cr +#' Should constant columns be dropped before returning the result? +#' +#' @return [\code{\link{data.table}}]. +#' @family run-related functions +#' @export +convertOMLRunEvalsToDT = function(run.evals, drop.constant = TRUE) { + assert_data_frame(run.evals) + assert_true(!is.null(run.evals$setup_parameters)) + assert_flag(drop.constant) + setup_params = run.evals$setup_parameters + out = lapply(setup_params, function(params) { + params[!(params$parameter_name == "verbose" & params$data_type == "boolean"), ] + params[, convertValueByType(params$parameter_name, params$value, params$data_type)] + }) + dt = rbindlist(out, fill = TRUE) + if (drop.constant) dt = dt[, vlapply(dt, function(x) length(unique(x)) > 1), with = FALSE] + run.evals$setup_parameters = NULL + return(cbind(run.evals, dt)) +} + + +# Convert values according to a parameter's type. +# Note that this is very unreliably. +convertValueByType = function(parameter_name, value, type) { + value = Map(function(v, t) { + v[v == "None" | v == "none" | v == "Null" | v == "null"] = NA + v = gsub(""", "", v) + if (t %in% c("boolean", "bool")) v = as.logical(v) + else if (t %in% c("float", "number")) v = as.numeric(v) + else if (t %in% c("int", "integer", "int or None", "integer or None")) v = suppressWarnings(as.integer(v)) + return(v) + }, value, type) + names(value) = gsub(""", "", parameter_name) + return(as.data.table(value)) +} diff --git a/R/listOMLRunEvaluations.R b/R/listOMLRunEvaluations.R index 16c8900..47cb411 100644 --- a/R/listOMLRunEvaluations.R +++ b/R/listOMLRunEvaluations.R @@ -1,19 +1,28 @@ .listOMLRunEvaluations = function(task.id = NULL, flow.id = NULL, run.id = NULL, uploader.id = NULL, tag = NULL, limit = NULL, offset = NULL, verbosity = NULL, - evaluation.measure = NULL, show.array.measures = FALSE, extend.flow.name = TRUE) { + evaluation.measure = NULL, show.array.measures = FALSE, extend.flow.name = TRUE, + setup = FALSE) { if (is.null(task.id) && is.null(flow.id) && is.null(run.id) && is.null(uploader.id) && is.null(tag)) stop("Please hand over at least one of the following: task.id, flow.id, run.id, uploader.id, tag") if (is.null(evaluation.measure)) showInfo(verbosity, "Suggestion: Use the 'evaluation.measure' argument to restrict the results to only one measure.") - api.call = generateAPICall(api.call = "json/evaluation/list", task.id = task.id, + if (!setup) api.call = "json/evaluation/list" else api.call = "json/evaluation/setup/list" + api.call = generateAPICall(api.call = api.call, task.id = task.id, flow.id = flow.id, run.id = run.id, uploader.id = uploader.id, tag = tag, evaluation.measure = evaluation.measure, limit = limit, offset = offset) content = doAPICall(api.call, file = NULL, method = "GET", verbosity = verbosity) if (is.null(content)) return(data.frame()) - evals = fromJSON(txt = content, simplifyVector = FALSE)$evaluations$evaluation + lst_content = fromJSON(txt = content, simplifyVector = FALSE) + evals = lst_content$evaluations$evaluation + + if (setup) { + param_list = lapply(evals, function(x) { + parameters = as.data.table(cleanupSetupParameters(x$parameters)) + }) + } evals = rbindlist(lapply(evals, function(x) { if (is.null(x$value)) x$value = NA @@ -75,7 +84,7 @@ values = list(flow.version = flow.version, flow.source = flow.source, learner.name = learner.name)), stringsAsFactors = FALSE) } - + if (setup) evals$setup_parameters = param_list return(evals) } @@ -101,6 +110,9 @@ #' @param extend.flow.name [\code{logical(1)}]\cr #' Adds a column \code{flow.version} that refers to the version number of the flow and a column \code{flow.source} containing the prefix of the flow that specifies the source of the flow (i.e. weka, R) and a column \code{learner.name} that refers to the learner. #' Default is \code{TRUE}. +#' @param setup [\code{logical(1)}]\cr +#' Adds a column \code{setup_parameters} that contains the runs setup, i.e. the hyperparameters set +#' for the run. #' #' @return [\code{data.frame}]. #' @family list diff --git a/R/listOMLSetup.R b/R/listOMLSetup.R index 828021a..bdd72e4 100644 --- a/R/listOMLSetup.R +++ b/R/listOMLSetup.R @@ -11,32 +11,7 @@ setup = fromJSON(txt = content)$setups$setup sid = data.frame(join_id = 1:length(setup$setup_id), setup_id = setup$setup_id) - # Get parameters and clean them up - param = setup$parameter - if (!is.null(names(param))) { - # if elements have a name, it refers to parameter - param = param[!vlapply(param, function(x) length(x) == 0)] - param = as.data.frame(param, stringsAsFactors = FALSE) - param = cbind(param, join_id = 1, stringsAsFactors = FALSE) - } else { - # add names - param = setNames(param, 1:length(param)) - # filter out NULL or empty elements - param = param[!vlapply(param, function(x) length(x) == 0)] - # inside each element, replace empty values with NA - param = lapply(param, function(x) { - replace(x, which(vlapply(x, function(i) length(i) == 0)), NA_character_) - }) - param = rbindlist(param, fill = TRUE, idcol = "join_id") - param = as.data.frame(param, stringsAsFactors = FALSE) - } - - list.cols = colnames(param)[vlapply(param, is.list)] - for (col in list.cols) { - ind = which(vlapply(param[[col]], function(i) length(i) == 0)) - param[[col]][ind] = NA_character_ - param[[col]] = unlist(param[[col]], recursive = FALSE) - } + param = cleanupSetupParameters(setup$parameter) ret = merge(param, sid) ret$id = ret$join_id = NULL @@ -66,3 +41,33 @@ #' @export #' @example inst/examples/listOMLSetup.R listOMLSetup = memoise(.listOMLSetup) + + +# Get parameters and clean them up +cleanupSetupParameters = function(param) { + if (!is.null(names(param))) { + # if elements have a name, it refers to parameter + param = param[!vlapply(param, function(x) length(x) == 0)] + param = as.data.frame(param, stringsAsFactors = FALSE) + param = cbind(param, join_id = 1, stringsAsFactors = FALSE) + } else { + # add names + param = setNames(param, 1:length(param)) + # filter out NULL or empty elements + param = param[!vlapply(param, function(x) length(x) == 0)] + # inside each element, replace empty values with NA + param = lapply(param, function(x) { + replace(x, which(vlapply(x, function(i) length(i) == 0)), NA_character_) + }) + param = rbindlist(param, fill = TRUE, idcol = "join_id") + param = as.data.frame(param, stringsAsFactors = FALSE) + } + + list.cols = colnames(param)[vlapply(param, is.list)] + for (col in list.cols) { + ind = which(vlapply(param[[col]], function(i) length(i) == 0)) + param[[col]][ind] = NA_character_ + param[[col]] = unlist(param[[col]], recursive = FALSE) + } + return(param) +} diff --git a/man/chunkOMLlist.Rd b/man/chunkOMLlist.Rd index 7ae1bbe..5c941ef 100644 --- a/man/chunkOMLlist.Rd +++ b/man/chunkOMLlist.Rd @@ -4,7 +4,7 @@ \alias{chunkOMLlist} \title{Do chunked listings} \usage{ -chunkOMLlist(listfun, ..., total.limit = 1e+05, chunk.limit = 1000) +chunkOMLlist(listfun, ..., total.limit = 100000, chunk.limit = 1000) } \arguments{ \item{listfun}{[\code{character(1)}]\cr diff --git a/man/convertOMLMlrRunToBMR.Rd b/man/convertOMLMlrRunToBMR.Rd index d1fc61f..9337464 100644 --- a/man/convertOMLMlrRunToBMR.Rd +++ b/man/convertOMLMlrRunToBMR.Rd @@ -17,7 +17,8 @@ One or more \code{\link{OMLMlrRun}s}} Converts one or more \code{\link{OMLMlrRun}}s to a \code{\link[mlr]{BenchmarkResult}}. } \seealso{ -Other run-related functions: \code{\link{convertOMLRunToBMR}}, +Other run-related functions: \code{\link{convertOMLRunEvalsToDT}}, + \code{\link{convertOMLRunToBMR}}, \code{\link{deleteOMLObject}}, \code{\link{getOMLRun}}, \code{\link{listOMLRuns}}, \code{\link{makeOMLRunParameter}}, diff --git a/man/convertOMLRunEvalsToDT.Rd b/man/convertOMLRunEvalsToDT.Rd new file mode 100644 index 0000000..679156d --- /dev/null +++ b/man/convertOMLRunEvalsToDT.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/convertOMLSetupParamsToDT.R +\name{convertOMLRunEvalsToDT} +\alias{convertOMLRunEvalsToDT} +\title{Extract Parameters from an OpenML run into a flat structure} +\usage{ +convertOMLRunEvalsToDT(run.evals, drop.constant = TRUE) +} +\arguments{ +\item{run.evals}{[\code{data.frame}]\cr +Result of caling listOMLRunEvaluations(..., setup = TRUE).} +} +\value{ +[\code{\link{data.table}}]. +} +\description{ +Extract Parameters from an OpenML run into a flat structure +} +\seealso{ +Other run-related functions: \code{\link{convertOMLMlrRunToBMR}}, + \code{\link{convertOMLRunToBMR}}, + \code{\link{deleteOMLObject}}, \code{\link{getOMLRun}}, + \code{\link{listOMLRuns}}, + \code{\link{makeOMLRunParameter}}, + \code{\link{makeOMLRun}}, \code{\link{tagOMLObject}}, + \code{\link{uploadOMLRun}} +} +\concept{run-related functions} diff --git a/man/convertOMLRunToBMR.Rd b/man/convertOMLRunToBMR.Rd index 6d3e627..bc4cb3b 100644 --- a/man/convertOMLRunToBMR.Rd +++ b/man/convertOMLRunToBMR.Rd @@ -27,6 +27,7 @@ Converts an \code{\link{OMLRun}} to a \code{\link[mlr]{BenchmarkResult}}. } \seealso{ Other run-related functions: \code{\link{convertOMLMlrRunToBMR}}, + \code{\link{convertOMLRunEvalsToDT}}, \code{\link{deleteOMLObject}}, \code{\link{getOMLRun}}, \code{\link{listOMLRuns}}, \code{\link{makeOMLRunParameter}}, diff --git a/man/deleteOMLObject.Rd b/man/deleteOMLObject.Rd index 1fbcdd9..a57a2ea 100644 --- a/man/deleteOMLObject.Rd +++ b/man/deleteOMLObject.Rd @@ -47,6 +47,7 @@ Other flow-related functions: \code{\link{convertOMLFlowToMlr}}, \code{\link{makeOMLFlow}}, \code{\link{tagOMLObject}} Other run-related functions: \code{\link{convertOMLMlrRunToBMR}}, + \code{\link{convertOMLRunEvalsToDT}}, \code{\link{convertOMLRunToBMR}}, \code{\link{getOMLRun}}, \code{\link{listOMLRuns}}, \code{\link{makeOMLRunParameter}}, diff --git a/man/getOMLRun.Rd b/man/getOMLRun.Rd index 3709418..77be328 100644 --- a/man/getOMLRun.Rd +++ b/man/getOMLRun.Rd @@ -46,6 +46,7 @@ Other downloading functions: \code{\link{getOMLDataSetQualities}}, \code{\link{getOMLStudy}}, \code{\link{getOMLTask}} Other run-related functions: \code{\link{convertOMLMlrRunToBMR}}, + \code{\link{convertOMLRunEvalsToDT}}, \code{\link{convertOMLRunToBMR}}, \code{\link{deleteOMLObject}}, \code{\link{listOMLRuns}}, \code{\link{makeOMLRunParameter}}, diff --git a/man/listOMLRunEvaluations.Rd b/man/listOMLRunEvaluations.Rd index 701d35d..1913ed2 100644 --- a/man/listOMLRunEvaluations.Rd +++ b/man/listOMLRunEvaluations.Rd @@ -7,7 +7,8 @@ listOMLRunEvaluations(task.id = NULL, flow.id = NULL, run.id = NULL, uploader.id = NULL, tag = NULL, limit = NULL, offset = NULL, verbosity = NULL, evaluation.measure = NULL, - show.array.measures = FALSE, extend.flow.name = TRUE) + show.array.measures = FALSE, extend.flow.name = TRUE, + setup = FALSE) } \arguments{ \item{task.id}{[\code{integer}]\cr @@ -55,6 +56,10 @@ Default is \code{FALSE}.} \item{extend.flow.name}{[\code{logical(1)}]\cr Adds a column \code{flow.version} that refers to the version number of the flow and a column \code{flow.source} containing the prefix of the flow that specifies the source of the flow (i.e. weka, R) and a column \code{learner.name} that refers to the learner. Default is \code{TRUE}.} + +\item{setup}{[\code{logical(1)}]\cr +Adds a column \code{setup_parameters} that contains the runs setup, i.e. the hyperparameters set +for the run.} } \value{ [\code{data.frame}]. diff --git a/man/listOMLRuns.Rd b/man/listOMLRuns.Rd index c6b0d2e..efb7ed4 100644 --- a/man/listOMLRuns.Rd +++ b/man/listOMLRuns.Rd @@ -76,6 +76,7 @@ Other listing functions: \code{\link{chunkOMLlist}}, \code{\link{listOMLTasks}} Other run-related functions: \code{\link{convertOMLMlrRunToBMR}}, + \code{\link{convertOMLRunEvalsToDT}}, \code{\link{convertOMLRunToBMR}}, \code{\link{deleteOMLObject}}, \code{\link{getOMLRun}}, \code{\link{makeOMLRunParameter}}, diff --git a/man/makeOMLRun.Rd b/man/makeOMLRun.Rd index f0a74f4..00c2cb1 100644 --- a/man/makeOMLRun.Rd +++ b/man/makeOMLRun.Rd @@ -75,6 +75,7 @@ More details about the elements of a \code{OMLRun} can be found in the } \seealso{ Other run-related functions: \code{\link{convertOMLMlrRunToBMR}}, + \code{\link{convertOMLRunEvalsToDT}}, \code{\link{convertOMLRunToBMR}}, \code{\link{deleteOMLObject}}, \code{\link{getOMLRun}}, \code{\link{listOMLRuns}}, diff --git a/man/makeOMLRunParameter.Rd b/man/makeOMLRunParameter.Rd index be45430..ad6a008 100644 --- a/man/makeOMLRunParameter.Rd +++ b/man/makeOMLRunParameter.Rd @@ -23,6 +23,7 @@ Construct a run parameter object for a run. } \seealso{ Other run-related functions: \code{\link{convertOMLMlrRunToBMR}}, + \code{\link{convertOMLRunEvalsToDT}}, \code{\link{convertOMLRunToBMR}}, \code{\link{deleteOMLObject}}, \code{\link{getOMLRun}}, \code{\link{listOMLRuns}}, \code{\link{makeOMLRun}}, diff --git a/man/tagging.Rd b/man/tagging.Rd index a88e838..8e18a7c 100644 --- a/man/tagging.Rd +++ b/man/tagging.Rd @@ -54,6 +54,7 @@ Other flow-related functions: \code{\link{convertOMLFlowToMlr}}, \code{\link{makeOMLFlow}} Other run-related functions: \code{\link{convertOMLMlrRunToBMR}}, + \code{\link{convertOMLRunEvalsToDT}}, \code{\link{convertOMLRunToBMR}}, \code{\link{deleteOMLObject}}, \code{\link{getOMLRun}}, \code{\link{listOMLRuns}}, diff --git a/man/uploadOMLRun.Rd b/man/uploadOMLRun.Rd index 22e1bf5..879113e 100644 --- a/man/uploadOMLRun.Rd +++ b/man/uploadOMLRun.Rd @@ -53,6 +53,7 @@ Other uploading functions: \code{\link{makeOMLStudy}}, \code{\link{uploadOMLFlow}}, \code{\link{uploadOMLStudy}} Other run-related functions: \code{\link{convertOMLMlrRunToBMR}}, + \code{\link{convertOMLRunEvalsToDT}}, \code{\link{convertOMLRunToBMR}}, \code{\link{deleteOMLObject}}, \code{\link{getOMLRun}}, \code{\link{listOMLRuns}}, diff --git a/tests/testthat/test_server_listOMLRunEvaluations.R b/tests/testthat/test_server_listOMLRunEvaluations.R index 2bfe263..ff5cd7b 100644 --- a/tests/testthat/test_server_listOMLRunEvaluations.R +++ b/tests/testthat/test_server_listOMLRunEvaluations.R @@ -30,3 +30,16 @@ test_that("listOMLRunEvaluations", { expect_error(listOMLRunEvaluations(task.id = task.id, evaluation.measure = "m")) }) }) + +test_that("listOMLRunEvaluations", { + with_main_server({ + setOMLConfig(server = "https://test.openml.org/api/v1") + task.id = 6L + + # filter only successful runs + run.evals = .listOMLRunEvaluations(task.id = task.id, evaluation.measure = "area_under_roc_curve", setup = TRUE, limit = 20) + expect_data_frame(run.evals, min.rows = 1L, col.names = "unique") + expect_subset(c("run.id", "task.id", "setup.id", "flow.id", "flow.name", "flow.source", "data.name", "setup_parameters"), + names(run.evals)) + }) +})