From 116b271f097ad572f1a45a3b02d484d282f7635c Mon Sep 17 00:00:00 2001 From: pfistfl Date: Tue, 19 Nov 2019 16:08:57 +0100 Subject: [PATCH 1/3] Add runevals --- R/getOMLRunEvaluations.R | 129 ++++++++++++++++++++++++++++++++++++++ R/listOMLRunEvaluations.R | 10 ++- 2 files changed, 136 insertions(+), 3 deletions(-) create mode 100644 R/getOMLRunEvaluations.R diff --git a/R/getOMLRunEvaluations.R b/R/getOMLRunEvaluations.R new file mode 100644 index 0000000..7d023c4 --- /dev/null +++ b/R/getOMLRunEvaluations.R @@ -0,0 +1,129 @@ +# https://test.openml.org/api/v1/evaluation/setup/list/flow/2117/task/403/limit/100 works, while https://test.openml.org/api/v1/evaluation/setup/list/flow/6794 returns nothing. + +# Result size limits are okay, as long as I can somehow reliably iterate with the offset, i.e. +# https://test.openml.org/api/v1/evaluation/setup/list/flow/2117/task/403/function/predictive_accuracy/limit/1/offset/2 + + + +# #' @title Get performances and hyperparameters for flows or tasks +# #' +# #' @description +# #' Given an run id, the corresponding \code{\link{OMLRun}} including all server +# #' and user computed metrics is downloaded if not already available in cache. +# #' +# #' @param flow.id [\code{integer(1)}]\cr +# #' The flow ID. +# #' @param task.id [\code{integer(1)}]\cr +# #' The task ID. +# #' @param eval_measure [\code{character}] +# #' @template arg_cache_only +# #' @param only.xml [\code{logical(1)}]\cr +# #' Should only the XML be downloaded? +# #' @template arg_verbosity +# #' @return [\code{\link{OMLRun}}]. +# #' @family downloading functions +# #' @family run-related functions +# #' @example inst/examples/getOMLRun.R +# #' @export +# getOMLRun = function(run.id, cache.only = FALSE, only.xml = FALSE, verbosity = NULL) { +# id = asCount(run.id) +# assertFlag(cache.only) + +# down = downloadOMLObject(id, object = "run", cache.only = cache.only, only.xml = only.xml, verbosity = verbosity) +# f = down$files +# doc = down$doc + +# run.args = filterNull(list( +# run.id = xmlREValI(doc, "/oml:run/oml:run_id"), +# uploader = xmlREValI(doc, "/oml:run/oml:uploader"), +# uploader.name = xmlOValS(doc, "/oml:run/oml:uploader.name"), +# task.id = xmlREValI(doc, "/oml:run/oml:task_id"), +# task.type = xmlOValS(doc, "/oml:run/oml:task_type"), +# task.evaluation.measure = xmlOValS(doc, "/oml:run/oml:task_evaluation_measure"), +# flow.id = xmlRValI(doc, "/oml:run/oml:flow_id"), +# flow.name = xmlOValS(doc, "/oml:run/oml:flow_name"), +# setup.id = xmlREValI(doc, "/oml:run/oml:setup_id"), +# setup.string = xmlOValS(doc, "/oml:run/oml:setup_string"), +# error.message = xmlOValS(doc, "/oml:run/oml:error_message"), +# tags = xmlOValsMultNsS(doc, "/oml:run/oml:tag"), +# input.data = parseData(doc, "/oml:run/oml:input_data"), +# output.data = parseData(doc, "/oml:run/oml:output_data"), +# parameter.setting = list() +# )) + +# # parse parameters +# ns.pars = getNodeSet(doc, "/oml:run/oml:parameter_setting") +# run.args[["parameter.setting"]] = lapply(seq_along(ns.pars), function(i) { +# args = filterNull(list( +# name = xmlRValS(doc, paste("/oml:run/oml:parameter_setting[", i, "]/oml:name", sep = "")), +# value = xmlRValS(doc, paste("/oml:run/oml:parameter_setting[", i, "]/oml:value", sep = "")), +# component = xmlOValS(doc, paste("/oml:run/oml:parameter_setting[", i, "]/oml:component", sep = "")) +# )) +# do.call(makeOMLRunParameter, args) +# }) +# par.names = vcapply(run.args[["parameter.setting"]], function(x) x$name) +# run.args[["parameter.setting"]] = setNames(run.args[["parameter.setting"]], par.names) +# #setClasses(run.args[["parameter.setting"]], "OMLRunParList") + +# # get the predictions +# f = findCachedRun(run.args$run.id) + +# if (!f$predictions.arff$found) { +# showInfo(verbosity, "No ARFF file containing the predictions found.") +# pred = NULL +# } else { +# #showInfo(verbosity, "Predictions found in cache.") +# pred = arff.reader(f$predictions.arff$path) +# } +# run.args[["predictions"]] = pred + +# return(do.call(makeOMLRun, run.args)) +# } + +# parseData = function(doc, path) { +# # parse datasets +# path.ds = paste(path, "oml:dataset", sep = "/") +# ns.datasets = getNodeSet(doc, path.ds) +# datasets = lapply(seq_along(ns.datasets), function(i) { +# list( +# data.id = xmlRValR(doc, paste(path.ds, "[", i, "]/oml:did", sep = "")), +# name = xmlRValS(doc, paste(path.ds, "[", i, "]/oml:name", sep = "")), +# url = xmlRValS(doc, paste(path.ds, "[", i, "]/oml:url", sep = "")) +# )}) +# datasets = convertListOfRowsToDataFrame(datasets, strings.as.factors = FALSE) + +# # parse files +# path.fls = paste(path, "oml:file", sep = "/") +# ns.fls = getNodeSet(doc, path.fls) +# files = lapply(seq_along(ns.fls), function(i) { +# list( +# data.id = xmlRValR(doc, paste(path.fls, "[", i, "]/oml:did", sep = "")), +# name = xmlRValS(doc, paste(path.fls, "[", i, "]/oml:name", sep = "")), +# url = xmlRValS(doc, paste(path.fls, "[", i, "]/oml:url", sep = "")) +# )}) +# files = convertListOfRowsToDataFrame(files, strings.as.factors = FALSE) + +# # parse evaluations +# path.evals = paste(path, "oml:evaluation", sep = "/") +# ns.evals = getNodeSet(doc, path.evals) + +# evals = setDF(rbindlist(lapply(ns.evals, function(node) { +# children = xmlChildren(node) +# row = list( +# as.integer(xmlValue(children[["did"]])), +# xmlValue(children[["name"]]), +# xmlValue(children[["flow_id"]]), +# xmlValue(children[["label"]]), +# as.numeric(xmlValue(children[["value"]])), +# as.numeric(xmlValue(children[["stdev"]])), +# xmlValue(children[["array_data"]]), +# as.integer(xmlValue(children[["sample_size"]])) +# ) +# cv.info = xmlAttrs(node)[c("repeat", "fold")] +# if (is.null(cv.info)) cv.info = c(NA, NA) +# row = c(row, cv.info) +# names(row) = c("data.id", "name", "flow_id", "label", "value", "stdev", "array.data", "sample.size", "repeat", "fold") +# row +# }), fill = TRUE)) +# makeOMLIOData(datasets = datasets, files = files, evaluations = evals) +# } diff --git a/R/listOMLRunEvaluations.R b/R/listOMLRunEvaluations.R index 16c8900..d497364 100644 --- a/R/listOMLRunEvaluations.R +++ b/R/listOMLRunEvaluations.R @@ -1,19 +1,23 @@ .listOMLRunEvaluations = function(task.id = NULL, flow.id = NULL, run.id = NULL, uploader.id = NULL, tag = NULL, limit = NULL, offset = NULL, verbosity = NULL, - evaluation.measure = NULL, show.array.measures = FALSE, extend.flow.name = TRUE) { + evaluation.measure = NULL, show.array.measures = FALSE, extend.flow.name = TRUE, + setup = FALSE) { if (is.null(task.id) && is.null(flow.id) && is.null(run.id) && is.null(uploader.id) && is.null(tag)) stop("Please hand over at least one of the following: task.id, flow.id, run.id, uploader.id, tag") if (is.null(evaluation.measure)) showInfo(verbosity, "Suggestion: Use the 'evaluation.measure' argument to restrict the results to only one measure.") - api.call = generateAPICall(api.call = "json/evaluation/list", task.id = task.id, + if (!setup) api.call = "json/evaluation/list" else api.call = "json/evaluation/setup/list" + api.call = generateAPICall(api.call = api.call, task.id = task.id, flow.id = flow.id, run.id = run.id, uploader.id = uploader.id, tag = tag, evaluation.measure = evaluation.measure, limit = limit, offset = offset) content = doAPICall(api.call, file = NULL, method = "GET", verbosity = verbosity) if (is.null(content)) return(data.frame()) - evals = fromJSON(txt = content, simplifyVector = FALSE)$evaluations$evaluation + browser() + lst_content = fromJSON(txt = content, simplifyVector = FALSE) + evals = lst_content$evaluations$evaluation evals = rbindlist(lapply(evals, function(x) { if (is.null(x$value)) x$value = NA From 957dca1ab8fded64ec13a60ee5f5bd41b7bebf0d Mon Sep 17 00:00:00 2001 From: pfistfl Date: Wed, 20 Nov 2019 14:53:02 +0100 Subject: [PATCH 2/3] Add tests and make a proposal for changes --- R/convertOMLSetupParamsToDT.R | 47 +++++++ R/getOMLRunEvaluations.R | 129 ------------------ R/listOMLRunEvaluations.R | 9 +- R/listOMLSetup.R | 57 ++++---- .../test_server_listOMLRunEvaluations.R | 13 ++ 5 files changed, 98 insertions(+), 157 deletions(-) create mode 100644 R/convertOMLSetupParamsToDT.R delete mode 100644 R/getOMLRunEvaluations.R diff --git a/R/convertOMLSetupParamsToDT.R b/R/convertOMLSetupParamsToDT.R new file mode 100644 index 0000000..c0bcab6 --- /dev/null +++ b/R/convertOMLSetupParamsToDT.R @@ -0,0 +1,47 @@ +# https://test.openml.org/api/v1/evaluation/setup/list/flow/2117/task/403/limit/100 works, while https://test.openml.org/api/v1/evaluation/setup/list/flow/6794 returns nothing. + +# Result size limits are okay, as long as I can somehow reliably iterate with the offset, i.e. +# https://test.openml.org/api/v1/evaluation/setup/list/flow/2117/task/403/function/predictive_accuracy/limit/1/offset/2 + + + +#' @title Extract Parameters from an OpenML run into a flat structure +#' +#' @param run.evals [\code{data.frame}]\cr +#' Result of caling listOMLRunEvaluations(..., setup = TRUE). +#' @param drop.constant [\code{logical(1)]\cr +#' Should constant columns be dropped before returning the result? +#' +#' @return [\code{\link{data.table}}]. +#' @family run-related functions +#' @export +convertOMLRunEvalsToDT = function(run.evals, drop.constant = TRUE) { + assert_data_frame(run.evals) + assert_true(!is.null(run.evals$setup_parameters)) + assert_flag(drop.constant) + setup_params = run.evals$setup_parameters + out = lapply(setup_params, function(params) { + params[!(parameter_name == "verbose" & data_type == "boolean"), ] + params[, convertValueByType(parameter_name, value, data_type)] + }) + dt = rbindlist(out, fill = TRUE) + if (drop.constant) dt = dt[, vlapply(dt, function(x) length(unique(x)) > 1), with =FALSE] + run.evals$setup_parameters = NULL + return(cbind(run.evals, dt)) +} + + +# Convert values according to a parameter's type. +# Note that this is very unreliably. +convertValueByType = function(parameter_name, value, type) { + value = Map(function(v, t) { + v[v == "None" | v == "none" | v == "Null" | v == "null"] = NA + v = gsub(""", "", v) + if (t %in% c("boolean", "bool")) v = as.logical(v) + else if (t %in% c("float", "number")) v = as.numeric(v) + else if (t %in% c("int", "integer", "int or None", "integer or None")) v = suppressWarnings(as.integer(v)) + return(v) + }, value, type) + names(value) = gsub(""", "", parameter_name) + return(as.data.table(value)) +} diff --git a/R/getOMLRunEvaluations.R b/R/getOMLRunEvaluations.R deleted file mode 100644 index 7d023c4..0000000 --- a/R/getOMLRunEvaluations.R +++ /dev/null @@ -1,129 +0,0 @@ -# https://test.openml.org/api/v1/evaluation/setup/list/flow/2117/task/403/limit/100 works, while https://test.openml.org/api/v1/evaluation/setup/list/flow/6794 returns nothing. - -# Result size limits are okay, as long as I can somehow reliably iterate with the offset, i.e. -# https://test.openml.org/api/v1/evaluation/setup/list/flow/2117/task/403/function/predictive_accuracy/limit/1/offset/2 - - - -# #' @title Get performances and hyperparameters for flows or tasks -# #' -# #' @description -# #' Given an run id, the corresponding \code{\link{OMLRun}} including all server -# #' and user computed metrics is downloaded if not already available in cache. -# #' -# #' @param flow.id [\code{integer(1)}]\cr -# #' The flow ID. -# #' @param task.id [\code{integer(1)}]\cr -# #' The task ID. -# #' @param eval_measure [\code{character}] -# #' @template arg_cache_only -# #' @param only.xml [\code{logical(1)}]\cr -# #' Should only the XML be downloaded? -# #' @template arg_verbosity -# #' @return [\code{\link{OMLRun}}]. -# #' @family downloading functions -# #' @family run-related functions -# #' @example inst/examples/getOMLRun.R -# #' @export -# getOMLRun = function(run.id, cache.only = FALSE, only.xml = FALSE, verbosity = NULL) { -# id = asCount(run.id) -# assertFlag(cache.only) - -# down = downloadOMLObject(id, object = "run", cache.only = cache.only, only.xml = only.xml, verbosity = verbosity) -# f = down$files -# doc = down$doc - -# run.args = filterNull(list( -# run.id = xmlREValI(doc, "/oml:run/oml:run_id"), -# uploader = xmlREValI(doc, "/oml:run/oml:uploader"), -# uploader.name = xmlOValS(doc, "/oml:run/oml:uploader.name"), -# task.id = xmlREValI(doc, "/oml:run/oml:task_id"), -# task.type = xmlOValS(doc, "/oml:run/oml:task_type"), -# task.evaluation.measure = xmlOValS(doc, "/oml:run/oml:task_evaluation_measure"), -# flow.id = xmlRValI(doc, "/oml:run/oml:flow_id"), -# flow.name = xmlOValS(doc, "/oml:run/oml:flow_name"), -# setup.id = xmlREValI(doc, "/oml:run/oml:setup_id"), -# setup.string = xmlOValS(doc, "/oml:run/oml:setup_string"), -# error.message = xmlOValS(doc, "/oml:run/oml:error_message"), -# tags = xmlOValsMultNsS(doc, "/oml:run/oml:tag"), -# input.data = parseData(doc, "/oml:run/oml:input_data"), -# output.data = parseData(doc, "/oml:run/oml:output_data"), -# parameter.setting = list() -# )) - -# # parse parameters -# ns.pars = getNodeSet(doc, "/oml:run/oml:parameter_setting") -# run.args[["parameter.setting"]] = lapply(seq_along(ns.pars), function(i) { -# args = filterNull(list( -# name = xmlRValS(doc, paste("/oml:run/oml:parameter_setting[", i, "]/oml:name", sep = "")), -# value = xmlRValS(doc, paste("/oml:run/oml:parameter_setting[", i, "]/oml:value", sep = "")), -# component = xmlOValS(doc, paste("/oml:run/oml:parameter_setting[", i, "]/oml:component", sep = "")) -# )) -# do.call(makeOMLRunParameter, args) -# }) -# par.names = vcapply(run.args[["parameter.setting"]], function(x) x$name) -# run.args[["parameter.setting"]] = setNames(run.args[["parameter.setting"]], par.names) -# #setClasses(run.args[["parameter.setting"]], "OMLRunParList") - -# # get the predictions -# f = findCachedRun(run.args$run.id) - -# if (!f$predictions.arff$found) { -# showInfo(verbosity, "No ARFF file containing the predictions found.") -# pred = NULL -# } else { -# #showInfo(verbosity, "Predictions found in cache.") -# pred = arff.reader(f$predictions.arff$path) -# } -# run.args[["predictions"]] = pred - -# return(do.call(makeOMLRun, run.args)) -# } - -# parseData = function(doc, path) { -# # parse datasets -# path.ds = paste(path, "oml:dataset", sep = "/") -# ns.datasets = getNodeSet(doc, path.ds) -# datasets = lapply(seq_along(ns.datasets), function(i) { -# list( -# data.id = xmlRValR(doc, paste(path.ds, "[", i, "]/oml:did", sep = "")), -# name = xmlRValS(doc, paste(path.ds, "[", i, "]/oml:name", sep = "")), -# url = xmlRValS(doc, paste(path.ds, "[", i, "]/oml:url", sep = "")) -# )}) -# datasets = convertListOfRowsToDataFrame(datasets, strings.as.factors = FALSE) - -# # parse files -# path.fls = paste(path, "oml:file", sep = "/") -# ns.fls = getNodeSet(doc, path.fls) -# files = lapply(seq_along(ns.fls), function(i) { -# list( -# data.id = xmlRValR(doc, paste(path.fls, "[", i, "]/oml:did", sep = "")), -# name = xmlRValS(doc, paste(path.fls, "[", i, "]/oml:name", sep = "")), -# url = xmlRValS(doc, paste(path.fls, "[", i, "]/oml:url", sep = "")) -# )}) -# files = convertListOfRowsToDataFrame(files, strings.as.factors = FALSE) - -# # parse evaluations -# path.evals = paste(path, "oml:evaluation", sep = "/") -# ns.evals = getNodeSet(doc, path.evals) - -# evals = setDF(rbindlist(lapply(ns.evals, function(node) { -# children = xmlChildren(node) -# row = list( -# as.integer(xmlValue(children[["did"]])), -# xmlValue(children[["name"]]), -# xmlValue(children[["flow_id"]]), -# xmlValue(children[["label"]]), -# as.numeric(xmlValue(children[["value"]])), -# as.numeric(xmlValue(children[["stdev"]])), -# xmlValue(children[["array_data"]]), -# as.integer(xmlValue(children[["sample_size"]])) -# ) -# cv.info = xmlAttrs(node)[c("repeat", "fold")] -# if (is.null(cv.info)) cv.info = c(NA, NA) -# row = c(row, cv.info) -# names(row) = c("data.id", "name", "flow_id", "label", "value", "stdev", "array.data", "sample.size", "repeat", "fold") -# row -# }), fill = TRUE)) -# makeOMLIOData(datasets = datasets, files = files, evaluations = evals) -# } diff --git a/R/listOMLRunEvaluations.R b/R/listOMLRunEvaluations.R index d497364..4dd5314 100644 --- a/R/listOMLRunEvaluations.R +++ b/R/listOMLRunEvaluations.R @@ -15,10 +15,15 @@ content = doAPICall(api.call, file = NULL, method = "GET", verbosity = verbosity) if (is.null(content)) return(data.frame()) - browser() lst_content = fromJSON(txt = content, simplifyVector = FALSE) evals = lst_content$evaluations$evaluation + if (setup) { + param_list = lapply(evals, function(x) { + parameters = as.data.table(cleanupSetupParameters(x$parameters))[-25,] + }) + } + evals = rbindlist(lapply(evals, function(x) { if (is.null(x$value)) x$value = NA if (is.null(x$array_data)) x$array_data = NA else x$array_data = collapse(x$array_data) @@ -79,7 +84,7 @@ values = list(flow.version = flow.version, flow.source = flow.source, learner.name = learner.name)), stringsAsFactors = FALSE) } - + if (setup) evals$setup_parameters = param_list return(evals) } diff --git a/R/listOMLSetup.R b/R/listOMLSetup.R index 828021a..bdd72e4 100644 --- a/R/listOMLSetup.R +++ b/R/listOMLSetup.R @@ -11,32 +11,7 @@ setup = fromJSON(txt = content)$setups$setup sid = data.frame(join_id = 1:length(setup$setup_id), setup_id = setup$setup_id) - # Get parameters and clean them up - param = setup$parameter - if (!is.null(names(param))) { - # if elements have a name, it refers to parameter - param = param[!vlapply(param, function(x) length(x) == 0)] - param = as.data.frame(param, stringsAsFactors = FALSE) - param = cbind(param, join_id = 1, stringsAsFactors = FALSE) - } else { - # add names - param = setNames(param, 1:length(param)) - # filter out NULL or empty elements - param = param[!vlapply(param, function(x) length(x) == 0)] - # inside each element, replace empty values with NA - param = lapply(param, function(x) { - replace(x, which(vlapply(x, function(i) length(i) == 0)), NA_character_) - }) - param = rbindlist(param, fill = TRUE, idcol = "join_id") - param = as.data.frame(param, stringsAsFactors = FALSE) - } - - list.cols = colnames(param)[vlapply(param, is.list)] - for (col in list.cols) { - ind = which(vlapply(param[[col]], function(i) length(i) == 0)) - param[[col]][ind] = NA_character_ - param[[col]] = unlist(param[[col]], recursive = FALSE) - } + param = cleanupSetupParameters(setup$parameter) ret = merge(param, sid) ret$id = ret$join_id = NULL @@ -66,3 +41,33 @@ #' @export #' @example inst/examples/listOMLSetup.R listOMLSetup = memoise(.listOMLSetup) + + +# Get parameters and clean them up +cleanupSetupParameters = function(param) { + if (!is.null(names(param))) { + # if elements have a name, it refers to parameter + param = param[!vlapply(param, function(x) length(x) == 0)] + param = as.data.frame(param, stringsAsFactors = FALSE) + param = cbind(param, join_id = 1, stringsAsFactors = FALSE) + } else { + # add names + param = setNames(param, 1:length(param)) + # filter out NULL or empty elements + param = param[!vlapply(param, function(x) length(x) == 0)] + # inside each element, replace empty values with NA + param = lapply(param, function(x) { + replace(x, which(vlapply(x, function(i) length(i) == 0)), NA_character_) + }) + param = rbindlist(param, fill = TRUE, idcol = "join_id") + param = as.data.frame(param, stringsAsFactors = FALSE) + } + + list.cols = colnames(param)[vlapply(param, is.list)] + for (col in list.cols) { + ind = which(vlapply(param[[col]], function(i) length(i) == 0)) + param[[col]][ind] = NA_character_ + param[[col]] = unlist(param[[col]], recursive = FALSE) + } + return(param) +} diff --git a/tests/testthat/test_server_listOMLRunEvaluations.R b/tests/testthat/test_server_listOMLRunEvaluations.R index 2bfe263..ff5cd7b 100644 --- a/tests/testthat/test_server_listOMLRunEvaluations.R +++ b/tests/testthat/test_server_listOMLRunEvaluations.R @@ -30,3 +30,16 @@ test_that("listOMLRunEvaluations", { expect_error(listOMLRunEvaluations(task.id = task.id, evaluation.measure = "m")) }) }) + +test_that("listOMLRunEvaluations", { + with_main_server({ + setOMLConfig(server = "https://test.openml.org/api/v1") + task.id = 6L + + # filter only successful runs + run.evals = .listOMLRunEvaluations(task.id = task.id, evaluation.measure = "area_under_roc_curve", setup = TRUE, limit = 20) + expect_data_frame(run.evals, min.rows = 1L, col.names = "unique") + expect_subset(c("run.id", "task.id", "setup.id", "flow.id", "flow.name", "flow.source", "data.name", "setup_parameters"), + names(run.evals)) + }) +}) From a5c67d233520a87a2ba370c41009176ddefa2fe5 Mon Sep 17 00:00:00 2001 From: pfistfl Date: Fri, 22 Nov 2019 14:48:37 +0100 Subject: [PATCH 3/3] lint and docs --- NAMESPACE | 1 + R/convertOMLSetupParamsToDT.R | 8 ++++---- R/listOMLRunEvaluations.R | 5 ++++- man/chunkOMLlist.Rd | 2 +- man/convertOMLMlrRunToBMR.Rd | 3 ++- man/convertOMLRunEvalsToDT.Rd | 28 ++++++++++++++++++++++++++++ man/convertOMLRunToBMR.Rd | 1 + man/deleteOMLObject.Rd | 1 + man/getOMLRun.Rd | 1 + man/listOMLRunEvaluations.Rd | 7 ++++++- man/listOMLRuns.Rd | 1 + man/makeOMLRun.Rd | 1 + man/makeOMLRunParameter.Rd | 1 + man/tagging.Rd | 1 + man/uploadOMLRun.Rd | 1 + 15 files changed, 54 insertions(+), 8 deletions(-) create mode 100644 man/convertOMLRunEvalsToDT.Rd diff --git a/NAMESPACE b/NAMESPACE index 4d7ad6c..8cca4f1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,6 +33,7 @@ export(convertMlrTaskToOMLDataSet) export(convertOMLDataSetToMlr) export(convertOMLFlowToMlr) export(convertOMLMlrRunToBMR) +export(convertOMLRunEvalsToDT) export(convertOMLRunToBMR) export(convertOMLTaskToMlr) export(deleteOMLObject) diff --git a/R/convertOMLSetupParamsToDT.R b/R/convertOMLSetupParamsToDT.R index c0bcab6..4e41a3f 100644 --- a/R/convertOMLSetupParamsToDT.R +++ b/R/convertOMLSetupParamsToDT.R @@ -9,7 +9,7 @@ #' #' @param run.evals [\code{data.frame}]\cr #' Result of caling listOMLRunEvaluations(..., setup = TRUE). -#' @param drop.constant [\code{logical(1)]\cr +#' @param drop.constant [\code{logical(1)]\cr #' Should constant columns be dropped before returning the result? #' #' @return [\code{\link{data.table}}]. @@ -21,11 +21,11 @@ convertOMLRunEvalsToDT = function(run.evals, drop.constant = TRUE) { assert_flag(drop.constant) setup_params = run.evals$setup_parameters out = lapply(setup_params, function(params) { - params[!(parameter_name == "verbose" & data_type == "boolean"), ] - params[, convertValueByType(parameter_name, value, data_type)] + params[!(params$parameter_name == "verbose" & params$data_type == "boolean"), ] + params[, convertValueByType(params$parameter_name, params$value, params$data_type)] }) dt = rbindlist(out, fill = TRUE) - if (drop.constant) dt = dt[, vlapply(dt, function(x) length(unique(x)) > 1), with =FALSE] + if (drop.constant) dt = dt[, vlapply(dt, function(x) length(unique(x)) > 1), with = FALSE] run.evals$setup_parameters = NULL return(cbind(run.evals, dt)) } diff --git a/R/listOMLRunEvaluations.R b/R/listOMLRunEvaluations.R index 4dd5314..47cb411 100644 --- a/R/listOMLRunEvaluations.R +++ b/R/listOMLRunEvaluations.R @@ -20,7 +20,7 @@ if (setup) { param_list = lapply(evals, function(x) { - parameters = as.data.table(cleanupSetupParameters(x$parameters))[-25,] + parameters = as.data.table(cleanupSetupParameters(x$parameters)) }) } @@ -110,6 +110,9 @@ #' @param extend.flow.name [\code{logical(1)}]\cr #' Adds a column \code{flow.version} that refers to the version number of the flow and a column \code{flow.source} containing the prefix of the flow that specifies the source of the flow (i.e. weka, R) and a column \code{learner.name} that refers to the learner. #' Default is \code{TRUE}. +#' @param setup [\code{logical(1)}]\cr +#' Adds a column \code{setup_parameters} that contains the runs setup, i.e. the hyperparameters set +#' for the run. #' #' @return [\code{data.frame}]. #' @family list diff --git a/man/chunkOMLlist.Rd b/man/chunkOMLlist.Rd index 7ae1bbe..5c941ef 100644 --- a/man/chunkOMLlist.Rd +++ b/man/chunkOMLlist.Rd @@ -4,7 +4,7 @@ \alias{chunkOMLlist} \title{Do chunked listings} \usage{ -chunkOMLlist(listfun, ..., total.limit = 1e+05, chunk.limit = 1000) +chunkOMLlist(listfun, ..., total.limit = 100000, chunk.limit = 1000) } \arguments{ \item{listfun}{[\code{character(1)}]\cr diff --git a/man/convertOMLMlrRunToBMR.Rd b/man/convertOMLMlrRunToBMR.Rd index d1fc61f..9337464 100644 --- a/man/convertOMLMlrRunToBMR.Rd +++ b/man/convertOMLMlrRunToBMR.Rd @@ -17,7 +17,8 @@ One or more \code{\link{OMLMlrRun}s}} Converts one or more \code{\link{OMLMlrRun}}s to a \code{\link[mlr]{BenchmarkResult}}. } \seealso{ -Other run-related functions: \code{\link{convertOMLRunToBMR}}, +Other run-related functions: \code{\link{convertOMLRunEvalsToDT}}, + \code{\link{convertOMLRunToBMR}}, \code{\link{deleteOMLObject}}, \code{\link{getOMLRun}}, \code{\link{listOMLRuns}}, \code{\link{makeOMLRunParameter}}, diff --git a/man/convertOMLRunEvalsToDT.Rd b/man/convertOMLRunEvalsToDT.Rd new file mode 100644 index 0000000..679156d --- /dev/null +++ b/man/convertOMLRunEvalsToDT.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/convertOMLSetupParamsToDT.R +\name{convertOMLRunEvalsToDT} +\alias{convertOMLRunEvalsToDT} +\title{Extract Parameters from an OpenML run into a flat structure} +\usage{ +convertOMLRunEvalsToDT(run.evals, drop.constant = TRUE) +} +\arguments{ +\item{run.evals}{[\code{data.frame}]\cr +Result of caling listOMLRunEvaluations(..., setup = TRUE).} +} +\value{ +[\code{\link{data.table}}]. +} +\description{ +Extract Parameters from an OpenML run into a flat structure +} +\seealso{ +Other run-related functions: \code{\link{convertOMLMlrRunToBMR}}, + \code{\link{convertOMLRunToBMR}}, + \code{\link{deleteOMLObject}}, \code{\link{getOMLRun}}, + \code{\link{listOMLRuns}}, + \code{\link{makeOMLRunParameter}}, + \code{\link{makeOMLRun}}, \code{\link{tagOMLObject}}, + \code{\link{uploadOMLRun}} +} +\concept{run-related functions} diff --git a/man/convertOMLRunToBMR.Rd b/man/convertOMLRunToBMR.Rd index 6d3e627..bc4cb3b 100644 --- a/man/convertOMLRunToBMR.Rd +++ b/man/convertOMLRunToBMR.Rd @@ -27,6 +27,7 @@ Converts an \code{\link{OMLRun}} to a \code{\link[mlr]{BenchmarkResult}}. } \seealso{ Other run-related functions: \code{\link{convertOMLMlrRunToBMR}}, + \code{\link{convertOMLRunEvalsToDT}}, \code{\link{deleteOMLObject}}, \code{\link{getOMLRun}}, \code{\link{listOMLRuns}}, \code{\link{makeOMLRunParameter}}, diff --git a/man/deleteOMLObject.Rd b/man/deleteOMLObject.Rd index 1fbcdd9..a57a2ea 100644 --- a/man/deleteOMLObject.Rd +++ b/man/deleteOMLObject.Rd @@ -47,6 +47,7 @@ Other flow-related functions: \code{\link{convertOMLFlowToMlr}}, \code{\link{makeOMLFlow}}, \code{\link{tagOMLObject}} Other run-related functions: \code{\link{convertOMLMlrRunToBMR}}, + \code{\link{convertOMLRunEvalsToDT}}, \code{\link{convertOMLRunToBMR}}, \code{\link{getOMLRun}}, \code{\link{listOMLRuns}}, \code{\link{makeOMLRunParameter}}, diff --git a/man/getOMLRun.Rd b/man/getOMLRun.Rd index 3709418..77be328 100644 --- a/man/getOMLRun.Rd +++ b/man/getOMLRun.Rd @@ -46,6 +46,7 @@ Other downloading functions: \code{\link{getOMLDataSetQualities}}, \code{\link{getOMLStudy}}, \code{\link{getOMLTask}} Other run-related functions: \code{\link{convertOMLMlrRunToBMR}}, + \code{\link{convertOMLRunEvalsToDT}}, \code{\link{convertOMLRunToBMR}}, \code{\link{deleteOMLObject}}, \code{\link{listOMLRuns}}, \code{\link{makeOMLRunParameter}}, diff --git a/man/listOMLRunEvaluations.Rd b/man/listOMLRunEvaluations.Rd index 701d35d..1913ed2 100644 --- a/man/listOMLRunEvaluations.Rd +++ b/man/listOMLRunEvaluations.Rd @@ -7,7 +7,8 @@ listOMLRunEvaluations(task.id = NULL, flow.id = NULL, run.id = NULL, uploader.id = NULL, tag = NULL, limit = NULL, offset = NULL, verbosity = NULL, evaluation.measure = NULL, - show.array.measures = FALSE, extend.flow.name = TRUE) + show.array.measures = FALSE, extend.flow.name = TRUE, + setup = FALSE) } \arguments{ \item{task.id}{[\code{integer}]\cr @@ -55,6 +56,10 @@ Default is \code{FALSE}.} \item{extend.flow.name}{[\code{logical(1)}]\cr Adds a column \code{flow.version} that refers to the version number of the flow and a column \code{flow.source} containing the prefix of the flow that specifies the source of the flow (i.e. weka, R) and a column \code{learner.name} that refers to the learner. Default is \code{TRUE}.} + +\item{setup}{[\code{logical(1)}]\cr +Adds a column \code{setup_parameters} that contains the runs setup, i.e. the hyperparameters set +for the run.} } \value{ [\code{data.frame}]. diff --git a/man/listOMLRuns.Rd b/man/listOMLRuns.Rd index c6b0d2e..efb7ed4 100644 --- a/man/listOMLRuns.Rd +++ b/man/listOMLRuns.Rd @@ -76,6 +76,7 @@ Other listing functions: \code{\link{chunkOMLlist}}, \code{\link{listOMLTasks}} Other run-related functions: \code{\link{convertOMLMlrRunToBMR}}, + \code{\link{convertOMLRunEvalsToDT}}, \code{\link{convertOMLRunToBMR}}, \code{\link{deleteOMLObject}}, \code{\link{getOMLRun}}, \code{\link{makeOMLRunParameter}}, diff --git a/man/makeOMLRun.Rd b/man/makeOMLRun.Rd index f0a74f4..00c2cb1 100644 --- a/man/makeOMLRun.Rd +++ b/man/makeOMLRun.Rd @@ -75,6 +75,7 @@ More details about the elements of a \code{OMLRun} can be found in the } \seealso{ Other run-related functions: \code{\link{convertOMLMlrRunToBMR}}, + \code{\link{convertOMLRunEvalsToDT}}, \code{\link{convertOMLRunToBMR}}, \code{\link{deleteOMLObject}}, \code{\link{getOMLRun}}, \code{\link{listOMLRuns}}, diff --git a/man/makeOMLRunParameter.Rd b/man/makeOMLRunParameter.Rd index be45430..ad6a008 100644 --- a/man/makeOMLRunParameter.Rd +++ b/man/makeOMLRunParameter.Rd @@ -23,6 +23,7 @@ Construct a run parameter object for a run. } \seealso{ Other run-related functions: \code{\link{convertOMLMlrRunToBMR}}, + \code{\link{convertOMLRunEvalsToDT}}, \code{\link{convertOMLRunToBMR}}, \code{\link{deleteOMLObject}}, \code{\link{getOMLRun}}, \code{\link{listOMLRuns}}, \code{\link{makeOMLRun}}, diff --git a/man/tagging.Rd b/man/tagging.Rd index a88e838..8e18a7c 100644 --- a/man/tagging.Rd +++ b/man/tagging.Rd @@ -54,6 +54,7 @@ Other flow-related functions: \code{\link{convertOMLFlowToMlr}}, \code{\link{makeOMLFlow}} Other run-related functions: \code{\link{convertOMLMlrRunToBMR}}, + \code{\link{convertOMLRunEvalsToDT}}, \code{\link{convertOMLRunToBMR}}, \code{\link{deleteOMLObject}}, \code{\link{getOMLRun}}, \code{\link{listOMLRuns}}, diff --git a/man/uploadOMLRun.Rd b/man/uploadOMLRun.Rd index 22e1bf5..879113e 100644 --- a/man/uploadOMLRun.Rd +++ b/man/uploadOMLRun.Rd @@ -53,6 +53,7 @@ Other uploading functions: \code{\link{makeOMLStudy}}, \code{\link{uploadOMLFlow}}, \code{\link{uploadOMLStudy}} Other run-related functions: \code{\link{convertOMLMlrRunToBMR}}, + \code{\link{convertOMLRunEvalsToDT}}, \code{\link{convertOMLRunToBMR}}, \code{\link{deleteOMLObject}}, \code{\link{getOMLRun}}, \code{\link{listOMLRuns}},