% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/pipeline_vectorized.R
\name{pipeline_vectorized}
\alias{pipeline_vectorized}
\title{Call (nearly) one "Accuracy" function with many parameterizations at once
automatically}
\usage{
pipeline_vectorized(
  fct,
  resp_vars = NULL,
  study_data,
  meta_data,
  label_col,
  ...,
  key_var_names,
  cores = list(mode = "socket", logging = FALSE, load.balancing = TRUE),
  variable_roles = list(resp_vars = list(VARIABLE_ROLES$PRIMARY,
    VARIABLE_ROLES$SECONDARY), group_vars = VARIABLE_ROLES$PROCESS),
  result_groups,
  use_cache = FALSE,
  compute_plan_only = FALSE
)
}
\arguments{
\item{fct}{\link{function} function to call}

\item{resp_vars}{\link{variable list} the name of the measurement variables,
if NULL (default), all variables are used.}

\item{study_data}{\link{data.frame} the data frame that contains the measurements}

\item{meta_data}{\link{data.frame} the data frame that contains metadata
attributes of study data}

\item{label_col}{\link{variable attribute} the name of the column in the metadata
with labels of variables}

\item{...}{additional arguments for the function}

\item{key_var_names}{\link{character} character vector named by arguments to be
filled by meta data KEY_-entries as follows:
c(group_vars = KEY_OBSERVER) -- may be missing,
then all possible combinations will be analyzed.
Cannot contain resp_vars.}

\item{cores}{\link{integer} number of cpu cores to use or a named list with
arguments for \link[parallelMap:parallelStart]{parallelMap::parallelStart} or NULL, if
parallel has already been started by the caller.}

\item{variable_roles}{\link{list} restrict each function argument (referred to by
its name matching a name in \code{names(variable_roles)})
to variables of the role given here.}

\item{result_groups}{\link{character} columns to group results to encapsulated
lists or NULL receive a data frame with all
call arguments and their respective results
in a column 'result' -- see
\link{pipeline_recursive_result}}

\item{use_cache}{\link{logical} set to FALSE to omit re-using already distributed
study- and metadata on a parallel cluster}

\item{compute_plan_only}{\link{logical} set to TRUE to omit computations and
return only the compute plan
filled with planned evaluations. used in
pipelines.}
}
\value{
\itemize{
\item if \code{result_groups} is set, a list with:
\itemize{
\item first argument's values in \code{result_groups}, each containing second's
argument's values as a similar list recursively;
}
\item if \code{result_groups} is not set, a data frame with one row per function
call, all the arguments of each call in its columns and a column \code{results}
providing the function calls' results.
}
}
\description{
This is a function to automatically call indicator functions of the
"Accuracy" dimension in a vectorized manner with a set of parameterizations
derived from the metadata.
}
\details{
The function to call is given as first argument. All arguments of the called
functions can be given here, but \code{pipline_vectorized} can derive technically
possible values (most of them) from the metadata, which can be controlled
using the arguments \code{key_var_names} and \code{variable_roles}. The function
returns an encapsulated list by default, but it can also return a
\link{data.frame}. See also \link{pipeline_recursive_result} for these two options.
The argument \code{use_cache} controls, whether the input data (\code{study_data} and
\code{meta_data}) should be passed around, if running in parallel or being
distributed beforehand to the compute nodes. All calls will be done in
parallel, if possible. This can be configured, see argument \code{cores} below.

If the function is called in a larger framework (such as \link{dq_report}), then
\code{compute_plan_only} controls, not to actually call functions but return
a \link{data.frame} with parameterizations of "Accuracy" functions only. Also in
such a scenario, one may want not to start and stop a cluster with entry
and leaving of \link{pipeline_vectorized} but use an existing cluster. This can
be achieved by setting the \code{cores} argument \code{NULL}.
}
\examples{
\dontrun{ # really long-running example
load(system.file("extdata/study_data.RData", package = "dataquieR"))
load(system.file("extdata/meta_data.RData", package = "dataquieR"))
a <- pipeline_vectorized(
  fct = acc_margins, study_data = study_data,
  meta_data = meta_data, label_col = LABEL,
  key_var_names = c(group_vars = KEY_OBSERVER)
)
b <- pipeline_vectorized(
  fct = acc_margins, study_data = study_data,
  meta_data = meta_data, label_col = LABEL
)
b_adj <-
  pipeline_vectorized(
    fct = acc_margins, study_data = study_data,
    meta_data = meta_data, label_col = LABEL, co_vars = c("SEX_1", "AGE_1")
  )
c <- pipeline_vectorized(
  fct = acc_loess, study_data = study_data,
  meta_data = meta_data, label_col = LABEL,
  variable_roles = list(
    resp_vars = list(VARIABLE_ROLES$PRIMARY),
    group_vars = VARIABLE_ROLES$PROCESS
  )
)
d <- pipeline_vectorized(
  fct = acc_loess, study_data = study_data,
  meta_data = meta_data, label_col = LABEL,
  variable_roles = list(
    resp_vars = list(VARIABLE_ROLES$PRIMARY, VARIABLE_ROLES$SECONDARY),
    group_vars = VARIABLE_ROLES$PROCESS
  )
)
e <- pipeline_vectorized(
  fct = acc_margins, study_data = study_data,
  meta_data = meta_data, label_col = LABEL,
  key_var_names = c(group_vars = KEY_OBSERVER), co_vars = "SEX_0"
)

f <- pipeline_vectorized(
  fct = acc_margins, study_data = study_data,
  meta_data = meta_data, label_col = LABEL,
  key_var_names = c(group_vars = KEY_OBSERVER), co_vars = "SEX_0",
  result_groups = NULL
)
pipeline_recursive_result(f)
g <- pipeline_vectorized(
  fct = acc_margins, study_data = study_data,
  meta_data = meta_data, label_col = LABEL,
  key_var_names = c(group_vars = KEY_OBSERVER), co_vars = "SEX_0",
  result_groups = c("co_vars")
)
g1 <- pipeline_vectorized(
  fct = acc_margins, study_data = study_data,
  meta_data = meta_data, label_col = LABEL,
  key_var_names = c(group_vars = KEY_OBSERVER), co_vars = "SEX_0",
  result_groups = c("group_vars")
)
g2 <- pipeline_vectorized(
  fct = acc_margins, study_data = study_data,
  meta_data = meta_data, label_col = LABEL,
  key_var_names = c(group_vars = KEY_OBSERVER), co_vars = "SEX_0",
  result_groups = c("group_vars", "co_vars")
)
g3 <- pipeline_vectorized(
  fct = acc_margins, study_data = study_data,
  meta_data = meta_data, label_col = LABEL,
  key_var_names = c(group_vars = KEY_OBSERVER), co_vars = "SEX_0",
  result_groups = c("co_vars", "group_vars")
)
g4 <- pipeline_vectorized(
  fct = acc_margins, study_data = study_data,
  meta_data = meta_data, label_col = LABEL,
  co_vars = "SEX_0", result_groups = c("co_vars")
)
meta_datax <- meta_data
meta_datax[9, "KEY_DEVICE"] <- "v00011"
g5 <- pipeline_vectorized(
  fct = acc_margins, study_data = study_data,
  meta_data = meta_datax, label_col = LABEL,
  co_vars = "SEX_0", result_groups = c("co_vars")
)
g6 <- pipeline_vectorized(
  fct = acc_margins, study_data = study_data,
  meta_data = meta_datax, label_col = LABEL,
  co_vars = "SEX_0", result_groups = c("co_vars", "group_vars")
)
}
}
