% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rf_compare.R
\name{rf_compare}
\alias{rf_compare}
\title{Compares models via spatial cross-validation}
\usage{
rf_compare(
  models = NULL,
  xy = NULL,
  repetitions = 30,
  training.fraction = 0.75,
  metrics = c("r.squared", "pseudo.r.squared", "rmse", "nrmse", "auc"),
  distance.step = NULL,
  distance.step.x = NULL,
  distance.step.y = NULL,
  fill.color = viridis::viridis(100, option = "F", direction = -1, alpha = 0.8),
  line.color = "gray30",
  seed = 1,
  verbose = TRUE,
  n.cores = parallel::detectCores() - 1,
  cluster = NULL
)
}
\arguments{
\item{models}{Named list with models resulting from \code{\link[=rf]{rf()}}, \code{\link[=rf_spatial]{rf_spatial()}}, \code{\link[=rf_tuning]{rf_tuning()}}, or \code{\link[=rf_evaluate]{rf_evaluate()}}. Example: \code{models = list(a = model.a, b = model.b)}. Default: \code{NULL}}

\item{xy}{Data frame or matrix with two columns containing coordinates and named "x" and "y". Default: \code{NULL}}

\item{repetitions}{Integer, number of spatial folds to use during cross-validation. Must be lower than the total number of rows available in the model's data. Default: \code{30}}

\item{training.fraction}{Proportion between 0.5 and 0.9 indicating the proportion of records to be used as training set during spatial cross-validation. Default: \code{0.75}}

\item{metrics}{Character vector, names of the performance metrics selected. The possible values are: "r.squared" (\code{cor(obs, pred) ^ 2}), "pseudo.r.squared" (\code{cor(obs, pred)}), "rmse" (\code{sqrt(sum((obs - pred)^2)/length(obs))}), "nrmse" (\code{rmse/(quantile(obs, 0.75) - quantile(obs, 0.25))}). Default: \code{c("r.squared", "pseudo.r.squared", "rmse", "nrmse")}}

\item{distance.step}{Numeric, argument \code{distance.step} of \code{\link[=thinning_til_n]{thinning_til_n()}}. distance step used during the selection of the centers of the training folds. These fold centers are selected by thinning the data until a number of folds equal or lower than \code{repetitions} is reached. Its default value is 1/1000th the maximum distance within records in \code{xy}. Reduce it if the number of training folds is lower than expected.}

\item{distance.step.x}{Numeric, argument \code{distance.step.x} of \code{\link[=make_spatial_folds]{make_spatial_folds()}}. Distance step used during the growth in the x axis of the buffers defining the training folds. Default: \code{NULL} (1/1000th the range of the x coordinates).}

\item{distance.step.y}{Numeric, argument \code{distance.step.x} of \code{\link[=make_spatial_folds]{make_spatial_folds()}}. Distance step used during the growth in the y axis of the buffers defining the training folds. Default: \code{NULL} (1/1000th the range of the y coordinates).}

\item{fill.color}{Character vector with hexadecimal codes (e.g. "#440154FF" "#21908CFF" "#FDE725FF"), or function generating a palette (e.g. \code{viridis::viridis(100)}). Default: \code{viridis::viridis(100, option = "F", direction = -1)}}

\item{line.color}{Character string, color of the line produced by \code{ggplot2::geom_smooth()}. Default: \code{"gray30"}}

\item{seed}{Integer, random seed to facilitate reproduciblity. If set to a given number, the results of the function are always the same. Default: \code{1}.}

\item{verbose}{Logical. If \code{TRUE}, messages and plots generated during the execution of the function are displayed, Default: \code{TRUE}}

\item{n.cores}{Integer, number of cores to use for parallel execution. Creates a socket cluster with \code{parallel::makeCluster()}, runs operations in parallel with \code{foreach} and \verb{\%dopar\%}, and stops the cluster with \code{parallel::clusterStop()} when the job is done. Default: \code{parallel::detectCores() - 1}}

\item{cluster}{A cluster definition generated with \code{parallel::makeCluster()}. If provided, overrides \code{n.cores}. When \code{cluster = NULL} (default value), and \code{model} is provided, the cluster in \code{model}, if any, is used instead. If this cluster is \code{NULL}, then the function uses \code{n.cores} instead. The function does not stop a provided cluster, so it should be stopped with \code{parallel::stopCluster()} afterwards. The cluster definition is stored in the output list under the name "cluster" so it can be passed to other functions via the \code{model} argument, or using the \verb{\%>\%} pipe. Default: \code{NULL}}
}
\value{
A list with three slots:
\itemize{
\item \code{comparison.df}: Data frame with one performance value per spatial fold, metric, and model.
\item \code{spatial.folds}: List with the indices of the training and testing records for each evaluation repetition.
\item \code{plot}: Violin-plot of \code{comparison.df}.
}
}
\description{
Uses \code{\link[=rf_evaluate]{rf_evaluate()}} to compare the performance of several models on independent spatial folds via spatial cross-validation.
}
\examples{

if(interactive()){

  data(
    plants_rf,
    plants_rf_spatial,
    plants_xy
  )

  comparison <- rf_compare(
    models = list(
      `Non spatial` = plants_rf,
      Spatial = plants_rf_spatial
    ),
    repetitions = 5,
    xy = plants_xy,
    metrics = "rmse",
    n.cores = 1
  )

}

}
\seealso{
\code{\link[=rf_evaluate]{rf_evaluate()}}

Other model_workflow: 
\code{\link{rf_evaluate}()},
\code{\link{rf_importance}()},
\code{\link{rf_repeat}()},
\code{\link{rf_tuning}()}
}
\concept{model_workflow}
