% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/parsperrorest.R
\name{parsperrorest}
\alias{parsperrorest}
\title{Perform spatial error estimation and variable importance assessment}
\usage{
parsperrorest(formula, data, coords = c("x", "y"), model.fun,
  model.args = list(), pred.fun = NULL, pred.args = list(),
  smp.fun = partition.loo, smp.args = list(), train.fun = NULL,
  train.param = NULL, test.fun = NULL, test.param = NULL,
  err.fun = err.default, error.fold = TRUE, error.rep = TRUE,
  err.train = TRUE, imp.variables = NULL, imp.permutations = 1000,
  importance = !is.null(imp.variables), distance = FALSE, do.gc = 1,
  do.try = FALSE, progress = 1, out.progress = "", par.args = list(),
  benchmark = FALSE, ...)
}
\arguments{
\item{formula}{A formula specifying the variables used by the \code{model}.
Only simple formulas without interactions or nonlinear terms should
be used, e.g. \code{y~x1+x2+x3} but not \code{y~x1*x2+log(x3)}.
Formulas involving interaction and nonlinear terms may possibly work
for error estimation but not for variable importance assessment,
but should be used with caution.}

\item{data}{a \code{data.frame} with predictor and response variables.
Training and test samples will be drawn from this data set by \code{train.fun}
and \code{test.fun}, respectively.}

\item{coords}{vector of length 2 defining the variables in \code{data} that
contain the x and y coordinates of sample locations.}

\item{model.fun}{Function that fits a predictive model, such as \code{glm}
or \code{rpart}. The function must accept at least two arguments, the first
one being a formula and the second a data.frame with the learning sample.}

\item{model.args}{Arguments to be passed to \code{model.fun}
(in addition to the \code{formula} and \code{data} argument,
which are provided by \code{sperrorest})}

\item{pred.fun}{Prediction function for a fitted model object created
by \code{model}. Must accept at least two arguments: the fitted
\code{object} and a \code{data.frame} \code{newdata} with data
on which to predict the outcome.}

\item{pred.args}{(optional) Arguments to \code{pred.fun} (in addition to the
fitted model object and the \code{newdata} argument,
which are provided by \code{sperrorest})}

\item{smp.fun}{A function for sampling training and test sets from
\code{data}. E.g., \code{\link[=partition.kmeans]{partition.kmeans()}} for
spatial cross-validation using spatial \emph{k}-means clustering.}

\item{smp.args}{(optional) Arguments to be passed to \code{est.fun}}

\item{train.fun}{(optional) A function for resampling or subsampling the
training sample in order to achieve, e.g., uniform sample sizes on all
training sets, or maintaining a certain ratio of positives and negatives
in training sets.
E.g., \code{\link[=resample.uniform]{resample.uniform()}} or \code{\link[=resample.strat.uniform]{resample.strat.uniform()}}}

\item{train.param}{(optional) Arguments to be passed to \code{resample.fun}}

\item{test.fun}{(optional) Like \code{train.fun} but for the test set.}

\item{test.param}{(optional) Arguments to be passed to \code{test.fun}}

\item{err.fun}{A function that calculates selected error measures from the
known responses in \code{data} and the model predictions delivered
by \code{pred.fun}. E.g., \code{\link[=err.default]{err.default()}} (the default).
See example and details below.}

\item{error.fold}{logical (default: \code{TRUE}) if \code{importance} is
\code{TRUE}, otherwise \code{FALSE}): calculate error measures on each fold
within a resampling repetition.}

\item{error.rep}{logical (default: \code{TRUE}): calculate error measures
based on the pooled predictions of all folds within a resampling repetition.}

\item{err.train}{logical (default: \code{TRUE}): calculate error measures on
the training set (in addition to the test set estimation).}

\item{imp.variables}{(optional; used if \code{importance = TRUE})
Variables for which permutation-based variable importance assessment
is performed. If \code{importance = TRUE} and \code{imp.variables} is
\code{NULL}, all variables in \code{formula} will be used.}

\item{imp.permutations}{(optional; used if \code{importance = TRUE})
Number of permutations used for variable importance assessment.}

\item{importance}{logical: perform permutation-based variable
importance assessment?}

\item{distance}{logical (default: \code{FALSE}): if \code{TRUE}, calculate
mean nearest-neighbour distances from test samples to training samples using
\code{\link[=add.distance.represampling]{add.distance.represampling()}}}

\item{do.gc}{numeric (default: 1): defines frequency of memory garbage
collection by calling \code{\link[=gc]{gc()}}; if \code{<1}, no garbage collection;
if \code{>=1}, run a \code{gc()} after each repetition;
if \code{>=2}, after each fold}

\item{do.try}{logical (default: \code{FALSE}): if \code{TRUE} (untested!!),
use \code{\link[=try]{try()}} to robustify calls to \code{model.fun} and
\code{err.fun}; use with caution!}

\item{progress}{numeric (default: \code{1}): Whether to show progress
information. For \code{par.mode = 1}, information about elapsed time, estimated time remaining and a
percentage indicator (0\% - 100\%) are shown.
\code{progress = 2} only applies to \code{par.mode = 2} and shows repetition
information only (instead of repetition and fold).
Set to \code{FALSE} for no progress information.}

\item{out.progress}{only used if \code{par.mode = 2}: Optionally write progress output to a file instead of console output.
The default (\code{''}) results in console output for Unix-systems and
file output ('parsperrorest.progress.txt') in the current working directory
for Windows-systems.}

\item{par.args}{list of parallelization parameters:
\code{par.mode} (the parallelization mode),
\code{par.units} (the number of parallel processing units),
\code{par.libs} (libraries to be loaded on cluster workers, character list).
See Details for more information.}

\item{benchmark}{(optional) logical (default: \code{FALSE}): if \code{TRUE},
perform benchmarking and return \code{sperrorestbenchmarks} object}

\item{...}{currently not used}
}
\value{
A list (object of class \code{sperrorest}) with (up to) six components:
\item{error.rep}{a \code{sperrorestreperror} object containing
predictive performances at the repetition level}
\item{error.fold}{a \code{sperroresterror} object containing predictive
performances at the fold level}
\item{represampling}{a \code{\link[=represampling]{represampling()}} object}
\item{importance}{a \code{sperrorestimportance} object containing
permutation-based variable importances at the fold level}
\item{benchmarks}{a \code{sperrorestbenchmarks} object containing
information on the system the code is running on, starting and
finishing times, number of available CPU cores, parallelization mode,
number of parallel units, and runtime performance}
\item{package.version}{a \code{sperrorestpackageversion} object containing
information about the \code{sperrorest} package version}
}
\description{
\code{parsperrorest} is a flexible interface for multiple types of
parallelized spatial and non-spatial cross-validation
and bootstrap error estimation and parallelized permutation-based
assessment of spatial variable importance.
}
\details{
Two \code{par.mode} options are availabe. The default mode is
\code{par.mode = 1}. Here, \code{\link[pbapply:pblapply]{pbapply::pblapply()}} is used which
either calls \code{\link[parallel:mclapply]{parallel::mclapply()}} (on Unix-systems) or
\code{\link[parallel:parApply]{parallel::parApply()}} (on Windows-systems). \code{par.mode = 2}
uses \code{\link[foreach:foreach]{foreach::foreach()}}. While this approach is not as efficient,
it may work in cases in which \code{par.mode = 1} fails.

\code{par.libs} only applies to \code{par.mode = 1} on Windows-systems.

This parallelized version of \code{\link[=sperrorest]{sperrorest()}} may highly
decrease computation time. However, please note that problems
may occur depending on which function is used for cross-validation.
While the \code{\link[rpart:rpart]{rpart::rpart()}} example (see Examples) here works fine, you may
encounter problems with other functions.

For \code{par.mode = 2}, you may encounter missing repetitions in the results
if repetitions finish to quickly. In this case, consider using
\code{\link[=sperrorest]{sperrorest()}}

If you define a custom \code{pred.fun} which consists of multiple custom
defined child functions, make sure to define \code{pred.fun} and all child
functions in one call. Otherwise you will encounter errors in \code{par.mode = 2}
caused by how \code{foreach} loads the parent environment.
}
\note{
(1) Optionally save fitted models, training and test samples in the
results object; (2) Optionally save intermediate results in some file, and
enable the function to continue an interrupted sperrorest call where it
was interrupted. (3) Optionally have sperrorest dump the result of each
repetition into a file, and to skip repetitions for which a file already exists.
}
\examples{
\dontrun{
data(ecuador) # Muenchow et al. (2012), see ?ecuador
fo <- slides ~ dem + slope + hcurv + vcurv + log.carea + cslope

# Example of a classification tree fitted to this data:
library(rpart)
mypred.rpart <- function(object, newdata) predict(object, newdata)[, 2]
ctrl <- rpart.control(cp = 0.005) # show the effects of overfitting
fit <- rpart(fo, data = ecuador, control = ctrl)

# Non-spatial 5-repeated 10-fold cross-validation:
mypred.rpart <- function(object, newdata) predict(object, newdata)[,2]
par.nsp.res <- parsperrorest(data = ecuador, formula = fo,
                             model.fun = rpart, model.args = list(control = ctrl),
                             pred.fun = mypred.rpart,
                             progress = TRUE,
                             smp.fun = partition.cv, 
                             smp.args = list(repetition = 1:5, nfold = 15), 
                             par.args = list(par.units = 2, par.mode = 1),
                             error.rep = TRUE, error.fold = TRUE)
summary(par.nsp.res$error.rep)
summary(par.nsp.res$error.fold)
summary(par.nsp.res$represampling)
# plot(par.nsp.res$represampling, ecuador)

# Spatial 5-repeated 10-fold spatial cross-validation:
par.sp.res <- parsperrorest(data = ecuador, formula = fo,
                            model.fun = rpart, model.args = list(control = ctrl),
                            pred.fun = mypred.rpart,
                            progress = TRUE,
                            smp.fun = partition.kmeans, 
                            smp.args = list(repetition = 1:5, nfold = 15), 
                            par.args = list(par.units = 2, par.mode = 2),
                            error.rep = TRUE, error.fold = TRUE)
summary(par.sp.res$error.rep)
summary(par.sp.res$error.fold)
summary(par.sp.res$represampling)
# plot(par.sp.res$represampling, ecuador)

smry <- data.frame(
    nonspat.training = unlist(summary(par.nsp.res$error.rep, level = 1)$train.auroc),
    nonspat.test     = unlist(summary(par.nsp.res$error.rep, level = 1)$test.auroc),
    spatial.training = unlist(summary(par.sp.res$error.rep, level = 1)$train.auroc),
    spatial.test     = unlist(summary(par.sp.res$error.rep, level = 1)$test.auroc))
boxplot(smry, col = c('red','red','red','green'), 
    main = 'Training vs. test, nonspatial vs. spatial',
    ylab = 'Area under the ROC curve')
}    
}
\references{
Brenning, A. 2012. Spatial cross-validation and bootstrap for
the assessment of prediction rules in remote sensing: the R package 'sperrorest'.
2012 IEEE International Geoscience and Remote Sensing Symposium (IGARSS),
23-27 July 2012, p. 5372-5375.

Brenning, A. 2005. Spatial prediction models for landslide hazards: review,
comparison and evaluation. Natural Hazards and Earth System Sciences, 5(6): 853-862.

Brenning, A., S. Long & P. Fieguth. Forthcoming. Detecting rock glacier flow
structures using Gabor filters and IKONOS imagery.
Submitted to Remote Sensing of Environment.

Russ, G. & A. Brenning. 2010a. Data mining in precision agriculture:
Management of spatial information. In 13th International Conference on
Information Processing and Management of Uncertainty, IPMU 2010; Dortmund;
28 June - 2 July 2010. Lecture Notes in Computer Science, 6178 LNAI: 350-359.

Russ, G. & A. Brenning. 2010b. Spatial variable importance assessment for
yield prediction in Precision Agriculture. In Advances in Intelligent
Data Analysis IX, Proceedings, 9th International Symposium,
IDA 2010, Tucson, AZ, USA, 19-21 May 2010.
Lecture Notes in Computer Science, 6065 LNCS: 184-195.
}
\seealso{
\code{\link[=sperrorest]{sperrorest()}}
}
