% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/prob-gain_capture.R
\name{gain_capture}
\alias{gain_capture}
\alias{gain_capture.data.frame}
\alias{gain_capture_vec}
\title{Gain capture}
\usage{
gain_capture(data, ...)

\method{gain_capture}{data.frame}(data, truth, ..., estimator = NULL,
  na_rm = TRUE)

gain_capture_vec(truth, estimate, estimator = NULL, na_rm = TRUE, ...)
}
\arguments{
\item{data}{A \code{data.frame} containing the \code{truth} and \code{estimate}
columns.}

\item{...}{A set of unquoted column names or one or more
\code{dplyr} selector functions to choose which variables contain the
class probabilities. If \code{truth} is binary, only 1 column should be selected.
Otherwise, there should be as many columns as factor levels of \code{truth}.}

\item{truth}{The column identifier for the true class results
(that is a \code{factor}). This should be an unquoted column name although
this argument is passed by expression and supports
\link[rlang:quasiquotation]{quasiquotation} (you can unquote column
names). For \code{_vec()} functions, a \code{factor} vector.}

\item{estimator}{One of \code{"binary"}, \code{"macro"}, or \code{"macro_weighted"} to
specify the type of averaging to be done. \code{"binary"} is only relevant for
the two class case. The other two are general methods for calculating
multiclass metrics. The default will automatically choose \code{"binary"} or
\code{"macro"} based on \code{truth}.}

\item{na_rm}{A \code{logical} value indicating whether \code{NA}
values should be stripped before the computation proceeds.}

\item{estimate}{If \code{truth} is binary, a numeric vector of class probabilities
corresponding to the "relevant" class. Otherwise, a matrix with as many
columns as factor levels of \code{truth}. \emph{It is assumed that these are in the
same order as the levels of \code{truth}.}}
}
\value{
A \code{tibble} with columns \code{.metric}, \code{.estimator},
and \code{.estimate} and 1 row of values.

For grouped data frames, the number of rows returned will be the same as
the number of groups.

For \code{gain_capture_vec()}, a single \code{numeric} value (or \code{NA}).
}
\description{
\code{gain_capture()} is a measure of performance similar to an AUC calculation,
but applied to a gain curve.
}
\details{
\code{gain_capture()} calculates the area \emph{under} the gain curve, but \emph{above}
the baseline, and then divides that by the area \emph{under} a perfect gain curve,
but \emph{above} the baseline. It is meant to represent the amount of potential
gain "captured" by the model.
}
\section{Relevant level}{


There is no common convention on which factor level should
automatically be considered the "event" or "positive" result.
In \code{yardstick}, the default is to use the \emph{first} level. To
change this, a global option called \code{yardstick.event_first} is
set to \code{TRUE} when the package is loaded. This can be changed
to \code{FALSE} if the last level of the factor is considered the
level of interest. For multiclass extensions involving one-vs-all
comparisons (such as macro averaging), this option is ignored and
the "one" level is always the relevant result.
}

\section{Multiclass}{


Macro and macro-weighted averaging is available for this metric.
The default is to select macro averaging if a \code{truth} factor with more
than 2 levels is provided. Otherwise, a standard binary calculation is done.
See \code{vignette("multiclass", "yardstick")} for more information.
}

\examples{
# Two class
data("two_class_example")
gain_capture(two_class_example, truth, Class1)

# Multiclass
library(dplyr)
data(hpc_cv)

# You can use the col1:colN tidyselect syntax
hpc_cv \%>\%
  filter(Resample == "Fold01") \%>\%
  gain_capture(obs, VF:L)

# Groups are respected
hpc_cv \%>\%
  group_by(Resample) \%>\%
  gain_capture(obs, VF:L)

# Weighted macro averaging
hpc_cv \%>\%
  group_by(Resample) \%>\%
  gain_capture(obs, VF:L, estimator = "macro_weighted")

# Vector version
# Supply a matrix of class probabilities
fold1 <- hpc_cv \%>\%
  filter(Resample == "Fold01")

gain_capture_vec(
   truth = fold1$obs,
   matrix(
     c(fold1$VF, fold1$F, fold1$M, fold1$L),
     ncol = 4
   )
)

# Visualize gain_capture() --------------------------------------------------

# Visually, this represents the area under the black curve, but above the
# 45 degree line, divided by the area of the shaded triangle.
library(ggplot2)
autoplot(gain_curve(two_class_example, truth, Class1))

}
\seealso{
\code{\link[=gain_curve]{gain_curve()}} to compute the full gain curve.

Other class probability metrics: \code{\link{mn_log_loss}},
  \code{\link{pr_auc}}, \code{\link{roc_auc}}
}
\author{
Max Kuhn
}
\concept{class probability metrics}
