% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/summarise_plus.R
\name{summarise_plus}
\alias{summarise_plus}
\title{Fast And Powerful Yet Simple To Use Summarise}
\usage{
summarise_plus(
  data_frame,
  class = NULL,
  values,
  statistics = c("sum", "freq"),
  formats = list(),
  types = "",
  weight = NULL,
  nesting = "deepest",
  merge_back = FALSE,
  na.rm = .qol_options[["na.rm"]],
  print_miss = .qol_options[["print_miss"]],
  monitor = .qol_options[["monitor"]],
  notes = TRUE
)
}
\arguments{
\item{data_frame}{A data frame to summarise.}

\item{class}{A vector containing all grouping variables.}

\item{values}{A vector containing all variables that should be summarised.}

\item{statistics}{Available functions:
\itemize{
\item "sum"       -> Weighted and unweighted sum
\item "sum_wgt"   -> Sum of all weights
\item "freq"      -> Unweighted frequency
\item "freq_g0"   -> Unweighted frequency of all values greater than zero
\item "pct_group" -> Weighted and unweighted percentages within the respective group
\item "pct_total" -> Weighted and unweighted percentages compared to the grand total
\item "mean"      -> Weighted and unweighted mean
\item "median"    -> Weighted and unweighted median
\item "mode"      -> Weighted and unweighted mode
\item "min"       -> Minimum
\item "max"       -> Maximum
\item "sd"        -> Weighted and unweighted standard deviation
\item "variance"  -> Weighted and unweighted standard variance
\item "first"     -> First value
\item "last"      -> Last value
\item "pn"        -> Weighted and unweighted percentiles (any p1, p2, p3, ... possible)
\item "missing"   -> Missings generated by the value variables
}}

\item{formats}{A list in which is specified which formats should be applied to which
class variables.}

\item{types}{A character vector specifying the different combinations of group
variables which should be computed when using nesting = "all". If left empty all
possible combinations will be computed.}

\item{weight}{Put in a weight variable to compute weighted results.}

\item{nesting}{The predefined value is "deepest" meaning that only the fully
nested version of all class variables will be computed. If set to "all", all
possible combinations will be computed in one data table. The option "single"
only outputs the ungrouped summary of all class variables in one data table.}

\item{merge_back}{Newly summarised variables can be merged back to the original
data frame if TRUE. Only works if nested = "deepest and no formats are defined.}

\item{na.rm}{FALSE by default. If TRUE removes all NA values from the class variables.}

\item{print_miss}{FALSE by default. If TRUE outputs all possible categories of the
grouping variables based on the provided formats, even if there are no observations
for a combination.}

\item{monitor}{FALSE by default. If TRUE, outputs two charts to visualize the
functions time consumption.}

\item{notes}{TRUE by default. Prints notifications about NA values produced by
class variables during summarise.}
}
\value{
Returns a summarised data table.
}
\description{
\code{\link[=summarise_plus]{summarise_plus()}} creates a new aggregated data table with the desired grouping.
It can output only the deepest nested combination of the grouping variables (default)
or you can also output every possible combination of the grouping variables at once,
with just one small change. Besides the normal summary functions like sum, mean
or median, you can also calculate their respective weighted version by just
setting a weight variable.
}
\details{
\code{\link[=summarise_plus]{summarise_plus()}} is based on the 'SAS' procedure Proc Summary, which provides
efficient and readable ways to perform complex aggregations.

Normally you would compute new categorical variables beforehand - probably even in
different forms, if you wanted to have different categorizations - and bloat up
the data set. After all this recoding footwork you could finally use multiple
summaries to compute all the stats you need to then put them back together. With this
function this is no more necessary.

In \code{\link[=summarise_plus]{summarise_plus()}} you put in the original data frame and let the recoding happen
via format containers. This is very efficient, since new variables and categories
are only created just before the summarise happens.

Additionally you can specify whether you only want to produce the all nested version
of all group variables or whether you want to produce every possible combination in
one go. All with a single option.

The function is optimized to always take the fastest route, depending on the options
specified.
}
\examples{
# Example formats
age. <- discrete_format(
    "Total"          = 0:100,
    "under 18"       = 0:17,
    "18 to under 25" = 18:24,
    "25 to under 55" = 25:54,
    "55 to under 65" = 55:64,
    "65 and older"   = 65:100)

sex. <- discrete_format(
    "Total"  = 1:2,
    "Male"   = 1,
    "Female" = 2)

income. <- interval_format(
    "Total"              = 0:99999,
    "below 500"          = 0:499,
    "500 to under 1000"  = 500:999,
    "1000 to under 2000" = 1000:1999,
    "2000 and more"      = 2000:99999)

# Example data frame
my_data <- dummy_data(1000)

# Call function
all_nested <- my_data |>
    summarise_plus(class      = c(year, sex, age),
                   values     = income,
                   statistics = c("sum", "pct_group", "pct_total", "sum_wgt", "freq"),
                   formats    = list(sex = sex., age = age.),
                   weight     = weight,
                   nesting    = "deepest",
                   na.rm      = TRUE)

all_possible <- my_data |>
    summarise_plus(class      = c(year, sex, age, income),
                   values     = c(probability),
                   statistics = c("sum", "p1", "p99", "min", "max", "freq", "freq_g0"),
                   formats    = list(sex    = sex.,
                                     age    = age.,
                                     income = income.),
                   weight     = weight,
                   nesting    = "all",
                   na.rm      = TRUE)

# Formats can also be passed as characters
single <- my_data |>
    summarise_plus(class      = c(year, age, sex),
                   values     = weight,
                   statistics = c("sum", "mean"),
                   formats    = list(sex = "sex.", age = "age."),
                   nesting    = "single")

merge_back <- my_data |>
    summarise_plus(class      = c(year, age, sex),
                   values     = weight,
                   statistics = c("sum", "mean"),
                   nesting    = "deepest",
                   merge_back = TRUE)

certain_types <- my_data |>
    summarise_plus(class      = c(year, sex, age),
                   values     = c(probability),
                   statistics = c("sum", "mean", "freq"),
                   formats    = list(sex = sex.,
                                     age = age.),
                   types      = c("year", "year + age", "age + sex"),
                   weight     = weight,
                   nesting    = "all",
                   na.rm      = TRUE)

}
\seealso{
Creating formats: \code{\link[=discrete_format]{discrete_format()}} and \code{\link[=interval_format]{interval_format()}}.

Functions that also make use of formats: \code{\link[=frequencies]{frequencies()}}, \code{\link[=crosstabs]{crosstabs()}},
\code{\link[=any_table]{any_table()}}, \code{\link[=recode]{recode()}}, \code{\link[=recode_multi]{recode_multi()}}, \code{\link[=transpose_plus]{transpose_plus()}}, \code{\link[=sort_plus]{sort_plus()}}.
}
