% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/df_explicit_na.R
\name{df_explicit_na}
\alias{df_explicit_na}
\title{Encode Categorical Missing Values in a Data Frame}
\usage{
df_explicit_na(
  data,
  omit_columns = NULL,
  char_as_factor = TRUE,
  logical_as_factor = FALSE,
  na_level = "<Missing>"
)
}
\arguments{
\item{data}{(\code{data.frame})\cr data set.}

\item{omit_columns}{(\code{character})\cr names of variables from \code{data} that should
not be modified by this function.}

\item{char_as_factor}{(\code{flag})\cr whether to convert character variables
in \code{data} to factors.}

\item{logical_as_factor}{(\code{flag})\cr whether to convert logical variables
in \code{data} to factors.}

\item{na_level}{(\code{string})\cr used to replace all \code{NA} or empty
values inside non-\code{omit_columns} columns.}
}
\value{
A \code{data.frame} with the chosen modifications applied.
}
\description{
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}}

This is a helper function to encode missing entries across groups of categorical
variables in a data frame.
}
\details{
Missing entries are those with \code{NA} or empty strings and will
be replaced with a specified value. If factor variables include missing
values, the missing value will be inserted as the last level.
Similarly, in case character or logical variables should be converted to factors
with the \code{char_as_factor} or \code{logical_as_factor} options, the missing values will
be set as the last level.
}
\examples{
my_data <- data.frame(
  u = c(TRUE, FALSE, NA, TRUE),
  v = factor(c("A", NA, NA, NA), levels = c("Z", "A")),
  w = c("A", "B", NA, "C"),
  x = c("D", "E", "F", NA),
  y = c("G", "H", "I", ""),
  z = c(1, 2, 3, 4),
  stringsAsFactors = FALSE
)

# Example 1
# Encode missing values in all character or factor columns.
df_explicit_na(my_data)
# Also convert logical columns to factor columns.
df_explicit_na(my_data, logical_as_factor = TRUE)
# Encode missing values in a subset of columns.
df_explicit_na(my_data, omit_columns = c("x", "y"))

# Example 2
# Here we purposefully convert all `M` values to `NA` in the `SEX` variable.
# After running `df_explicit_na` the `NA` values are encoded as `<Missing>` but they are not
# included when generating `rtables`.
adsl <- tern_ex_adsl
adsl$SEX[adsl$SEX == "M"] <- NA
adsl <- df_explicit_na(adsl)

# If you want the `Na` values to be displayed in the table use the `na_level` argument.
adsl <- tern_ex_adsl
adsl$SEX[adsl$SEX == "M"] <- NA
adsl <- df_explicit_na(adsl, na_level = "Missing Values")

# Example 3
# Numeric variables that have missing values are not altered. This means that any `NA` value in
# a numeric variable will not be included in the summary statistics, nor will they be included
# in the denominator value for calculating the percent values.
adsl <- tern_ex_adsl
adsl$AGE[adsl$AGE < 30] <- NA
adsl <- df_explicit_na(adsl)

}
\seealso{
\code{\link[=sas_na]{sas_na()}} and \code{\link[=explicit_na]{explicit_na()}} for other missing data helper functions.
}
