% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/imputation_storage.R
\name{reduce_imputed_data}
\alias{reduce_imputed_data}
\title{Reduce Imputed Data for Efficient Storage}
\usage{
reduce_imputed_data(imputed_data, original_data, vars)
}
\arguments{
\item{imputed_data}{A data.frame containing the full imputed dataset with an
\code{IMPID} column identifying each imputation. Typically the output from
\code{\link[=get_imputed_data]{get_imputed_data()}}.}

\item{original_data}{A data.frame containing the original dataset before
imputation, with missing values in the outcome column.}

\item{vars}{A \code{vars} object as created by \code{\link[rbmi:set_vars]{rbmi::set_vars()}}.}
}
\value{
A data.frame containing only the rows from \code{imputed_data} that
correspond to originally missing outcome values. All columns from
\code{imputed_data} are preserved.
}
\description{
Extracts only the imputed records (those that were originally missing) from
a full imputed dataset. This significantly reduces storage requirements when
working with many imputations, as observed values are identical across all
imputations and only need to be stored once in the original data.
}
\details{
Storage savings depend on the proportion of missing data. For example:
\itemize{
\item Original: 1000 rows, 44 missing values
\item Full imputed (1000 imputations): 1,000,000 rows
\item Reduced (1000 imputations): 44,000 rows (4.4\\% of full size)
}

Use \code{\link[=expand_imputed_data]{expand_imputed_data()}} to reconstruct the full imputed dataset when
needed for analysis.
}
\examples{
library(rbmi)
library(dplyr)

# Example with package data
data("ADMI", package = "rbmiUtils")
data("ADEFF", package = "rbmiUtils")

# Prepare original data to match ADMI structure
original <- ADEFF |>
  mutate(
    TRT = TRT01P,
    USUBJID = as.character(USUBJID)
  )

vars <- set_vars(
  subjid = "USUBJID",
  visit = "AVISIT",
  group = "TRT",
  outcome = "CHG"
)

# Reduce to only imputed values
reduced <- reduce_imputed_data(ADMI, original, vars)

# Compare sizes
cat("Full imputed rows:", nrow(ADMI), "\n")
cat("Reduced rows:", nrow(reduced), "\n")
cat("Compression:", round(100 * nrow(reduced) / nrow(ADMI), 1), "\%\n")

}
\seealso{
\itemize{
\item \code{\link[rbmi:impute]{rbmi::impute()}} which creates the imputed datasets this function operates on
\item \code{\link[=expand_imputed_data]{expand_imputed_data()}} to reconstruct the full dataset
\item \code{\link[=get_imputed_data]{get_imputed_data()}} to extract imputed data from an rbmi imputation object
}
}
