% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gformula.R
\name{gformula_binary_eof}
\alias{gformula_binary_eof}
\title{Estimation of Binary End-of-Follow-Up Outcome Under the Parametric G-Formula}
\usage{
gformula_binary_eof(
  obs_data,
  id,
  time_name,
  covnames,
  covtypes,
  covparams,
  covfits_custom = NA,
  covpredict_custom = NA,
  histvars = NULL,
  histories = NA,
  basecovs = NA,
  censor_name = NULL,
  censor_model = NA,
  outcome_name,
  ymodel,
  ymodel_fit_custom = NULL,
  ymodel_predict_custom = NULL,
  intvars = NULL,
  interventions = NULL,
  int_times = NULL,
  int_descript = NULL,
  ref_int = 0,
  visitprocess = NA,
  restrictions = NA,
  yrestrictions = NA,
  baselags = FALSE,
  nsimul = NA,
  sim_data_b = FALSE,
  seed,
  nsamples = 0,
  parallel = FALSE,
  ncores = NA,
  ci_method = "percentile",
  threads,
  model_fits = FALSE,
  boot_diag = FALSE,
  show_progress = TRUE,
  ipw_cutoff_quantile = NULL,
  ipw_cutoff_value = NULL,
  int_visit_type = NULL,
  sim_trunc = TRUE,
  ...
)
}
\arguments{
\item{obs_data}{Data table containing the observed data.}

\item{id}{Character string specifying the name of the ID variable in \code{obs_data}.}

\item{time_name}{Character string specifying the name of the time variable in \code{obs_data}.}

\item{covnames}{Vector of character strings specifying the names of the time-varying covariates in \code{obs_data}.}

\item{covtypes}{Vector of character strings specifying the "type" of each time-varying covariate included in \code{covnames}. The possible "types" are: \code{"binary"}, \code{"normal"}, \code{"categorical"}, \code{"bounded normal"}, \code{"zero-inflated normal"}, \code{"truncated normal"}, \code{"absorbing"}, \code{"categorical time"}, and \code{"custom"}.}

\item{covparams}{List of vectors, where each vector contains information for
one parameter used in the modeling of the time-varying covariates (e.g.,
model statement, family, link function, etc.). Each vector
must be the same length as \code{covnames} and in the same order.
If a parameter is not required for a certain covariate, it
should be set to \code{NA} at that index.}

\item{covfits_custom}{Vector containing custom fit functions for time-varying covariates that
do not fall within the pre-defined covariate types. It should be in
the same order \code{covnames}. If a custom fit function is not
required for a particular covariate (e.g., if the first
covariate is of type \code{"binary"} but the second is of type \code{"custom"}), then that
index should be set to \code{NA}. The default is \code{NA}.}

\item{covpredict_custom}{Vector containing custom prediction functions for time-varying
covariates that do not fall within the pre-defined covariate types.
It should be in the same order as \code{covnames}. If a custom
prediction function is not required for a particular
covariate, then that index should be set to \code{NA}. The default is \code{NA}.}

\item{histvars}{List of vectors. The kth vector specifies the names of the variables for which the kth history function
in \code{histories} is to be applied.}

\item{histories}{Vector of history functions to apply to the variables specified in \code{histvars}. The default is \code{NA}.}

\item{basecovs}{Vector of character strings specifying the names of baseline covariates in \code{obs_data}. These covariates are not simulated using a model but rather carry their value over all time points from the first time point of \code{obs_data}. These covariates should not be included in \code{covnames}. The default is \code{NA}.}

\item{censor_name}{Character string specifying the name of the censoring variable in \code{obs_data}. Only applicable when using inverse probability weights to estimate the natural course means / risk from the observed data. See "Details".}

\item{censor_model}{Model statement for the censoring variable. Only applicable when using inverse probability weights to estimate the natural course means / risk from the observed data. See "Details".}

\item{outcome_name}{Character string specifying the name of the outcome variable in \code{obs_data}.}

\item{ymodel}{Model statement for the outcome variable.}

\item{ymodel_fit_custom}{Function specifying a custom outcome model. See the vignette "Using Custom Outcome Models in gfoRmula" for details. The default is \code{NULL}.}

\item{ymodel_predict_custom}{Function obtaining predictions from the custom outcome model specified in \code{ymodel_fit_custom}. See the vignette "Using Custom Outcome Models in gfoRmula" for details. The default is \code{NULL}.}

\item{intvars}{(Deprecated. See the \code{...} argument) List, whose elements are vectors of character strings. The kth vector in \code{intvars} specifies the name(s) of the variable(s) to be intervened
on in each round of the simulation under the kth intervention in \code{interventions}.}

\item{interventions}{(Deprecated. See the \code{...} argument) List, whose elements are lists of vectors. Each list in \code{interventions} specifies a unique intervention on the relevant variable(s) in \code{intvars}. Each vector contains a function
implementing a particular intervention on a single variable, optionally
followed by one or more "intervention values" (i.e.,
integers used to specify the treatment regime).}

\item{int_times}{(Deprecated. See the \code{...} argument) List, whose elements are lists of vectors. The kth list in \code{int_times} corresponds to the kth intervention in \code{interventions}. Each vector specifies the time points in which the relevant intervention is applied on the corresponding variable in \code{intvars}.
When an intervention is not applied, the simulated natural course value is used. By default, this argument is set so that all interventions are applied in all time points.}

\item{int_descript}{Vector of character strings, each describing an intervention. It must
be in same order as the specified interventions (see the \code{...} argument).}

\item{ref_int}{Integer denoting the intervention to be used as the
reference for calculating the end-of-follow-up mean ratio and mean difference. 0 denotes the
natural course, while subsequent integers denote user-specified
interventions in the order that they are
named in \code{interventions}. The default is 0.}

\item{visitprocess}{List of vectors. Each vector contains as its first entry
the covariate name of a visit process; its second entry
the name of a covariate whose modeling depends on the
visit process; and its third entry the maximum number
of consecutive visits that can be missed before an
individual is censored. The default is \code{NA}.}

\item{restrictions}{List of vectors. Each vector contains as its first entry a covariate for which
\emph{a priori} knowledge of its distribution is available; its second entry a condition
under which no knowledge of its distribution is available and that must be \code{TRUE}
for the distribution of that covariate given that condition to be estimated via a parametric
model or other fitting procedure; its third entry a function for estimating the distribution
of that covariate given the condition in the second entry is false such that \emph{a priori} knowledge
of the covariate distribution is available; and its fourth entry a value used by the function in the
third entry. The default is \code{NA}.}

\item{yrestrictions}{List of vectors. Each vector contains as its first entry
a condition and its second entry an integer. When the
condition is \code{TRUE}, the outcome variable is simulated
according to the fitted model; when the condition is \code{FALSE},
the outcome variable takes on the value in the second entry.
The default is \code{NA}.}

\item{baselags}{Logical scalar for specifying the convention used for lagi and lag_cumavgi terms in the model statements when pre-baseline times are not
included in \code{obs_data} and when the current time index, \eqn{t}, is such that \eqn{t < i}. If this argument is set to \code{FALSE}, the value
of all lagi and lag_cumavgi terms in this context are set to 0 (for non-categorical covariates) or the reference
level (for categorical covariates). If this argument is set to \code{TRUE}, the value of lagi and lag_cumavgi terms
are set to their values at time 0. The default is \code{FALSE}.}

\item{nsimul}{Number of subjects for whom to simulate data. By default, this argument is set
equal to the number of subjects in \code{obs_data}.}

\item{sim_data_b}{Logical scalar indicating whether to return the simulated data set. If bootstrap samples are used (i.e., \code{nsamples} is set to a value greater than 0), this argument must be set to \code{FALSE}. The default is \code{FALSE}.}

\item{seed}{Starting seed for simulations and bootstrapping.}

\item{nsamples}{Integer specifying the number of bootstrap samples to generate.
The default is 0.}

\item{parallel}{Logical scalar indicating whether to parallelize simulations of
different interventions to multiple cores.}

\item{ncores}{Integer specifying the number of CPU cores to use in parallel
simulation. This argument is required when parallel is set to \code{TRUE}.
In many applications, users may wish to set this argument equal to \code{parallel::detectCores() - 1}.}

\item{ci_method}{Character string specifying the method for calculating the bootstrap 95\% confidence intervals, if applicable. The options are \code{"percentile"} and \code{"normal"}.}

\item{threads}{Integer specifying the number of threads to be used in \code{data.table}. See \code{\link[data.table]{setDTthreads}} for further details.}

\item{model_fits}{Logical scalar indicating whether to return the fitted models. Note that if this argument is set to \code{TRUE}, the output of this function may use a lot of memory. The default is \code{FALSE}.}

\item{boot_diag}{Logical scalar indicating whether to return the parametric g-formula estimates as well as the coefficients, standard errors, and variance-covariance matrices of the parameters of the fitted models in the bootstrap samples. The default is \code{FALSE}.}

\item{show_progress}{Logical scalar indicating whether to print a progress bar for the number of bootstrap samples completed in the R console. This argument is only applicable when \code{parallel} is set to \code{FALSE} and bootstrap samples are used (i.e., \code{nsamples} is set to a value greater than 0). The default is \code{TRUE}.}

\item{ipw_cutoff_quantile}{Percentile by which to truncate inverse probability weights. The default is \code{NULL} (i.e., no truncation). See "Details".}

\item{ipw_cutoff_value}{Cutoff value by which to truncate inverse probability weights. The default is \code{NULL} (i.e., no truncation). See "Details".}

\item{int_visit_type}{Vector of logicals. The kth element is a logical specifying whether to carry forward the intervened value (rather than the natural value) of the treatment variables(s) when performing a carry forward restriction type for the kth intervention in \code{interventions}.
When the kth element is set to \code{FALSE}, the natural value of the treatment variable(s) in the kth intervention in \code{interventions} will be carried forward.
By default, this argument is set so that the intervened value of the treatment variable(s) is carried forward for all interventions.}

\item{sim_trunc}{Logical scalar indicating whether to truncate simulated covariates to their range in the observed data set. This argument is only applicable for covariates of type \code{"normal"}, \code{"bounded normal"}, \code{"truncated normal"}, and \code{"zero-inflated normal"}. The default is \code{TRUE}.}

\item{...}{Other arguments, including (a) those that specify the interventions and (b) those that are passed to the functions in \code{covpredict_custom}. To specify interventions, users can supply arguments with the following naming requirements
\itemize{
\item{Each intervention argument begins with a prefix of \code{intervention}.}
\item{After the prefix, the intervention number is specified and followed by a period.}
\item{After the period, the treatment variable name is specified.}
}
Each intervention argument takes as input a list with the following elements:
\itemize{
\item{The first element specifies the intervention function.}
\item{The subsequent elements specify any intervention values.}
\item{(Optional) The named element \code{int_times} specifies the time points to apply the intervention. By default, all interventions are applied at all time points.}
}
For example, an "always treat" intervention on \code{A} is given by \cr
\code{intervention1.A = list(static, rep(1, time_points))} \cr
See the vignette "A Simplified Approach for Specifying Interventions in gfoRmula" and "Examples" section for more examples.}
}
\value{
An object of class "gformula_binary_eof". The object is a list with the following components:
\item{result}{Results table containing the estimated outcome probability for all interventions (inculding natural course) at the last time point as well as the "cumulative percent intervened on" and the "average percent intervened on". If bootstrapping was used, the results table includes the bootstrap end-of-follow-up mean ratio, standard error, and 95\% confidence interval.}
\item{coeffs}{A list of the coefficients of the fitted models.}
\item{stderrs}{A list of the standard errors of the coefficients of the fitted models.}
\item{vcovs}{A list of the variance-covariance matrices of the parameters of the fitted models.}
\item{rmses}{A list of root mean square error (RMSE) values of the fitted models.}
\item{fits}{A list of the fitted models for the time-varying covariates and outcome. If \code{model_fits} is set to \code{FALSE}, a value of \code{NULL} is given.}
\item{sim_data}{A list of data tables of the simulated data. Each element in the list corresponds to one of the interventions. If the argument \code{sim_data_b} is set to \code{FALSE}, a value of \code{NA} is given.}
\item{IP_weights}{A numeric vector specifying the inverse probability weights. See "Details".}
\item{bootests}{A data.table containing the bootstrap replicates of the parametric g-formula estimates. If \code{boot_diag} is set to \code{FALSE}, a value of \code{NULL} is given.}
\item{bootcoeffs}{A list, where the kth element is a list containing the coefficients of the fitted models corresponding to the kth bootstrap sample. If \code{boot_diag} is set to \code{FALSE}, a value of \code{NULL} is given.}
\item{bootstderrs}{A list, where the kth element is a list containing the standard errors of the coefficients of the fitted models corresponding to the kth bootstrap sample. If \code{boot_diag} is set to \code{FALSE}, a value of \code{NULL} is given.}
\item{bootvcovs}{A list, where the kth element is a list containing the variance-covariance matrices of the parameters of the fitted models corresponding to the kth bootstrap sample. If \code{boot_diag} is set to \code{FALSE}, a value of \code{NULL} is given.}
\item{...}{Some additional elements.}

The results for the g-formula simulation under various interventions for the last time point are printed with the \code{\link{print.gformula_binary_eof}} function. To generate graphs comparing the mean estimated and observed covariate values over time, use the \code{\link{plot.gformula_binary_eof}} function.
}
\description{
Based on an observed data set, this internal function estimates the outcome probability at
end-of-follow-up under multiple user-specified interventions using the parametric g-formula. See McGrath et al. (2020) for
further details concerning the application and implementation of the parametric g-formula.
}
\details{
To assess model misspecification in the parametric g-formula, users can obtain inverse probability (IP) weighted estimates of the natural course means of the time-varying covariates from the observed data.
See Chiu et al. (2023) for details.
In addition to the general requirements described in McGrath et al. (2020), the requirements for the input data set and the call to the gformula function for such analyses are described below.

Users need to include a column in \code{obs_data} with a time-varying censoring variable.
Users need to indicate the name of the censoring variable and a model statement for the censoring variable with parameters \code{censor_name} and \code{censor_model}, respectively.
Finally, users can specify how to truncate IP weights with the \code{ipw_cutoff_quantile} or \code{ipw_cutoff_value} parameters.

In addition to the package output described in McGrath et al. (2020), the output will display estimates of the "cumulative percent intervened on" and the "average percent intervened on". When using a custom intervention function, users need to specify whether each individual at that time point is eligible to contribute person-time to the percent intervened on calculations. Specifically, this must be specified in the \code{eligible_pt} column of \code{newdf}. By default, \code{eligible_pt} is set to \code{TRUE} for each individual at each time point in custom interventions.
}
\examples{

## Estimating the effect of threshold interventions on the mean of a binary
## end of follow-up outcome
\donttest{
id <- 'id_num'
time_name <- 'time'
covnames <- c('cov1', 'cov2', 'treat')
outcome_name <- 'outcome'
histories <- c(lagged, cumavg)
histvars <- list(c('treat', 'cov1', 'cov2'), c('cov1', 'cov2'))
covtypes <- c('binary', 'zero-inflated normal', 'normal')
covparams <- list(covmodels = c(cov1 ~ lag1_treat + lag1_cov1 + lag1_cov2 + cov3 +
                                  time,
                                cov2 ~ lag1_treat + cov1 + lag1_cov1 + lag1_cov2 +
                                  cov3 + time,
                                treat ~ lag1_treat + cumavg_cov1 +
                                  cumavg_cov2 + cov3 + time))
ymodel <- outcome ~  treat + cov1 + cov2 + lag1_cov1 + lag1_cov2 + cov3
intervention1.treat <- list(static, rep(0, 7))
intervention2.treat <- list(threshold, 1, Inf)
int_descript <- c('Never treat', 'Threshold - lower bound 1')
nsimul <- 10000
ncores <- 2

gform_bin_eof <- gformula_binary_eof(obs_data = binary_eofdata, id = id,
                                     time_name = time_name,
                                     covnames = covnames,
                                     outcome_name = outcome_name,
                                     covtypes = covtypes,
                                     covparams = covparams,
                                     ymodel = ymodel,
                                     intervention1.treat = intervention1.treat,
                                     intervention2.treat = intervention2.treat,
                                     int_descript = int_descript,
                                     histories = histories, histvars = histvars,
                                     basecovs = c("cov3"), seed = 1234,
                                     parallel = TRUE, nsamples = 5,
                                     nsimul = nsimul, ncores = ncores)
gform_bin_eof
}

}
\references{
Chiu YH, Wen L, McGrath S, Logan R, Dahabreh IJ, Hernán MA. Evaluating model specification when using the parametric g-formula in the presence of censoring. American Journal of Epidemiology. 2023;192:1887–1895.

McGrath S, Lin V, Zhang Z, Petito LC, Logan RW, Hernán MA, and JG Young. gfoRmula: An R package for estimating the effects of sustained treatment strategies via the parametric g-formula. Patterns. 2020;1:100008.

Robins JM. A new approach to causal inference in mortality studies with a sustained exposure period: application to the healthy worker survivor effect. Mathematical Modelling. 1986;7:1393–1512. [Errata (1987) in Computers and Mathematics with Applications 14, 917.-921. Addendum (1987) in Computers and Mathematics with Applications 14, 923-.945. Errata (1987) to addendum in Computers and Mathematics with Applications 18, 477.].
}
\seealso{
\code{\link{gformula}}
}
