% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/BayesPET.R
\name{generate_data}
\alias{generate_data}
\title{Generate two-arm trial data with enrollment, event, and censoring processes,
and return data formatted for event-time prediction.}
\usage{
generate_data(
  N,
  E_target,
  E_cutoff,
  p_trt,
  cov_type,
  cov_dist,
  logHR.trt = NULL,
  enroll_rate,
  dist.event,
  dist.censor,
  blinded = TRUE,
  event.scale = NULL,
  event.shape = NULL,
  censor.scale = NULL,
  censor.shape = NULL,
  beta.event,
  beta.censor,
  event.scale_trt = NULL,
  event.shape_trt = NULL,
  beta.event_trt = if (is.null(logHR.trt)) beta.event else NULL,
  assess_window = 0,
  seed = 123
)
}
\arguments{
\item{N}{Integer. Total planned sample size (maximum number of subjects
that can be enrolled in the trial).}

\item{E_target}{Integer. Target number of events for the final analysis.}

\item{E_cutoff}{Integer. Target number of events for the interim analysis.}

\item{p_trt}{Scalar randomization probability to the experimental arm, \eqn{\gamma \in (0,1)}.}

\item{cov_type}{Character vector specifying the distribution for each component
of the covariate vector \eqn{\boldsymbol{Z} = (Z_1,\dots,Z_p)} used in the proportional hazards model.
Each element must be \code{"binary"} or \code{"continuous"}.
If \code{NULL}, no covariates are used.}

\item{cov_dist}{Numeric vector of the same length as \code{cov_type}, giving
parameters for the covariate-generating distribution of each \eqn{Z_j}:
\itemize{
\item \code{"binary"}: \eqn{Z_j \sim \mathrm{Bernoulli}(p_j)} with
\eqn{p_j = cov\_dist[j]}.
\item \code{"continuous"}: \eqn{Z_j \sim N(0, \sigma_j^2)} with
\eqn{\sigma_j = cov\_dist[j]}.
}}

\item{logHR.trt}{Numeric scalar giving the log hazard ratio for the experimental
versus control arm in the event-time model. When \code{NULL} (default), the two treatment
arms are generated from separate proportional hazards models with
arm-specific baseline parameters and covariate effects.}

\item{enroll_rate}{Positive numeric scalar specifying the enrollment rate.}

\item{dist.event}{Character. Baseline distribution for event times:
\code{"Weibull"} or \code{"Loglogistic"}. This distribution family is the same
for both arms.}

\item{dist.censor}{Character. Baseline distribution for random censoring times:
\code{"Weibull"} or \code{"Loglogistic"}.}

\item{blinded}{Logical. If \code{TRUE} (default), the generated interim dataset is blinded
and treatment assignments in \code{data.eventcensor$trt} are set to \code{NA}.
If \code{FALSE}, treatment assignments in \code{data.eventcensor$trt} are coded as \code{0} for
control and \code{1} for the experimental group.}

\item{event.scale}{Numeric scalar > 0. Control-arm event baseline scale parameter.}

\item{event.shape}{Numeric scalar > 0. Control-arm event baseline shape parameter.}

\item{censor.scale}{Numeric scalar > 0. Random censoring baseline scale parameter.}

\item{censor.shape}{Numeric scalar > 0. Random censoring baseline shape parameter.}

\item{beta.event}{Numeric vector. Regression coefficients for baseline
covariates in the event-time proportional hazards model; must have the
same length and ordering as \code{cov_type}.}

\item{beta.censor}{Numeric vector. Regression coefficients for baseline
covariates in the random censoring-time proportional hazards model; must
have the same length and ordering as \code{cov_type}.}

\item{event.scale_trt}{Numeric scalar > 0. Experimental-arm event
baseline scale parameter (used when \code{logHR.trt = NULL}).}

\item{event.shape_trt}{Numeric scalar > 0. Experimental-arm event baseline shape parameter
(used when \code{logHR.trt = NULL}).}

\item{beta.event_trt}{Numeric vector. Regression coefficients for baseline
covariates in the experimental-arm event-time proportional hazards model, used when
\code{logHR.trt = NULL}.  Must
have the same length and ordering as \code{cov_type}. Defaults to \code{beta.event}.}

\item{assess_window}{Numeric scalar >= 0. Assessment window width. If > 0,
observed event/censoring times are coarsened to the midpoint of the window
containing \eqn{\min(T_\mathrm{event}, T_\mathrm{censor})}. Defaults to \code{0}.}

\item{seed}{Integer or \code{NULL}. Random seed for data generation. If the value is
NULL then no random seed is used. Defaults to \code{123}.}
}
\value{
A list with elements:
\itemize{
\item \code{data.enroll}: A data frame of observed enrollment information up to the
interim data cut. Columns: subject index \code{No}, subject enrollment calendar
time \code{enrolltime}, enrollment interarrival time \code{interarrivaltime},
enrollment status \code{enrollstatus} (1 = enrolled, 0 = administratively
censored enrollment process).
\item \code{data.eventcensor}: A data frame of observed survival outcomes at the
interim cut. Columns include subject index \code{No}, treatment assignment
indicator \code{trt} (\code{NA} if \code{blinded = TRUE}), observed time \code{time}
(administratively censored at interim cut), event status \code{eventstatus}
(1 = event, 0 = right censored),
random censoring status \code{censorstatus} (1 = random censoring before
the interim data cut; 0 = otherwise), followed by covariates.
\item \code{truesurvival}: Full underlying data without administrative censoring:
\code{No}, \code{trt}, \code{t_event} (true underlying event time),
\code{t_randcensor} (true underlying random censoring time),
\code{t_event.obs} (underlying follow-up time without administrative censoring), \cr
\code{t_event.obswithintervalassess} (underlying follow-up time without
administrative censoring but applied with assessment windows),  \code{status}
(1 = event before random censoring), \code{enrollmenttime}, plus covariates.
\item \code{event.interim.obs}: Observed number of events at the interim
data cut. If the prespecified interim event cutoff \code{E_cutoff} cannot be reached,
this equals the maximum number of events observed.
\item \code{event.max}: Number of events that would occur without administrative censoring
(i.e., after accounting only for random censoring).
\item \code{cuttime.true}: The true calendar time at which the cumulative number
of observed events reaches the target event count. If the target number
of events cannot be reached, this is the calendar time of the last observed
event or censoring.
\item \code{event.final.obs}: The latent true number of events at \code{cuttime.true}.
}
}
\description{
Simulates data from a two-arm clinical trial with a time-to-event endpoint.
The data generating process incorporates staggered enrollment, event times,
and random censoring, with event and censoring distributions specified as
Weibull or log-logistic. Treatment and covariate effects are incorporated
through a proportional hazards structure.
}
\details{
Subjects are randomized independently to the experimental arm with
probability \code{p_trt}. Baseline
covariates are generated independently based on \code{cov_type} and
\code{cov_dist}. Binary covariates follow a Bernoulli distribution, while
continuous covariates follow a normal distribution with mean zero and
standard deviation determined by \code{cov_dist}.

Interarrival times between successive enrollments are drawn from an exponential
distribution with rate \code{enroll_rate} with model details documented in
\code{\link{fit_enroll}}. Calendar enrollment times are obtained by cumulative
summation of these interarrival times.

Event times and random censoring times are generated from Weibull or
log-logistic baseline distributions, as specified by \code{dist.event}
and \code{dist.censor}. For the Weibull model, the baseline survival function
is parameterized as \deqn{S_0(t) = \exp\{-\lambda_0 t^{\rho}\}, \quad t \ge 0,} where
\eqn{\rho > 0} is the shape and \eqn{\lambda_0 > 0} is the baseline hazard
scale. For the log-logistic model, the baseline survival function is
\deqn{S_0(t) = \{1 + (t / b)^a\}^{-1}, \quad t \ge 0,} where \eqn{a > 0} and \eqn{b > 0}
denote the shape and scale parameters, respectively.
Parameter calibration via marginal median survival can be performed using
\code{\link{convert_median}} prior to simulation.

Covariate effects are incorporated through a
proportional hazards structure for both the event and censoring
processes. When \code{logHR.trt} is provided, the treatment effect is
modeled through a proportional hazards formulation. When
\code{logHR.trt} is \code{NULL}, the two treatment arms are allowed to
differ through separate baseline parameters and covariate effects.
The random censoring mechanism does not depend on treatment assignment.
}
\examples{
## --- Weibull event/censoring with a common PH treatment effect ---
data.weibull <- generate_data(
  N = 80, E_target = 50, E_cutoff = 25, p_trt = 0.5,
  cov_type = c("binary", "continuous"),
  cov_dist = c(0.5, sqrt(2)),
  beta.event  = c(0.2, 0.2),
  beta.censor = c(0, 0),
  logHR.trt = log(0.5),
  enroll_rate = 50/3, beta.event_trt = NULL,
  dist.event = "Weibull", dist.censor = "Weibull",
  event.scale = 1/5^3, event.shape = 3,
  censor.scale = 10^(-6), censor.shape = 6,
  blinded = TRUE,
  assess_window = 2,
  seed = 1
)
names(data.weibull)

## --- Log-logistic event/censoring with a common PH treatment effect ---
data.logl <- generate_data(
  N = 80, E_target = 50, E_cutoff = 25, p_trt = 0.5,
  cov_type = c("binary", "continuous"),
  cov_dist = c(0.5, sqrt(2)),
  beta.event  = c(0.2, 0.2),
  beta.censor = c(0, 0),
  logHR.trt = log(0.5),
  enroll_rate = 50/3, beta.event_trt = NULL,
  dist.event = "Loglogistic", dist.censor = "Loglogistic",
  event.scale = 6, event.shape = 6,
  censor.scale = 20, censor.shape = 4,
  blinded = TRUE,
  assess_window = 2,
  seed = 1
)
summary(data.logl$truesurvival$t_event)
### true underlying event time without administrative censoring

## --- Weibull arm-specific models (logHR.trt = NULL) ---
data.weibull.nonPH <- generate_data(
  N = 80, E_target = 50, E_cutoff = 25, p_trt = 0.5,
  cov_type = c("binary", "continuous"),
  cov_dist = c(0.5, sqrt(2)),
  beta.event  = c(0.2, 0.2),
  beta.censor = c(0, 0),
  logHR.trt = NULL,
  enroll_rate = 50/3,
  dist.event = "Weibull", dist.censor = "Weibull",
  event.scale = 1/5^3, event.shape = 3,     # control
  event.scale_trt = 1/6^3, event.shape_trt = 3, # experiment
  beta.event_trt = c(0.15, 0.2),
  censor.scale = 10^(-6), censor.shape = 6,
  blinded = TRUE,
  assess_window = 2,
  seed = 1
)
data.weibull.nonPH$cuttime.true

}
