% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/generateData.R
\name{generateData}
\alias{generateData}
\title{Generate synthetic data with missing values for missoNet}
\usage{
generateData(
  n,
  p,
  q,
  rho,
  missing.type = "MCAR",
  X = NULL,
  Beta = NULL,
  E = NULL,
  Theta = NULL,
  Sigma.X = NULL,
  Beta.row.sparsity = 0.2,
  Beta.elm.sparsity = 0.2,
  seed = NULL
)
}
\arguments{
\item{n}{Integer. Sample size (number of observations). Must be at least 2.}

\item{p}{Integer. Number of predictor variables. Must be at least 1.}

\item{q}{Integer. Number of response variables. Must be at least 2.}

\item{rho}{Numeric scalar or vector of length \code{q}. Proportion of missing
values for each response variable. Values must be in [0, 1). If scalar,
the same missing rate is applied to all responses.}

\item{missing.type}{Character string specifying the missing data mechanism.
One of:
\itemize{
\item \code{"MCAR"} (default): Missing Completely At Random
\item \code{"MAR"}: Missing At Random (depends on predictors)
\item \code{"MNAR"}: Missing Not At Random (depends on response values)
}}

\item{X}{Optional \code{n x p} matrix. User-supplied predictor matrix. If
\code{NULL} (default), predictors are simulated from a multivariate normal
distribution with mean zero and covariance \code{Sigma.X}.}

\item{Beta}{Optional \code{p x q} matrix. Regression coefficient matrix. If
\code{NULL} (default), a sparse coefficient matrix is generated with
sparsity controlled by \code{Beta.row.sparsity} and \code{Beta.elm.sparsity}.}

\item{E}{Optional \code{n x q} matrix. Error/noise matrix. If \code{NULL}
(default), errors are simulated from a multivariate normal distribution
with mean zero and precision matrix \code{Theta}.}

\item{Theta}{Optional \code{q x q} positive definite matrix. Precision matrix
(inverse covariance) for the response variables. If \code{NULL} (default),
a block-structured precision matrix is generated with four types of graph
structures. Only used when \code{E = NULL}.}

\item{Sigma.X}{Optional \code{p x p} positive definite matrix. Covariance
matrix for the predictors. If \code{NULL} (default), an AR(1) covariance
structure with correlation 0.7 is used. Only used when \code{X = NULL}.}

\item{Beta.row.sparsity}{Numeric in [0, 1]. Proportion of rows in Beta that
contain at least one non-zero element. Default is 0.2. Only used when
\code{Beta = NULL}.}

\item{Beta.elm.sparsity}{Numeric in [0, 1]. Proportion of non-zero elements
within active rows of Beta. Default is 0.2. Only used when \code{Beta = NULL}.}

\item{seed}{Optional integer. Random seed for reproducibility.}
}
\value{
A list containing:
\item{X}{\code{n x p} matrix. Predictor matrix (either user-supplied or simulated).}
\item{Y}{\code{n x q} matrix. Complete response matrix without missing values.}
\item{Z}{\code{n x q} matrix. Response matrix with missing values (coded as NA).}
\item{Beta}{\code{p x q} matrix. Regression coefficient matrix used in generation.}
\item{Theta}{\code{q x q} matrix or NULL. Precision matrix (if used in generation).}
\item{rho}{Numeric vector of length \code{q}. Missing rates for each response.}
\item{missing.type}{Character string. The missing mechanism used.}
}
\description{
Generates synthetic data from a conditional Gaussian graphical model with
user-specified missing data mechanisms. This function is designed for simulation
studies and testing of the missoNet package, supporting three types of
missingness: Missing Completely At Random (MCAR), Missing At Random (MAR),
and Missing Not At Random (MNAR).
}
\details{
The function generates data through the following model:
\deqn{Y = XB + E}
where:
\itemize{
\item \eqn{X \in \mathbb{R}^{n \times p}} is the predictor matrix
\item \eqn{B \in \mathbb{R}^{p \times q}} is the coefficient matrix
\item \eqn{E \sim \mathcal{MVN}(0, \Theta^{-1})} is the error matrix
\item \eqn{Y \in \mathbb{R}^{n \times q}} is the complete response matrix
}

Missing values are then introduced to create \eqn{Z} (the observed response
matrix with NAs) according to the specified mechanism:

\strong{MCAR}: Each element has probability \code{rho[j]} of being missing,
independent of all variables.

\strong{MAR}: Missingness depends on the predictors through a logistic model:
\deqn{P(Z_{ij} = NA) = \mathrm{logit}^{-1}(XB)_{ij} \times c_j}
where \eqn{c_j} is calibrated to achieve the target missing rate.

\strong{MNAR}: The lowest \code{rho[j]} proportion of values in each column
are set as missing.
}
\examples{
# Example 1: Basic usage with default settings
sim.dat <- generateData(n = 300, p = 50, q = 20, rho = 0.1, seed = 857)

# Check dimensions and missing rate
dim(sim.dat$X)      # 300 x 50
dim(sim.dat$Z)      # 300 x 20
mean(is.na(sim.dat$Z))  # approximately 0.1

# Example 2: Variable missing rates with MAR mechanism
rho.vec <- seq(0.05, 0.25, length.out = 20)
sim.dat <- generateData(n = 300, p = 50, q = 20, 
                       rho = rho.vec, 
                       missing.type = "MAR")

# Example 3: High sparsity in coefficient matrix
sim.dat <- generateData(n = 500, p = 100, q = 30,
                       rho = 0.15,
                       Beta.row.sparsity = 0.1,  # 10\% active predictors
                       Beta.elm.sparsity = 0.3)  # 30\% active in each row

# Example 4: User-supplied matrices
n <- 300; p <- 50; q <- 20
X <- matrix(rnorm(n*p), n, p)
Beta <- matrix(rnorm(p*q) * rbinom(p*q, 1, 0.1), p, q)  # 10\% non-zero
Theta <- diag(q) + 0.1  # Simple precision structure

sim.dat <- generateData(X = X, Beta = Beta, Theta = Theta,
                       n = n, p = p, q = q,
                       rho = 0.2, missing.type = "MNAR")

\donttest{
# Example 5: Use generated data with missoNet
library(missoNet)
sim.dat <- generateData(n = 400, p = 50, q = 10, rho = 0.15)

# Split into training and test sets
train.idx <- 1:300
test.idx <- 301:400

# Fit missoNet model
fit <- missoNet(X = sim.dat$X[train.idx, ], 
               Y = sim.dat$Z[train.idx, ],
               lambda.beta = 0.1, 
               lambda.theta = 0.1)

# Evaluate on test set
pred <- predict(fit, newx = sim.dat$X[test.idx, ])
}

}
\seealso{
\code{\link{missoNet}} for fitting models to data with missing values,
\code{\link{cv.missoNet}} for cross-validation
}
\author{
Yixiao Zeng \email{yixiao.zeng@mail.mcgill.ca}, Celia M. T. Greenwood
}
