\name{SIMEXBoost}
\alias{SIMEXBoost}

\title{Boosting Method with SIMEX Correction for High-Dimensional Error-Prone Data
}
\description{
This function aims to address variable selection and estimation for (ultra)high-dimensional data subject to covariate measurement error, which are particularly considered in \code{ME_Data}.

}
\usage{
SIMEXBoost(Y,Xstar,zeta=c(0,0.25,0.5,0.75,1),B=500,type="normal",sigmae,Iter=100,
Lambda=0,Extrapolation="linear")
}


\arguments{
  \item{Y}{Responses in the dataset. If \code{type} is specified as "normal", "binary", or "poisson", then \code{Y} should be a n-dimensional vector; if \code{type} is given by "AFT-normal" or "AFT-loggamma", then \code{Y} should be a (n,2) matrix of interval-censored responses, where the first column is the lower bound of an interval-censored response and the second column is the upper bound of an interval-censored response.
}

  \item{Xstar}{
An (n,p) matrix of the error-prone covariates.
}

  \item{zeta}{A sequence of values used in the procedure of the SIMEX method. A default sequence is given by \code{c(0,0.25,0.5,0.75,1)}.
}
  \item{B}{The number of repetition in the SIMEX method. The default value is 500.

}
  \item{type}{
\code{type} reflects the specification of regression models. "normal" means the linear regression model with the error term generated by the standard normal distribution; "binary" means the logistic regression model; "poisson" means the Poisson regression model. In addition, the accelerated failure time (AFT) model is also considered to fit length-biased and interval-censored survival data. Specifically, "AFT-normal" represents the AFT model with the error term being normal distributions; "AFT-loggamma" represents the AFT model with the error term specified as log-gamma distributions.
}
  \item{sigmae}{
An (p,p) covariance matrix of the noise term in the classical measurement error model.
}

  \item{Iter}{The number of iterations for the boosting procedure. The default value is 100.

}

  \item{Lambda}{A tuning parameter that aims to deal with the collinearity of covariates. \code{Lambda=0} means that no L2-norm is involved, and it is taken as the default value.

}

  \item{Extrapolation}{A extrapolation function for the SIMEX method. Two choices are included: "linear" means a linear function; "quadratic" means a quadratic function. The default argument is "linear".

}



}

\value{
\item{BetaHatCorrect}{the estimator obtained by SIMEXBoost.}

}


\details{
This function aims to address variable selection and estimation for (ultra)high-dimensional data subject to covariate measurement error. In the SIMEX method, inputs of \code{B}, \code{zeta}, and \code{Extrapolation} are user-specific. Normally, larger values of \code{B} and \code{zeta} give a more precise estimator, and meanwhile, longer computational times. More detailed descriptions of the SIMEX method can be found in the following references.
}

\references{
Chen, L.-P. (2023). De-noising boosting methods for variable selection and estimation subject to error-prone variables. \emph{Statistics and Computing}, 33:38.

Chen, L.-P. and Qiu, B. (2023). Analysis of length-biased and partly interval-censored survival data with mismeasured covariates. \emph{Biometrics}. To appear. <doi: 10.1111/biom.13898>

Chen, L.-P. and Yi, G. Y. (2021). Analysis of noisy survival data with graphical proportional hazards measurement error models. \emph{Biometrics}, 77, 956–969.

Hastie, T., Tibshirani, R. and Friedman, J. (2008). \emph{The Elements of Statistical Learning: Data Mining, Inference, and Prediction}. Springer, New York.


}


\author{
Bangxu Qiu and Li-Pang Chen
}

\seealso{
  \code{\link{ME_Data}}
  \code{\link{Boost_VSE}}
  }
\examples{

##### Example 1: A linear model under default settings #####

X1 = matrix(rnorm((20)*400),nrow=400,ncol=20,byrow=TRUE)

data=ME_Data(X1,beta=c(1,1,1,rep(0,dim(X1)[2]-3)),
type="normal",
sigmae=diag(0.1,dim(X1)[2]))

Y = data$response
Xstar = data$ME_covariate

SIMEXBoost(Y,Xstar,B=2,zeta=c(0,0.5,1),
type="normal",Iter=3,sigmae=diag(0.1,dim(X1)[2]))



##### Example 2: An AFT model #####

X1 = matrix(rnorm((100)*400),nrow=400,ncol=100,byrow=TRUE)

data=ME_Data(X1,beta=c(1,1,1,rep(0,dim(X1)[2]-3)),pr0=0.3,
type="AFT-loggamma",
sigmae=diag(0.1,dim(X1)[2]))

Y = data$response
Xstar = data$ME_covariate

SIMEXBoost(Y,Xstar,B=2,zeta=c(0,0.5,1),
type="AFT-loggamma",Iter=3,sigmae=diag(0.1,dim(X1)[2]))

}

\keyword{funtion}
\keyword{core}
