% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/method_glm.R
\name{method_glm}
\alias{method_glm}
\title{Mass imputation using the generalized linear model method}
\usage{
method_glm(
  y_nons,
  X_nons,
  X_rand,
  svydesign,
  weights = NULL,
  family_outcome = "gaussian",
  start_outcome = NULL,
  vars_selection = FALSE,
  pop_totals = NULL,
  pop_size = NULL,
  control_outcome = control_out(),
  control_inference = control_inf(),
  verbose = FALSE,
  se = TRUE
)
}
\arguments{
\item{y_nons}{target variable from non-probability sample}

\item{X_nons}{a \code{model.matrix} with auxiliary variables from non-probability sample}

\item{X_rand}{a \code{model.matrix} with auxiliary variables from non-probability sample}

\item{svydesign}{a svydesign object}

\item{weights}{case / frequency weights from non-probability sample}

\item{family_outcome}{family for the glm model}

\item{start_outcome}{start parameters (default \code{NULL})}

\item{vars_selection}{whether variable selection should be conducted}

\item{pop_totals}{population totals from the \code{nonprob} function}

\item{pop_size}{population size from the \code{nonprob} function}

\item{control_outcome}{controls passed by the \code{control_out} function}

\item{control_inference}{controls passed by the \code{control_inf} function (currently not used, for further development)}

\item{verbose}{parameter passed from the main \code{nonprob} function}

\item{se}{whether standard errors should be calculated}
}
\value{
an \code{nonprob_method} class which is a \code{list} with the following entries

\describe{
\item{model_fitted}{fitted model either an \code{glm.fit} or \code{cv.ncvreg} object}
\item{y_nons_pred}{predicted values for the non-probablity sample}
\item{y_rand_pred}{predicted values for the probability sample or population totals}
\item{coefficients}{coefficients for the model (if available)}
\item{svydesign}{an updated \code{surveydesign2} object (new column \code{y_hat_MI} is added)}
\item{y_mi_hat}{estimated population mean for the target variable}
\item{vars_selection}{whether variable selection was performed}
\item{var_prob}{variance for the probability sample component (if available)}
\item{var_nonprob}{variance for the non-probability sampl component}
\item{var_total}{total variance, if possible it should be \code{var_prob+var_nonprob} if not, just a scalar}
\item{model}{model type (character \code{"glm"})}
\item{family}{family type (character \code{"glm"})}
}
}
\description{
Model for the outcome for the mass imputation estimator using generalized linear
models via the \code{stats::glm} function. Estimation of the mean is done using \eqn{S_B}
probability sample or known population totals.
}
\details{
Analytical variance

The variance of the mean is estimated based on the following approach

(a) non-probability part  (\eqn{S_A} with size \eqn{n_A}; denoted as \code{var_nonprob} in the result)

\deqn{
\hat{V}_1 = \frac{1}{n_A^2}\sum_{i=1}^{n_A} \hat{e}_i \left\lbrace \boldsymbol{h}(\boldsymbol{x}_i; \hat{\boldsymbol{\beta}})^\prime\hat{\boldsymbol{c}}\right\rbrace,
}

where \eqn{\hat{e}_i = y_i - m(\boldsymbol{x}_i; \hat{\boldsymbol{\beta}})} and
\deqn{\widehat{\boldsymbol{c}}=\left\lbrace n_B^{-1} \sum_{i \in B} \dot{\boldsymbol{m}}\left(\boldsymbol{x}_i ; \boldsymbol{\beta}^*\right) \boldsymbol{h}\left(\boldsymbol{x}_i ; \boldsymbol{\beta}^*\right)^{\prime}\right\rbrace^{-1} N^{-1} \sum_{i \in A} w_i \dot{\boldsymbol{m}}\left(\boldsymbol{x}_i ; \boldsymbol{\beta}^*\right).}

Under the linear regression model \eqn{\boldsymbol{h}\left(\boldsymbol{x}_i ; \widehat{\boldsymbol{\beta}}\right)=\boldsymbol{x}_i} and \eqn{\widehat{\boldsymbol{c}}=\left(n_A^{-1} \sum_{i \in A} \boldsymbol{x}_i \boldsymbol{x}_i^{\prime}\right)^{-1} N^{-1} \sum_{i \in B} w_i \boldsymbol{x}_i .}

(b) probability part (\eqn{S_B} with size \eqn{n_B}; denoted as \code{var_prob} in the result)

This part uses functionalities of the \code{{survey}} package and the variance is estimated using the following
equation:

\deqn{
\hat{V}_2=\frac{1}{N^2} \sum_{i=1}^{n_B} \sum_{j=1}^{n_B} \frac{\pi_{i j}-\pi_i \pi_j}{\pi_{i j}}
\frac{m(\boldsymbol{x}_i; \hat{\boldsymbol{\beta}})}{\pi_i} \frac{m(\boldsymbol{x}_i; \hat{\boldsymbol{\beta}})}{\pi_j}.
}

Note that \eqn{\hat{V}_2} in principle can be estimated in various ways depending on the type of the design and whether population size is known or not.

Furthermore, if only population totals/means are known and assumed to be fixed we set \eqn{\hat{V}_2=0}.

Information on the case when \code{svydesign} is not available:
\enumerate{
\item variance is estimated only for the non-probability part with \eqn{\hat{V}_1} defined above.
\item point estimator of \eqn{\hat{\mu}_y} for linear regression is estimated using \eqn{\mu_x^\prime\hat{\boldsymbol{\beta}}}
where \eqn{\mu_x} is the vector of population means
\item for non-linear functions such as logistic or Poisson regression we use a simplification, i.e. we report
point estimate as \eqn{\exp(\mu_x^\prime\hat{\boldsymbol{\beta}})} for Poisson and \eqn{\frac{\exp(\mu_x^\prime\hat{\boldsymbol{\beta}})}{1+\exp(\mu_x^\prime\hat{\boldsymbol{\beta}})}} for logistic regression.
}
}
\examples{

data(admin)
data(jvs)
jvs_svy <- svydesign(ids = ~ 1,  weights = ~ weight, strata = ~ size + nace + region, data = jvs)

res_glm <- method_glm(y_nons = admin$single_shift,
                      X_nons = model.matrix(~ region + private + nace + size, admin),
                      X_rand = model.matrix(~ region + private + nace + size, jvs),
                      svydesign = jvs_svy)

res_glm

}
\references{
Kim, J. K., Park, S., Chen, Y., & Wu, C. (2021). Combining non-probability and probability survey samples
through mass imputation. Journal of the Royal Statistical Society Series A: Statistics in Society,
184(3), 941-963.
}
