% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/LeverageSampling.R
\name{LeverageSampling}
\alias{LeverageSampling}
\title{Basic and shrinkage leverage sampling for Generalised Linear Models}
\usage{
LeverageSampling(rf,Y,X,N,S_alpha,family)
}
\arguments{
\item{rf}{sample size}

\item{Y}{response data or Y}

\item{X}{covariate data or X matrix that has all the covariates (first column is for the intercept)}

\item{N}{size of the big data}

\item{S_alpha}{shrinkage factor in between 0 and 1}

\item{family}{a character vector for "linear", "logistic" and "poisson" regression from Generalised Linear Models}
}
\value{
The output of \code{LeverageSampling} gives a list of

\code{Beta_Estimates} estimated model parameters in a data.frame after sampling

\code{Variance_Epsilon_Estimates} matrix of estimated variance for epsilon in a data.frame after sampling (valid only for linear regression)

\code{Sample_Basic_Leverage} list of indexes for the optimal samples obtained based on basic leverage

\code{Sample_Shrinkage_Leverage} list of indexes for the optimal samples obtained based on shrinkage leverage

\code{Sampling_Probability} matrix of calculated sampling probabilities for basic and shrinkage leverage
}
\description{
Using this function sample from big data under linear, logistic and Poisson regression to describe the data.
Sampling probabilities are obtained based on the basic and shrinkage leverage method.
}
\details{
Leverage sampling algorithm for big data under Generalised Linear Models (linear, logistic and Poisson regression).

First is to obtain a random sample of size \eqn{min(rf)/2} and estimate the model parameters. Using the estimated parameters
leverage scores are evaluated for leverage sampling.

Through the estimated leverage scores a sample of size \eqn{rf} was obtained. Finally,
the sample of size \eqn{rf} is used and the model parameters are estimated.

\strong{NOTE} : If input parameters are not in given domain conditions
necessary error messages will be provided to go further.

If \eqn{rf} is not satisfied then an error message will be produced.

If the big data \eqn{X,Y} has any missing values then an error message will be produced.

The big data size \eqn{N} is compared with the sizes of \eqn{X,Y} and if they are not aligned an error
message will be produced.

If \eqn{0 < \alpha_{S} < 1} is not satisfied an error message will be produced.

A character vector is provided for \code{family} and if it is not of the any three types an error message
will be produced.
}
\examples{
Dist<-"Normal"; Dist_Par<-list(Mean=0,Variance=1,Error_Variance=0.5)
No_Of_Var<-2; Beta<-c(-1,2,1); N<-5000; Family<-"linear"
Full_Data<-GenGLMdata(Dist,Dist_Par,No_Of_Var,Beta,N,Family)

rf<-rep(100*c(6,10),50); Original_Data<-Full_Data$Complete_Data;

LeverageSampling(rf = rf, Y = as.matrix(Original_Data[,1]),
                 X = as.matrix(Original_Data[,-1]),N = nrow(Original_Data),
                 S_alpha = 0.95,
                 family = "linear")->Results

plot_Beta(Results)

Dist<-"Normal"; Dist_Par<-list(Mean=0,Variance=1)
No_Of_Var<-2; Beta<-c(-1,2,1); N<-5000; Family<-"logistic"
Full_Data<-GenGLMdata(Dist,Dist_Par,No_Of_Var,Beta,N,Family)

rf<-rep(100*c(6,10),25); Original_Data<-Full_Data$Complete_Data;

LeverageSampling(rf = rf, Y = as.matrix(Original_Data[,1]),
                 X = as.matrix(Original_Data[,-1]),N = nrow(Original_Data),
                 S_alpha = 0.95,
                 family = "logistic")->Results

plot_Beta(Results)

Dist<-"Normal";
No_Of_Var<-2; Beta<-c(-1,0.5,0.5); N<-5000; Family<-"poisson"
Full_Data<-GenGLMdata(Dist,NULL,No_Of_Var,Beta,N,Family)

rf<-rep(100*c(6,10),25); Original_Data<-Full_Data$Complete_Data;

LeverageSampling(rf = rf, Y = as.matrix(Original_Data[,1]),
                 X = as.matrix(Original_Data[,-1]),N = nrow(Original_Data),
                 S_alpha = 0.95,
                 family = "poisson")->Results

plot_Beta(Results)

}
\references{
\insertRef{ma2014statistical}{NeEDS4BigData}

\insertRef{ma2015leveraging}{NeEDS4BigData}
}
