% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/PLreg.R
\name{PLreg}
\alias{PLreg}
\alias{PLreg.fit}
\title{Power Logit Regression Models for Bounded Variables}
\usage{
PLreg(
  formula,
  data,
  subset,
  na.action,
  family = c("NO", "LO", "TF", "PE", "SN", "SLASH", "Hyp"),
  zeta = NULL,
  link = c("logit", "probit", "cloglog", "cauchit", "loglog"),
  link.sigma = NULL,
  type = c("pML", "ML"),
  control = PLreg.control(...),
  model = TRUE,
  y = TRUE,
  x = FALSE,
  ...
)

PLreg.fit(
  X,
  y,
  S = NULL,
  family,
  type = "pML",
  zeta = zeta,
  link = "logit",
  link.sigma = "log",
  control = PLreg.control()
)
}
\arguments{
\item{formula}{a symbolic description of the model. See details for further information.}

\item{data, subset, na.action}{arguments controlling formula processing via \code{\link{model.frame}}.}

\item{family}{a description of the symmetric distribution to be used for generating the power logit model.
Supported families include "\code{NO}", "\code{LO}", "\code{TF}", "\code{PE}", "\code{Hyp}", \code{SHN}"
and "\code{SLASH}", which correspond to the power logit normal, type II logistic,
Student-t, power exponential, hyperbolic, sinh-normal, and slash distributions, respectively.}

\item{zeta}{a numeric value or numeric vector that represents the extra parameter of the distribution. For the
PL-NO and PL-LO models, no extra parameter is needed.}

\item{link}{an optional character that specifies the link function of the median submodel (mu).
The "\code{logit}", "\code{probit}", "\code{cloglog}", "\code{cauchit}",
"\code{loglog}" functions are supported. The \code{logit} function is the default.}

\item{link.sigma}{an optional character that specifies the link function of the dispersion submodel (sigma).
The "\code{log}", "\code{sqrt}" functions are supported. The default is \code{log}.}

\item{type}{character specifying the type of estimator for the skewness parameter.
Currently, penalized maximum likelihood ("\code{pML}") and maximum likelihood ("\code{ML}") are supported.
If the skewness parameter is fixed, \code{ML} type is used.}

\item{control}{a list of control arguments specified via \code{\link{PLreg.control}}.}

\item{model, y, x}{logicals. If \code{TRUE} the corresponding components of the fit
(model frame, response, model matrix) are returned.  For \code{\link{PLreg.fit}}, \code{y} must
be the numeric response vector (with values in (0,1)).}

\item{...}{arguments passed to \code{\link{PLreg.control}}.}

\item{X}{numeric regressor matrix for the median submodel.}

\item{S}{numeric regressor matrix for the dispersion submodel.}
}
\value{
\code{PLreg} returns an object of class "\code{PLreg}" with the following
components (the \code{PLreg.fit} returns elements up to \code{v}).
\item{coefficients}{a list with the "\code{median}", "\code{dispersion}" and
"\code{skewness}" (if \code{lambda = NULL}) coefficients.}
\item{residuals}{a vector of the raw residuals (the difference between the
observed and the fitted response).}
\item{fitted.values}{a vector with the fitted values of the median submodel.}
\item{optim}{a list with the output from \code{optim}. When lambda is not fixed,
if \code{type = "pML"}, the output refers to the iterative process of
the median and dispersion parameters only and, if \code{type = "ML"},
on the maximization of the likelihood for all the parameters.}
\item{family}{a character specifying the \code{family} used.}
\item{method}{the method argument passed to the optim call.}
\item{control}{the control arguments passed to the optim call.}
\item{start}{a vector with the starting values used in the iterative process.}
\item{nobs}{number of observations.}
\item{df.null}{residual degrees of freedom in the null model
(constant median and dispersion), i.e., \eqn{n-3}.}
\item{df.residual}{residual degrees of freedom in the fitted model.}
\item{lambda}{value of the skewness parameter lambda
(\code{NULL} when lambda is not fixed).}
\item{loglik}{log-likelihood of the fitted model.}
\item{vcov}{covariance matrix of all the parameters.}
\item{pseudo.r.squared}{pseudo R-squared value.}
\item{Upsilon.zeta}{an overall goodness-of-fit measure.}
\item{link}{a list with elements "\code{median}" and "\code{dispersion}" containing the
link objects for the respective models.}
\item{converged}{logical indicating successful convergence of the
iterative process.}
\item{zeta}{a numeric specifying the value of zeta used in the estimation
process.}
\item{type}{a character specifying the estimation method used.}
\item{v}{a vector with the v(z) values for all the observations (see Queiroz and
Ferrari(2021)).}
\item{call}{the original function call.}
\item{formula}{the formula used.}
\item{terms}{a list with elements "\code{median}", "\code{dispersion}" and "\code{full}" containing
the term objects for the respective models.}
\item{levels}{a list with elements "\code{median}", "\code{dispersion}" and "\code{full}" containing
the levels of the categorical regressors.}
\item{contrasts}{a list with elements "\code{median}" and "\code{dispersion}"
containing the contrasts corresponding to levels from the respective models.}
\item{model}{the full model frame (if \code{y = TRUE}).}
\item{y}{the response variable (if \code{y = TRUE}).}
\item{x}{a list with elements "\code{median}" and "\code{dispersion}" with the matrices from
the median and dispersion submodels (if \code{x = TRUE}).}
}
\description{
\code{PLreg} is used to fit power logit regression model for continuous and bounded variables via maximum likelihood approach.
Both median and dispersion of the response variable are modeled through
parametric functions.
}
\details{
The power logit regression models, proposed by Queiroz and Ferrari (2021), is useful in
situations when the response variable is continuous and bounded on the unit interval (0, 1).
The median and the dispersion parameters are modeled through parametric link
functions. The models depend on a skewness parameter (called \eqn{\lambda}). When the skewness parameter is fixed
and equal to 1, the power logit models coincide with the GJS regression models
(Lemonte and Bazan, 2016). Queiroz and Ferrari (2021)  suggest using a penalized maximum
likelihood method to estimate the parameters. This method is implemented in
\code{PLreg} by default when \eqn{\lambda} is not fixed. If convergence is not reached,
maximum likelihood estimation is performed. The estimation
process uses \code{\link{optim}}. If no starting values are specified,
the \code{PLreg} function uses those suggested by Queiroz and Ferrari (2021).
This function also fits the log-log regression models by setting \eqn{\lambda}
at zero (\eqn{\lambda = 0} represents \eqn{\lambda \rightarrow 0^+}).\cr \cr
The formulation of the model has the same structure as in the usual functions
\code{\link{lm}} and \code{\link{glm}}. The argument
\code{formula} could comprise of three parts (separated by the symbols "\eqn{~}" and "\eqn{|}"),
namely: observed response variable in the unit interval, predictor of the median submodel,
with link function \code{link} and predictor of the dispersion submodel, with \code{link.sigma}
link function. If the model has constant dispersion, the third part may be omitted and the link function for sigma
is "\code{log}" by default. The skewness parameter \code{lambda} may be
treated as fixed or not (default). If \code{lambda} is fixed, its value
must be specified in the \code{control} argument. \cr \cr
Some methods are available for objects of class "\code{PLreg}",
see \code{\link{plot.PLreg}}, \code{\link{summary.PLreg}},
\code{\link{coef.PLreg}}, \code{\link{vcov.PLreg}}, and
\code{\link{residuals.PLreg}}, for details and other methods.
}
\examples{
#### Body fat data
data("bodyfat_Aeolus")

#Initial model with zeta = 2
fit1 <- PLreg(percentfat ~ days + sex + year, data = bodyfat_Aeolus,
             family = "PE", zeta = 2)
summary(fit1)
# Choosing the best value for zeta
# extra.parameter(fit1, lower = 1, upper = 4, grid = 15)

# Using zeta = 1.7
fit2 <- PLreg(percentfat ~ days + sex + year, data = bodyfat_Aeolus,
             family = "PE", zeta = 1.7)
summary(fit2)

# Fixing lambda = 1
fit3 <- PLreg(percentfat ~ days + sex + year, data = bodyfat_Aeolus,
             family = "PE", zeta = 1.7,
             control = PLreg.control(lambda = 1))
summary(fit3)

# Comparing the AIC and Upsilon values between fit2 and fit3
AIC(fit2) < AIC(fit3) # TRUE
fit2$Upsilon.zeta < fit3$Upsilon.zeta #TRUE

#### Firm cost data
data("Firm")

fitPL <- PLreg(firmcost ~ sizelog + indcost | sizelog + indcost,
              data = Firm,
              family = "SLASH",
              zeta = 2.13)
summary(fitPL)
#extra.parameter(fitPL, lower = 1.2, upper = 4, grid = 10)
#plot(fitPL, type = "standardized")
#envelope(fitPL, type = "standardized")
\donttest{
fitPL_wo72 <- PLreg(firmcost ~ sizelog + indcost | sizelog + indcost,
                   data = Firm[-72,],
                   family = "SLASH",
                   zeta = 2.13)
fitPL_wo15 <- PLreg(firmcost ~ sizelog + indcost | sizelog + indcost,
                   data = Firm[-15,],
                   family = "SLASH",
                   zeta = 2.13)
fitPL_wo16 <- PLreg(firmcost ~ sizelog + indcost | sizelog + indcost,
                   data = Firm[-16,],
                   family = "SLASH",
                   zeta = 2.13)

coef.mu      <- coef(fitPL)[1:3]
coef.mu_wo72 <- coef(fitPL_wo72)[1:3]
coef.mu_wo15 <- coef(fitPL_wo15)[1:3]
coef.mu_wo16 <- coef(fitPL_wo16)[1:3]

plot(Firm$indcost, Firm$firmcost,
    pch = "+",
    xlab = "indcost",
    ylab = "firmcost")
#identify(Firm$indcost, Firm$firmcost)
covariate = matrix(c(rep.int(1, 1000),
                    rep(median(Firm$sizelog), 1000),
                    seq(0, 1.22, length.out = 1000)),
                  ncol = 3)
lines(covariate[,3],
     as.vector(fitPL$link$median$linkinv(covariate\%*\%coef.mu)),
     type = "l")
lines(covariate[,3],
     as.vector(fitPL$link$median$linkinv(covariate\%*\%coef.mu_wo72)),
     type = "l", lty = 2, col = "blue")
lines(covariate[,3],
     as.vector(fitPL$link$median$linkinv(covariate\%*\%coef.mu_wo15)),
     type = "l", lty = 3, col = "red")
lines(covariate[,3],
     as.vector(fitPL$link$median$linkinv(covariate\%*\%coef.mu_wo16)),
     type = "l", lty = 4, col = "green")
parameters = c("pML",
              "pML w/o 72",
              "pML w/o 15",
              "pML w/o 16")
legend(x = 0.5,
      y = 0.8,
      legend = parameters,
      col = c("black", "blue", "red", "green"),
      lty = c(1, 2, 3, 4),
      cex = 0.6)}

}
\references{
Queiroz, F. F. and Ferrari, S. L. P. (2022). Power logit regression
for modeling bounded data. \emph{arXiv}:2202.01697. \cr \cr
Lemonte, A. J. and Bazan, J. L. (2015). New class of Johnson SB distributions
and its associated regression model for rates and proportions. \emph{Biometrical Journal}. 58:727-746.
}
\seealso{
\code{\link{summary.PLreg}}, \code{\link{PLreg.control}}, \code{\link{residuals.PLreg}}
}
\author{
Francisco Felipe de Queiroz (\email{ffelipeq@outlook.com}) and Silvia L. P. Ferrari.
}
