% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/fpsden.r
\name{fpsden}
\alias{cvpsden}
\alias{fpsden}
\alias{iwlspsden}
\alias{lpsden}
\alias{nlpsden}
\title{MLE Fitting of P-splines Density Estimator}
\usage{
fpsden(x, lambdaseq = NULL, breaks = NULL, xrange = NULL, nseg = 10,
  degree = 3, design.knots = NULL, ord = 2)

lpsden(x, beta = NULL, bsplines = NULL, nbinwidth = 1, log = TRUE)

nlpsden(pvector, x, bsplines = NULL, nbinwidth = 1, finitelik = FALSE)

cvpsden(lambda = 1, counts, bsplines, ord = 2)

iwlspsden(counts, bsplines, ord = 2, lambda = 10)
}
\arguments{
\item{x}{quantiles}

\item{lambdaseq}{vector of \eqn{\lambda}'s (or scalar) to be considered in profile likelihood. Required.}

\item{breaks}{histogram breaks (as in \code{\link[graphics:hist]{hist}} function)}

\item{xrange}{vector of minimum and maximum of B-spline (support of density)}

\item{nseg}{number of segments between knots}

\item{degree}{degree of B-splines (0 is constant, 1 is linear, etc.)}

\item{design.knots}{spline knots for splineDesign function}

\item{ord}{order of difference used in the penalty term}

\item{beta}{vector of B-spline coefficients (required)}

\item{bsplines}{matrix of B-splines}

\item{nbinwidth}{scaling to convert count frequency into proper density}

\item{log}{logical, if TRUE then log density}

\item{pvector}{vector of initial values of GPD parameters (\code{sigmau}, \code{xi}) or \code{NULL}}

\item{finitelik}{logical, should log-likelihood return finite value for invalid parameters}

\item{lambda}{penalty coefficient}

\item{counts}{counts from histogram binning}
}
\value{
Log-likelihood for original data is given by \code{\link[evmix:fpsden]{lpsden}} and it's
  wrappers for negative log-likelihood from \code{\link[evmix:fpsden]{nlpsden}}. Cross-validation
  sum of square of errors is provided by \code{\link[evmix:fpsden]{cvpsden}}. Poisson regression
  fitting by IWLS is carried out in \code{\link[evmix:fpsden]{iwlspsden}}. Fitting function
  \code{\link[evmix:fpsden]{fpsden}} returns a simple list with the
  following elements

\tabular{ll}{
 \code{call}:                \tab \code{optim} call\cr
 \code{x}:                   \tab data vector \code{x}\cr
 \code{xrange}:              \tab range of support of B-splines\cr
 \code{degree}:              \tab degree of B-splines\cr
 \code{nseg}:                \tab number of internal segments\cr
 \code{design.knots}:        \tab knots used in \code{\link[splines:splineDesign]{splineDesign}}\cr
 \code{ord}:                 \tab order of penalty term\cr
 \code{binned}:              \tab histogram results\cr
 \code{breaks}:              \tab histogram breaks\cr
 \code{mids}:                \tab histogram mid-bins\cr
 \code{counts}:              \tab histogram counts\cr
 \code{nbinwidth}:           \tab scaling factor to convert counts to density\cr
 \code{bsplines}:            \tab B-splines matrix used for binned counts\cr
 \code{databsplines}:        \tab B-splines matrix used for data\cr
 \code{counts}:              \tab histogram counts\cr
 \code{lambdaseq}:           \tab \eqn{\lambda} vector for profile likelihood or scalar for fixed \eqn{\lambda}\cr
 \code{cvlambda}:            \tab CV MSE for each \eqn{\lambda}\cr
 \code{mle} and \code{beta}: \tab vector of MLE of coefficients\cr
 \code{nllh}:                \tab negative log-likelihood for original data\cr
 \code{n}:                   \tab total original sample size\cr
 \code{lambda}:              \tab Estimated or fixed \eqn{\lambda}\cr
}
}
\description{
Maximum likelihood estimation for P-splines density estimation. Histogram binning
produces frequency counts, which are modelled by constrained B-splines in a Poisson regression. A penalty
based on differences in the sequences B-spline coefficients is used to smooth/interpolate the counts.
Iterated weighted least squares (IWLS) for a mixed model representation of the P-splines regression,
conditional on a particular penalty coefficient, is used for estimating the B-spline coefficients.
Leave-one-out cross-validation deviances are available for estimation of the penalty coefficient.
}
\details{
The P-splines density estimator is fitted using maximum likelihood estimation, following
the approach of Eilers and Marx (1996). Histogram binning produces frequency counts, which are
modelled by constrained B-splines in a Poisson regression. A penalty
based on differences in the sequences B-spline coefficients is used to smooth/interpolate the counts.

The B-splines are defined as in Eiler and Marx (1996), so that those are meet the boundary are simply
shifted and truncated version of the internal B-splines. No renormalisation is carried out. They are not
"natural" B-spline which are also commonly in use. Note that atural B-splines can be obtained by suitable
linear combinations of these B-splines. Hence, in practice there is little difference in the fit obtained
from either B-spline definition, even with the penalty constraining the coefficients. If the user desires
they can force the use of natural B-splines, by prior specification of the \code{design.knots}
with appropriate replication of the boundaries, see \code{\link[evmix:psden]{dpsden}}.

Iterated weighted least squares (IWLS) for a mixed model representation of the P-splines regression,
conditional on a particular penalty coefficient, is used for estimating the B-spline coefficients which
is equivalent to maximum likelihood estimation. Leave-one-out cross-validation deviances are available
for estimation of the penalty coefficient.

The parameter vector is the B-spline coefficients \code{beta}, no matter whether the penalty coefficient is
fixed or estimated. The penalty coefficient \code{lambda} is treated separately.

The log-likelihood functions \code{\link[evmix:fpsden]{lpsden}} and \code{\link[evmix:fpsden]{nlpsden}}
evaluate the likelihood for the original dataset, using the fitted P-splines density estimator. The
log-likelihood is output as \code{nllh} from the fitting function \code{\link[evmix:fpsden]{fpsden}}.
They do not provide the likelihood for the Poisson regression of the histogram counts, which is usually
evaluated using the deviance. The deviance (via CVMSE for Poisson counts) is also output as \code{cvlambda}
from the fitting function \code{\link[evmix:fpsden]{fpsden}}.

The \code{\link[evmix:fpsden]{iwlspsden}} function performs the IWLS. The
\code{\link[evmix:fpsden]{cvpsden}} function calculates the leave-one-out cross-validation
sum of the squared errors. They are not designed to be used directly by users. No checks of the
inputs are carried out.
}
\note{
The data are both vectors. Infinite and missing sample values are dropped.

No initial values for the coefficients are needed.

It is advised to specify the range of support \code{xrange}, using finite end-points. This is
especially important when the support is bounded. By default \code{xrange} is simply the range of the
input data \code{range(x)}.

Further, it is advised to always set the histogram bin \code{breaks}, expecially if the support is bounded.
By default \code{10*ln(n)} equi-spaced bins are defined between \code{xrange}.
}
\section{Acknowledgments}{
 The Poisson regression and leave-one-out cross-validation functions
are based on the code of Eilers and Marx (1996) available from Brian Marx's website
\url{http://www.stat.lsu.edu/faculty/marx}, which is gratefully acknowledged.
}
\examples{
\dontrun{
set.seed(1)
par(mfrow = c(1, 1))

x = rnorm(1000)
xx = seq(-4, 4, 0.01)
y = dnorm(xx)

# Plenty of histogram bins (100)
breaks = seq(-4, 4, length.out=101)

# P-spline fitting with cubic B-splines, 2nd order penalty and 10 internal segments
# CV search for penalty coefficient.
fit = fpsden(x, lambdaseq = 10^seq(-5, 5, 0.25), breaks = breaks,
             xrange = c(-4, 4), nseg = 10, degree = 3, ord = 2)
psdensity = exp(fit$bsplines \%*\% fit$mle)

hist(x, freq = FALSE, breaks = seq(-4, 4, length.out=101), xlim = c(-6, 6))
lines(xx, y, col = "black") # true density

lines(fit$mids, psdensity/fit$nbinwidth, lwd = 2, col = "blue") # P-splines density

# check density against dpsden function
with(fit, lines(xx, dpsden(xx, beta, nbinwidth, design = design.knots),
                lwd = 2, col = "red", lty = 2))

# vertical lines for all knots
with(fit, abline(v = design.knots, col = "red"))

# internal knots
with(fit, abline(v = design.knots[(degree + 2):(length(design.knots) - degree - 1)], col = "blue"))

# boundary knots (support of B-splines)
with(fit, abline(v = design.knots[c(degree + 1, length(design.knots) - degree)], col = "green"))

legend("topright", c("True Density","P-spline density","Using dpsdens function"),
  col=c("black", "blue", "red"), lty = c(1, 1, 2))
legend("topleft", c("Internal Knots", "Boundaries", "Extra Knots"),
  col=c("blue", "green", "red"), lty = 1)
}
}
\author{
Alfadino Akbar and Carl Scarrott \email{carl.scarrott@canterbury.ac.nz}
}
\references{
\url{http://www.math.canterbury.ac.nz/~c.scarrott/evmix}

\url{http://en.wikipedia.org/wiki/Cross-validation_(statistics)}

\url{http://en.wikipedia.org/wiki/B-spline}

\url{http://www.stat.lsu.edu/faculty/marx}

Eilers, P.H.C. and Marx, B.D. (1996). Flexible smoothing with B-splines and penalties.
Statistical Science 11(2), 89-121.
}
\seealso{
\code{\link[evmix:kden]{kden}}.

Other psden fpsden: \code{\link{dpsden}},
  \code{\link{dpsden}}, \code{\link{dpsden}},
  \code{\link{dpsden}}, \code{\link{dpsden}},
  \code{\link{ppsden}}, \code{\link{ppsden}},
  \code{\link{ppsden}}, \code{\link{ppsden}},
  \code{\link{ppsden}}, \code{\link{psden}},
  \code{\link{psden}}, \code{\link{psden}},
  \code{\link{psden}}, \code{\link{psden}},
  \code{\link{qpsden}}, \code{\link{qpsden}},
  \code{\link{qpsden}}, \code{\link{qpsden}},
  \code{\link{qpsden}}, \code{\link{rpsden}},
  \code{\link{rpsden}}, \code{\link{rpsden}},
  \code{\link{rpsden}}, \code{\link{rpsden}}
}

