# file modreg/R/loess.R
# copyright (C) 1998 B. D. Ripley
#
loess <-
function(formula, data=NULL, weights, subset, na.action, model = FALSE,
         span = 0.75, enp.target, degree = 2, parametric = FALSE, 
         drop.square = FALSE, normalize = TRUE,
         family = c("gaussian", "symmetric"),
         method = c("loess", "model.frame"),
         control = loess.control(...), ...)
{
  mt <- terms(formula, data = data)
  mf <- match.call(expand.dots=FALSE)
  mf$model <- mf$span <- mf$enp.target <- mf$degree <-
    mf$parametric <- mf$drop.square <- mf$normalize <- mf$family <-
      mf$control <- mf$... <- NULL
  mf[[1]] <- as.name("model.frame")
  mf <- eval(mf, sys.frame(sys.parent()))
  if (match.arg(method) == "model.frame") return(mf)
  y <- model.response(mf, "numeric")
  w <- model.weights(mf)
  if(is.null(w)) w <- rep(1, length(y))
  nmx <- as.character(attr(mt, "variables"))[-(1:2)]
  x <- mf[, nmx, drop=F]
  if(any(sapply(x, is.factor))) stop("predictors must all be numeric")
  x <- as.matrix(x)
  D <- ncol(x)
  nmx <- colnames(x)
  names(nmx) <- nmx
  drop.square <- match(nmx, nmx[drop.square], 0) > 0
  parametric <- match(nmx, nmx[parametric], 0) > 0
  if(!match(degree, 0:2, 0)) stop("degree must be 0, 1 or 2")
  iterations <- if(family=="gaussian") 1 else control$iterations
  if(!missing(enp.target))
    if(!missing(span))
      warning("both span and enp.target specified: span will be used")
    else { # White book p.321
      tau <- switch(degree+1, 1, D+1, (D+1)*(D+2)/2) - sum(drop.square)
      span <- 1.2 * tau/enp.target
    }
  fit <- simpleLoess(y, x, w, span, degree, parametric, drop.square,
                     normalize, control$statistics, control$surface,
                     control$cell, iterations, control$trace.hat)
  fit$call <- match.call()
  fit$terms <- mt
  fit$xnames <- nmx
  fit$x <- x
  fit$y <- y
  fit$weights <- w
  if(model) fit$model <- mf
  fit
}

loess.control <-
  function(surface = c("interpolate", "direct"),
           statistics = c("approximate", "exact"),
           trace.hat = c("exact", "approximate"),
           cell = 0.2, iterations = 4, ...)
{
  list(surface=match.arg(surface),
       statistics=match.arg(statistics), 
       trace.hat=match.arg(trace.hat),
       cell=cell, iterations=iterations)
}


simpleLoess <-
  function(y, x, weights, span = 0.75, degree = 2, parametric = FALSE,
           drop.square = FALSE, normalize = TRUE,
           statistics = "approximate", surface = "interpolate",
           cell = 0.2, iterations = 1, trace.hat = "exact")
{
## loess_ translated to R.
  
  D <- NCOL(x);
  N <- NROW(x);
  x <- as.matrix(x)
  max.kd <-  max(N, 200)
  robust <- rep(1, N)
  divisor<- rep(1, D)
  if(normalize && D > 1) {
    trim <- ceiling(0.1 * N)
    divisor <-
      sqrt(apply(apply(x, 2, sort)[seq(trim+1, N-trim), , drop = FALSE],
                 2, var))
    x <- x/rep(divisor, rep(N, D))
  }
  sum.drop.sqr <- sum(drop.square)
  sum.parametric <- sum(parametric)
  nonparametric <- sum(!parametric)
  order.parametric <- order(parametric)
  x <- x[, order.parametric]
  order.drop.sqr <- (2 - drop.square)[order.parametric]
  if(degree==1 && sum.drop.sqr)
    stop("Specified the square of a factor predictor to be dropped when degree = 1")
  if(D == 1 && sum.drop.sqr)
    stop("Specified the square of a predictor to be dropped with only one numeric predictor")
  if(sum.parametric == D) stop("Specified parametric for all predictors")

  for(j in 1:iterations) {
    robust <- weights * robust
    if(j > 1) statistics <- "none"
    if(surface == "interpolate" && statistics == "approximate")
      statistics <- if(trace.hat == "approximate") "2.approx"
      else if(trace.hat == "exact") "1.approx"
    surf.stat <- paste(surface, statistics, sep="/")
    z <- .C("loess_raw",
            as.double(y),
            as.double(x),
            as.double(weights),
            as.double(robust),
            as.integer(D),
            as.integer(N),
            as.double(span),
            as.integer(degree),
            as.integer(nonparametric),
            as.integer(order.drop.sqr),
            as.integer(sum.drop.sqr),
            as.double(span*cell),
            as.character(surf.stat),
            fitted.values = double(N),
            parameter = integer(7),
            a = integer(max.kd),
            xi = double(max.kd),
            vert = double(2*D),
            vval = double((D+1)*max.kd),
            diagonal = double(N),
            trL = double(1),
            delta1 = double(1),
            delta2 = double(1),
            as.integer(surf.stat == "interpolate/exact"))
    if(j==1) {
      trace.hat.out <- z$trL
      one.delta <- z$delta1
      two.delta <- z$delta2
    }
    fitted.residuals <- y - z$fitted.values
    if(j < iterations)
      robust <- .Fortran("lowesw",
                         as.double(fitted.residuals),
                         as.integer(N),
                         robust = double(N),
                         double(N))$robust
  }
  if(surface == "interpolate")
  {
    pars <- z$parameter
    names(pars) <- c("d", "n", "vc", "nc", "nv", "liv", "lv")
    enough <- (D + 1) * pars["nv"]
    fit.kd <- list(parameter=pars, a=z$a[1:pars[4]], xi=z$xi[1:pars[4]],
                   vert=z$vert, vval=z$vval[1:enough])
  }
  if(iterations > 1) {
      pseudovalues <- .Fortran("lowesp",
                               as.integer(N),
                               as.double(y),
                               as.double(z$fitted.values),
                               as.double(weights),
                               as.double(robust),
                               double(N),
                               pseudovalues = double(N))$pseudovalues
      z <- .C("loess_raw",
              as.double(pseudovalues),
              as.double(x),
              as.double(weights),
              as.double(weights),
              as.integer(D),
              as.integer(N),
              as.double(span),
              as.integer(degree),
              as.integer(nonparametric),
              as.integer(order.drop.sqr),
              as.integer(sum.drop.sqr),
              as.integer(span*cell),
              as.character(surf.stat),
              temp = double(N),
              parameter = integer(7),
              a = integer(max.kd),
              xi = double(max.kd),
              vert = double(2*D),
              vval = double((D+1)*max.kd),
              diagonal = double(N),
              trL = double(1),
              delta1 = double(1),
              delta2 = double(1),
              as.integer(0))
      pseudo.resid <- pseudovalues - z$temp
  }
  sum.squares <- if(iterations <= 1) sum(weights * fitted.residuals^2)
    else sum(weights * pseudo.resid^2)
  enp <- one.delta + 2*trace.hat.out - N
  s <- sqrt(sum.squares/one.delta)
  pars <- list(robust=robust, span=span, degree=degree, normalize=normalize,
               parametric=parametric, drop.square=drop.square,
               surface=surface, cell=cell, family=
               if(iterations <= 1) "gaussian" else "symmetric",
               iterations=iterations)
  fit <- list(n=N, fitted=z$fitted.values, residuals=fitted.residuals,
       enp=enp, s=s, one.delta=one.delta, two.delta=two.delta,
       trace.hat=trace.hat.out, divisor=divisor)
  fit$pars <- pars
  if(surface == "interpolate") fit$kd <- fit.kd
  class(fit) <- "loess"
  fit
}

predict.loess <-
  function(object, newdata = NULL, se = FALSE)
{
  if(!inherits(object, "loess"))
    stop("First argument must be a loess object")
  if(is.null(newdata) & (se == FALSE)) return(fitted(object))
  
  if(is.null(newdata)) newx <- .Alias(object$x)
  else {
    vars <- as.character(attr(delete.response(terms(object)), "variables"))[-1]
    if(length(vars) > 1 || NCOL(newdata) > 1) {
      if(any(!match(vars, colnames(newdata), F)))
        stop("newdata does not contain the variables needed")
      newx <- as.matrix(newdata[, vars, drop=F])
    } else newx <- as.matrix(newdata)
    }
  res <- predLoess(object$y, object$x, newx, object$s, object$weights,
                   object$pars$robust, object$pars$span, object$pars$degree, 
                   object$pars$normalize, object$pars$parametric,
                   object$pars$drop.square, object$pars$surface,
                   object$pars$cell, object$pars$family, 
                   object$kd, object$divisor, se=se)
  if(se) {
    res$df <- object$one.delta^2/object$two.delta
  }
  res
}

predLoess <-
  function(y, x, newx, s, weights, robust, span, degree, 
        normalize, parametric, drop.square, surface, cell, family, 
        kd, divisor, se=F)
{
## translation of pred_
  D <- NCOL(x); N <- NROW(x); M <- NROW(newx)
  x <- as.matrix(x); newx <- as.matrix(newx)
  newx <- newx/rep(divisor, rep(M, D))
  x <- x/rep(divisor, rep(N, D))
  sum.drop.sqr <- sum(drop.square)
  sum.parametric <- sum(parametric)
  nonparametric <- sum(!parametric)
  order.parametric <- order(parametric)
  x <- x[, order.parametric, drop=FALSE]
  x.evaluate <- newx[, order.parametric, drop=FALSE]
  order.drop.sqr <- (2 - drop.square)[order.parametric]
  if(surface == "direct") {
    if(se) {
      z <- .C("loess_dfitse",
              as.double(y),
              as.double(x),
              as.double(x.evaluate),
              as.double(weights),
              as.double(robust),
              as.integer(family =="gaussian"),
              as.double(span),
              as.integer(degree),
              as.integer(nonparametric),
              as.integer(order.drop.sqr),
              as.integer(sum.drop.sqr),
              as.integer(D),
              as.integer(N),
              as.integer(M),
              fit = double(M),
              L = double(N*M))[c("fit", "L")]
      fit <- z$fit
      se.fit <- (matrix(z$L^2, M, N)/rep(weights, rep(M, N))) %*% rep(1, N)
      se.fit <- drop(s * sqrt(se.fit))
    } else {
      fit <- .C("loess_dfit",
              as.double(y),
              as.double(x),
              as.double(x.evaluate),
              as.double(weights),
              as.double(span),
              as.integer(degree),
              as.integer(nonparametric),
              as.integer(order.drop.sqr),
              as.integer(sum.drop.sqr),
              as.integer(D),
              as.integer(N),
              as.integer(M),
              fit = double(M))$fit
    }
  } else {                              # interpolate
    ## need to eliminate points outside original range - not in pred_
    inside <- matrix(F, M, ncol = D)
    ranges <- apply(x, 2, range)
    inside <- (x.evaluate <= rep(ranges[2,], rep(M, D))) &
                (x.evaluate >= rep(ranges[1,], rep(M, D)))
    inside <- inside %*% rep(1, D) == D
    M1 <- sum(inside)
    fit <- rep(NA, M)
    if(any(inside))
      fit[inside] <- .C("loess_ifit",
                        as.integer(kd$parameter),
                        as.integer(kd$a), as.double(kd$xi),
                        as.double(kd$vert), as.double(kd$vval),
                        as.integer(M1),
                        as.double(x.evaluate[inside, ]),
                        fit = double(M1))$fit
    if(se){
      se.fit <- rep(NA, M)
      if(any(inside)) {
        L <- .C("loess_ise",
                as.double(y),
                as.double(x),
                as.double(x.evaluate[inside, ]),
                as.double(weights),
                as.double(span),
                as.integer(degree),
                as.integer(nonparametric),
                as.integer(order.drop.sqr),
                as.integer(sum.drop.sqr),
                as.double(span*cell),
                as.integer(D),
                as.integer(N),
                as.integer(M1),
                double(M1),
                L = double(N*M1)
                )$L
        tmp <- (matrix(L^2, M1, N)/rep(weights, rep(M1, N))) %*% rep(1, N)
        se.fit[inside] <- drop(s * sqrt(tmp))
      }
    }
  }
  if(se) list(fit = fit, se.fit = drop(se.fit), residual.scale = s) else fit
}

pointwise <- function(results, coverage)
{
  fit <- results$fit
  lim <- qt(1 - (1 - coverage)/2, results$df) * results$se.fit
  list(fit = fit, lower = fit - lim, upper = fit + lim)
}

print.loess <- function(x, digits=max(3, .Options$digits-3), ...)
{
  if(!is.null(cl <- x$call)) {
    cat("Call:\n")
    dput(cl)
  }
  cat("\nNumber of Observations:", x$n, "\n")
  cat("Equivalent Number of Parameters:", format(round(x$enp, 2)), "\n")
  if(x$pars$family == "gaussian")
    cat("Residual Standard Error:", format(signif(x$s, digits)), "\n")
  else cat("Residual Scale Estimate:", format(signif(x$s, digits)), "\n")
}

summary.loess <- function(object, ...)
{
  class(object) <- "summary.loess"
  object
}

print.summary.loess <- function(x, digits=max(3, .Options$digits-3), ...)
{
  if(!is.null(cl <- x$call)) {
    cat("Call:\n")
    dput(cl)
  }
  cat("\nNumber of Observations:", x$n, "\n")
  cat("Equivalent Number of Parameters:", format(round(x$enp, 2)), "\n")
  if(x$pars$family == "gaussian")
    cat("Residual Standard Error:", format(signif(x$s, digits)), "\n")
  else cat("Residual Scale Estimate:", format(signif(x$s, digits)), "\n")
  cat("Trace of smoother matrix:", format(round(x$trace.hat, 2)), "\n")
  cat("\nControl settings:\n")
  cat("  normalize: ", x$pars$normalize, "\n")
  cat("  span     : ", format(x$pars$span), "\n")
  cat("  degree   : ", x$pars$degree, "\n")
  cat("  family   : ", x$pars$family)
  if(x$pars$family != "gaussian")
    cat("       iterations =", x$pars$iterations, "\n") else cat("\n")
  cat("  surface  : ", x$pars$surface)
  if(x$pars$surface == "interpolate")
    cat("     cell =", format(x$pars$cell), "\n") else cat("\n")
}

scatter.smooth <-
function(x, y, span = 2/3, degree = 1,
         family = c("symmetric", "gaussian"), 
         xlab = deparse(substitute(x)), ylab = deparse(substitute(y)),
         ylim = range(y, prediction$y), evaluation = 50, ...)
{
  if(inherits(x, "formula")) {
    if(length(x) < 3) stop("need response in formula")
    thiscall <- match.call()
    thiscall$x <- x[[3]]
    thiscall$y <- x[[2]]
    return(invisible(eval(thiscall, sys.parent())))
  }
  prediction <- loess.smooth(x, y, span, degree, family, evaluation)
  plot(x, y, ylim = ylim, xlab = xlab, ylab = ylab, ...)
  lines(prediction)
  invisible()
}

loess.smooth <-
  function(x, y, span = 2/3, degree = 1, family = c("symmetric", "gaussian"), 
           evaluation = 50, ...)
{
  notna <- x[!(is.na(x) | is.na(y))]
  new.x <- seq(min(notna), max(notna), length = evaluation)
  
  control <- loess.control(...)
#  x <- matrix(x, ncol = 1)
#  n <- length(y)
  robust <- w <- rep(1, length(y))
  family <- match.arg(family)
  iterations <- if(family == "gaussian") 1 else control$iterations
  fit <- simpleLoess(y, x, w, span, degree, FALSE, FALSE,
                     normalize=F, "none", "interpolate",
                     control$cell, iterations, control$trace.hat)
  kd <- fit$kd
  z <- .C("loess_ifit",
            as.integer(kd$parameter),
            as.integer(kd$a), as.double(kd$xi),
            as.double(kd$vert), as.double(kd$vval),
            as.integer(evaluation),
            as.double(new.x),
            fit = double(evaluation))$fit
  list(x = new.x, y = z)
}

# panel.smooth <-
#   function(x, y, span = 2/3, degree = 1, family = c("symmetric", "gaussian"), 
#         zero.line = F, evaluation = 50, ...)
# {
#   points(x, y, ...)
#   lines(loess.smooth(x, y, span, degree, family, evaluation), ...)
#   if(zero.line) abline(h = 0, ...)
# }


anova.loess <-
  function(object, ...)
{
  objects <- list(object, ...)
  responses <- as.character(lapply(objects,
                                   function(x) as.character(x$terms[[2]])))
  sameresp <- responses == responses[1]
  ## calculate the number of models
  if (!all(sameresp)) {
    objects <- objects[sameresp]
    warning(paste("Models with response", deparse(responses[!sameresp]), 
                  "removed because response differs from", "model 1"))
  }
  nmodels <- length(objects)
  if(nmodels <= 1) stop("no models to compare")
  models <- as.character(lapply(objects, function(x) x$call))
  descr <- paste("Model ", format(1:nmodels), ": ", models, 
                 sep = "", collapse = "\n")
  ## extract statistics
  delta1 <- sapply(objects, function(x) x$one.delta)
  delta2 <- sapply(objects, function(x) x$two.delta)
  s <- sapply(objects, function(x) x$s)
  enp <- sapply(objects, function(x) x$enp)
  rss <- s^2*delta1
  max.enp <- order(enp)[nmodels]
  d1diff <- abs(diff(delta1))
  dfnum <- c(d1diff^2/abs(diff(delta2)))
  dfden <- (delta1^2/delta2)[max.enp]
  Fvalue <- c(NA, (abs(diff(rss))/d1diff)/s[max.enp]^2)
  pr <- 1 - pf(Fvalue, dfnum, dfden)
  ans <- data.frame(ENP = round(enp,2), RSS = rss, "F-value" = Fvalue,
                    "Pr(>F)" = pr, check.names = FALSE)
  attr(ans, "heading") <-
    paste(descr, "\n\n", "Analysis of Variance:   denominator df ",
          format(round(dfden,2)), "\n", sep = "")
  class(ans) <- c("anova", "data.frame")
  ans
}
