##' Fits a classical twin model for quantitative traits.
##'
##' @title Classic twin model for quantitative traits
##' @return   Returns an object of class \code{twinlm}.
##' @author Klaus K. Holst
##' @seealso \code{\link{bptwin}}, \code{\link{twinsim}}
##' @export
##' @examples
##' ## Simulate data
##' set.seed(1)
##' d <- twinsim(1000,b1=c(1,-1),b2=c(),acde=c(1,1,0,1))
##' ## E(y|z1,z2) = z1 - z2. var(A) = var(C) = var(E) = 1
##' 
##' ## E.g to fit the data to an ACE-model without any confounders we simply write
##' ace <- twinlm(y ~ 1, data=d, DZ="DZ", zyg="zyg", id="id")
##' ace
##' ## An AE-model could be fitted as
##' ae <- twinlm(y ~ 1, data=d, DZ="DZ", zyg="zyg", id="id", type="ae")
##' ## LRT:
##' lava::compare(ae,ace)
##' ## AIC
##' AIC(ae)-AIC(ace)
##' ## To adjust for the covariates we simply alter the formula statement
##' ace2 <- twinlm(y ~ x1+x2, data=d, DZ="DZ", zyg="zyg", id="id", type="ace")
##' ## Summary/GOF
##' summary(ace2)
##' ## An interaction could be analyzed as:
##' ace3 <- twinlm(y ~ x1+x2 + x1:I(x2<0), data=d, DZ="DZ", zyg="zyg", id="id", type="ace")
##' ace3
##' ## Categorical variables are also supported
##' d2 <- transform(d,x2cat=cut(x2,3,labels=c("Low","Med","High")))
##' ace4 <- twinlm(y ~ x1+x2cat, data=d2, DZ="DZ", zyg="zyg", id="id", type="ace")
##' ## plot the model structure
##' \dontrun{
##' plot(ace4)
##' }
##' @keywords models
##' @keywords regression
##' @param formula Formula specifying effects of covariates on the response
##' @param data \code{data.frame} with one observation pr row. In
##'     addition a column with the zygosity (DZ or MZ given as a factor) of
##'     each individual much be
##'     specified as well as a twin id variable giving a unique pair of
##'     numbers/factors to each twin pair
##' @param id The name of the column in the dataset containing the twin-id variable.
##' @param zyg The name of the column in the dataset containing the
##'     zygosity variable
##' @param DZ Character defining the level in the zyg variable
##'     corresponding to the dyzogitic twins. If this argument is missing,
##'     the reference level (i.e. the first level) will be interpreted as
##'     the dyzogitic twins
##' @param group Optional. Variable name defining group for interaction analysis (e.g., gender)
##' @param strata Strata variable name
##' @param weight Weight matrix if needed by the chosen estimator. For use
##'     with Inverse Probability Weights
##' @param type Character defining the type of analysis to be
##'     performed. Should be a subset of "aced" (additive genetic factors, common
##'     environmental factors, unique environmental factors, dominant
##'     genetic factors).
##' @param twinnum The name of the column in the dataset numbering the
##'     twins (1,2). If it does not exist in \code{data} it will
##'     automatically be created.
##' @param binary If \code{TRUE} a liability model is fitted. Note that if the right-hand-side of the formula is a factor, character vector, og logical variable, then the liability model is automatically chosen (wrapper of the \code{bptwin} function).
##' @param keep Vector of variables from \code{data} that are not
##'     specified in \code{formula}, to be added to data.frame of the SEM
##' @param estimator Choice of estimator/model
##' @param control Control argument parsed on to the optimization routine
##' @param constrain Development argument
##' @param messages Control amount of messages shown 
##' @param ... Additional arguments parsed on to lower-level functions
twinlm <- function(formula, data, id, zyg, DZ, group=NULL, strata=NULL, weight=NULL, type=c("ace"), twinnum="twinnum", binary=FALSE,keep=weight,estimator="gaussian",constrain=TRUE,control=list(),messages=1,...) {

  cl <- match.call(expand.dots=TRUE)
  opt <- options(na.action="na.pass")
  mf <- model.frame(formula,data)
  mt <- attr(mf, "terms")
  y <- model.response(mf, "any")
  formula <- update(formula, ~ . + 1)
  yvar <- getoutcome(formula)
  if (missing(zyg)) stop("Zygosity variable not specified")
  if (!(zyg%in%colnames(data))) stop("'zyg' not found in data")
  if (!(id%in%colnames(data))) stop("'id' not found in data")
  if (missing(id)) stop("Twin-pair variable not specified")

  if (binary | is.factor(data[,yvar]) | is.character(data[,yvar]) | is.logical(data[,yvar])) {
    args <- as.list(cl)
    args[[1]] <- NULL
    return(do.call("bptwin",args))
  }
  
  formulaId <- unlist(Specials(formula,"cluster"))
  formulaStrata <- unlist(Specials(formula,"strata"))
  formulaSt <- paste("~.-cluster(",formulaId,")-strata(",paste(formulaStrata,collapse="+"),")")
  formula <- update(formula,formulaSt)
  if (!is.null(formulaId)) {
    id <- formulaId
    cl$id <- id
  }
  if (!is.null(formulaStrata)) strata <- formulaStrata
  cl$formula <- formula
 
  if (!is.null(strata)) {
    dd <- split(data,interaction(data[,strata]))
    nn <- unlist(lapply(dd,nrow))
    dd[which(nn==0)] <- NULL
    if (length(dd)>1) {
      fit <- lapply(seq(length(dd)),function(i) {
        if (messages>0) message("Strata '",names(dd)[i],"'")
        cl$data <- dd[[i]]
        eval(cl)
      })
      res <- list(model=fit)
      res$strata <- names(res$model) <- names(dd)
      class(res) <- c("twinlm.strata","twinlm")
      res$coef <- unlist(lapply(res$model,coef))
      res$vcov <- blockdiag(lapply(res$model,vcov))
      res$N <- length(dd)
      res$idx <- seq(length(coef(res$model[[1]])))
      rownames(res$vcov) <- colnames(res$vcov) <- names(res$coef)
      return(res)
    }
  }
  
  type <- tolower(type)
  ## if ("u" %in% type) type <- c("ue")
  
  varnames <- all.vars(formula)
  latentnames <- c("a1","a2","c1","c2","d1","d2","e1","e2")
  if (any(latentnames%in%varnames))
    stop(paste(paste(latentnames,collapse=",")," reserved for names of latent variables.",sep=""))
  
  mm <- model.matrix(formula,mf)
  options(opt)
  
  covars <- colnames(mm)
  hasIntercept <- FALSE
  if (attr(terms(formula),"intercept")==1) {
      hasIntercept <- TRUE
      covars <- covars[-1]
  }
  if(length(covars)<1) covars <- NULL
  
  zygstat <- data[,zyg]
  if(!is.factor(zygstat)) {
    zygstat <- as.factor(zygstat)
  }
  zyglev <- levels(zygstat)
  if (length(zyglev)>2) stop("More than two zygosity levels found. For opposite sex (OS) analysis use the 'sex' argument")

  ## To wide format:
  num <- NULL; if (twinnum%in%colnames(data)) num <- twinnum
  data <- cbind(data[,c(yvar,keep,num,zyg,id,group)],mm)
  ddd <- fast.reshape(data,id=id,varying=c(yvar,keep,covars,group),keep=zyg,num=num,sep=".",labelnum=TRUE)
    
  if (missing(DZ)) {
    warning("Using first level, `",zyglev[1],"', in status variable as indicator for 'dizygotic'", sep="")
    DZ <- zyglev[1]    
  }
  OS <- NULL
  if (!is.null(OS)) {
    wide3 <- ddd[which(ddd[,zyg]==OS),,drop=FALSE]
    MZ <- setdiff(zyglev,c(DZ,OS))  
  } else {
    wide3 <- NULL  
    MZ <- setdiff(zyglev,DZ)
  }
  wide1 <- ddd[which(ddd[,zyg]==MZ),,drop=FALSE]
  wide2 <- ddd[which(ddd[,zyg]==DZ),,drop=FALSE]
  
  
  ## ###### The SEM
  outcomes <- paste(yvar,".",1:2,sep="")
  model1<- lvm()  
  regression(model1,to=outcomes,from=c("a1","c1","d1"),silent=TRUE) <-
    rep(c("lambda[a]","lambda[c]","lambda[d]"),2)
  regression(model1,to=outcomes,from=c("e1","e2")) <- rep("lambda[e]",2)
  latent(model1) <- c("a1","c1","d1","e1","e2")
  intercept(model1,latent(model1)) <- 0
  if (!is.null(covars))
    for (i in 1:length(covars)) {
      regression(model1, from=paste(covars[i],".1",sep=""), to=outcomes[1],silent=TRUE) <- paste("beta[",i,"]",sep="")
      regression(model1, from=paste(covars[i],".2",sep=""), to=outcomes[2],silent=TRUE) <- paste("beta[",i,"]",sep="")
    }
  covariance(model1,outcomes) <- 0
  covariance(model1, latent(model1))  <- 1
  if (!type%in%c("sat","flex")) {    
    intercept(model1,outcomes) <- "mu"
  }
  if (type%in%c("u","flex","sat")) {
    kill(model1) <- ~e1+e2
    covariance(model1,outcomes) <- "v1"
  }
  
  model2 <- cancel(model1,c(outcomes[2],"a1","d1"))
  regression(model2,to=outcomes[2],from=c("a2","d2"),silent=TRUE) <-
    c("lambda[a]","lambda[d]")  
  covariance(model2,a1~a2)  <- 0.5
  covariance(model2,d1~d2)  <- 0.25
  latent(model2) <- c("a2","d2")
  intercept(model2, c("a2","d2")) <- 0
  covariance(model2, c("a2","d2")) <- 1

  model3 <- model2
  covariance(model3, a1~a2) <- "r1"
  covariance(model3, d1~d2) <- "r2"
  constrain(model3, r1~ra) <- function(x) tanh(x)
  constrain(model3, r2~rd) <- function(x) tanh(x)

  if (type=="flex") {
     intercept(model1,outcomes) <- "mu1"
     intercept(model2,outcomes) <- "mu2"
     intercept(model3,outcomes) <- "mu3"
     covariance(model1,outcomes) <- "var(MZ)"
     covariance(model2,outcomes) <- "var(DZ)"
     covariance(model3,outcomes) <- "var(OS)"
   }
  if (type=="sat") {
     covariance(model1,outcomes) <- c("var(MZ)1","var(MZ)2")
     covariance(model2,outcomes) <- c("var(DZ)1","var(DZ)2")
     covariance(model3,outcomes) <- c("var(OS)1","var(OS)2")
  }
  if (type%in%c("u","flex","sat")) {
    if (constrain) {
      if (type=="sat") {
        model1 <- covariance(model1,outcomes,constrain=TRUE,rname="atanh(rhoMZ)",cname="covMZ",lname="log(var(MZ)).1",l2name="log(var(MZ)).2")
        model2 <- covariance(model2,outcomes,constrain=TRUE,rname="atanh(rhoDZ)",cname="covDZ",lname="log(var(DZ)).1",l2name="log(var(DZ)).2")
        model3 <- covariance(model3,outcomes,constrain=TRUE,rname="atanh(rhoOS)",cname="covOS",lname="log(var(OS)).1",l2name="log(var(OS)).2")
      } else {
        if (type=="flex") {
          model1 <- covariance(model1,outcomes,constrain=TRUE,rname="atanh(rhoMZ)",cname="covMZ",lname="log(var(MZ))")
          model2 <- covariance(model2,outcomes,constrain=TRUE,rname="atanh(rhoDZ)",cname="covDZ",lname="log(var(DZ))")
          model3 <- covariance(model3,outcomes,constrain=TRUE,rname="atanh(rhoOS)",cname="covOS",lname="log(var(OS))")
        }  else {
          model1 <- covariance(model1,outcomes,constrain=TRUE,rname="atanh(rhoMZ)",cname="covMZ",lname="log(var)")
          model2 <- covariance(model2,outcomes,constrain=TRUE,rname="atanh(rhoDZ)",cname="covDZ",lname="log(var)")
          model3 <- covariance(model3,outcomes,constrain=TRUE,rname="atanh(rhoOS)",cname="covOS",lname="log(var)")          
        }        
      }     
    } else {
      covariance(model1,outcomes[1],outcomes[2]) <- "covMZ"
      covariance(model2,outcomes[1],outcomes[2]) <- "covDZ"
      covariance(model3,outcomes[1],outcomes[2]) <- "covOS"
    }
  }
  if (!is.null(covars) & type%in%c("flex","sat")) {
    sta <- ""
    if (type=="sat") sta <- "b"
       for (i in 1:length(covars)) {
         regression(model1, from=paste(covars[i],".1",sep=""), to=outcomes[1],silent=TRUE) <- paste("beta1[",i,"]",sep="")         
         regression(model1, from=paste(covars[i],".2",sep=""), to=outcomes[2],silent=TRUE) <- paste("beta1",sta,"[",i,"]",sep="")
         regression(model2, from=paste(covars[i],".1",sep=""), to=outcomes[1],silent=TRUE) <- paste("beta2[",i,"]",sep="")
         regression(model2, from=paste(covars[i],".2",sep=""), to=outcomes[2],silent=TRUE) <- paste("beta2",sta,"[",i,"]",sep="")
         regression(model3, from=paste(covars[i],".1",sep=""), to=outcomes[1],silent=TRUE) <- paste("beta3[",i,"]",sep="")
         regression(model3, from=paste(covars[i],".2",sep=""), to=outcomes[2],silent=TRUE) <- paste("beta3",sta,"[",i,"]",sep="")
       }
  }
  
  full <- list(MZ=model1,DZ=model2,OS=model3)
  isA <- length(grep("a",type))>0 & type!="sat"
  isC <- length(grep("c",type))>0
  isD <- length(grep("d",type))>0
  isE <- length(grep("e",type))>0 | type=="sat" | type=="u"
  if (!isA) {
    kill(model1) <- c("a1")
    kill(model2) <- c("a1","a2")
    kill(model3) <- c("a1","a2","ra")
    constrain(model3,r1~1) <- NULL
  }
  if (!isD) {
    kill(model1) <- c("d1")
    kill(model2) <- c("d1","d2")
    kill(model3) <- c("d1","d2","rd")
    constrain(model3,r2~1) <- NULL
  }
  if (!isC) {
    kill(model1) <- c("c1")
    kill(model2) <- c("c1")
    kill(model3) <- c("c1")
  }
  if (!isE) {
    kill(model1) <- c("e1","e2")
    kill(model2) <- c("e1","e2")
    kill(model3) <- c("e1","e2")
  }

  ## Full rank covariate/design matrix?
  for (i in covars) {
    myvars <- paste(i,c(1,2),sep=".")
    dif <- wide1[,myvars[1]]-wide1[,myvars[2]]   
    mykeep <- myvars
    if (all(na.omit(dif)==00)) {
      mykeep <- mykeep[-2]
    }   
    trash <- setdiff(myvars,mykeep)
    if (length(mykeep)==1) {
      regression(model1, to=outcomes[2], from=mykeep) <- lava:::regfix(model1)$label[trash,outcomes[2]]
      kill(model1) <- trash
    }

    dif <- wide2[,myvars[1]]-wide2[,myvars[2]]   
    mykeep <- myvars
    if (all(na.omit(dif)==00)) {
      mykeep <- mykeep[-2]
    }  
    trash <- setdiff(myvars,mykeep)
    if (length(mykeep)==1) {
      regression(model2, to=outcomes[2], from=mykeep) <- lava:::regfix(model2)$label[trash,outcomes[2]]
      kill(model2) <- trash
    }

    if (!missing(OS)) {
      dif <- wide3[,myvars[1]]-wide3[,myvars[2]]   
      mykeep <- myvars
      if (all(na.omit(dif)==00)) {
        mykeep <- mykeep[-2]
      }  
      trash <- setdiff(myvars,mykeep)
      if (length(mykeep)==1) {
        regression(model3, to=outcomes[2], from=mykeep) <- lava:::regfix(model3)$label[trash,outcomes[2]]
        kill(model3) <- trash
      }      
    }
  }
  if (!is.null(weight)) {
    weight <- paste(weight,1:2,sep=".")
    estimator <- "weighted"
  }

  newkeep <- unlist(sapply(keep, function(x) paste(x, 1:2, 
                                                   sep = ".")))
  mm <- list(MZ = model1, DZ = model2)
  dd <- list(wide1, wide2)
  if (!is.null(OS)) {
    mm <- c(mm, OS = list(model3))
    dd <- c(dd, list(wide3))
  }
  names(dd) <- names(mm)

  if (is.null(estimator)) return(multigroup(mm, dd, missing=TRUE,fix=FALSE,keep=newkeep,type=2))
  optim <- list(method="nlminb2",refit=FALSE,gamma=1,start=rep(0.1,length(coef(mm[[1]]))*length(mm)))


  if (is.Surv(data[,yvar])) {
      l <- survreg(formula,mf,dist="gaussian")
      beta <- coef(l)
      sigma <- l$scale
  } else {
      l <- lm(formula,mf)
      beta <- coef(l)
      sigma <- summary(l)$sigma
  }
  start <- rep(sigma/sqrt(nchar(type)),nchar(type))
  if (hasIntercept) {
      start <- c(beta[1],start)
      start <- c(start,beta[-1])
  } else start <- c(start,beta)
  if (type=="sat") {
      start <- c(rep(log(sigma^2),2),0.5)
      if (hasIntercept) {
          start <- c(rep(beta[1],2),start)
          beta <- beta[-1]
      }
      start <- c(rep(start,2),rep(beta,4))
  }
  if (type=="flex") {
      start <- c(log(sigma^2),0.5)
      if (hasIntercept) {
          start <- c(beta[1],start)
          beta <- beta[-1]
      }
      start <- c(rep(start,2),rep(beta,2))
  }
  if (type=="u") {
      start <- c(log(sigma^2),0.5,0.5)
      if (hasIntercept) {
          start <- c(beta[1],start)
          beta <- beta[-1]
      }
      start <- c(start,beta)
  }
  names(start) <- NULL
  optim$start <- start
  if (length(control)>0) {
    optim[names(control)] <- control
  }

  if (is.Surv(data[,yvar])) {
    require("lava.tobit")
    if (is.null(optim$method))
       optim$method <- "nlminb1"
    e <- estimate(mm,dd,control=optim,...)
  } else {
    e <- estimate(mm,dd,weight=weight,estimator=estimator,fix=FALSE,control=optim,...)
  }

  if (!is.null(optim$refit) && optim$refit) {
    optim$method <- "NR"
    optim$start <- pars(e)
    if (is.Surv(data[,yvar])) {
      e <- estimate(mm,dd,estimator=estimator,fix=FALSE,control=optim,...)      
    } else {
      e <- estimate(mm,dd,weight=weight,estimator=estimator,fix=FALSE,control=optim,...)
    }
  }

  res <- list(coefficients=e$opt$estimate, vcov=Inverse(information(e)), estimate=e, model=mm, full=full, call=cl, data=data, zyg=zyg, id=id, twinnum=twinnum, type=type, model.mz=model1, model.dz=model2, model.dzos=model3, data.mz=wide1, data.dz=wide2, data.os=wide3, OS=!is.null(OS), constrain=constrain, outcomes=outcomes)
  class(res) <- "twinlm"
  return(res)
}
