% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/Treee.R
\name{Treee}
\alias{Treee}
\title{Classification Trees with Uncorrelated Linear Discriminant Analysis Terminal
Nodes}
\usage{
Treee(
  datX,
  response,
  ldaType = c("forward", "all"),
  nodeModel = c("ULDA", "mode"),
  pruneMethod = c("pre", "post"),
  numberOfPruning = 10L,
  maxTreeLevel = 20L,
  minNodeSize = NULL,
  pThreshold = NULL,
  prior = NULL,
  misClassCost = NULL,
  missingMethod = c("medianFlag", "newLevel"),
  kSample = -1,
  verbose = TRUE
)
}
\arguments{
\item{datX}{A data frame of predictor variables.}

\item{response}{A vector of response values corresponding to \code{datX}.}

\item{ldaType}{A character string specifying the type of LDA to use. Options
are \code{"forward"} for forward ULDA or \code{"all"} for full ULDA. Default is
\code{"forward"}.}

\item{nodeModel}{A character string specifying the type of model used in each
node. Options are \code{"ULDA"} for Uncorrelated LDA, or \code{"mode"} for predicting
based on the most frequent class. Default is \code{"ULDA"}.}

\item{pruneMethod}{A character string specifying the pruning method. \code{"pre"}
performs pre-pruning based on p-value thresholds, and \code{"post"} performs
cross-validation-based post-pruning. Default is \code{"pre"}.}

\item{numberOfPruning}{An integer specifying the number of folds for
cross-validation during post-pruning. Default is \code{10}.}

\item{maxTreeLevel}{An integer controlling the maximum depth of the tree.
Increasing this value allows for deeper trees with more nodes. Default is
\code{20}.}

\item{minNodeSize}{An integer controlling the minimum number of samples
required in a node. Setting a higher value may lead to earlier stopping and
smaller trees. If not specified, it defaults to one plus the number of
response classes.}

\item{pThreshold}{A numeric value used as a threshold for pre-pruning based
on p-values. Lower values result in more conservative trees. If not
specified, defaults to \code{0.01} for pre-pruning and \code{0.6} for post-pruning.}

\item{prior}{A numeric vector of prior probabilities for each class. If
\code{NULL}, the prior is automatically calculated from the data.}

\item{misClassCost}{A square matrix \eqn{C}, where each element \eqn{C_{ij}}
represents the cost of classifying an observation into class \eqn{i} given
that it truly belongs to class \eqn{j}. If \code{NULL}, a default matrix with
equal misclassification costs for all class pairs is used. Default is
\code{NULL}.}

\item{missingMethod}{A character string specifying how missing values should
be handled. Options include \code{'mean'}, \code{'median'}, \code{'meanFlag'},
\code{'medianFlag'} for numerical variables, and \code{'mode'}, \code{'modeFlag'},
\code{'newLevel'} for factor variables. \code{'Flag'} options indicate whether a
missing flag is added, while \code{'newLevel'} replaces missing values with a
new factor level.}

\item{kSample}{An integer specifying the number of samples to use for
downsampling during tree construction. Set to \code{-1} to disable downsampling.}

\item{verbose}{A logical value. If \code{TRUE}, progress messages and detailed
output are printed during tree construction and pruning. Default is
\code{FALSE}.}
}
\value{
An object of class \code{Treee} containing the fitted tree, which is a
list of nodes, each an object of class \code{TreeeNode}. Each \code{TreeeNode}
contains:
\itemize{
\item \code{currentIndex}: The node index in the tree.
\item \code{currentLevel}: The depth of the current node in the tree.
\item \code{idxRow}, \code{idxCol}: Row and column indices indicating which part of the original data was used for this node.
\item \code{currentLoss}: The training error for this node.
\item \code{accuracy}: The training accuracy for this node.
\item \code{stopInfo}: Information on why the node stopped growing.
\item \code{proportions}: The observed frequency of each class in this node.
\item \code{prior}: The (adjusted) class prior probabilities used for ULDA or mode prediction.
\item \code{misClassCost}: The misclassification cost matrix used in this node.
\item \code{parent}: The index of the parent node.
\item \code{children}: A vector of indices of this node’s direct children.
\item \code{splitFun}: The splitting function used for this node.
\item \code{nodeModel}: Indicates the model fitted at the node (\code{'ULDA'} or \code{'mode'}).
\item \code{nodePredict}: The fitted model at the node, either a ULDA object or the plurality class.
\item \code{alpha}: The p-value from a two-sample t-test used to evaluate the strength of the split.
\item \code{childrenTerminal}: A vector of indices representing the terminal nodes that are descendants of this node.
\item \code{childrenTerminalLoss}: The total training error accumulated from all nodes listed in \code{childrenTerminal}.
}
}
\description{
This function fits a classification tree where each node has a Uncorrelated
Linear Discriminant Analysis (ULDA) model. It can also handle missing values
and perform downsampling. The resulting tree can be pruned either through
pre-pruning or post-pruning methods.
}
\examples{
fit <- Treee(datX = iris[, -5], response = iris[, 5], verbose = FALSE)
# Use cross-validation to prune the tree
fitCV <- Treee(datX = iris[, -5], response = iris[, 5], pruneMethod = "post", verbose = FALSE)
head(predict(fit, iris)) # prediction
plot(fit) # plot the overall tree
plot(fit, datX = iris[, -5], response = iris[, 5], node = 1) # plot a certain node
}
\references{
Wang, S. (2024). FoLDTree: A ULDA-Based Decision Tree Framework
for Efficient Oblique Splits and Feature Selection. \emph{arXiv preprint
arXiv:2410.23147}. Available at \url{https://arxiv.org/abs/2410.23147}.

Wang, S. (2024). A New Forward Discriminant Analysis Framework Based On
Pillai's Trace and ULDA. \emph{arXiv preprint arXiv:2409.03136}. Available
at \url{https://arxiv.org/abs/2409.03136}.
}
