% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tree_distance_kendall-colijn.R
\encoding{UTF-8}
\name{KendallColijn}
\alias{KendallColijn}
\alias{KCVector}
\alias{PathVector}
\alias{SplitVector}
\alias{KCDiameter}
\title{Kendall–Colijn distance}
\usage{
KendallColijn(tree1, tree2 = NULL, Vector = KCVector)

KCVector(tree)

PathVector(tree)

SplitVector(tree)

KCDiameter(tree)
}
\arguments{
\item{tree1, tree2}{Trees of class \code{phylo}, with leaves labelled identically,
or lists of such trees to undergo pairwise comparison.  Where implemented,
\code{tree2 = NULL} will compute distances between each pair of trees in the list
\code{tree1} using a fast algorithm based on Day (1985).}

\item{Vector}{Function converting a tree to a numeric vector.

\code{KCVector}, the default, returns the number of edges between the common
ancestor of each pair of leaves and the root of the tree
\insertCite{@per @Kendall2016}{TreeDist}.

\code{PathVector} returns the number of edges between each pair of leaves
\insertCite{@per @Steel1993}{TreeDist}.

\code{SplitVector} returns the size of the smallest split that contains each
pair of leaves (per \insertCite{SmithSpace;nobrackets}{TreeDist}).}

\item{tree}{A tree of class \code{\link[ape:read.tree]{phylo}}.}
}
\value{
\code{KendallColijn()} returns an array of numerics providing the
distances between each  pair of trees in \code{tree1} and \code{tree2},
or \code{splits1} and \code{splits2}.

\code{KCDiameter()} returns the value of the Kendall & Colijn's (2016)
metric distance between two pectinate trees with \emph{n} leaves ordered in
the opposite direction, which I suggest (without any attempt at a proof) may
be a useful proxy for the diameter (i.e. maximum value) of the K–C
metric.
}
\description{
Calculate the Kendall–Colijn tree distance, a measure related to the
path difference.
}
\details{
The Kendall–Colijn distance works by measuring, for each pair of
leaves, the distance from the most recent common ancestor of those leaves
and the root node.
For a given tree, this produces a vector of values recording the
distance-from-the-root of each most recent common ancestor of each pair of
leaves.

Two trees are compared by taking the Euclidean distance between the
respective vectors.  This is calculated by taking the square root of the sum
of the squares of the differences between the vectors.

An analogous distance can be created from any vector representation of a
tree.
The split size vector metric \insertCite{SmithSpace}{TreeDist} is an attempt
to mimic the Kendall Colijn metric in situations where the position of
the root should not be afforded special significance; and the path distance
\insertCite{Steel1993}{TreeDist} is a familiar alternative whose underlying
vector measures the distance of the last common ancestor of each pair
of leaves from the leaves themselves, i.e. the length of the path from one
leaf to another.

None of these vector-based methods performs as well as other tree distances
in measuring similarities in the relationships implied by a pair of trees
\insertCite{SmithDist}{TreeDist}; in particular, the Kendall Colijn
metric is strongly influenced by tree balance, and may not be appropriate
for a suite of common applications \insertCite{SmithSpace}{TreeDist}.
}
\section{Functions}{
\itemize{
\item \code{KCVector()}: Creates a vector that characterises a rooted tree,
as described in \insertCite{Kendall2016;textual}{TreeDist}.

\item \code{PathVector()}: Creates a vector reporting the number of edges
between each pair of leaves, per the path metric of
\insertCite{Steel1993;textual}{TreeDist}.

\item \code{SplitVector()}: Creates a vector reporting the smallest split
containing each pair of leaves, per the metric proposed in
\insertCite{SmithSpace;textual}{TreeDist}.

}}
\examples{
KendallColijn(TreeTools::BalancedTree(8), TreeTools::PectinateTree(8))

set.seed(0)
KendallColijn(TreeTools::BalancedTree(8), lapply(rep(8, 3), ape::rtree))
KendallColijn(lapply(rep(8, 4), ape::rtree))

KendallColijn(lapply(rep(8, 4), ape::rtree), Vector = SplitVector)

# Notice that changing tree shape close to the root results in much
# larger differences
tree1 <- ape::read.tree(text = "(a, (b, (c, (d, (e, (f, (g, h)))))));")
tree2 <- ape::read.tree(text = "(a, ((b, c), (d, (e, (f, (g, h))))));")
tree3 <- ape::read.tree(text = "(a, (b, (c, (d, (e, ((f, g), h))))));")
trees <- c(tree1, tree2, tree3)
KendallColijn(trees)
KendallColijn(trees, Vector = SplitVector)
KCDiameter(trees)
KCDiameter(4)
}
\references{
\insertAllCited{}
}
\seealso{
\href{https://CRAN.R-project.org/package=treespace/vignettes/introduction.html}{\code{treespace::treeDist}}
is a more sophisticated, if more cumbersome, implementation that supports
lambda > 0, i.e. use of edge lengths in tree comparison.

Other tree distances: 
\code{\link{JaccardRobinsonFoulds}()},
\code{\link{MASTSize}()},
\code{\link{MatchingSplitDistance}()},
\code{\link{NNIDist}()},
\code{\link{NyeSimilarity}()},
\code{\link{PathDist}()},
\code{\link{Robinson-Foulds}},
\code{\link{SPRDist}()},
\code{\link{TreeDistance}()}
}
\author{
\href{https://orcid.org/0000-0001-5660-1727}{Martin R. Smith}
(\href{mailto:martin.smith@durham.ac.uk}{martin.smith@durham.ac.uk})
}
\concept{tree distances}
