% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/fdakmeans.R
\name{fdakmeans}
\alias{fdakmeans}
\title{Performs k-means clustering for functional data with amplitude and phase
separation}
\usage{
fdakmeans(
  x,
  y = NULL,
  n_clusters = 1L,
  seeds = NULL,
  seeding_strategy = c("kmeans++", "exhaustive-kmeans++", "exhaustive", "hclust"),
  warping_class = c("affine", "dilation", "none", "shift", "srsf"),
  centroid_type = "mean",
  metric = c("l2", "pearson"),
  cluster_on_phase = FALSE,
  use_verbose = TRUE,
  warping_options = c(0.15, 0.15),
  maximum_number_of_iterations = 100L,
  number_of_threads = 1L,
  parallel_method = 0L,
  distance_relative_tolerance = 0.001,
  use_fence = FALSE,
  check_total_dissimilarity = TRUE,
  compute_overall_center = FALSE,
  add_silhouettes = TRUE
)
}
\arguments{
\item{x}{A numeric vector of length \eqn{M} or a numeric matrix of shape
\eqn{N \times M} or an object of class \code{\link[funData:funData-class]{funData::funData}}. If a numeric
vector or matrix, it specifies the grid(s) of size \eqn{M} on which each of
the \eqn{N} curves have been observed. If an object of class
\code{\link[funData:funData-class]{funData::funData}}, it contains the whole functional data set and the \code{y}
argument is not used.}

\item{y}{Either a numeric matrix of shape \eqn{N \times M} or a numeric array
of shape \eqn{N \times L \times M} or an object of class \code{\link[fda:fd]{fda::fd}}. If a
numeric matrix or array, it specifies the \eqn{N}-sample of
\eqn{L}-dimensional curves observed on grids of size \eqn{M}. If an object
of class \code{\link[fda:fd]{fda::fd}}, it contains all the necessary information about the
functional data set to be able to evaluate it on user-defined grids.}

\item{n_clusters}{An integer value specifying the number of clusters.
Defaults to \code{1L}.}

\item{seeds}{An integer value or vector specifying the indices of the initial
centroids. If an integer vector, it is interpreted as the indices of the
intial centroids and should therefore be of length \code{n_clusters}. If an
integer value, it is interpreted as the index of the first initial centroid
and subsequent centroids are chosen according to the k-means++ strategy. It
can be \code{NULL} in which case the argument \code{seeding_strategy} is used to
automatically provide suitable indices. Defaults to \code{NULL}.}

\item{seeding_strategy}{A character string specifying the strategy for
choosing the initial centroids in case the argument \code{seeds} is set to
\code{NULL}. Choices are
\href{https://en.wikipedia.org/wiki/K-means\%2B\%2B}{\code{"kmeans++"}},
\code{"exhaustive-kmeans++"} which performs an exhaustive search over the choice
of the first centroid, \code{"exhaustive"} which tries on all combinations of
initial centroids or \code{"hclust"} which first performs hierarchical
clustering using Ward's linkage criterion to identify initial centroids.
Defaults to \code{"kmeans++"}, which is the fastest strategy.}

\item{warping_class}{A string specifying the warping class Choices are
\code{"affine"}, \code{"dilation"}, \code{"none"}, \code{"shift"} or \code{"srsf"}. Defaults to
\code{"affine"}. The SRSF class is the only class which is boundary-preserving.}

\item{centroid_type}{A string specifying the type of centroid to compute.
Choices are \code{"mean"}, \code{"median"} \code{"medoid"}, \code{"lowess"} or \code{"poly"}.
Defaults to \code{"mean"}. If LOWESS appproximation is chosen, the user can
append an integer between 0 and 100 as in \code{"lowess20"}. This number will be
used as the smoother span. This gives the proportion of points in the plot
which influence the smooth at each value. Larger values give more
smoothness. The default value is 10\%. If polynomial approximation is
chosen, the user can append an positive integer as in \code{"poly3"}. This
number will be used as the degree of the polynomial model. The default
value is \code{4L}.}

\item{metric}{A string specifying the metric used to compare curves. Choices
are \code{"l2"} or \code{"pearson"}. Defaults to \code{"l2"}. Used only when
\code{warping_class != "srsf"}. For the boundary-preserving warping class, the
L2 distance between the SRSFs of the original curves is used.}

\item{cluster_on_phase}{A boolean specifying whether clustering should be
based on phase variation or amplitude variation. Defaults to \code{FALSE} which
implies amplitude variation.}

\item{use_verbose}{A boolean specifying whether the algorithm should output
details of the steps to the console. Defaults to \code{TRUE}.}

\item{warping_options}{A numeric vector supplied as a helper to the chosen
\code{warping_class} to decide on warping parameter bounds. This is used only
when \code{warping_class != "srsf"}.}

\item{maximum_number_of_iterations}{An integer specifying the maximum number
of iterations before the algorithm stops if no other convergence criterion
was met. Defaults to \code{100L}.}

\item{number_of_threads}{An integer value specifying the number of threads
used for parallelization. Defaults to \code{1L}. This is used only when
\code{warping_class != "srsf"}.}

\item{parallel_method}{An integer value specifying the type of desired
parallelization for template computation, If \code{0L}, templates are computed
in parallel. If \code{1L}, parallelization occurs within a single template
computation (only for the medoid method as of now). Defaults to \code{0L}. This
is used only when \code{warping_class != "srsf"}.}

\item{distance_relative_tolerance}{A numeric value specifying a relative
tolerance on the distance update between two iterations. If all
observations have not sufficiently improved in that sense, the algorithm
stops. Defaults to \code{1e-3}. This is used only when \code{warping_class != "srsf"}.}

\item{use_fence}{A boolean specifying whether the fence algorithm should be
used to robustify the algorithm against outliers. Defaults to \code{FALSE}. This
is used only when \code{warping_class != "srsf"}.}

\item{check_total_dissimilarity}{A boolean specifying whether an additional
stopping criterion based on improvement of the total dissimilarity should
be used. Defaults to \code{TRUE}. This is used only when \code{warping_class != "srsf"}.}

\item{compute_overall_center}{A boolean specifying whether the overall center
should be also computed. Defaults to \code{FALSE}. This is used only when
\code{warping_class != "srsf"}.}

\item{add_silhouettes}{A boolean specifying whether silhouette values should
be computed for each observation for internal validation of the clustering
structure. Defaults to \code{TRUE}.}
}
\value{
An object of class \code{\link{caps}}.
}
\description{
This function provides implementations of the k-means clustering algorithm
for functional data, with possible joint amplitude and phase separation. A
number of warping class are implemented to achieve this separation.
}
\examples{
#----------------------------------
# Extracts 15 out of the 30 simulated curves in `simulated30_sub` data set
idx <- c(1:5, 11:15, 21:25)
x <- simulated30_sub$x[idx, ]
y <- simulated30_sub$y[idx, , ]

#----------------------------------
# Runs a k-means clustering with affine alignment, searching for 2 clusters
out <- fdakmeans(
  x = x,
  y = y,
  n_clusters = 2,
  warping_class = "affine"
)

#----------------------------------
# Then visualize the results
# Either with ggplot2 via ggplot2::autoplot(out)
# or using graphics::plot()
# You can visualize the original and aligned curves with:
plot(out, type = "amplitude")
# Or the estimated warping functions with:
plot(out, type = "phase")
}
