% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cluster_events.R
\name{cluster_events}
\alias{cluster_events}
\alias{bp_kmeans}
\alias{bp_clara}
\alias{bp_dbscan}
\alias{bp_mclust}
\alias{bp_density_cut}
\title{Clustering with trimming}
\usage{
bp_kmeans(.data, .parameter, .column_name, .k, .trim = 0, ...)

bp_clara(.data, .parameter, .column_name, .k, .trim = 0, ...)

bp_dbscan(.data, .parameter, .column_name, .eps = 0.2, .MinPts = 50, ...)

bp_mclust(
  .data,
  .parameter,
  .column_name,
  .k,
  .trim = 0,
  .sample_frac = 0.05,
  .max_subset = 500,
  ...
)

bp_density_cut(.data, .parameter, .column_name, .k, .trim = 0, ...)
}
\arguments{
\item{.data}{A tidy data.frame.}

\item{.parameter}{A character giving the name of column(s) where populations
are identified.}

\item{.column_name}{A character giving the name of the column to store the
population information.}

\item{.k}{Numeric giving the number of expected clusters, or a set of initial
cluster centers.}

\item{.trim}{A numeric between 0 and 1, giving the fraction of points to
remove by marking them NA.}

\item{...}{Additional arguments passed to appropriate methods, see below.}

\item{.eps}{Reachability distance, see \code{\link[fpc:dbscan]{fpc::dbscan()}}.}

\item{.MinPts}{Reachability minimum no. of points, see \code{\link[fpc:dbscan]{fpc::dbscan()}}.}

\item{.sample_frac}{A numeric between 0 and 1 giving the fraction of points
to use in initialisation of \code{Mclust()}.}

\item{.max_subset}{A numeric giving the maximum of events to use in
initialisation of \code{Mclust()}, see below.}
}
\value{
The data.frame in \code{.data} with the cluster classification added in
the column given by \code{.column_name}.
}
\description{
Cluster identification with various algorithms and subsequent trimming of each cluster
}
\section{Additional parameters}{

Information on additional arguments passed, can be found here:

\describe{
\item{clara}{\code{\link[cluster:clara]{cluster::clara()}}}
\item{kmeans}{\code{\link[=kmeans]{kmeans()}}}
\item{dbscan}{\code{\link[fpc:dbscan]{fpc::dbscan()}}}
\item{mclust}{\code{\link[mclust:Mclust]{mclust::Mclust()}}}
\item{density_cut}{\code{\link[=approx_adjust]{approx_adjust()}}}
}
}

\section{Default parameters to \code{clara()}}{

\code{\link[cluster:clara]{cluster::clara()}} is by default called with the following parameters:

\describe{
\item{samples}{100}
\item{pamLike}{TRUE}
}
}

\section{Parameters to dbscan}{

It requires some trial and error to get the right parameters for the
density based clustering, but the parameters usually stay stable throughout an
entire experiment and over time (assuming that there is only little drift in
the flow cytometer). There is no guarantee that the correct number of clusters
are returned, and it might be better to use this on the forward - side
scatter discrimination.

Scaling of the parameters seems to be appropriate in most cases for the
forward - side scatter discrimination and is automatically performed.
}

\section{Parameters to mclust}{

Mclust is is slow and memory hungry on large datasets. Using a subset of the
data to initialise the clustering greatly improves the speed. I have found
that a subset sample of 500 even works well and gives no markedly better
clustering than a subset of 5000 events, but initialisation with 500 makes
the clustering complete about 12 times faster than with 5000 events.
}

\section{Parameters to density_cut}{

This simple function works by smoothing a density function until the desired number
of clusters are found. The segregation of the clusters follows at the lowest
point between two clusters.
}

\examples{
library(beadplexr)
library(dplyr)
library(ggplot2)

data("lplex")

lplex[[1]] \%>\%
  # Speed things up a bit by selecting one fourth of the events.
  # Probably not something you'd usually do
  dplyr::sample_frac(0.25) \%>\%
  bp_kmeans(.parameter = c("FSC-A", "SSC-A"),
            .column_name = "population", .trim = 0.1, .k = 2) \%>\%
  ggplot() +
  aes(x = `FSC-A`, y = `SSC-A`, colour = population) +
  geom_point()

library(beadplexr)
library(dplyr)
library(ggplot2)

data("lplex")

lplex[[1]] \%>\%
  # Speed things up a bit by selecting one fourth of the events.
  # Probably not something you'd usually do
  dplyr::sample_frac(0.25) \%>\%
  bp_clara(.parameter = c("FSC-A", "SSC-A"),
           .column_name = "population", .trim = 0.1, .k = 2) \%>\%
  ggplot() +
  aes(x = `FSC-A`, y = `SSC-A`, colour = population) +
  geom_point()

lplex[[1]] \%>\%
  # Speed things up a bit by selecting one fourth of the events.
  # Probably not something you'd usually do
  dplyr::sample_frac(0.25) \%>\%
  bp_clara(.parameter = c("FSC-A", "SSC-A"),
           .column_name = "population", .trim = 0, .k = 2) \%>\%
  ggplot() +
  aes(x = `FSC-A`, y = `SSC-A`, colour = population) +
  geom_point()

\dontrun{
library(beadplexr)
library(dplyr)
library(ggplot2)

data("lplex")

lplex[[1]] \%>\%
  # Speed things up a bit by selecting one fourth of the events.
  # Probably not something you'd usually do
  dplyr::sample_frac(0.25) \%>\%
  bp_dbscan(.parameter = c("FSC-A", "SSC-A"), .column_name = "population",
            eps = 0.2, MinPts = 50) \%>\%
  ggplot() +
  aes(x = `FSC-A`, y = `SSC-A`, colour = population) +
  geom_point()

pop1 <- lplex[[1]] \%>\%
  # Speed things up a bit by selecting one fourth of the events.
  # Probably not something you'd usually do
  dplyr::sample_frac(0.25) \%>\%
  bp_dbscan(.parameter = c("FSC-A", "SSC-A"), .column_name = "population",
    eps = 0.2, MinPts = 50) \%>\%
  dplyr::filter(population == "1")

pop1 \%>\%
  bp_dbscan(.parameter = c("FL6-H", "FL2-H"), .column_name = "population",
    eps = 0.2, MinPts = 50) \%>\%
  .$population \%>\% unique

pop1 \%>\%
  bp_dbscan(.parameter = c("FL6-H", "FL2-H"), .column_name = "population",
    eps = 0.2, MinPts = 50, scale = FALSE) \%>\%
  .$population \%>\% unique
}
library(beadplexr)
library(magrittr)
library(ggplot2)

data("lplex")

lplex[[1]] \%>\%
  bp_mclust(.parameter = c("FSC-A", "SSC-A"),
           .column_name = "population", .trim = 0, .k = 2) \%>\%
  ggplot() +
  aes(x = `FSC-A`, y = `SSC-A`, colour = population) +
  geom_point()
library(beadplexr)
library(magrittr)
library(ggplot2)

data("lplex")

lplex[[1]] \%>\%
  bp_density_cut(.parameter = c("FSC-A"),
           .column_name = "population", .trim = 0, .k = 2) \%>\%
  ggplot() +
  aes(x = `FSC-A`, y = `SSC-A`, colour = population) +
  geom_point()

}
\seealso{
\code{\link[=trim_population]{trim_population()}}, \code{\link[=identify_analyte]{identify_analyte()}}.

Mclust and dbscan seems to do an excellent job at separating on the forward
and side scatter parameters. Mclust and clara both perform well separating
beads in the APC channel, but clara is about 3 times faster than Mclust.
}
