% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/CHICKN_W1.R
\name{CHICKN_W1}
\alias{CHICKN_W1}
\title{Chromatogram Hierarchical Compressive K-means with Nystrom approximation}
\usage{
CHICKN_W1(
  Data,
  K = 2,
  k_total,
  K_W1 = NULL,
  kernel_type = "Gaussian",
  distance_type = "W1",
  Freq = NULL,
  ncores = 2,
  max_neighbors = 32,
  nblocks = 64,
  N0 = 10000,
  max_Nsize = 32,
  DoPreimage = FALSE,
  DIR_output = tempfile(),
  DIR_tmp = tempfile(),
  BIG = FALSE,
  verbose = FALSE,
  ...
)
}
\arguments{
\item{Data}{A Filebacked Big Matrix n x N.}

\item{K}{Number of cluster at each call of clustering method. Default is 2.}

\item{k_total}{An upper bound of the total number of clusters.}

\item{K_W1}{A Filebacked Big Matrix. Nystrom kernel matrix \eqn{s \times N},
where N is the number of signals in the training collection and s is the Nystrom sample size.
By default is NULL and it is generated using \code{\link{Nystrom_kernel}} function.}

\item{kernel_type}{Kernel function type c('Gaussian', 'Laplacian').}

\item{distance_type}{Distance function type. The available types are Wasserstein-1 ('W1') and Euclidean ('Euclide').
The default value is 'W1'.}

\item{Freq}{A frequency matrix m x n with frequency vectors in rows.
If NULL, the frequency vectors are generated by \code{\link{GenerateFrequencies}} function.}

\item{ncores}{Number of cores. Default is 2.}

\item{max_neighbors}{Number of neighbors used to estimate the kernel parameter \code{gamma}. Default is 32.}

\item{nblocks}{Number of blocks, on which the regression is performed. Default is 32.}

\item{N0}{Number of data vectors used for the variance estimation in \code{\link{EstimSigma}}.}

\item{max_Nsize}{Number of neighbors used to compute consensus chromatograms.}

\item{DoPreimage}{logical that controls whether to compute the consensus chromatograms. Default is TRUE.}

\item{DIR_output}{A directory to save the results.}

\item{DIR_tmp}{A directory for temporal files.}

\item{BIG}{logical parameter that controls whether the resulting consensus chromatograms are stored as a Filebacked Big Matrix ('Centroid_preimage.bk').
Default is FALSE.}

\item{verbose}{logical that indicates whether dysplay the processing steps.}

\item{...}{Additional arguments passed on to \code{\link{COMPR}}.}
}
\value{
A list with the following attributes:
\itemize{
\item \code{gamma} is the estimated kernel parameter.
\item \code{CompressedData} is the Nystrom kernel matrix.
\item \code{sigma} is the estimated variance.
\item \code{Frequency} is the frequency matrix m x n.
\item \code{Clusters} is the cluster assignment.
}
}
\description{
An implementation of the complete pipeline of
the CHICKN algorithm.
}
\details{
\code{CHICKN_W1} compresses the data by computing a Nystrom kernel approximation and
applying the sketching operator from \insertCite{DBLP:journals/corr/KerivenBGP16}{chickn}.
See \code{\link{Nystrom_kernel}} and \code{\link{Sketch}} functions.
Then clusters are recovered by operating on the compressed data version.
It can use the kernel function based on the
Wasserstein-1 or the Euclidean distances. It generates in \code{DIR_output} directory the following files:
\itemize{
\item 'Cluster_assign_out.bk' is a Filebacked Big Matrix N x \code{maxLevel}+1, which stores the cluster assignment at each hierarchical level.
\item 'Centroids_out.bk' is a Filebacked Big Matrix with the resulting cluster centroids in columns.
}
}
\examples{
\donttest{
data("UPS2")
N = ncol(UPS2)
n= nrow(UPS2)
X_FBM = bigstatsr::FBM(init = UPS2, ncol=N, nrow = n)$save()
output  <- CHICKN_W1(Data = X_FBM, K = 2, k_total =8, max_neighbors = 10, ncores = 2, 
                     N0 = N, DoPreimage = FALSE)
}                     
}
\references{
\itemize{
\item \insertRef{wang2019scalable}{chickn}
\item \insertRef{DBLP:journals/corr/KerivenBGP16}{chickn}.
}
}
\seealso{
\code{\link{Nystrom_kernel}}, \code{\link{GenerateFrequencies}},
\code{\link{hcc_parallel}}, \code{\link{Preimage}}, \href{https://github.com/privefl/bigstatsr}{bigstatsr}
}
