% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/generics.R, R/mllib_clustering.R
\name{spark.bisectingKmeans}
\alias{spark.bisectingKmeans}
\alias{spark.bisectingKmeans,SparkDataFrame,formula-method}
\alias{summary,BisectingKMeansModel-method}
\alias{predict,BisectingKMeansModel-method}
\alias{fitted,BisectingKMeansModel-method}
\alias{write.ml,BisectingKMeansModel,character-method}
\title{Bisecting K-Means Clustering Model}
\usage{
spark.bisectingKmeans(data, formula, ...)

\S4method{spark.bisectingKmeans}{SparkDataFrame,formula}(
  data,
  formula,
  k = 4,
  maxIter = 20,
  seed = NULL,
  minDivisibleClusterSize = 1
)

\S4method{summary}{BisectingKMeansModel}(object)

\S4method{predict}{BisectingKMeansModel}(object, newData)

\S4method{fitted}{BisectingKMeansModel}(object, method = c("centers", "classes"))

\S4method{write.ml}{BisectingKMeansModel,character}(object, path, overwrite = FALSE)
}
\arguments{
\item{data}{a SparkDataFrame for training.}

\item{formula}{a symbolic description of the model to be fitted. Currently only a few formula
operators are supported, including '~', '.', ':', '+', and '-'.
Note that the response variable of formula is empty in spark.bisectingKmeans.}

\item{...}{additional argument(s) passed to the method.}

\item{k}{the desired number of leaf clusters. Must be > 1.
The actual number could be smaller if there are no divisible leaf clusters.}

\item{maxIter}{maximum iteration number.}

\item{seed}{the random seed.}

\item{minDivisibleClusterSize}{The minimum number of points (if greater than or equal to 1.0)
or the minimum proportion of points (if less than 1.0) of a
divisible cluster. Note that it is an expert parameter. The
default value should be good enough for most cases.}

\item{object}{a fitted bisecting k-means model.}

\item{newData}{a SparkDataFrame for testing.}

\item{method}{type of fitted results, \code{"centers"} for cluster centers
or \code{"classes"} for assigned classes.}

\item{path}{the directory where the model is saved.}

\item{overwrite}{overwrites or not if the output path already exists. Default is FALSE
which means throw exception if the output path exists.}
}
\value{
\code{spark.bisectingKmeans} returns a fitted bisecting k-means model.

\code{summary} returns summary information of the fitted model, which is a list.
        The list includes the model's \code{k} (number of cluster centers),
        \code{coefficients} (model cluster centers),
        \code{size} (number of data points in each cluster), \code{cluster}
        (cluster centers of the transformed data; cluster is NULL if is.loaded is TRUE),
        and \code{is.loaded} (whether the model is loaded from a saved file).

\code{predict} returns the predicted values based on a bisecting k-means model.

\code{fitted} returns a SparkDataFrame containing fitted values.
}
\description{
Fits a bisecting k-means clustering model against a SparkDataFrame.
Users can call \code{summary} to print a summary of the fitted model, \code{predict} to make
predictions on new data, and \code{write.ml}/\code{read.ml} to save/load fitted models.

Get fitted result from a bisecting k-means model.
Note: A saved-loaded model does not support this method.
}
\note{
spark.bisectingKmeans since 2.2.0

summary(BisectingKMeansModel) since 2.2.0

predict(BisectingKMeansModel) since 2.2.0

fitted since 2.2.0

write.ml(BisectingKMeansModel, character) since 2.2.0
}
\examples{
\dontrun{
sparkR.session()
t <- as.data.frame(Titanic)
df <- createDataFrame(t)
model <- spark.bisectingKmeans(df, Class ~ Survived, k = 4)
summary(model)

# get fitted result from a bisecting k-means model
fitted.model <- fitted(model, "centers")
showDF(fitted.model)

# fitted values on training data
fitted <- predict(model, df)
head(select(fitted, "Class", "prediction"))

# save fitted model to input path
path <- "path/to/model"
write.ml(model, path)

# can also read back the saved model and print
savedModel <- read.ml(path)
summary(savedModel)
}
}
\seealso{
\link{predict}, \link{read.ml}, \link{write.ml}
}
