% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/generics.R, R/mllib_clustering.R
\docType{methods}
\name{spark.gaussianMixture}
\alias{spark.gaussianMixture}
\alias{spark.gaussianMixture,SparkDataFrame,formula-method}
\alias{summary,GaussianMixtureModel-method}
\alias{predict,GaussianMixtureModel-method}
\alias{predict,GaussianMixtureModel,SparkDataFrame-method}
\alias{write.ml,GaussianMixtureModel,character-method}
\title{Multivariate Gaussian Mixture Model (GMM)}
\usage{
spark.gaussianMixture(data, formula, ...)

\S4method{spark.gaussianMixture}{SparkDataFrame,formula}(data, formula,
  k = 2, maxIter = 100, tol = 0.01)

\S4method{summary}{GaussianMixtureModel}(object)

\S4method{predict}{GaussianMixtureModel}(object, newData)

\S4method{write.ml}{GaussianMixtureModel,character}(object, path,
  overwrite = FALSE)
}
\arguments{
\item{data}{a SparkDataFrame for training.}

\item{formula}{a symbolic description of the model to be fitted. Currently only a few formula
operators are supported, including '~', '.', ':', '+', and '-'.
Note that the response variable of formula is empty in spark.gaussianMixture.}

\item{...}{additional arguments passed to the method.}

\item{k}{number of independent Gaussians in the mixture model.}

\item{maxIter}{maximum iteration number.}

\item{tol}{the convergence tolerance.}

\item{object}{a fitted gaussian mixture model.}

\item{newData}{a SparkDataFrame for testing.}

\item{path}{the directory where the model is saved.}

\item{overwrite}{overwrites or not if the output path already exists. Default is FALSE
which means throw exception if the output path exists.}
}
\value{
\code{spark.gaussianMixture} returns a fitted multivariate gaussian mixture model.

\code{summary} returns summary of the fitted model, which is a list.
        The list includes the model's \code{lambda} (lambda), \code{mu} (mu),
        \code{sigma} (sigma), \code{loglik} (loglik), and \code{posterior} (posterior).

\code{predict} returns a SparkDataFrame containing predicted labels in a column named
        "prediction".
}
\description{
Fits multivariate gaussian mixture model against a SparkDataFrame, similarly to R's
mvnormalmixEM(). Users can call \code{summary} to print a summary of the fitted model,
\code{predict} to make predictions on new data, and \code{write.ml}/\code{read.ml}
to save/load fitted models.
}
\note{
spark.gaussianMixture since 2.1.0

summary(GaussianMixtureModel) since 2.1.0

predict(GaussianMixtureModel) since 2.1.0

write.ml(GaussianMixtureModel, character) since 2.1.0
}
\examples{
\dontrun{
sparkR.session()
library(mvtnorm)
set.seed(100)
a <- rmvnorm(4, c(0, 0))
b <- rmvnorm(6, c(3, 4))
data <- rbind(a, b)
df <- createDataFrame(as.data.frame(data))
model <- spark.gaussianMixture(df, ~ V1 + V2, k = 2)
summary(model)

# fitted values on training data
fitted <- predict(model, df)
head(select(fitted, "V1", "prediction"))

# save fitted model to input path
path <- "path/to/model"
write.ml(model, path)

# can also read back the saved model and print
savedModel <- read.ml(path)
summary(savedModel)
}
}
\seealso{
mixtools: \url{https://cran.r-project.org/package=mixtools}

\link{predict}, \link{read.ml}, \link{write.ml}
}
