% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/install.R
\name{install.spark}
\alias{install.spark}
\title{Download and Install Apache Spark to a Local Directory}
\usage{
install.spark(hadoopVersion = "2.7", mirrorUrl = NULL,
  localDir = NULL, overwrite = FALSE)
}
\arguments{
\item{hadoopVersion}{Version of Hadoop to install. Default is \code{"2.7"}. It can take other
version number in the format of "x.y" where x and y are integer.
If \code{hadoopVersion = "without"}, "Hadoop free" build is installed.
See
\href{http://spark.apache.org/docs/latest/hadoop-provided.html}{
"Hadoop Free" Build} for more information.
Other patched version names can also be used, e.g. \code{"cdh4"}}

\item{mirrorUrl}{base URL of the repositories to use. The directory layout should follow
\href{http://www.apache.org/dyn/closer.lua/spark/}{Apache mirrors}.}

\item{localDir}{a local directory where Spark is installed. The directory contains
version-specific folders of Spark packages. Default is path to
the cache directory:
\itemize{
  \item Mac OS X: \file{~/Library/Caches/spark}
  \item Unix: \env{$XDG_CACHE_HOME} if defined, otherwise \file{~/.cache/spark}
  \item Windows: \file{\%LOCALAPPDATA\%\\Apache\\Spark\\Cache}.
}}

\item{overwrite}{If \code{TRUE}, download and overwrite the existing tar file in localDir
and force re-install Spark (in case the local directory or file is corrupted)}
}
\value{
the (invisible) local directory where Spark is found or installed
}
\description{
\code{install.spark} downloads and installs Spark to a local directory if
it is not found. If SPARK_HOME is set in the environment, and that directory is found, that is
returned. The Spark version we use is the same as the SparkR version. Users can specify a desired
Hadoop version, the remote mirror site, and the directory where the package is installed locally.
}
\details{
The full url of remote file is inferred from \code{mirrorUrl} and \code{hadoopVersion}.
\code{mirrorUrl} specifies the remote path to a Spark folder. It is followed by a subfolder
named after the Spark version (that corresponds to SparkR), and then the tar filename.
The filename is composed of four parts, i.e. [Spark version]-bin-[Hadoop version].tgz.
For example, the full path for a Spark 2.0.0 package for Hadoop 2.7 from
\code{http://apache.osuosl.org} has path:
\code{http://apache.osuosl.org/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.7.tgz}.
For \code{hadoopVersion = "without"}, [Hadoop version] in the filename is then
\code{without-hadoop}.
}
\note{
install.spark since 2.1.0
}
\examples{
\dontrun{
install.spark()
}
}
\seealso{
See available Hadoop versions:
         \href{http://spark.apache.org/downloads.html}{Apache Spark}
}
