% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/randomSVD.R
\name{big_randomSVD}
\alias{big_randomSVD}
\title{Randomized partial SVD}
\usage{
big_randomSVD(
  X,
  fun.scaling = big_scale(center = FALSE, scale = FALSE),
  ind.row = rows_along(X),
  ind.col = cols_along(X),
  k = 10,
  tol = 1e-04,
  verbose = FALSE,
  ncores = 1,
  fun.prod = big_prodVec,
  fun.cprod = big_cprodVec
)
}
\arguments{
\item{X}{An object of class \link[=FBM-class]{FBM}.}

\item{fun.scaling}{A function that returns a named list of
\code{mean} and \code{sd} for every column, to scale each of their elements
such as followed: \deqn{\frac{X_{i,j} - mean_j}{sd_j}.}
Default doesn't use any scaling.}

\item{ind.row}{An optional vector of the row indices that are used.
If not specified, all rows are used. \strong{Don't use negative indices.}}

\item{ind.col}{An optional vector of the column indices that are used.
If not specified, all columns are used. \strong{Don't use negative indices.}}

\item{k}{Number of singular vectors/values to compute. Default is \code{10}.
\strong{This algorithm should be used to compute only a few singular vectors/values.}}

\item{tol}{Precision parameter of \link[RSpectra:svds]{svds}. Default is \code{1e-4}.}

\item{verbose}{Should some progress be printed? Default is \code{FALSE}.}

\item{ncores}{Number of cores used. Default doesn't use parallelism.
You may use \link{nb_cores}.}

\item{fun.prod}{Function that takes 6 arguments (in this order):
\itemize{
\item a matrix-like object \code{X},
\item a vector \code{x},
\item a vector of row indices \code{ind.row} of \code{X},
\item a vector of column indices \code{ind.col} of \code{X},
\item a vector of column centers (corresponding to \code{ind.col}),
\item a vector of column scales (corresponding to \code{ind.col}),
and compute the product of \code{X} (subsetted and scaled) with \code{x}.
}}

\item{fun.cprod}{Same as \code{fun.prod}, but for the \emph{transpose} of \code{X}.}
}
\value{
A named list (an S3 class "big_SVD") of
\itemize{
\item \code{d}, the singular values,
\item \code{u}, the left singular vectors,
\item \code{v}, the right singular vectors,
\item \code{niter}, the number of the iteration of the algorithm,
\item \code{nops}, number of Matrix-Vector multiplications used,
\item \code{center}, the centering vector,
\item \code{scale}, the scaling vector.
}

Note that to obtain the Principal Components, you must use
\link[=predict.big_SVD]{predict} on the result. See examples.
}
\description{
An algorithm for partial SVD (or PCA) of a Filebacked Big Matrix based on the
algorithm in RSpectra (by Yixuan Qiu and Jiali Mei).\cr
This algorithm is linear in time in all dimensions and is very
memory-efficient. Thus, it can be used on very large big.matrices.
}
\note{
The idea of using this Implicitly Restarted Arnoldi Method algorithm
comes from G. Abraham, Y. Qiu, and M. Inouye,
FlashPCA2: principal component analysis of biobank-scale genotype datasets,
bioRxiv: \doi{10.1101/094714}.
\cr
It proved to be faster than our implementation of the "blanczos" algorithm
in Rokhlin, V., Szlam, A., & Tygert, M. (2010).
A Randomized Algorithm for Principal Component Analysis.
SIAM Journal on Matrix Analysis and Applications, 31(3), 1100-1124.
\doi{10.1137/080736417}.
}
\examples{
set.seed(1)

X <- big_attachExtdata()
K <- 10

# Using only half of the data for "training"
n <- nrow(X)
ind <- sort(sample(n, n/2))
test <- big_randomSVD(X, fun.scaling = big_scale(), ind.row = ind, k = K)
str(test)

pca <- prcomp(X[ind, ], center = TRUE, scale. = TRUE)

# same scaling
all.equal(test$center, pca$center)
all.equal(test$scale,  pca$scale)

# use this function to predict scores
class(test)
scores <- predict(test)
# scores and loadings are the same or opposite
plot(scores, pca$x[, 1:K])
plot(test$v, pca$rotation[, 1:K])
plot(test$u)
plot(test, type = "scores")

# projecting on new data
ind2 <- setdiff(rows_along(X), ind)
scores.test2 <- predict(test, X, ind.row = ind2)
scores.test3 <- predict(pca, X[-ind, ])
plot(scores.test2, scores.test3[, 1:K])

}
\seealso{
\link[RSpectra:svds]{svds}
}
