% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/cyt_splsda.R
\name{cyt_splsda}
\alias{cyt_splsda}
\title{Analyze data with Sparse Partial Least Squares Discriminant Analysis
(sPLS-DA).}
\usage{
cyt_splsda(
  data,
  group_col = NULL,
  group_col2 = NULL,
  multilevel_col = NULL,
  batch_col = NULL,
  ind_names = FALSE,
  colors = NULL,
  output_file = NULL,
  ellipse = FALSE,
  bg = FALSE,
  conf_mat = FALSE,
  var_num,
  cv_opt = NULL,
  fold_num = 5,
  scale = c("none", "log2", "log10", "zscore", "custom"),
  custom_fn = NULL,
  tune = FALSE,
  tune_folds = 5,
  comp_num = 2,
  pch_values,
  style = NULL,
  roc = FALSE,
  verbose = FALSE,
  seed = 123
)
}
\arguments{
\item{data}{A matrix or data frame containing the variables. Columns not
specified by \code{group_col} or \code{group_col2} are assumed to be continuous
variables for analysis.}

\item{group_col}{A string specifying the column name that contains the first group
information. If \code{group_col2} is not provided, an overall analysis will
be performed.}

\item{group_col2}{A string specifying the second grouping column. Default is
\code{NULL}.}

\item{multilevel_col}{A string specifying the column name that identifies
repeated measurements (e.g., patient or sample IDs). If provided, a
multilevel analysis will be performed. Default is \code{NULL}.}

\item{batch_col}{A string specifying the column that identifies the batch or study for each sample.}

\item{ind_names}{If \code{TRUE}, the row names of the first (or second) data matrix is used as names.
Default is \code{FALSE}. If a character vector is provided, these values will be used as names.
If 'pch' is set this will overwrite the names as shapes. See ?mixOmics::plotIndiv for details.}

\item{colors}{A vector of colors for the groups or treatments. If
\code{NULL}, a random palette (using \code{rainbow}) is generated based on
the number of groups.}

\item{output_file}{Optional string specifying the name of the file
to be created.  When \code{NULL} (default), plots are drawn on
the current graphics device. Ensure that the file
extension matches the desired format (e.g., ".pdf" for PDF output
or ".png" for PNG output or .tiff for TIFF output).}

\item{ellipse}{Logical. Whether to draw a 95\\% confidence ellipse on the
figures. Default is \code{FALSE}.}

\item{bg}{Logical. Whether to draw the prediction background in the figures.
Default is \code{FALSE}.}

\item{conf_mat}{Logical. Whether to print the confusion matrix for the
classifications. Default is \code{FALSE}.}

\item{var_num}{Numeric. The number of variables to be used in the PLS-DA model.}

\item{cv_opt}{Character. Option for cross-validation method: either
"loocv" or "Mfold". Default is \code{NULL}.}

\item{fold_num}{Numeric. The number of folds to use if \code{cv_opt} is
"Mfold". Default is 5.}

\item{scale}{Character string specifying a transformation to apply to the
numeric predictor columns prior to model fitting.  Options are
"none", "log2", "log10", "zscore", or "custom".  When
"custom" is selected a user defined function must be supplied via
\code{custom_fn}.  Defaults to "none".}

\item{custom_fn}{A custom transformation function used when
\code{scale = "custom"}.  Ignored otherwise.  It should take a numeric
vector and return a numeric vector of the same length.}

\item{tune}{Logical.  If \code{TRUE}, performs tuning of \code{ncomp} and
\code{keepX} via cross‑validation.  Default is \code{FALSE}.}

\item{tune_folds}{Integer.  Number of folds in cross‑validation when
tuning.  Default is 5.}

\item{comp_num}{Numeric. The number of components to calculate in the sPLS-DA
model. Default is 2.}

\item{pch_values}{A vector of integers specifying the plotting characters
(pch values) to be used in the plots.}

\item{style}{Character. If set to \code{"3D"} or \code{"3d"} and
\code{comp_num} equals 3, a 3D plot is generated using the
\code{plot3D} package. Default is \code{NULL}.}

\item{roc}{Logical. Whether to compute and plot the ROC curve for the model.
Default is \code{FALSE}.}

\item{verbose}{A logical value indicating whether to print additional
informational output to the console. When \code{TRUE}, the function will
display progress messages, and intermediate results when
\code{FALSE} (the default), it runs quietly.}

\item{seed}{An integer specifying the seed for reproducibility (default is 123).}
}
\value{
Plots consisting of the classification figures, component figures
with Variable of Importance in Projection (VIP) scores, and classifications
based on VIP scores greater than 1. ROC curves and confusion matrices are also
produced if requested.
}
\description{
This function conducts Sparse Partial Least Squares Discriminant Analysis
(sPLS-DA) on the provided data. It uses the specified \code{group_col} (and
optionally \code{group_col2}) to define class labels while assuming the remaining
columns contain continuous variables. The function supports transformations
via the \code{scale} parameter and generates a series of plots,
including classification plots, scree plots, loadings plots, and VIP score
plots. Optionally, ROC curves are produced when \code{roc} is \code{TRUE}.
Additionally, cross-validation is supported via LOOCV or Mfold methods. When
both \code{group_col} and \code{group_col2} are provided and differ, the function
analyzes each treatment level separately.
}
\details{
When \code{verbose} is set to \code{TRUE}, additional information about the analysis and confusion matrices
are printed to the console. These can be suppressed by keeping \code{verbose = FALSE}.
}
\examples{
# Loading Sample Data
data_df <- ExampleData1[,-c(3)]
data_df <- dplyr::filter(data_df, Group != "ND", Treatment != "Unstimulated")

cyt_splsda(data_df, output_file = NULL,
colors = c("black", "purple"), bg = FALSE, scale = "log2",
conf_mat = FALSE, var_num = 25, cv_opt = NULL, comp_num = 2,
pch_values = c(16, 4), style = NULL, ellipse = TRUE,
group_col = "Group", group_col2 = "Treatment", roc = FALSE, verbose = FALSE)

}
\references{
Lê Cao, K.-A., Boitard, S. and Besse, P. (2011).
Sparse PLS Discriminant Analysis: biologically relevant feature selection
and graphical displays for multiclass problems. \emph{BMC Bioinformatics}
\bold{12}:253.
}
\author{
Xiaohua Douglas Zhang and Shubh Saraswat
}
