% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/stability_functions_adjusted.R
\encoding{UTF-8}
\name{stabilitySechidis}
\alias{stabilitySechidis}
\title{Stability Measure Sechidis}
\usage{
stabilitySechidis(features, sim.mat, threshold = 0.9, impute.na = NULL)
}
\arguments{
\item{features}{\code{list (length >= 2)}\cr
Chosen features per dataset. Each element of the list contains the features for one dataset.
The features must be given by their names (\code{character}) or indices (\code{integerish}).}

\item{sim.mat}{\code{numeric matrix}\cr
Similarity matrix which contains the similarity structure of all features based on
all datasets. The similarity values must be in the range of [0, 1] where 0 indicates
very low similarity and 1 indicates very high similarity. If the list elements of
\code{features} are integerish vectors, then the feature numbering must correspond to the
ordering of \code{sim.mat}. If the list elements of \code{features} are character
vectors, then \code{sim.mat} must be named and the names of \code{sim.mat} must correspond
to the entries in \code{features}.}

\item{threshold}{\code{numeric(1)}\cr
Threshold for indicating which features are similar and which are not. Two features
are considered as similar, if and only if the corresponding entry of \code{sim.mat} is greater
than or equal to \code{threshold}.}

\item{impute.na}{\code{numeric(1)}\cr
In some scenarios, the stability cannot be assessed based on all feature sets.
E.g. if some of the feature sets are empty, the respective pairwise comparisons yield NA as result.
With which value should these missing values be imputed? \code{NULL} means no imputation.}
}
\value{
\code{numeric(1)} Stability value.
}
\description{
The stability of feature selection is defined as the robustness of
the sets of selected features with respect to small variations in the data on which the
feature selection is conducted. To quantify stability, several datasets from the
same data generating process can be used. Alternatively, a single dataset can be
split into parts by resampling. Either way, all datasets used for feature selection must
contain exactly the same features. The feature selection method of interest is
applied on all of the datasets and the sets of chosen features are recorded.
The stability of the feature selection is assessed based on the sets of chosen features
using stability measures.
}
\details{
The stability measure is defined as
\deqn{1 - \frac{\mathop{\mathrm{trace}}(CS)}{\mathop{\mathrm{trace}}(C \Sigma)}} with (\eqn{p \times p})-matrices
\deqn{(S)_{ij} = \frac{m}{m-1}\left(\frac{h_{ij}}{m} - \frac{h_i}{m} \frac{h_j}{m}\right)} and
\deqn{(\Sigma)_{ii} = \frac{q}{mp} \left(1 - \frac{q}{mp}\right),}
\deqn{(\Sigma)_{ij} = \frac{\frac{1}{m} \sum_{i=1}^{m} |V_i|^2 - \frac{q}{m}}{p^2 - p} - \frac{q^2}{m^2 p^2}, i \neq j.}
The matrix \eqn{C} is created from matrix \code{sim.mat} by setting all values of \code{sim.mat} that are smaller
than \code{threshold} to 0. If you want to \eqn{C} to be equal to \code{sim.mat}, use \code{threshold = 0}.
}
\note{
This stability measure is not corrected for chance.
Unlike for the other stability measures in this R package, that are not corrected for chance,
for \code{stabilitySechidis}, no \code{correction.for.chance} can be applied.
This is because for \code{stabilitySechidis}, no finite upper bound is known at the moment,
see \link{listStabilityMeasures}.
}
\section{Notation}{
 For the definition of all stability measures in this package,
the following notation is used:
Let \eqn{V_1, \ldots, V_m} denote the sets of chosen features
for the \eqn{m} datasets, i.e. \code{features} has length \eqn{m} and
\eqn{V_i} is a set which contains the \eqn{i}-th entry of \code{features}.
Furthermore, let \eqn{h_j} denote the number of sets that contain feature
\eqn{X_j} so that \eqn{h_j} is the absolute frequency with which feature \eqn{X_j}
is chosen.
Analogously, let \eqn{h_{ij}} denote the number of sets that include both \eqn{X_i} and \eqn{X_j}.
Also, let \eqn{q = \sum_{j=1}^p h_j = \sum_{i=1}^m |V_i|} and \eqn{V = \bigcup_{i=1}^m V_i}.
}

\examples{
feats = list(1:3, 1:4, 1:5)
mat = 0.92 ^ abs(outer(1:10, 1:10, "-"))
stabilitySechidis(features = feats, sim.mat = mat)
}
\references{
Sechidis K, Papangelou K, Nogueira S, Weatherall J, Brown G (2020).
\dQuote{On the Stability of Feature Selection in the Presence of Feature Correlations.}
In \emph{Machine Learning and Knowledge Discovery in Databases}, 327--342.
Springer International Publishing.
\doi{10.1007/978-3-030-46150-8_20}.
}
\seealso{
\link{listStabilityMeasures}
}
