% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/RcppExports.R
\name{davidClustering_kappa}
\alias{davidClustering_kappa}
\title{Cluster rows of a Kappa-statistic-matrix by the hierarhical fuzzy multi-linkage partitioning method proposed by DAVID}
\usage{
davidClustering_kappa(
  kappaMatrix,
  kappaThr = 0.35,
  initialGroupMembership = 3L,
  multiLinkageThr = 0.5,
  mergeRule = 1L
)
}
\arguments{
\item{kappaMatrix}{A numeric matrix of Kappa statistics, which is likely returned by \code{\link{rowKappa}} or \code{\link{colKappa}}}

\item{kappaThr}{Numeric, the threshold of the Kappa statistic, which is used to select initial seeds. Default value: 0.35, as recommended by the authors of the original study based on their experiences.}

\item{initialGroupMembership}{Non-negative integer, the number of minimal members in initial groups. Default value: 3.}

\item{multiLinkageThr}{Numeric, the minimal linkage between two groups to be merged. Default value: 0.5.}

\item{mergeRule}{Integer, how two seeds are merged. See below.

Currently following merge rules are implemented:
\itemize{
\item{1 (OR RULE) length of intersect divided by length of \emph{either} seeds no less than \code{multiLinkageThr}. Empirical evidence suggests that it is a bit coarse grain than the native DAVID clustering algorithm, but the performance is quite good judged by biological relevance.}
\item{2 (AND RULE) length of intersect divided by length of \emph{both} seeds no less than \code{multiLinkageThr}, which gives slightly fragmented cluster by empirical experieince}
\item{3 (UNION RULE) length of intersect divided by length of the union no less than \code{multiLinkageThr}, which performs similar to the \emph{AND RULE} above.}
\item{4 (GMEAN RULE) Geometric mean of length of intersect divided by length of \emph{both} seeds no less than \code{multiLinkageThr}, the clusters tend to be highly fragemented.}
\item{5 (AMEAN RULE) Arithmetic mean of length of intersect divided by length of \emph{both} seeds no less than \code{multiLinkageThr}, a few items tend to appear in multiple clusters.}
}}
}
\value{
A list of integer vectors. Each element represents a cluster
and contains the indices of rows belonging to that cluster. Rows can
appear in multiple clusters (fuzzy clustering).
}
\description{
The function implements the Hierarhical fuzzy multi-linkage partitioning method used in the DAVID Bioinformatics tool.
}
\note{
The function has only been tested in a few anecdotal examples. Cautions and more systematic tests are required before it is applied to critical datasets.
}
\examples{
synData <- matrix(c(rep(c(rep(1, 10), rep(0, 5)), 3),
rep(0, 4), rep(1, 7), rep(0,4),
rep(c(rep(0,5), rep(1,10)), 3),
rep(c(rep(0,3), 1), 4)[-16]), ncol=15, byrow=TRUE)
rownames(synData) <- sprintf("Gene \%s", letters[1:8])
colnames(synData) <- sprintf("t\%d", 1:15)
synKappaMat <- rowKappa(synData)
synKappaMat.round2 <- round(synKappaMat, 2)
davidClustering_kappa(synKappaMat.round2)

}
\references{
Huang et al. The DAVID Gene Functional Classification Tool: a novel
biological module-centric algorithm to functionally analyze large gene
lists. Genome Biology, 2007. \doi{10.1186/gb-2007-8-9-r183}
}
\author{
Jitao David Zhang <jitao_david.zhang@roche.com>
}
