% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/WelchRankTest.R
\name{WelchRankTest}
\alias{WelchRankTest}
\title{Compare two numerical data subsets}
\usage{
WelchRankTest(DF, xVar, indexA, indexB = NULL, cLevel = 0.95)
}
\arguments{
\item{DF}{data frame containing \code{xVar}}

\item{xVar}{numerical variable whose subsets are to be compared}

\item{indexA}{record index defining the first subset of \code{xVar} values}

\item{indexB}{record index defining the second subset of \code{xVar} values
(default NULL means the second subset is all records not contained in
the first)}

\item{cLevel}{confidence level for the test (default = 0.95)}
}
\value{
a named vector with these 5 elements:
\itemize{
\item nA the number of records in the first \code{xVar} subset
\item nB the number of records in the second \code{xVar} subset
\item medianA the median \code{xVar} value in the first subset
\item medianB the median \code{xVar} value in the second subset
\item pValue the p-value returned by the Welch rank test
}
}
\description{
Uses the Welch rank-test (a robust alternative to the classical t-test,
with better resistance to outliers and asymmetry) to compare the
distributions of two subsets of the same numerical variable.  The
result characterizes the subsets in terms of their median values,
and a small p-value (traditionally less than 0.05) implies
significant distributional differences between the two subsets.
}
\examples{
x <- seq(-1, 1, length = 200)
a <- rep(c("a", "b"), 100)
DF <- data.frame(numVar = x, setVar = a)
indexA <- which(DF$setVar == "a")
WelchRankTest(DF, "numVar", indexA)  # No difference in distribution
offset <- rep(c(0, 0.2), 100)
DF$numVar2 <- x + offset
WelchRankTest(DF, "numVar2", indexA) # Significant difference
xMod <- x
xMod[indexA[1:4]] <- x[indexA[1:4]] + 10
DF$numVar3 <- xMod
WelchRankTest(DF, "numVar3", indexA) # No difference even with outliers
stats::t.test(DF[indexA, "numVar3"], DF[-indexA, "numVar3"]) # Compare t-test
}
