% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gen_informative_sample.R
\name{gen_informative_sample}
\alias{gen_informative_sample}
\title{Generate a finite population and take an informative single or two-stage sample.}
\usage{
gen_informative_sample(clustering = TRUE, two_stage = FALSE,
  theta = c(0.2, 0.7, 1), M = 3, theta_star = matrix(c(0.3, 0.3, 0.3,
  0.31, 0.72, 2.04, 0.58, 0.83, 1), 3, 3, byrow = TRUE), gp_type = "rq",
  N = 10000, T = 15, L = 10, R = 8, I = 4, n = 750,
  noise_to_signal = 0.05, incl_gradient = "medium")
}
\arguments{
\item{clustering}{Boolean input on whether want population generated from clusters of covariance 
parameters.  Defaults to \code{clustering = FALSE}}

\item{two_stage}{Boolean input on whether want two stage sampling, with first stage defining set
of \code{L} blocks, where membership in blocks determined by quantiles of observation unit
variance functions.  (They are structured like strata, though they are sub-sampled).}

\item{theta}{A numeric vector of global covariance parameters in the case of \code{clustering = FALSE}.
The length, \code{P}, of \code{theta} must be consistent with the selected \code{gp_type}.  
Defaults to \code{theta = c(0.30.7,1.0)} in the case of \code{clustering = FALSE}.}

\item{M}{Scalar input denoting number of clusters to employ if \code{clustering = TRUE}. Defaults to
\code{M = 3}}

\item{theta_star}{An \emph{P x M} matrix of cluster location values associated with the choice of 
\code{M} and the selected \code{gp_type}. Defaults to 
\code{matrix(c(0.3,0.3,0.3,0.31,0.72,2.04,0.58,0.83,1.00),3,3,byrow=TRUE))}.}

\item{gp_type}{Input of choice for covariance matrix formulation to be used to generate the functions
for the \code{N} population units.  Choices are \code{c("se","rq")}, where \code{"se"} denotes
the squared exponential covariance function and \code{"rq"} denotes the rational quadratic.
Defaults to \code{gp_type = "se"}}

\item{N}{A scalar input denoting the number of population units (or establishments).}

\item{T}{A scalar input denoting the number of time points in each of \code{N}, \emph{T x 1} functions
that contribute to the \emph{N x T} population data matrix, \code{y}.  Defaults to \code{T = 15}.}

\item{L}{A scalar input that denotes the number of blocks in which to assign the population
units to be sub-sampled in the first stage of sampling.  
Defaults to \code{L = 10}.}

\item{R}{A scalar input that denotes the number of blocks to sample from \code{L  = 10} with
probability proportional to the average variance of member functions in each block.}

\item{I}{A scalar input denoting the number of strata to form within each block.  Population units
are divided into equally-sized strata based on variance quantiles. Defaults to \code{I  = 4}.}

\item{n}{Sample size to be generated.  Both an informative sample under either single
(\code{two_stage = FALSE}) or 2-stage (\code{two_stage = TRUE}) sample is taken, along with
a non-informative, \emph{iid} sample of the same size (\code{n}) from the finite population
(generated with (\code{clustering = TRUE}) or without clustering). Defaults to \code{n = 770}.}

\item{noise_to_signal}{A numeric input in the interval, \code{(0,1)}, denoting the ratio of noise
variance to the average variance of the generated functions, \code{bb_i}.  Defaults to 
\code{noise_to_signal = 0.05}}

\item{incl_gradient}{A character input on whether stratum probabilities from lowest-to-highest
is to \code{"high"}, in which case they are proportional to the exponential of the
cluster number.  If set to \code{"medium"} , the inclusion probabilities are proportional
to the square of the cluster number.  Note that population units are assigned to each
stratum proportional to a progressively increasing quantile variance.  The 
\code{incl_gradient} setting is used for both \code{two_stage = TRUE}, in which
case it is applied to strata within block, as well as \code{two_stage = FALSE},
in which case a simple stratified random sample is conducted.  Defaults to 
\code{incl_gradient = "medium"}}
}
\value{
A list object named \code{dat_sim} containing objects related to the generated sample
        finite population, the informative sample and the non-informative, \emph{iid}, sample. 
        Some important objects, include:
    \item{H}{A vector of length \code{N}, the population size, with cluster assignments
            for each establishment (unit) in \code{1,..M} clusters.}
    \item{map.tot}{A \code{data.frame} object including unit label identifiers
                  (under \code{establishment}),
                   the cluster assignment (if \code{clustering = TRUE}), 
                   the block (if\code{two_stage = TRUE}) and stratum assignments 
                   and the sample inclusion probabilities.}
    \item{map.obs}{A \code{data.frame} object configured the same as \code{map.tot}, only
                 confined to those establishments/units selected into the \emph{informative}
                 sample of size \code{n}.}
    \item{map.iid}{A \code{data.frame} object configured the same as \code{map.tot}, only
                 confined to those establishments/units selected into the \emph{non-informative},
                 iid sample of size \code{n}.}
    \item{(y,bb)}{\emph{N x T} \code{matrix} objects containing data responses and de-noised '
                 functions, respectively, for each of the \code{N} population units. The order
                 of the \code{N} units is consistent with \code{map}.}
    \item{(y_obs,bb_obs)}{\emph{N x T} \code{matrix} objects containing observed responses and de-noised '
                 functions, respectively, for each of the \code{n} units sampled under an
                 informative sampling design. The order of the \code{n} units is consistent
                 with \code{map_obs}.}
    \item{(y_iid,bb_iid)}{\emph{N x T} \code{matrix} objects containing observed responses and de-noised '
                 functions, respectively, for each of the \code{n} units sampled under a
                 non-informative / iid sampling design. The order of the \code{n} units is consistent
                 with \code{map_iid}.}
}
\description{
Used to compare performance of sample design-weighted and unweighted estimation procedures.
}
\examples{
\dontrun{
library(growfunctions)
## use gen_informative_sample() to generate an 
## N X T population drawn from a dependent GP
## By default, 3 clusters are used to generate 
## the population.
## A single stage stratified random sample of size n 
## is drawn from the population using I = 4 strata. 
## The resulting sample is informative in that the 
## distribution for this sample is
## different from the population from which 
## it was drawn because the strata inclusion
## probabilities are proportional to a feature 
## of the response, y (in the case, the variance.
## The stratified random sample over-samples 
## large variance strata).
## (The user may also select a 2-stage 
## sample with the first stage
## sampling "blocks" of the population and 
## the second stage sampling strata within blocks). 
dat_sim        <- gen_informative_sample(N = 10000, 
                                n = 500, T = 10,
                                noise_to_signal = 0.1)

## extract n x T observed sample under informative
## stratified sampling design.
y_obs                       <- dat_sim$y_obs
T                           <- ncol(y_obs)
}
}
\author{
Terrance Savitsky \email{tds151@gmail.com}
}
\seealso{
\code{\link{gpdpgrow}}, \code{\link{gmrfdpgrow}}
}

