% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gMAP.R
\name{gMAP}
\alias{gMAP}
\alias{print.gMAP}
\alias{fitted.gMAP}
\alias{coef.gMAP}
\alias{as.matrix.gMAP}
\alias{summary.gMAP}
\title{Meta-Analytic-Predictive Analysis for Generalized Linear Models}
\usage{
gMAP(
  formula,
  family = gaussian,
  data,
  weights,
  offset,
  tau.strata,
  tau.dist = c("HalfNormal", "TruncNormal", "Uniform", "Gamma", "InvGamma", "LogNormal",
    "TruncCauchy", "Exp", "Fixed"),
  tau.prior,
  tau.strata.pred = 1,
  beta.prior,
  prior_PD = FALSE,
  REdist = c("normal", "t"),
  t.df = 5,
  contrasts = NULL,
  iter = getOption("RBesT.MC.iter", 6000),
  warmup = getOption("RBesT.MC.warmup", 2000),
  thin = getOption("RBesT.MC.thin", 4),
  init = getOption("RBesT.MC.init", 1),
  chains = getOption("RBesT.MC.chains", 4),
  cores = getOption("mc.cores", 1L)
)

\method{print}{gMAP}(x, digits = 3, probs = c(0.025, 0.5, 0.975), ...)

\method{fitted}{gMAP}(object, type = c("response", "link"), probs = c(0.025, 0.5, 0.975), ...)

\method{coef}{gMAP}(object, probs = c(0.025, 0.5, 0.975), ...)

\method{as.matrix}{gMAP}(x, ...)

\method{summary}{gMAP}(
  object,
  type = c("response", "link"),
  probs = c(0.025, 0.5, 0.975),
  ...
)
}
\arguments{
\item{formula}{the model formula describing the linear predictor
and encoding the grouping; see details}

\item{family}{the family of distributions defining the statistical
model (\code{binomial}, \code{gaussian}, or \code{poisson})}

\item{data}{optional data frame containing the variables of the
model. If not found in \code{data}, the variables are taken from
\code{environment(formula)}.}

\item{weights}{optional weight vector; see details below.}

\item{offset}{offset term in statistical model used for Poisson
data}

\item{tau.strata}{sets the exchangability stratum per study. That
is, it is expected that each study belongs to a single
stratum. Default is to assign all studies to stratum 1. See section
differential heterogeniety below.}

\item{tau.dist}{type of prior distribution for \code{tau};
supported priors are \code{HalfNormal} (default),
\code{TruncNormal}, \code{Uniform}, \code{Gamma}, \code{InvGamma},
\code{LogNormal}, \code{TruncCauchy}, \code{Exp} and \code{Fixed}.}

\item{tau.prior}{parameters of prior distribution for \code{tau};
see section prior specification below.}

\item{tau.strata.pred}{the index for the prediction stratum; default is 1.}

\item{beta.prior}{mean and standard deviation for normal priors of
regression coefficients, see section prior specification below.}

\item{prior_PD}{logical to indicate if the prior predictive distribution should be sampled (no conditioning on the data). Defaults to \code{FALSE}.}

\item{REdist}{type of random effects distribution. \code{Normal} (default) or \code{t}.}

\item{t.df}{degrees of freedom if random-effects distribution is \code{t}.}

\item{contrasts}{an optional list; See \code{contrasts.arg} from
\code{\link[stats:model.matrix.default]{model.matrix.default}}.}

\item{iter}{number of iterations (including warmup).}

\item{warmup}{number of warmup iterations.}

\item{thin}{period of saving samples.}

\item{init}{positive number to specify uniform range on
unconstrained space for random initialization. See
\code{\link[rstan:stan]{stan}}.}

\item{chains}{number of Markov chains.}

\item{cores}{number of cores for parallel sampling of chains.}

\item{x, object}{\code{gMAP} analysis object created by \code{gMAP} function}

\item{digits}{number of displayed significant digits.}

\item{probs}{defines quantiles to be reported.}

\item{...}{optional arguments are ignored}

\item{type}{sets reported scale (\code{response} (default) or \code{link}).}
}
\value{
The function returns a S3 object of type \code{gMAP}. See
the methods section below for applicable functions to query the
object.
}
\description{
Meta-Analytic-Predictive (MAP) analysis for generalized linear
models suitable for normal, binary, or Poisson data. Model
specification and overall syntax follows mainly
\code{\link[stats:glm]{glm}} conventions.
}
\details{
The meta-analytic-predictive (MAP) approach derives a prior from
historical data using a hierarchical model.  The statistical model is
formulated as a generalized linear mixed model for binary, normal
(with fixed \eqn{\sigma}) and Poisson endpoints:

\deqn{y_{ih}|\theta_{ih} \sim f(y_{ih} | \theta_{ih})}{y_ih|\theta_ih ~ f(y_ih | \theta_ih)}

Here, \eqn{i=1,\ldots,N} is the index for observations, and
\eqn{h=1,\ldots,H} is the index for the grouping (usually studies).
The model assumes the linear predictor for a transformed mean as

\deqn{g(\theta_{ih}; x_{ih},\beta) = x_{ih} \, \beta + \epsilon_h}{g(\theta_ih; x_ih,\beta) = x_ih \beta + \epsilon_h}

with \eqn{x_{ih}}{x_ih} being the row vector of \eqn{k} covariates for
observation \eqn{i}.  The variance component is assumed by default
normal

\deqn{\epsilon_h \sim N(0,\tau^2), \qquad h=1,\ldots,H}{\epsilon_h ~ N(0,\tau^2), h=1,...,H}

Lastly, the Bayesian implementation assumes independent normal
priors for the \eqn{k} regression coefficients and a prior for the
between-group standard deviation \eqn{\tau} (see \code{taud.dist}
for available distributions).

The MAP prior will then be derived from the above model as the
conditional distribution of \eqn{\theta_{\star}}{\theta_*} given the
available data and the vector of covariates \eqn{x_{\star}}{x_*}
defining the overall intercept

\deqn{\theta_{\star}| x_{\star},y .}{\theta_*| x_*,y .}

A simple and common case arises for one observation (summary
statistic) per trial. For a normal endpoint, the model then simplifies
to the standard normal-normal hierarchical model. In the above
notation, \eqn{i=h=1,\ldots,H} and

\deqn{y_h|\theta_h \sim N(\theta_h,s_h^2)}{y_h|\theta_h ~ N(\theta_h,s_h^2)}
\deqn{\theta_h = \mu + \epsilon_h}{\theta_h = \mu + \epsilon_h}
\deqn{\epsilon_h \sim N(0,\tau^2),}{\epsilon_h ~ N(0,\tau^2),}

where the more common \eqn{\mu} is used for the only (intercept)
parameter \eqn{\beta_1}. Since there are no covariates, the MAP
prior is simply \eqn{Pr(\theta_{\star} |
y_1,\ldots,y_H)}{Pr(\theta_* | y_1,\ldots,y_H)}.

The hierarchical model is a compromise between the two extreme
cases of full pooling (\eqn{\tau=0}, full borrowing, no
discounting) and no pooling (\eqn{\tau=\infty}, no borrowing,
stratification). The information content of the
historical data grows with H (number of historical data items)
indefinitely for full pooling whereas no information is
gained in a stratified analysis. For a fixed
\eqn{\tau}, the maximum effective sample
size of the MAP prior is \eqn{n_\infty} (\eqn{H\rightarrow
\infty}{H->\infty}), which for a normal endpoint with fixed
\eqn{\sigma} is

\deqn{n_\infty = \left(\frac{\tau^2}{\sigma^2}\right)^{-1},}{n_\infty = (\tau^2/\sigma^2)^-1}

(\emph{Neuenschwander et al., 2010}). Hence, the ratio
\eqn{\tau/\sigma} limits the amount of information a MAP prior is
equivalent to. This allows for a classification of \eqn{\tau}
values in relation to \eqn{\sigma}, which is crucial to define a
prior \eqn{P_\tau}. The following classification is useful in a
clinical trial setting:

\tabular{lcc}{
Heterogeneity \tab \eqn{\tau/\sigma} \tab \eqn{n_\infty} \cr
small \tab 0.0625 \tab 256 \cr
moderate \tab 0.125 \tab 64 \cr
substantial \tab 0.25 \tab 16 \cr
large \tab 0.5 \tab 4 \cr
very large \tab 1.0 \tab 1
}

The above formula for \eqn{n_\infty} assumes a known
\eqn{\tau}. This is unrealistic as the between-trial heterogeneity
parameter is often not well estimable, in particular if the number
of trials is small (H small). The above table helps to specify a
prior distribution for \eqn{\tau} appropriate for the given context
which defines the crucial parameter \eqn{\sigma}. For binary and
Poisson endpoints, normal approximations can be used to determine
\eqn{\sigma}. See examples below for concrete cases.

The design matrix \eqn{X} is defined by the formula for the linear
predictor and is always of the form \code{response ~ predictor |
grouping}, which follows \code{\link[stats:glm]{glm}}
conventions. The syntax has been extended to include a
specification of the grouping (for example study) factor of the
data with a horizontal bar, \code{|}. The bar separates the
optionally specified grouping level, i.e. in the binary endpoint
case \code{cbind(r, n-r) ~ 1 | study}. By default it is assumed
that each row corresponds to an individual group (for which an
individual parameter is estimated). Specifics for the different
endpoints are:

\describe{

\item{normal}{\code{family=gaussian} assumes an identity link
function. The \code{response} should be given as matrix with two
columns with the first column being the observed mean value
\eqn{y_{ih}}{y_ih} and the second column the standard error
\eqn{se_{ih}}{se_ih} (of the mean). Additionally, it is recommended
to specify with the \code{weight} argument the number of units
which contributed to the (mean) measurement
\eqn{y_{ih}}{y_ih}. This information is used to estimate
\eqn{\sigma}.}

\item{binary}{\code{family=binomial} assumes a logit link
function. The \code{response} must be given as two-column matrix
with number of responders \eqn{r} (first column) and non-responders
\eqn{n-r} (second column).}

\item{Poisson}{\code{family=poisson} assumes a log link
function. The \code{response} is a vector of counts. The total
exposure times can be specified by an \code{offset}, which will be
linearly added to the linear predictor. The \code{offset} can be
given as part of the formula, \code{y ~ 1 + offset(log(exposure))}
or as the \code{offset} argument to \code{gMAP}. Note that the
exposure unit must be given as log-offset.}

}
}
\section{Methods (by generic)}{
\itemize{
\item \code{print(gMAP)}: displays a summary of the gMAP analysis.

\item \code{fitted(gMAP)}: returns the quantiles of the posterior shrinkage
estimates for each data item used during the analysis of the given
\code{gMAP} object.

\item \code{coef(gMAP)}: returns the quantiles of the predictive
distribution. User can choose with \code{type} if the result is on
the response or the link scale.

\item \code{as.matrix(gMAP)}: extracts the posterior sample of the model.

\item \code{summary(gMAP)}: returns the summaries of a gMAP.
analysis. Output is a \code{gMAPsummary} object, which is a list containing
\describe{
\item{\code{tau}}{posterior summary of the heterogeneity standard deviation}
\item{\code{beta}}{posterior summary of the regression coefficients}
\item{\code{theta.pred}}{summary of the predictive distribution (given in dependence on the \code{type} argument either on \code{response} or \code{link} scale)}
\item{\code{theta}}{posterior summary of the mean estimate (also depends on the \code{type} argument)}
}

}}
\section{Differential Discounting}{


The above model assumes the same between-group standard deviation
\eqn{\tau}, which implies that the data are equally relevant. This
assumption can be relaxed to more than one \eqn{\tau}. That is,

\deqn{\epsilon_h \sim N(0,\tau_{s(h)}^2)}{\epsilon_h ~ N(0,\tau_s(h)^2)}

where \eqn{s(h)} assignes group \eqn{h} to one of \eqn{S}
between-group heterogeneity strata.

For example, in a situation with two randomized and four
observational studies, one may want to assume \eqn{\tau_1} (for
trials 1 and 2) and \eqn{\tau_2} (for trials 3-6) for the
between-trial standard deviations of the control means. More
heterogeneity (less relevance) for the observational studies can
then be expressed by appropriate priors for \eqn{\tau_1} and
\eqn{\tau_2}. In this case, \eqn{S=2} and the strata assignments
(see \code{tau.strata} argument) would be \eqn{s(1)=s(2)=1,
s(3)=\ldots=s(6)=2}.
}

\section{Prior Specification}{


The prior distribution for the regression coefficients \eqn{\beta}
is normal.

\itemize{
\item If a single number is given, then this is used as the standard
deviation and the default mean of 0 is used.

\item If a vector is given, it must be of the same length
as number of covariates defined and is used as standard
deviation.

\item If a matrix with a single row is given, its first row will be
used as mean and the second row will be used as standard deviation
for all regression coefficients.

\item Lastly, a two-column matrix (mean and standard deviation columns)
with as many columns as regression coefficients can be given.
}

It is recommended to always specify a \code{beta.prior}. Per
default a mean of 0 is set. The standard deviation is set to 2 for
the binary case, to 100 * \code{sd(y)} for the normal case and to
\code{sd(log(y + 0.5 + offset))} for the Poisson case.

For the between-trial heterogeniety \eqn{\tau} prior, a dispersion
parameter must always be given for each exchangeability
stratum. For the different \code{tau.prior} distributions, two
parameters are needed out of which one is set to a default value if
applicable:

\tabular{lccl}{
Prior \tab \eqn{a} \tab \eqn{b} \tab default \cr
\code{HalfNormal}  \tab \eqn{\mu = 0} \tab  \eqn{\sigma} \tab \cr
\code{TruncNormal} \tab \eqn{\mu} \tab  \eqn{\sigma} \tab \eqn{\mu = 0} \cr
\code{Uniform}     \tab a \tab b \tab a = 0 \cr
\code{Gamma}       \tab \eqn{\alpha} \tab \eqn{\beta} \tab \cr
\code{InvGamma}    \tab \eqn{\alpha} \tab \eqn{\beta} \tab \cr
\code{LogNormal}   \tab \eqn{\mu_{\log}}{\mu_log} \tab \eqn{\sigma_{\log}}{\sigma_log} \tab \cr
\code{TruncCauchy} \tab \eqn{\mu} \tab \eqn{\sigma} \tab \eqn{\mu = 0} \cr
\code{Exp}         \tab \eqn{\beta} \tab 0 \tab \cr
\code{Fixed}       \tab a \tab 0 \tab \cr
}

For a prior distribution with a default location parameter, a
vector of length equal to the number of exchangability strata can
be given. Otherwise, a two-column matrix with as many rows as
exchangability strata must be given, except for a single \eqn{\tau}
stratum, for which a vector of length two defines the parameters a
and b.
}

\section{Random seed}{
 The MAP analysis is performed using
Markov-Chain-Monte-Carlo (MCMC) in \code{\link[rstan]{rstan}}. MCMC
is a stochastic algorithm. To obtain exactly reproducible results
you must use the \code{\link[base:set.seed]{set.seed}} function
before calling \code{gMAP}. See \code{\link[=RBesT-package]{RBesT}}
overview page for global options on setting further MCMC simulation
parameters.
}

\examples{
## Setting up dummy sampling for fast execution of example
## Please use 4 chains and 20x more warmup & iter in practice
.user_mc_options <- options(RBesT.MC.warmup=50, RBesT.MC.iter=100,
                            RBesT.MC.chains=2, RBesT.MC.thin=1)

# Binary data example 1

# Mean response rate is ~0.25. For binary endpoints
# a conservative choice for tau is a HalfNormal(0,1) as long as
# the mean response rate is in the range of 0.2 to 0.8. For
# very small or large rates consider the n_infinity approach
# illustrated below.
# for exact reproducible results, the seed must be set
set.seed(34563)
map_AS <- gMAP(cbind(r, n-r) ~ 1 | study,
               family=binomial,
               data=AS,
               tau.dist="HalfNormal", tau.prior=1,
               beta.prior=2)
print(map_AS)

# obtain numerical summaries
map_sum <- summary(map_AS)
print(map_sum)
names(map_sum)
# [1] "tau"        "beta"       "theta.pred" "theta"
map_sum$theta.pred

\donttest{
# graphical model checks (returns list of ggplot2 plots)
map_checks <- plot(map_AS)
# forest plot with shrinkage estimates
map_checks$forest_model
# density of MAP prior on response scale
map_checks$densityThetaStar
# density of MAP prior on link scale
map_checks$densityThetaStarLink
}

# obtain shrinkage estimates
fitted(map_AS)

# regression coefficients
coef(map_AS)

# finally fit MAP prior with parametric mixture
map_mix <- mixfit(map_AS, Nc=2)
plot(map_mix)$mix

\donttest{
# optionally select number of components automatically via AIC
map_automix <- automixfit(map_AS)
plot(map_automix)$mix
}

# Normal example 2, see normal vignette

# Prior considerations

# The general principle to derive a prior for tau can be based on the
# n_infinity concept as discussed in Neuenschwander et al., 2010.
# This assumes a normal approximation which applies for the colitis
# data set as:
p_bar <- mean(with(colitis, r/n))
s <- round(1/sqrt(p_bar * (1-p_bar)), 1)
# s is the approximate sampling standard deviation and a
# conservative prior is tau ~ HalfNormal(0,s/2)
tau_prior_sd <- s/2

# Evaluate HalfNormal prior for tau
tau_cat <- c(pooling=0
            ,small=0.0625
            ,moderate=0.125
            ,substantial=0.25
            ,large=0.5
            ,veryLarge=1
            ,stratified=Inf)
# Interval probabilites (basically saying we are assuming
# heterogeniety to be smaller than very large)
diff(2*pnorm(tau_cat * s, 0, tau_prior_sd))
# Cumulative probabilities as 1-F
1 - 2*(pnorm(tau_cat * s, 0, tau_prior_sd) - 0.5)

## Recover user set sampling defaults
options(.user_mc_options)

}
\references{
Neuenschwander B, Capkun-Niggli G, Branson M,
Spiegelhalter DJ. Summarizing historical information on controls in
clinical trials. \emph{Clin Trials}. 2010; 7(1):5-18

Schmidli H, Gsteiger S, Roychoudhury S, O'Hagan A, Spiegelhalter D,
Neuenschwander B.  Robust meta-analytic-predictive priors in
clinical trials with historical control information.
\emph{Biometrics} 2014;70(4):1023-1032.

Weber S, Li Y, Seaman III J.W., Kakizume T, Schmidli H. Applying
Meta-Analytic Predictive Priors with the {R} {B}ayesian evidence
synthesis tools. \emph{JSS} 2021; 100(19):1-32
}
\seealso{
\code{\link{plot.gMAP}}, \code{\link{forest_plot}}, \code{\link{automixfit}}, \code{\link{predict.gMAP}}
}
