% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/calibrate_trial.R
\name{calibrate_trial}
\alias{calibrate_trial}
\title{Calibrate trial specification}
\usage{
calibrate_trial(
  trial_spec,
  n_rep = 1000,
  cores = NULL,
  base_seed = NULL,
  fun = NULL,
  target = 0.05,
  search_range = c(0.9, 1),
  tol = target/10,
  dir = 0,
  init_n = 2,
  iter_max = 25,
  resolution = 5000,
  kappa = 0.5,
  pow = 1.95,
  lengthscale = 1,
  scale_x = TRUE,
  noisy = is.null(base_seed),
  narrow = !noisy & !is.null(base_seed),
  prev_x = NULL,
  prev_y = NULL,
  path = NULL,
  overwrite = FALSE,
  version = NULL,
  compress = TRUE,
  sparse = TRUE,
  progress = NULL,
  export = NULL,
  export_envir = parent.frame(),
  verbose = FALSE,
  plot = FALSE
)
}
\arguments{
\item{trial_spec}{\code{trial_spec} object, generated and validated by the
\code{\link[=setup_trial]{setup_trial()}}, \code{\link[=setup_trial_binom]{setup_trial_binom()}} or \code{\link[=setup_trial_norm]{setup_trial_norm()}} function.}

\item{n_rep}{single integer, the number of simulations to run at each
evaluation. Values \verb{< 100} are not permitted; values \verb{< 1000} are permitted
but recommended against.}

\item{cores}{\code{NULL} or single integer. If \code{NULL}, a default value/cluster set
by \code{\link[=setup_cluster]{setup_cluster()}} will be used to control whether simulations are run in
parallel on a default cluster or sequentially in the main process; if a
cluster/value has not been specified by \code{\link[=setup_cluster]{setup_cluster()}}, \code{cores} will
then be set to the value stored in the global \code{"mc.cores"} option (if
previously set by \verb{options(mc.cores = <number of cores>}), and \code{1} if that
option has not been specified.\cr
If the resulting number of \code{cores = 1}, computations will be run
sequentially in the primary process, and if \code{cores > 1}, a new parallel
cluster will be setup using the \code{parallel} library and removed once the
function completes. See \code{\link[=setup_cluster]{setup_cluster()}} for details.}

\item{base_seed}{single integer or \code{NULL} (default); the random seed used as
the basis for all simulation runs (see \code{\link[=run_trials]{run_trials()}}) and random number
generation within the rest of the calibration process; if used, the global
random seed will be restored after the function has been run.\cr
\strong{Note:} providing a \code{base_seed} is highly recommended, as this will
generally lead to faster and more stable calibration.}

\item{fun}{\code{NULL} (the default), in which case the trial specification will
be calibrated using the default process described above and further in
\strong{Details}; otherwise a user-supplied function used during the calibration
process, which should have a structure as described in \strong{Details}.}

\item{target}{single finite numeric value (defaults to \code{0.05}); the target
value for \code{y} to calibrate the \code{trial_spec} object to.}

\item{search_range}{finite numeric vector of length \code{2}; the lower and upper
boundaries in which to search for the best \code{x}. Defaults to \code{c(0.9, 1.0)}.}

\item{tol}{single finite numeric value (defaults to \code{target / 10}); the
accepted tolerance (in the direction(s) specified by \code{dir}) accepted; when
a \code{y}-value within the accepted tolerance of the target is obtained, the
calibration stops.\cr
\strong{Note:} \code{tol} should be specified to be sensible considering \code{n_rep};
e.g., if the probability of superiority is targeted with \code{n_rep == 1000}, a
\code{tol} of \code{0.01} will correspond to \code{10} simulated trials.\cr
A too low \code{tol} relative to \code{n_rep} may lead to very slow calibration or
calibration that cannot succeed regardless of the number of iterations.\cr
\strong{Important:} even when a large number of simulations are conducted,
using a very low \code{tol} may lead to calibration not succeeding as it may
also be affected by other factors, e.g., the total number of simulated
patients, the possible maximum differences in simulated outcomes, and the
number of posterior draws (\code{n_draws} in the \code{\link[=setup_trial]{setup_trial()}} family of
functions), which affects the minimum differences in posterior
probabilities when simulating trials and thus can affect calibration,
including when using the default calibration function. Increase the number
of posterior draws or the number of repetitions should be attempted if the
desired tolerance cannot be achieved.}

\item{dir}{single numeric value; specifies the direction(s) of the tolerance
range. If \code{0} (the default) the tolerance range will be \code{target - tol} to
\code{target + tol}. If \verb{< 0}, the range will be \code{target - tol} to \code{target}, and
if \verb{> 0}, the range will be \code{target} to \code{target + tol}.}

\item{init_n}{single integer \verb{>= 2}. The number of initial evaluations
evenly spread over the \code{search_range}, with one evaluation at each boundary
(thus, the default value of \code{2} is the minimum permitted value; if
calibrating according to a different target than the default, a higher
value may be sensible).}

\item{iter_max}{single integer \verb{> 0} (default \code{25}). The maximum number of
new evaluations after the initial grid (with size specified by \code{init_n})
has been set up. If calibration is unsuccessful after the maximum number
of iterations, the \code{prev_x} and \code{prev_y} arguments (described below) may be
used to to start a new calibration process re-using previous evaluations.}

\item{resolution}{single integer (defaults to \code{5000}), size of the grid at
which the predictions used to select the next value to evaluate at are
made.\cr
\strong{Note:} memory use will substantially increase with higher values. See
also the \code{narrow} argument below.}

\item{kappa}{single numeric value \verb{> 0} (default \code{0.5}); corresponding to the
width of the uncertainty bounds used to find the next target to evaluate.
See \strong{Details}.}

\item{pow}{single numerical value in the \verb{[1, 2]} range (default \code{1.95}),
controlling the smoothness of the Gaussian process. See \strong{Details}.}

\item{lengthscale}{single numerical value (defaults to \code{1}) or numerical
vector of length \code{2}; values must be finite and non-negative. If a single
value is provided, this will be used as the \code{lengthscale} hyperparameter;
if a numerical vector of length \code{2} is provided, the second value must be
higher than the first and the optimal \code{lengthscale} in this range will be
found using an optimisation algorithm. If any value is \code{0}, a small amount
of noise will be added as lengthscales must be \verb{> 0}. Controls smoothness
in combination with \code{pow}. See \strong{Details}.}

\item{scale_x}{single logical value; if \code{TRUE} (the default) the \code{x}-values
will be scaled to the \verb{[0, 1]} range according to the minimum/maximum
values provided. If \code{FALSE}, the model will use the original scale. If
distances on the original scale are small, scaling may be preferred. The
returned values will always be on the original scale. See \strong{Details}.}

\item{noisy}{single logical value; if \code{FALSE}, a noiseless process is
assumed, and interpolation between values is performed (i.e., with no
uncertainty at the \code{x}-values assumed). If \code{TRUE}, the \code{y}-values are
assumed to come from a noisy process, and regression is performed (i.e.,
some uncertainty at the evaluated \code{x}-values will be assumed and included
in the predictions). Specifying \code{FALSE} requires a \code{base_seed} supplied,
and is generally recommended, as this will usually lead to faster and more
stable calibration. If a low \code{n_rep} is used (or if trials are calibrated
to other metrics other than the default), specifying \code{TRUE} may be
necessary even when using a valid \code{base_seed}. Defaults to \code{TRUE} if a
\code{base_seed} is supplied and \code{FALSE} if not.}

\item{narrow}{single logical value. If \code{FALSE}, predictions are evenly spread
over the full \code{x}-range. If \code{TRUE}, the prediction grid will be spread
evenly over an interval consisting of the two \code{x}-values with
corresponding \code{y}-values closest to the target in opposite directions. Can
only be \code{TRUE} when a \code{base_seed} is provided and \code{noisy} is \code{FALSE} (the
default value is \code{TRUE} in that case, otherwise it is \code{FALSE}), and only if
the function can safely be assumed to be only monotonically increasing or
decreasing (which is generally reasonable if the default is used for
\code{fun}), in which case this will lead to a faster search and a smoother
prediction grid in the relevant region without increasing memory use.}

\item{prev_x, prev_y}{numeric vectors of equal lengths, corresponding to
previous evaluations. If provided, these will be used in the calibration
process (added before the initial grid is setup, with values in the grid
matching values in \code{prev_x} leading to those evaluations being skipped).}

\item{path}{single character string or \code{NULL} (the default); if a valid file
path is provided, the calibration results will either be saved to this path
(if the file does not exist or if \code{overwrite} is \code{TRUE}, see below) or the
previous results will be loaded and returned (if the file exists,
\code{overwrite} is \code{FALSE}, and if the input \code{trial_spec} and central control
settings are identical to the previous run, otherwise an error is
produced). Results are saved/loaded using the \code{\link[=saveRDS]{saveRDS()}} / \code{\link[=readRDS]{readRDS()}}
functions.}

\item{overwrite}{single logical, defaults to \code{FALSE}, in which case previous
results are loaded if a valid file path is provided in \code{path} and the
object in \code{path} contains the same input \code{trial_spec} and the previous
calibration used the same central control settings (otherwise, the function
errors). If \code{TRUE} and a valid file path is provided in \code{path}, the
complete calibration function will be run with results saved using
\code{\link[=saveRDS]{saveRDS()}}, regardless of whether or not a previous result was saved
in \code{path}.}

\item{version}{passed to \code{\link[=saveRDS]{saveRDS()}} when saving calibration results,
defaults to \code{NULL} (as in \code{\link[=saveRDS]{saveRDS()}}), which means that the current
default version is used. Ignored if calibration results are not saved.}

\item{compress}{passed to \code{\link[=saveRDS]{saveRDS()}} when saving calibration results,
defaults to \code{TRUE} (as in \code{\link[=saveRDS]{saveRDS()}}), see \code{\link[=saveRDS]{saveRDS()}} for other options.
Ignored if calibration results are not saved.}

\item{sparse, progress, export, export_envir}{passed to \code{\link[=run_trials]{run_trials()}}, see
description there.}

\item{verbose}{single logical, defaults to \code{FALSE}. If \code{TRUE}, the function
will print details on calibration progress.}

\item{plot}{single logical, defaults to \code{FALSE}. If \code{TRUE}, the function
will print plots of the Gaussian process model predictions and return
them as part of the final object; requires the \code{ggplot2} package installed.}
}
\value{
A list of special class \code{"trial_calibration"}, which contains the
following elements that can be extracted using \code{$} or \code{[[}:
\itemize{
\item \code{success}: single logical, \code{TRUE} if the calibration succeeded with
the best result being within the tolerance range, \code{FALSE} if the
calibration process ended after all allowed iterations without
obtaining a result within the tolerance range.
\item \code{best_x}: single numerical value, the \code{x}-value (on the original,
input scale) at which the best \code{y}-value was found, regardless of
\code{success}.
\item \code{best_y}: single numerical value, the best \code{y}-value obtained,
regardless of \code{success}.
\item \code{best_trial_spec}: the best calibrated version of the original
\code{trial_spec} object supplied, regardless of \code{success} (i.e., the
returned trial specification object is only adequately calibrated if
\code{success} is \code{TRUE}).
\item \code{best_sims}: the trial simulation results (from \code{\link[=run_trials]{run_trials()}})
leading to the best \code{y}-value, regardless of \code{success}. If no new
simulations have been conducted (e.g., if the best \code{y}-value is from
one of the \code{prev_y}-values), this will be \code{NULL}.
\item \code{evaluations}: a two-column \code{data.frame} containing the variables
\code{x} and \code{y}, corresponding to all \code{x}-values and \code{y}-values (including
values supplied through \code{prev_x}/\code{prev_y}).
\item \code{input_trial_spec}: the unaltered, uncalibrated, original
\code{trial_spec}-object provided to the function.
\item \code{elapsed_time}: the total run time of the calibration process.
\item \code{control}: list of the most central settings provided to the
function.
\item \code{fun}: the function used for calibration; if \code{NULL} was supplied
when starting the calibration, the default function (described in
\strong{Details}) is returned after being used in the function.
\item \code{adaptr_version}: the version of the \code{adaptr} package used to run
the calibration process.
\item \code{plots}: list containing \code{ggplot2} plot objects of each Gaussian
process suggestion step, only included if \code{plot} is \code{TRUE}.
}
}
\description{
This function calibrates a trial specification using a Gaussian process-based
Bayesian optimisation algorithm.
The function calibrates an input trial specification object (using repeated
calls to \code{\link[=run_trials]{run_trials()}} while adjusting the trial specification) to a
\code{target} value within a \code{search_range} in a single input dimension (\code{x}) in
order to find an optimal value (\code{y}).\cr
The default (and expectedly most common use case) is to calibrate a trial
specification to adjust the \code{superiority} and \code{inferiority} thresholds to
obtain a certain probability of superiority; if used with a trial
specification with identical underlying outcomes (no between-arm
differences), this probability is an estimate of the Bayesian analogue of the
total type-1 error rate for the outcome driving the adaptations, and if
between-arm differences are present, this corresponds to an estimate of the
Bayesian analogue of the power.\cr
The default is to perform the calibration while varying single, constant,
symmetric thresholds for \code{superiority} / \code{inferiority} throughout a trial
design, as described in \strong{Details}, and the default values have been chosen
to function well in this case.\cr
Advanced users may use the function to calibrate trial specifications
according to other metrics - see \strong{Details} for how to specify a custom
function used to modify (or recreate) a trial specification object during
the calibration process.\cr
The underlying Gaussian process model and its control hyperparameters are
described under \strong{Details}, and the model is partially based on code from
Gramacy 2020 (with permission; see \strong{References}).
}
\details{
\strong{Default calibration}
\cr\cr
If \code{fun} is \code{NULL} (as default), the default calibration strategy will be
employed. Here, the target \code{y} is the probability of superiority (as
described in \code{\link[=check_performance]{check_performance()}} and \code{\link[=summary]{summary()}}), and the function will
calibrate constant stopping thresholds for superiority and inferiority (as
described in \code{\link[=setup_trial]{setup_trial()}}, \code{\link[=setup_trial_binom]{setup_trial_binom()}}, and
\code{\link[=setup_trial_norm]{setup_trial_norm()}}), which corresponds to the Bayesian analogues of the
type 1 error rate if there are no differences between arms in the trial
specification, which we expect to be the most common use case, or the power,
if there are differences between arms in the trial specification.\cr

The stopping calibration process will, in the default case, use the input \code{x}
as the stopping threshold for superiority and \code{1 - x} as the stopping
threshold for inferiority, respectively, i.e., stopping thresholds will be
constant and symmetric.\cr

The underlying default function calibrated is typically essentially
noiseless if a high enough number of simulations are used with an
appropriate random \code{base_seed}, and generally monotonically decreasing. The
default values for the control hyperparameters have been set to normally
work well in this case (including \code{init_n}, \code{kappa}, \code{pow}, \code{lengthscale},
\code{narrow}, \code{scale_x}, etc.). Thus, few initial grid evaluations are used in
this case, and if a \code{base_seed} is provided, a noiseless process is assumed
and narrowing of the search range with each iteration is performed, and the
uncertainty bounds used in the acquisition function (corresponding to
quantiles from the posterior predictive distribution) are relatively narrow.

\strong{Specifying calibration functions}
\cr\cr
A user-specified calibration function should have the following structure:

\if{html}{\out{<div class="sourceCode">}}\preformatted{# The function must take the arguments x and trial_spec
# trial_spec is the original trial_spec object which should be modified
# (alternatively, it may be re-specified, but the argument should still
# be included, even if ignored)
function(x, trial_spec) \{
  # Calibrate trial_spec, here as in the default function
  trial_spec$superiority <- x
  trial_spec$inferiority <- 1 - x

  # If relevant, known y values corresponding to specific x values may be
  # returned without running simulations (here done as in the default
  # function). In that case, a code block line the one below can be included,
  # with changed x/y values - of note, the other return values should not be
  # changed
  if (x == 1) \{
    return(list(sims = NULL, trial_spec = trial_spec, y = 0))
  \}

  # Run simulations - this block should be included unchanged
  sims <- run_trials(trial_spec, n_rep = n_rep, cores = cores,
                     base_seed = base_seed, sparse = sparse,
                     progress = progress, export = export,
                     export_envir = export_envir)

 # Return results - only the y value here should be changed
 # summary() or check_performance() will often be used here
 list(sims = sims, trial_spec = trial_spec,
      y = summary(sims)$prob_superior)
\}
}\if{html}{\out{</div>}}

\strong{Note:} changes to the trial specification are \strong{not validated}; users who
define their own calibration function need to ensure that changes to
calibrated trial specifications does not lead to invalid values; otherwise,
the procedure is prone to error when simulations are run. Especially, users
should be aware that changing \code{true_ys} in a trial specification generated
using the simplified \code{\link[=setup_trial_binom]{setup_trial_binom()}} and \code{\link[=setup_trial_norm]{setup_trial_norm()}} functions
requires changes in multiple places in the object, including in the functions
used to generate random outcomes, and in these cases (and otherwise if in
doubt) re-generating the \code{trial_spec} instead of modifying should be
preferred as this is safer and leads to proper validation.

\strong{Note:} if the \code{y} values corresponding to certain \code{x} values are known,
then the user may directly return these values without running simulations
(e.g., in the default case an \code{x} of \code{1} will require \verb{>100\%} or \verb{<0\%}
probabilities for stopping rules, which is impossible, and hence the \code{y}
value in this case is by definition \code{1}).

\strong{Gaussian process optimisation function and control hyperparameters}
\cr\cr
The calibration function uses a relatively simple Gaussian optimisation
function with settings that should work well for the default calibration
function, but can be changed as required, which should be considered if
calibrating according to other targets (effects of using other settings may
be evaluated in greater detail by setting \code{verbose} and \code{plot} to \code{TRUE}).\cr
The function may perform both interpolation (i.e., assuming a noiseless,
deterministic process with no uncertainty at the values already evaluated) or
regression (i.e., assuming a noisy, stochastic process), controlled by the
\code{noisy} argument.\cr

The covariance matrix (or kernel) is defined as:\cr

\verb{exp(-||x - x'||^pow / lengthscale)}\cr

with \verb{||x -x'||} corresponding to a matrix containing the absolute Euclidean
distances of values of \code{x} (and values on the prediction grid), scaled to
the \verb{[0, 1]} range if \code{scale_x} is \code{TRUE} and on their original scale if
\code{FALSE}. Scaling i generally recommended (as this leads to more comparable
and predictable effects of \code{pow} and \code{lengthscale}, regardless of the true
scale), and also recommended if the range of values is smaller than this
range. The absolute distances are raised to the power \code{pow}, which must be a
value in the \verb{[1, 2]} range. Together with \code{lengthscale}, \code{pow} controls the
smoothness of the Gaussian process model, with \code{1} corresponding to less
smoothing (i.e., piecewise straight lines between all evaluations if
\code{lengthscale} is \code{1}) and values \verb{> 1} corresponding to more smoothing. After
raising the absolute distances to the chosen power \code{pow}, the resulting
matrix is divided by \code{lengthscale}. The default is \code{1} (no change), and
values \verb{< 1} leads to faster decay in correlations and thus less smoothing
(more wiggly fits), and values \verb{> 1} leads to more smoothing (less wiggly
fits). If a single specific value is supplied for \code{lengthscale} this is used;
if a range of values is provided, a secondary optimisation process determines
the value to use within that range.\cr

Some minimal noise ("jitter") is always added to the diagonals of the
matrices where relevant to ensure numerical stability; if \code{noisy} is \code{TRUE},
a "nugget" value will be determined using a secondary optimisation process
\cr

Predictions will be made over an equally spaced grid of \code{x} values of size
\code{resolution}; if \code{narrow} is \code{TRUE}, this grid will only be spread out
between the \code{x} values with corresponding \code{y} values closest to and below and
closes to and above \code{target}, respectively, leading to a finer grid in the
range of relevance (as described above, this should only be used for processes
that are assumed to be noiseless and should only be used if the process can
safely be assumed to be monotonically increasing or decreasing within the
\code{search_range}). To suggest the next \code{x} value for evaluations, the function
uses an acquisition function based on bi-directional uncertainty bounds
(posterior predictive distributions) with widths controlled by the \code{kappa}
hyperparameter. Higher \code{kappa}/wider uncertainty bounds leads to increased
\emph{exploration} (i.e., the algorithm is more prone to select values with high
uncertainty, relatively far from existing evaluations), while lower
\code{kappa}/narrower uncertainty bounds leads to increased \emph{exploitation} (i.e.,
the algorithm is more prone to select values with less uncertainty, closer to
the best predicted mean values). The value in the \code{x} grid leading with one of
the boundaries having the smallest absolute distance to the \code{target} is
chosen (within the narrowed range, if \code{narrow} is \code{TRUE}). See
Greenhill et al, 2020 under \strong{References} for a general description of
acquisition functions.\cr

\strong{IMPORTANT:}
\strong{we recommend that control hyperparameters are explicitly specified}, even
for the default calibration function. Although the default values should be
sensible for the default calibration function, these may change in the
future. Further, we generally recommend users to perform small-scale
comparisons (i.e., with fewer simulations than in the final calibration) of
the calibration process with different hyperparameters for specific use cases
beyond the default (possibly guided by setting the \code{verbose} and \code{plot}
options to \code{TRUE}) before running a substantial number of calibrations or
simulations, as the exact choices may have important influence on the speed
and likelihood of success of the calibration process.\cr
It is the responsibility of the user to specify sensible values for the
settings and hyperparameters.
}
\examples{
\dontrun{
# Setup a trial specification to calibrate
# This trial specification has similar event rates in all arms
# and as the default calibration settings are used, this corresponds to
# assessing the Bayesian type 1 error rate for this design and scenario
binom_trial <- setup_trial_binom(arms = c("A", "B"),
                                 true_ys = c(0.25, 0.25),
                                 data_looks = 1:5 * 200)

# Run calibration using default settings for most parameters
res <- calibrate_trial(binom_trial, n_rep = 1000, base_seed = 23)

# Print calibration summary result
res
}

}
\references{
Gramacy RB (2020). Chapter 5: Gaussian Process Regression. In: Surrogates:
Gaussian Process Modeling, Design and Optimization for the Applied Sciences.
Chapman Hall/CRC, Boca Raton, Florida, USA.
\href{https://bookdown.org/rbg/surrogates/chap5.html}{Available online}.

Greenhill S, Rana S, Gupta S, Vellanki P, Venkatesh S (2020). Bayesian
Optimization for Adaptive Experimental Design: A Review. IEEE Access, 8,
13937-13948. \doi{10.1109/ACCESS.2020.2966228}
}
