# -----------------------------------------------------------------------------
# File: control.R
# Purpose: Functions for setting control parameters for fitting and simulation
# Author: Steffen Maletz
# Last modified: 2026-01-12
# -----------------------------------------------------------------------------


#' @rdname glmstarma_sim.control
#' @title Control Parameters for Simulation of \code{glmstarma} Models
#' @description List of control parameters to be passed to the \code{glmstarma.sim} function.
#' @param return_burn_in Logical; if \code{TRUE}, include the burn-in period in the returned simulated data. Default is \code{FALSE}.
#' @param init_link Character or matrix. Method to initialize first link values in the burn-in period. See details.
#' @param use_sparsity Logical; whether to use sparse matrices for the neighborhood matrices.
#' @param sparsity_threshold Numeric in \eqn{[0, 1]}. Threshold for proportion of non-zero elements for considering neighborhood matrices as sparse (default: \code{2/3}).
#'
#' @details
#' This function validates control arguments for \code{glmstarma.sim}.
#' By default, the initial link values for the burn-in period are generated by calculating the unconditional mean of the process based on the model parameters, ignoring covariates.
#' Different initial link values can submitted as a numeric matrix, with \eqn{p} rows (number of locations) and \code{max_time_lag} columns (maximum time lag of the model).
#'
#' @return A named list of control parameters
#'
#' @seealso \code{\link{glmstarma.sim}}, \code{\link{glmstarma.control}}
#' @export
glmstarma_sim.control <- function(return_burn_in = FALSE, init_link = "parameter", use_sparsity = TRUE, sparsity_threshold = 2 / 3){
    stopifnot("init_link must be 'parameter' or a matrix with first link-values" = (init_link == "parameter") || is.matrix(init_link))
    stopifnot("'return_burn_in' must be logical" = is.logical(return_burn_in))
    stopifnot("'use_sparsity' must be logical" = is.logical(use_sparsity))
    stopifnot("'sparsity_threshold' must be a numeric between 0 and 1" = is.numeric(sparsity_threshold) && sparsity_threshold >= 0 && sparsity_threshold <= 1)
    return(list(return_burn_in = return_burn_in, init_link = init_link, use_sparsity = use_sparsity, sparsity_threshold = sparsity_threshold))
}


#' @rdname glmstarma.control
#' @title Control Parameters for \code{glmstarma} Fitting
#' @description List of control parameters to be passed as an an argument to \code{glmstarma}.
#' @param parameter_init Character or list. Start values for parameter estimation. See details.
#' @param init_link Character or matrix, specifing how to initialize the linear process of the mean model, if regression on the feedback process is included.
#' * \code{"first_obs"}: Use the first (transformed) observed values at each location.
#' * \code{"mean"}: Use the mean of the (transformed) observed values at each location.
#' * \code{"transformed_mean"}: Calculates the mean of the obsverved values at each location and transforms it by the link function.
#' * \code{"zero"}: Use zero as initial value.
#' * (numeric matrix) specifying starting values (rows = location, columns = time, must match maximum temporal order of model)
#' @param dispersion_est_type Character. Estimation of global dispersion parameter either based on deviance (\code{"deviance"}) or pearson residuals (\code{"pearson"}), if applicable.
#' @param use_sparsity Logical; whether to use sparse matrices for the neighborhood matrices.
#' @param sparsity_threshold Numeric in \eqn{[0, 1]}. Threshold for proportion of non-zero elements for considering neighborhood matrices as sparse (default: \code{2/3}).
#' @param method Character. Optimization method to be used. Options are:
#' * \code{"nloptr"} (requires \pkg{nloptr}, default),
#' * \code{"optim"} (base R \code{\link{optim}})
#' @param constrained Logical; whether to use parameter constraints ensuring a stationary solution. Only works with \code{method = "nloptr"}.
#' @param constraint_tol Numeric. Tolerance for constraint satisfaction.
#' @param constrain_method Character. Method for applying parameter constraints.
#' * \code{"sum_of_absolutes"}: Sum of absolute values of parameters is constrained
#' * \code{"absolute_sum"}: Absolute sum of parameters is constrained. (only intended for univariate models)
#' * \code{"soft"}: Constraints for \code{"softplus"} and \code{"softclipping"} link functions (not available for different link functions).
#' @param gradtol Numeric. Tolerance for gradient convergence. See details.
#' @param changetol Numeric. Tolerance for parameter change convergence. See details.
#' @param trace Integer. Level of tracing output. See details.
#' @param fnscale Numeric. Scaling factor for the objective function. See details.
#' @param maxit Integer. Maximum number of iterations. See details.
#' @param abstol Numeric. Absolute convergence tolerance. See details.
#' @param reltol Numeric. Relative convergence tolerance. See details.
#' @param lmm Integer. Limited-memory BFGS parameter. See details.
#' @param factr Numeric. Factor for controlling the convergence tolerance. See details.
#' @param pgtol Numeric. Tolerance for projected gradient convergence. See details.
#' @return A named list of control parameters
#' @details This function is called internally in \code{glmstarma} to validate control parameters in the \code{control} argument.
#'
#'
#' The arguments \code{constraint_tol}, \code{gradtol}, \code{changetol}, \code{trace}, \code{fnscale}, \code{maxit}, \code{abstol}, \code{reltol}, \code{lmm}, \code{factr}, and \code{pgtol} are passed to the optimization routines and control the convergence behavior and output.
#' Some of these arguments are not used by all optimization methods. 
#'
#' The \code{optim} method uses the L-BFGS-B algorithm when non-negative parameters are required, otherwise the BFGS algorithm is used. Stationarity constraints cannot be applied when using \code{optim}.
#' Only if \code{method = "nloptr"} stationarity constraints are supported, and the specified \code{constrain_method} is applied. For optimization we use the SLSQP routine. The constraints implied by \code{constrain_method} are given by:
#' * \code{"sum_of_absolutes"}: \deqn{\sum_{i = 1}^q\sum_{\ell = 0}^{a_i} | \alpha_{i\ell} | + \sum_{j = 1}^r \sum_{\ell = 0}^{b_j} | \beta_{j\ell} | \leq 1}
#' * \code{"absolute_sum"}: \deqn{\left|\sum_{i = 1}^q\sum_{\ell = 0}^{a_i}  \alpha_{i\ell}  + \sum_{j = 1}^r \sum_{\ell = 0}^{b_j}  \beta_{j\ell} \right| \leq 1}
#' * \code{"soft"}: \deqn{\sum_{i = 1}^q\sum_{\ell = 0}^{a_i}  \max \lbrace 0, \alpha_{i\ell} \rbrace  + \sum_{j = 1}^r \sum_{\ell = 0}^{b_j}  \max\lbrace0, \beta_{j\ell}\rbrace \leq 1} and \deqn{\sum_{i = 1}^q\sum_{\ell = 0}^{a_i} | \alpha_{i\ell} | < 1}
#' 
#' Start values for the optimization can be provided as a named list via \code{parameter_init} or as a character. If a named list is provided, these must match the model orders, see \code{\link{glmstarma.sim}}. Otherwise, \code{parameter_init} must be one of the following:
#' * \code{"zero"}: All parameters initialized to (near) zero. If parameters must be non-negative a small value within the feasible region is used.
#' * \code{"random"}: All parameters initialized to random values in the stationary region of the model.
#'
#' In case of a negative binomial family, the global dispersion parameter is always estimated using dispersion_est_type = "pearson". It corresponds to the shape parameter of the negative binomial distribution.
#'
#' @seealso \code{\link{glmstarma}}, \code{\link[nloptr]{nloptr}}, \code{\link{optim}}
#' @examples
#' \donttest{
#' dat <- load_data("chickenpox", directory = tempdir())
#' chickenpox <- dat$chickenpox
#' population_hungary <- dat$population_hungary
#' W_hungary <- dat$W_hungary
#'
#' model_autoregressive <- list(past_obs = rep(1, 7))
#' glmstarma(chickenpox, model_autoregressive, W_hungary, family = vpoisson("log"), 
#'           control = list(parameter_init = "random", init_link = "mean"))
#' }
#' @export
glmstarma.control <- function(parameter_init = "zero", init_link = "first_obs", dispersion_est_type = "deviance", use_sparsity = TRUE, sparsity_threshold = 2 / 3, 
                                method = "nloptr", constrained = TRUE, constraint_tol = 1e-8, constrain_method = "sum_of_absolutes", gradtol = sqrt(.Machine$double.eps), changetol = sqrt(.Machine$double.eps),
                                trace = 0L, fnscale = 1.0, maxit = 10000L, abstol = -Inf, reltol = sqrt(.Machine$double.eps), lmm = 5, factr = 1e7, pgtol = 0.0){
    if(is.character(parameter_init)){
        parameter_init <- match.arg(parameter_init, c("zero", "random"))
    } else {
        stopifnot("If not automatically chosen, parameters must be submitted as list" = is.list(parameter_init))
    }
    constrain_method <- match.arg(constrain_method, c("sum_of_absolutes", "absolute_sum", "soft"))
    if(is.character(init_link)){
        init_link <- match.arg(init_link, c("first_obs", "mean", "transformed_mean", "parameter", "zero"))
    } else {
        stopifnot("init_link has to be a character or a matrix" = is.matrix(init_link))
    }

    dispersion_est_type <- match.arg(dispersion_est_type, c("deviance", "pearson"))
    method <- match.arg(method, c("nloptr", "optim"))
    return(list(constrained = constrained, constraint_tol = constraint_tol, constrain_method = constrain_method, 
                parameter_init = parameter_init, init_link = init_link, 
                use_sparsity = use_sparsity, sparsity_threshold = sparsity_threshold, 
                method = method, trace = trace, gradtol = gradtol, 
                changetol = changetol, fnscale = fnscale, maxit = maxit, 
                abstol = abstol, reltol = reltol, lmm = lmm, factr = factr, 
                pgtol = pgtol, dispersion_est_type = dispersion_est_type))
}


#' @rdname dglmstarma.control
#' @title Control Parameters for \code{dglmstarma} Fitting
#' @description List of control parameters to be passed as an an argument to \code{dglmstarma}.
#' @param parameter_init Character or list. Start values for parameter estimation. See details.
#' @param parameter_init_dispersion Character or list. Start values for dispersion parameter estimation. See details.
#' @param use_sparsity Logical; whether to use sparse matrices for the neighborhood matrices.
#' @param sparsity_threshold Numeric in \eqn{[0, 1]}. Threshold for proportion of non-zero elements for considering neighborhood matrices as sparse (default: \code{2/3}).
#' @param init_link Character or matrix, specifing how to initialize the linear process of the mean model, if regression on the feedback process is included.
#' * \code{"first_obs"}: Use the first (transformed) observed values at each location.
#' * \code{"mean"}: Use the mean of the (transformed) observed values at each location.
#' * \code{"transformed_mean"}: Calculates the mean of the obsverved values at each location and transforms it by the link function.
#' * \code{"zero"}: Use zero as initial value.
#' * (numeric matrix) specifying starting values (rows = location, columns = time, must match maximum temporal order of model)
#' @param init_dispersion Character or matrix, specifing how to initialize the linear process of the dispersion model, if feedback mechanism is included in the dispersion model.
#' * \code{"first_obs"}: Use the first (transformed) values at each location.
#' * \code{"mean"}: Use the mean of the (transformed) values at each location.
#' * \code{"transformed_mean"}: Calculates the mean of the values at each location and transforms it by the link function.
#' * \code{"zero"}: Use zero as initial value.
#' * (numeric matrix) specifying starting values (rows = location, columns = time, must match maximum temporal order of the dispersion model)
#' @param print_progress Logical; whether to print progress information during fitting.
#' @param print_warnings Logical; whether to print warnings if convergence was not achieved (only applicable if \code{print_progress} is \code{TRUE}).
#' @param convergence_threshold Numeric. Convergence threshold for fitting procedure. See details.
#' @param max_fits Integer. Maximum number of iterations between fitting mean and dispersion model. See details.
#' @param use_fast_if_const_dispersion Logical; whether to use a faster fitting method if the dispersion model is constant, i.e. only an intercept model. See details.
#' @param use_backtracking Logical; whether to use backtracking line search when updating parameters in the fitting procedure. Default is \code{TRUE}. See details.
#' @param alpha_shrink Numeric; shrinkage factor for backtracking line search. Default is \code{0.5}.
#' @param alpha_start Numeric; initial step size for backtracking line search. Default is \code{1.0}.
#' @param min_alpha Numeric; minimum step size for backtracking line search. Default is \code{0.05}.
#' @param lower_dispersion Numeric. Lower bound for pseudo observations. See details.
#' @param upper_dispersion Numeric. Upper bound for pseudo observations. See details.
#' @param drop_max_mean_lag Logical; whether to drop the first \code{max_time_lag} observations of the mean model when fitting the dispersion model. Default is \code{TRUE} (recommended).
#' @param previous_param_as_start Logical; whether to use the parameter estimates of the previous fitting step as starting values for the next fitting step when iterating between fitting mean and dispersion model. If \code{FALSE}, the initial parameter values specified via \code{parameter_init} and \code{parameter_init_dispersion} are used for each fitting step. Default is \code{FALSE}.
#' @param method Character. Optimization method to be used. Options are:
#' * \code{"nloptr"} (requires \pkg{nloptr}, default),
#' * \code{"optim"} (base R \code{\link{optim}})
#' @param constrained_mean Logical; whether to use parameter constraints ensuring a stable solution. Only works with \code{method = "nloptr"}.
#' @param constrained_dispersion Logical; whether to use parameter constraints ensuring a stable solution for the dispersion model. Only works with \code{method = "nloptr"}.
#' @param constraint_tol Numeric. Tolerance for fulfilling constraint.
#' @param constrain_method_mean Character. Method for applying parameter constraints.
#' * \code{"sum_of_absolutes"}: Sum of absolute values of parameters is constrained
#' * \code{"absolute_sum"}: Absolute sum of parameters is constrained. (only intended for univariate models)
#' * \code{"soft"}: Constraints for \code{"softplus"} and \code{"softclipping"} link functions (not available for different link functions).
#' @param constrain_method_dispersion Character. Method for applying parameter constraints for the dispersion model.
#' * \code{"sum_of_absolutes"}: Sum of absolute values of parameters is constrained
#' * \code{"absolute_sum"}: Absolute sum of parameters is constrained. (only intended for univariate models)
#' @param gradtol Numeric. Tolerance for gradient convergence. See details.
#' @param changetol Numeric. Tolerance for parameter change convergence. See details.
#' @param trace Integer. Level of tracing output. See details.
#' @param fnscale Numeric. Scaling factor for the objective function. See details.
#' @param maxit Integer. Maximum number of iterations. See details.
#' @param abstol Numeric. Absolute convergence tolerance. See details.
#' @param reltol Numeric. Relative convergence tolerance. See details.
#' @param lmm Integer. Limited-memory BFGS parameter. See details.
#' @param factr Numeric. Factor for controlling the convergence tolerance. See details.
#' @param pgtol Numeric. Tolerance for projected gradient convergence. See details.
#' @return A named list of control parameters
#' @details This function is called internally in \code{dglmstarma} to validate control parameters in the \code{control} argument.
#'
#' The arguments \code{constraint_tol}, \code{gradtol}, \code{changetol}, \code{trace}, \code{fnscale}, \code{maxit}, \code{abstol}, \code{reltol}, \code{lmm}, \code{factr}, and \code{pgtol} are passed to the optimization routines and control the convergence behavior and output.
#' Some of these arguments are not used by all optimization methods. 
#'
#' Iteration between fitting the mean and dispersion model stops when relative change in log-likelihood or absolute change in parameters is below \code{convergence_threshold} or when \code{max_fits} is reached.
#'
#' The \code{optim} method uses the L-BFGS-B algorithm when non-negative parameters are required, otherwise the BFGS algorithm is used. Stability constraints cannot be applied when using \code{optim}.
#' Only if \code{method = "nloptr"} stability constraints are supported, and the specified \code{constrain_method} is applied. For optimization we use the SLSQP routine. The constraints implied by \code{constrain_method} are given by:
#' * \code{"sum_of_absolutes"}: \deqn{\sum_{i = 1}^q\sum_{\ell = 0}^{a_i} | \alpha_{i\ell} | + \sum_{j = 1}^r \sum_{\ell = 0}^{b_j} | \beta_{j\ell} | \leq 1}
#' * \code{"absolute_sum"}: \deqn{\left|\sum_{i = 1}^q\sum_{\ell = 0}^{a_i}  \alpha_{i\ell}  + \sum_{j = 1}^r \sum_{\ell = 0}^{b_j}  \beta_{j\ell} \right| \leq 1}
#' * \code{"soft"}: \deqn{\sum_{i = 1}^q\sum_{\ell = 0}^{a_i}  \max\lbrace0, \alpha_{i\ell}\rbrace  + \sum_{j = 1}^r \sum_{\ell = 0}^{b_j}  \max\lbrace0, \beta_{j\ell}\rbrace \leq 1} and \deqn{\sum_{i = 1}^q\sum_{\ell = 0}^{a_i} | \alpha_{i\ell} | < 1}
#'
#' To avoid numerical issues when fitting the dispersion model, the pseudo observations are clamped in between \code{lower_dispersion} and \code{upper_dispersion}.
#'
#' If the dispersion model is constant (i.e., only an intercept), setting \code{use_fast_if_const_dispersion = TRUE} the dispersion parameters are estimated using means or colMeans of the Pearson or deviance residuals instead of optimizing the dispersion model.
#' Note that this sets the dispersion_link to identity during fitting.
#'
#' If \code{use_backtracking = TRUE}, the fitting procedure aims to increase the total log-likelihood of the model after each fit by applying a backtracking line search.
#'
#' Start values for the optimization can be provided as a named list via \code{parameter_init} or as a character. If a named list is provided, these must match the model orders, see \code{\link{glmstarma.sim}}. Otherwise, \code{parameter_init} must be one of the following:
#' * \code{"zero"}: All parameters initialized to (near) zero. If parameters must be non-negative a small value within the feasible region is used.
#' * \code{"random"}: All parameters initialized to random values in the stationary region of the model.
#' @seealso \code{\link{dglmstarma}}, \code{\link[nloptr]{nloptr}}, \code{\link{optim}}
#' @examples
#' \donttest{
#' dat <- load_data("chickenpox", directory = tempdir())
#' chickenpox <- dat$chickenpox
#' population_hungary <- dat$population_hungary
#' W_hungary <- dat$W_hungary
#' mean_model <- list(past_obs = 1)
#' dispersion_model <- list(past_obs = 1)
#' dglmstarma(chickenpox, mean_model, dispersion_model, mean_family = vquasipoisson("log"), 
#'           dispersion_link = "log", W_hungary,
#'           control = list(parameter_init = "random", print_progress = FALSE))
#' }
#' @export
dglmstarma.control <- function(parameter_init = "zero", parameter_init_dispersion = "zero",
                                use_sparsity = TRUE, sparsity_threshold = 2 / 3,
                                init_link = "first_obs", init_dispersion = "first_obs",
                                use_backtracking = TRUE, alpha_shrink = 0.5, alpha_start = 1.0, min_alpha = 0.05,
                                print_progress = TRUE, print_warnings = FALSE, convergence_threshold = 1e-6, max_fits = 50L, use_fast_if_const_dispersion = FALSE,
                                lower_dispersion = 1e-7, upper_dispersion = 1e6, drop_max_mean_lag = TRUE, previous_param_as_start = FALSE,
                                method = "nloptr", constrained_mean = TRUE, constrained_dispersion = TRUE, constraint_tol = 1e-8,
                                constrain_method_mean = "sum_of_absolutes", constrain_method_dispersion = "sum_of_absolutes", gradtol = sqrt(.Machine$double.eps), changetol = sqrt(.Machine$double.eps),
                                trace = 0L, fnscale = 1.0, maxit = 10000L, abstol = -Inf, reltol = sqrt(.Machine$double.eps), lmm = 5, factr = 1e7, pgtol = 0.0)
{
    if(is.character(parameter_init)){
        parameter_init <- match.arg(parameter_init, c("zero", "random")) # Evtl. noch andere Methoden ergaenzen
    } else {
        stopifnot("If not automatically chosen, parameters must be submitted as list" = is.list(parameter_init))
    }
    if(is.character(parameter_init)){
        parameter_init <- match.arg(parameter_init, c("zero", "random")) # Evtl. noch andere Methoden ergaenzen
    } else {
        stopifnot("If not automatically chosen, parameters must be submitted as list" = is.list(parameter_init))
    }
    constrain_method_mean <- match.arg(constrain_method_mean, c("sum_of_absolutes", "absolute_sum", "soft"))
    constrain_method_dispersion <- match.arg(constrain_method_dispersion, c("sum_of_absolutes", "absolute_sum"))
    if(is.character(init_link)){
        init_link <- match.arg(init_link, c("first_obs", "mean", "transformed_mean", "zero"))
    } else {
        stopifnot("init_link has to be a character or a matrix" = is.matrix(init_link))
    }
    if(is.character(init_dispersion)){
        init_dispersion <- match.arg(init_dispersion, c("first_obs", "mean", "transformed_mean", "zero"))
    } else {
        stopifnot("init_dispersion has to be a character or a matrix" = is.matrix(init_dispersion))
    }
    method <- match.arg(method, c("nloptr", "optim"))

    use_sparsity <- as.logical(use_sparsity)
    sparsity_threshold <- as.numeric(sparsity_threshold)
    drop_max_mean_lag <- as.logical(drop_max_mean_lag)
    previous_param_as_start <- as.logical(previous_param_as_start)
    print_progress <- as.logical(print_progress)
    print_warnings <- as.logical(print_warnings)
    use_backtracking <- as.logical(use_backtracking)
    stopifnot("'convergence_threshold' must be numeric and non-negative" = is.numeric(convergence_threshold) && convergence_threshold >= 0,
              "'max_fits' must be a positive integer" = is.numeric(max_fits) && max_fits > 0 && (max_fits %% 1 == 0),
              "'lower_dispersion' must be numeric and non-negative" = is.numeric(lower_dispersion) && lower_dispersion >= 0,
              "'upper_dispersion' must be numeric and larger than 'lower_dispersion'" = is.numeric(upper_dispersion) && upper_dispersion > lower_dispersion,
              "'alpha_shrink', 'alpha_start', and 'min_alpha' must be numeric and positive" = is.numeric(alpha_shrink) && alpha_shrink > 0 && is.numeric(alpha_start) && alpha_start > 0 && is.numeric(min_alpha) && min_alpha > 0,
              "'alpha_shrink' must be less than 1" = alpha_shrink < 1,
              "'alpha_start' must be less than or equal to 1" = alpha_start <= 1,
              "'min_alpha' must be less than or equal to 'alpha_start'" = min_alpha <= alpha_start)

    return(list(parameter_init = parameter_init, parameter_init_dispersion = parameter_init_dispersion, use_sparsity = use_sparsity, sparsity_threshold = sparsity_threshold, use_backtracking = use_backtracking, alpha_shrink = alpha_shrink, alpha_start = alpha_start, min_alpha = min_alpha,
                                init_link = init_link, init_dispersion = init_dispersion, print_progress = print_progress, print_warnings = print_warnings, convergence_threshold = convergence_threshold, max_fits = max_fits, use_fast_if_const_dispersion = use_fast_if_const_dispersion,
                                lower_dispersion = lower_dispersion, upper_dispersion = upper_dispersion, drop_max_mean_lag = drop_max_mean_lag, previous_param_as_start = previous_param_as_start, method = method, constrained_mean = constrained_mean, constrained_dispersion = constrained_dispersion, constraint_tol = constraint_tol,
                                constrain_method_mean = constrain_method_mean, constrain_method_dispersion = constrain_method_dispersion, gradtol = gradtol, changetol = changetol, trace = trace, fnscale = fnscale, maxit = maxit, abstol = abstol,
                                reltol = reltol, lmm = lmm, factr = factr, pgtol = pgtol))
}
