#' tscope:  The two-stage  copula endogeneity (2sCOPE) control function regression
#'
#' @description Fit the two-stage copula endogeneity (2sCOPE) control function regression for addressing regressor
#'   endogeneity.
#'
#' @details The `formula` argument is either in the 1-bar form `Y ~ X | P` or the 2-bar form `Y ~ X | P | W`, where
#' `X` respresents the explanatory variable(s) in the `Y` model, `P` represents the continuous
#' endogenous regressors, and `W` represents the exogenous regressors.  If `X` contains no
#' exogenous regressors, then the 2sCOPE model reduces to the simpler model in Park and Gupta (2012)
#' and returns \eqn{P^*} (the copula transformation of `P`) as CCF and \eqn{W^*} (the copula transformation of `W`) as null.
#' When the structural outcome model includes an intercept, copula transformations of regressors in `P` and `W`  use the
#' optimized algorithm (Equation 9 in Qian, Koschmann, and Xie, 2025)  to avoid estimation bias.
#'
#' The function will add copula control function for each endogenous regressor specified in `P`.
#' Only first-order terms of endogenous regressors need to be included in `P`, even when the structural outcome model
#' contains higher-order terms of endogenous regressors. This is because including copula control functions for the
#' first-order endogenous regressors is sufficient to control for endogeneity, while adding control functions for
#' higher-order endogenous terms---such as interactions among endogenous regressors, interactions between endogenous and
#' exogenous regressors, or squared endogenous regressors---is unnecessary and can substantially degrade the performance
#' of copula correction (Qian, Koschmann, and Xie, 2025). This parsimonious treatment of higher-order endogenous
#' regressors is a merit of copula correction.
#'
#' Thus, if `X` contains no higher-order terms of endogenous regressors, the simpler 1-bar form `Y ~ X | P`
#' can be used, and `tscope()` treats all regressors in `X` except those in `P` as exogenous.
#' When `X` includes higher-order endogenous terms, the 2-bar form  `Y ~ X | P | W` should be used to explicitly specify
#'  the exogenous regressors in `W` and ensure that the higher-order endogenous terms are not treated as exogenous variables.
#'
#'  The extra generated regressors are denoted by `ccf:`
#'   followed by the associated endogenous regressor in the model output.
#'   The correlations between the endogenous regressors and the structural error
#'   of the model are denoted by `cor:` followed by the associated endogenous
#'   regressor.
#'
#' @param formula a formula describing the model to be fitted. The details of
#'   model specification are given under ``Details''.
#' @param data  a data frame, list, or environment containing the variables in
#'   the model.
#' @param nboot a numeric value representing the number of desired bootstrap
#'   samples taken to compute the standard errors of the 2sCOPE model estimates.
#'   nboot = 1 will not compute any standard errors, only
#'   parameter estimates.
#'
#' @return a data.frame of class "`tscope`" containing the following
#'   components: \item{Est}{the coefficients and other contents of the
#'   2sCOPE model. The first section contains the coefficeint estimates of
#'   the original regressors. The second section contains the coefficient
#'   estimates of the generated regressors (also known as copula terms or copula control functions).
#'   The third section contains the correlation(s) between the endogenous regressor(s) and
#'   the structural error of the model, which represents the
#'   strength and size of the endogeneity of the model, as well as sigma repreting the standard deviation
#'   of the structural error term.}
#'   \item{boot.SE}{standard errors for the coefficient estimates obtained from
#'   bootstrapping} \item{z value}{z score of the associated coefficient
#'   estimate} \item{Pr(>|z|)}{p-value of the associated coefficient estimate}
#'
#' @importFrom stats cor cor.test ecdf lm model.matrix pnorm qnorm reformulate
#'   sd terms
#'
#' @examples
#'
#' data("diapers") #load data
#'
#' #run a OLS model to compare results to 2sCOPE
#' ols <- lm(logVol ~ logPrice+Fshare+week+Q2+Q3+Q4, data = diapers)
#' summary(ols)
#'
#' #run 2sCOPE with 1-bar option
#' tscope_model_1bar <- tscope(logVol ~ logPrice+Fshare+week+Q2+Q3+Q4|logPrice,
#'   data = diapers, nboot = 300)
#' tscope_model_1bar
#'
#' #run 2sCOPE with 2-bar option
#' tscope_model_2bar <- tscope(logVol ~ logPrice+Fshare+week+Q2+Q3+Q4 |logPrice|
#'   Fshare+week+Q2+Q3+Q4,
#'   data = diapers, nboot = 300)
#' tscope_model_2bar
#'
#' #notice how both the 1-bar and 2-bar options produce the same parameter estimates,
#' #and that the results differ from OLS after correcting for endogeneity.
#' #the standard errors are not the same because the are obtained from bootstrapping.
#'
#' #run Park and Gupta (2012) model
#' pg <- tscope(logVol ~ logPrice|logPrice, data = diapers, nboot = 300)
#' pg
#'
#' @references
#' Qian, Y., Koschmann, A., & Xie, H. (2025).
#' \emph{EXPRESS: A Practical Guide to Endogeneity Correction Using Copulas.}
#' Journal of Marketing. <doi:10.1177/00222429251410844>\cr
#'
#' Park, S., & Gupta, S. (2012).
#' \emph{Handling endogenous regressors by joint estimation using copulas.}
#' Marketing Science, 31(4), 567-586.\cr
#'
#' Yang, F., Qian, Y., & Xie, H. (2025).
#' \emph{Addressing Endogeneity Using a Two-Stage Copula Generated Regressor Approach.}
#' Journal of Marketing Research, 62(4), 601-623.
#' <doi:10.1177/00222437241296453>\cr
#'
#' @export
tscope <- function(formula, data, nboot = 500) {
  est.preliminary <- tscope.fit(formula, data)
  est <- est.preliminary[1, ]
  fstat <- attr(est.preliminary, "f_stat")

  if (nboot == 1) {
    res <- as.data.frame(est)
    colnames(res) <- c("Est")
  } else {
    se <- matrix(0, nboot, length(est))
    for (i in 1:nboot) {
      bootdata <- data[sample(1:nrow(data), size = nrow(data), replace = T), ]
      .internal_env$run_fstat <- F
      se[i, ] <- tscope.fit(formula, bootdata)[1, ]
    }
    res <- matrix(0, ncol = 4, nrow = length(est))
    res[, 1] <- est
    res[, 2] <- apply(se, 2, sd)
    res[, 3] <- res[, 1] / res[, 2]
    res[, 4] <- sapply(res[, 3], function(x) {
      if (x > 0) {
        2 * pnorm(x, lower.tail = F)
      } else {
        2 * pnorm(x)
      }
    })
    rownames(res) <- names(est)
    colnames(res) <- c("Est", "boot.SE", "z value", "Pr(>|z|)")
  }
  .internal_env$run_fstat <- T
  .internal_env$high_order_terms_warning <- T
  .internal_env$p_and_g_output <- T
  .internal_env$remove_high_order_terms <- T
  .internal_env$p_and_w_remove_w <- T
  res <- as.data.frame(res)
  class(res) <- c("tscope", "data.frame")
  attr(res, "nx") <- attr(est.preliminary, "nx")
  attr(res, "np") <- attr(est.preliminary, "np")
  attr(res, "f_stat") <- fstat
  res
}
