geexlibrary(geex)
library(inferference)
library(dplyr)TODO: describe what’s going on here
n <- 1000
x <- data_frame(
A = rbinom(n, 1, .2),
Y0 = rnorm(n, 0, 1),
Y1 = rnorm(n, 2 * A, 1),
Y = (A*Y1) + (1 - A)*Y0)ipw_estFUN <- function(data){
A <- data$A
Y <- data$Y
function(theta, phat){
ipw0 <- 1/theta[1]
ipw1 <- 1/theta[2]
# Estimating functions #
c( (1 - A) - theta[1],
A - theta[2],
# Estimating IP weight
Y*(1 - A)*ipw0 - theta[3],
Y*(A)*ipw1 - theta[4],
# Treating IP weight as known
Y*A/phat - theta[5]
)
}
}phat <- mean(x$A)
out <- m_estimate(ipw_estFUN,
data = x,
inner_args = list(phat = phat),
root_control = setup_root_control(start = c(.5, .5, 0, 0, 0)))## Comparing point estimates
all.equal(mean(x$Y * x$A/phat), coef(out)[4]) ## [1] TRUE
all.equal(phat, coef(out)[2]) ## [1] TRUE
## Comparing variance estimates
geex_vcov <- diag(vcov(out)) * n
# estimates match treating propensity as known
all.equal(var(x$Y * x$A/phat) * (n - 1)/n, geex_vcov[5])## [1] TRUE
# estimates match using influence function approach
y <- x$Y * x$A/phat - mean(x$Y * x$A/phat)
z <- (x$A - phat) / (phat*(1 - phat))
var(y - predict(lm(y ~ z))) - geex_vcov[4] # close## [1] 0.005655136
An example \(\psi\) function written
in R. This function computes the score functions for a GLM,
plus two counterfactual means estimated by inverse probability
weighting.
eefun <- function(data, model, alpha){
X <- model.matrix(model, data = data)
A <- model.response(model.frame(model, data = data))
Y <- data$Y
function(theta){
p <- length(theta)
p1 <- length(coef(model))
lp <- X %*% theta[1:p1]
rho <- plogis(lp)
hh <- ((rho/alpha)^A * ((1-rho)/(1-alpha))^(1 - A))
IPW <- 1/(exp(sum(log(hh))))
score_eqns <- apply(X, 2, function(x) sum((A - rho) * x))
ce0 <- mean(Y * (A == 0)) * IPW / (1 - alpha)
ce1 <- mean(Y * (A == 1)) * IPW / (alpha)
c(score_eqns,
ce0 - theta[p - 1],
ce1 - theta[p])
}
}Compare to what inferference gets.
test <- interference(
formula = Y | A ~ X1 | group,
data = vaccinesim,
model_method = 'glm',
allocations = c(.35, .4))
mglm <- glm(A ~ X1, data = vaccinesim, family = binomial)
ce_estimates <- m_estimate(
estFUN = eefun,
data = vaccinesim,
units = 'group',
root_control = setup_root_control(start = c(coef(mglm), .4, .13)),
outer_args = list(alpha = .35, model = mglm)
)
roots(ce_estimates)## (Intercept) X1
## -0.36869683 -0.02037916 0.42186669 0.15507946
# Compare parameter estimates
direct_effect(test, allocation = .35)$estimate## [1] 0.2667872
roots(ce_estimates)[3] - roots(ce_estimates)[4]##
## 0.2667872
# conpare SE estimates
L <- c(0, 0, 1, -1)
Sigma <- vcov(ce_estimates)
sqrt(t(L) %*% Sigma %*% L) # from GEEX## [,1]
## [1,] 0.03716025
direct_effect(test, allocation = .35)$std.error # from inferference## [1] 0.02602316
I would expect them to be somewhat different, since
inferference uses a slightly different variance estimator
defined in the web
appendix of Perez-Heydrich et al (2014).
Estimators of causal effects often have the form:
\[\begin{equation} \label{eq:causal} \sum_{i = 1}^m \psi(O_i, \theta) = \sum_{i = 1}^m \begin{pmatrix} \psi(O_i, \nu) \\ \psi(O_i, \beta) \end{pmatrix} = 0, \end{equation}\]
where \(\nu\) are parameters in
nuisance model(s), such as a propensity score model, and \(\beta\) are the target causal parameters.
Even when \(\nu\) represent parameters
in common statistical models, deriving a closed form for a sandwich
variance estimator for \(\beta\) based
on Equation~\(\ref{eq:causal}\) may
involve tedious and error-prone derivative and matrix calculations
[e.g.; see the appendices of Lunceford and
Davidian (2004) and Perez-Heydrich et al.
(2014)]. In this example, we show how an analyst can avoid these
calculations and compute the empirical sandwich variance estimator using
geex.
Lunceford and Davidian (2004) review several estimators of causal effects from observational data. To demonstrate a more complicated estimator involving multiple nuisance models, we implement the doubly robust estimator:
\[\begin{equation} \label{eq:dbr} \hat{\Delta}_{DR} = \sum_{i = 1}^m \frac{Z_iY_i - (Z_i - \hat{e}_i) m_1(X_i, \hat{\alpha}_1)}{\hat{e}_i} - \frac{(1 - Z_i)Y_i - (Z_i - \hat{e}_i) m_0(X_i, \hat{\alpha}_0)}{1 - \hat{e}_i}. \end{equation}\]
This estimator targets the average causal effect, \(\Delta = \E[Y(1) - Y(0)]\), where \(Y(z)\) is the potential outcome for an experimental unit had it been exposed to the level \(z\) of the binary exposure variable \(Z\). The estimated propsensity score, \(\hat{e}_i\), is the estimated probability that unit \(i\) received \(z = 1\) and \(m_z(X_i, \hat{\alpha}_z)\) are regression models for the outcome with baseline covariates \(X_i\) and estimated paramaters \(\hat{\alpha}_z\). This estimator has the property that if either the propensity score models or the outcome models are correctly specified, then the solution to Equation~\(\ref{eq:dbr}\) will be a consistent and asymptotically Normal estimator of \(\Delta\).
This estimator and its estimating equations can be translated into an
estFUN as:
dr_estFUN <- function(data, models){
Z <- data$Z
Y <- data$Y
Xe <- grab_design_matrix(
data, rhs_formula = grab_fixed_formula(models$e))
Xm0 <- grab_design_matrix(
data, rhs_formula = grab_fixed_formula(models$m0))
Xm1 <- grab_design_matrix(
data, rhs_formula = grab_fixed_formula(models$m1))
e_pos <- 1:ncol(Xe)
m0_pos <- (max(e_pos) + 1):(max(e_pos) + ncol(Xm0))
m1_pos <- (max(m0_pos) + 1):(max(m0_pos) + ncol(Xm1))
e_scores <- grab_psiFUN(models$e, data)
m0_scores <- grab_psiFUN(models$m0, data)
m1_scores <- grab_psiFUN(models$m1, data)
function(theta){
e <- plogis(Xe %*% theta[e_pos])
m0 <- Xm0 %*% theta[m0_pos]
m1 <- Xm1 %*% theta[m1_pos]
rd_hat <- (Z*Y - (Z - e) * m1)/e - ((1 - Z) * Y - (Z - e) * m0)/(1 - e)
c(e_scores(theta[e_pos]),
m0_scores(theta[m0_pos]) * (Z == 0),
m1_scores(theta[m1_pos]) * (Z == 1),
rd_hat - theta[length(theta)])
}
}This estFUN presumes that the user will pass a list
containing fitted model objects for the three nuisance models: the
propensity score model and one regression model for each treatment
group. The functions grab_design_matrix and
grab_fixed_formula are geex utilities for
extracting relevant pieces of a model object. The function
grab_psiFUN converts a fitted model object to an estimating
function; for example, for a glm object,
grab_psiFUN uses the to create a function of
theta corresponding to the generalized linear model score
function. The m_estimate function can be wrapped in another
function, wherein nuisance models are fit and passed to
m_estimate.
estimate_dr <- function(data, propensity_formula, outcome_formula){
e_model <- glm(propensity_formula, data = data, family = binomial)
m0_model <- glm(outcome_formula, subset = (Z == 0), data = data)
m1_model <- glm(outcome_formula, subset = (Z == 1), data = data)
models <- list(e = e_model, m0 = m0_model, m1 = m1_model)
nparms <- sum(unlist(lapply(models, function(x) length(coef(x))))) + 1
m_estimate(
estFUN = dr_estFUN,
data = data,
root_control = setup_root_control(start = rep(0, nparms)),
outer_args = list(models = models))
}The following code provides a function to replicate the simulation settings of Lunceford and Davidian (2004).
library(mvtnorm)
tau_0 <- c(-1, -1, 1, 1)
tau_1 <- tau_0 * -1
Sigma_X3 <- matrix(
c(1, 0.5, -0.5, -0.5,
0.5, 1, -0.5, -0.5,
-0.5, -0.5, 1, 0.5,
-0.5, -0.5, 0.5, 1), ncol = 4, byrow = TRUE)
gen_data <- function(n, beta, nu, xi){
X3 <- rbinom(n, 1, prob = 0.2)
V3 <- rbinom(n, 1, prob = (0.75 * X3 + (0.25 * (1 - X3))))
hold <- rmvnorm(n, mean = rep(0, 4), Sigma_X3)
colnames(hold) <- c("X1", "V1", "X2", "V2")
hold <- cbind(hold, X3, V3)
hold <- apply(hold, 1, function(x){
x[1:4] <- x[1:4] + tau_1^(x[5])*tau_0^(1 - x[5])
x})
hold <- t(hold)[, c("X1", "X2", "X3", "V1", "V2", "V3")]
X <- cbind(Int = 1, hold)
Z <- rbinom(n, 1, prob = plogis(X[, 1:4] %*% beta))
X <- cbind(X[, 1:4], Z, X[, 5:7])
data.frame(
Y = X %*% c(nu, xi) + rnorm(n),
X[ , -1])
}To show that estimate_dr correctly computes \(\hat{\Delta}_{DR}\), the results from
geex can be compared to computing \(\hat{\Delta}_{DR}\) “by hand” for a
simulated dataset.
dt <- gen_data(1000, c(0, 0.6, -0.6, 0.6), c(0, -1, 1, -1, 2), c(-1, 1, 1))
geex_results <- estimate_dr(dt, Z ~ X1 + X2 + X3, Y ~ X1 + X2 + X3)e <- predict(glm(Z ~ X1 + X2 + X3, data = dt, family = "binomial"),
type = "response")
m0 <- predict(glm(Y ~ X1 + X2 + X3, data = dt, subset = Z==0), newdata = dt)
m1 <- predict(glm(Y ~ X1 + X2 + X3, data = dt, subset = Z==1), newdata = dt)
del_hat <- with(dt, mean( (Z * Y - (Z - e) * m1)/e)) -
with(dt, mean(((1 - Z) * Y - (Z - e) * m0)/(1 - e)))