require(geepack)
## Loading required package: geepack

G-estimation (univariate/no interaction)

\[Y_i^A - Y_i^{a=0} = \psi A_i , \psi = 2\]

We will set grid from 0 to 4, by 0.01, to search for \(\hat{\psi}\)

##################################################################
set.seed(123)
# Generate counterfactual outcome Y1 and Y0, observed data Y
n <-1000
beta <- 2.0
L<-rnorm(n,0,1)
A<- rbinom(n,1, 1/(1+exp(L)) )
Y0 <- L + L^2
Y1 <- Y0 + A * beta 
Y <- Y1*(A==1) + Y0*(A==0)

data<- data.frame(A=A,Y=Y,L=L,id=1:n)
grid <- seq(from = 1.5,to = 2.5, by = 0.01) # set by = 0.001 for finer estimate j=0
Hpsi.coefs <- double(length(grid))
Hpsi.var <- double(length(grid))
j<-0
for (i in grid){
  psi = i; j = j+1
  data$Hpsi <- data$Y - psi * data$A
  # GEE (generalized estimation equation) 
  # requires less assumption, more robust, costs more time)
  # gee.obj <- geeglm(A ~  L + Hpsi, data = data,
  #             id=id, corstr="independence", family = binomial(logit))
  # result<-summary(gee.obj)
  # Hpsi.coefs[j] <- coef(gee.obj)["Hpsi"]
  # Hpsi.var[j] <- result$cov.scaled[3,3]
  
  lm0 <- glm(A ~  L + Hpsi, data = data,family="binomial")
  lm0$coefficients["Hpsi"]
  result<-summary(lm0)
  Hpsi.coefs[j] <- coef(lm0)["Hpsi"]
  Hpsi.var[j] <- result$cov.scaled[3,3]
}

Hpsi.p <- 1-pchisq((Hpsi.coefs)^2 /Hpsi.var,1)
plot(grid,abs(Hpsi.coefs),main="|alpha| ~ psi")

plot(grid,Hpsi.p,main="p value ~ psi")

store.results <- as.data.frame(cbind(grid,abs(Hpsi.coefs),Hpsi.p))
names(store.results) <- c("grid", "Hpsi.est","p.value")
store.results[store.results$Hpsi.est == min(store.results$Hpsi.est),]
##    grid     Hpsi.est   p.value
## 45 1.94 0.0004991559 0.9941772
chosen<- which(store.results$p.value>0.05)
cat("psi estimation = ",store.results$grid[which.max(store.results$p.value)])
## psi estimation =  1.94
cat("95% Confidence interval = (", min(store.results$grid[chosen]),",",max(store.results$grid[chosen]),")")
## 95% Confidence interval = ( 1.81 , 2.07 )

G-estimation (bivariate/ one interaction term)

\[Y_i^A - Y_i^{a=0} = \psi_1 A_i + \psi_2 A_i V_i, \ \psi_1 =\psi_2 = 1\]

where \(\psi_1\) is the treatment’s main causal effect, \(\psi_2\) is the interaction term between treatment and covariate V.

1, We will set two grids from 0.5 to 1.5, by 0.02, to search for \((\hat{\psi}_1,\hat{\psi}_2)\), with double for-loop.

2, When there are two values \(\psi_1\) and \(\psi_2\), there should be two separate p values, what should we do? Answer: We will perform an overall hypothesis testing of \(\alpha_1 = \alpha_2 = 0\), which give us a single p value.

# G-estimation: Checking multiple possible values of psi*/
##################################################################

set.seed(123)
n <-1000
beta1 <- 1.0
beta2 <- 1.0
L<-rnorm(n,0,1)
V<-rnorm(n,0,1)
A<- rbinom(n,1, 1/(1+exp(L)) )
Y0 <- L + L^2
Y1 <- Y0 + A * beta1  + A*V*beta2
Y <- Y1*(A==1) + Y0*(A==0)
data<- data.frame(A=A,Y=Y,L=L,V=V)
grid1 <- seq(from = 0.5,to = 1.5, by = 0.02)
grid2 <- seq(from = 0.5,to = 1.5, by = 0.02)
Hpsi.p <- matrix(0,length(grid1),length(grid2)) 

j<-0
for (ii in 1: length(grid1) ){
  for(jj in 1: length(grid2)){
    psi1 = grid1[ii]
    psi2 = grid2[jj]
    data$Hpsi <- data$Y - psi1 * data$A - psi2 * data$V * data$A
    lm0 <- glm(A ~  L, data = data,family="binomial")
    lm1 <- glm(A ~  L + Hpsi + Hpsi:V, data = data,family="binomial")
    result<-summary(lm0)
    # hypothesis testing whether "parameters of Hpsi and Hpsi*V = 0"
    # the deviance difference follows chi square distribution, with d.f. = 2.
    Hpsi.p[ii,jj] <- 1-pchisq(lm0$deviance -  lm1$deviance,2)  
  }
}


options(rgl.useNULL = TRUE)
options(rgl.printRglwidget = TRUE)
require(rgl)
## Loading required package: rgl
x = rep(grid1,length(grid2))
y = rep(grid2,each=length(grid1))
df<-data.frame(x=x,y=y,z = as.vector(Hpsi.p),color = as.vector(Hpsi.p)>0.05)
with(df, {
  plot3d(
    x = x, y = y, z = z,  type = "s",col=2,size=0.5
  )
})
library(ggplot2)
ggplot(data=df,aes(x=x,y=y,col=color,alpha = z))+geom_point() + xlab("psi1") + ylab("psi2") + ggtitle("Confidence interval of (psi1,psi2)")

cat("(psi1,psi2) with maximal p value = (",df$x[which.max(df$z)],
    ",",df$y[which.max(df$z)],")")
## (psi1,psi2) with maximal p value = ( 1.04 , 1.04 )

G-estimation (two or more interaction terms)

\[Y_i^A - Y_i^{a=0} = \psi_1 A_i + \psi_2 A_i V_{1i} + \psi_3 A_i V_{2i}, \ \psi_1 =\psi_2 =\psi_3 = 1\]

where \(\psi_1\) is the treatment’s main causal effect, \(\psi_2\) and \(\psi_3\) are the interaction terms between treatment and covariates \(V_1\) and \(V_2\).

# G-estimation: Checking multiple possible values of psi*/
##################################################################
require(geepack)
set.seed(123)
n <-1000
beta1 <- 1.0
beta2 <- 1.0
beta3 <- 1.0

L<-rnorm(n,0,1)
V1<-rnorm(n,0,1)
V2<-rnorm(n,0,1)
A<- rbinom(n,1, 1/(1+exp(L)) )
Y0 <- L + L^2
Y1 <- Y0 + A * beta1  + A*V1*beta2 + A*V2*beta3
Y <- Y1*(A==1) + Y0*(A==0)

data<- data.frame(A=A,Y=Y,L=L,V1=V1,V2=V2)

f0<-function(psi){
  psi1<-psi[1];psi2<-psi[2];psi3<-psi[3];
  data$Hpsi <- data$Y - psi1 * data$A - psi2 * data$V1 * data$A- psi3 * data$V2 * data$A
  lm0 <- glm(A ~  L, data = data,family="binomial")
  lm1 <- glm(A ~  L + Hpsi + Hpsi:V1+ Hpsi:V2, data = data,family="binomial")
  result<-summary(lm0)
  #maximize p value
  Hpsi.p <- 1-pchisq(lm0$deviance -  lm1$deviance,3)
  return(-Hpsi.p)
  # OR
  # minimize sum |alpha_i|
  # Hpsi.sum <-  sum(abs(lm1$coefficients[3:5]))
  # return(Hpsi.sum)
}

1, We will not use triple for-loop to search, beacuse it is too time-consuming (curse of dimension, imagine 10 variables, \(grid^{10}\) iterations). Instead, we will use other optimization algorithms.

2, The optimization algorithm will generally do one thing: find the minimal point of a function. Here we want to find the minimal \(\sum_{i=1}^3 |\hat{\alpha}_i|\) or maximal p value. Thus our function will return a value of \(\sum_{i=1}^3 |\hat{\alpha}_i|\) or minus p value. Let’s just try the latter one!

3, The optimization algorithm requires a good “starting point”, then it will travel to the “optimal point” according to designed algorihm.

4, In this case, starting from \((\hat{\psi}_1,\hat{\psi}_2,\hat{\psi}_3)=(0,0,0)\) is too far away from the ground truth \((\psi_1,\psi_2,\psi_3)=(1,1,1)\), so it doesn’t converge.

5, Starting from \((\hat{\psi}_1,\hat{\psi}_2,\hat{\psi}_3)=(0.5,0.5,0.5), (1,1,1)\) are close to the ground truth \((\psi_1,\psi_2,\psi_3)=(1,1,1)\), so they both converge.

optim(par=c(0,0,0),fn=f0,method="Nelder-Mead")
## $par
## [1] 0 0 0
## 
## $value
## [1] 0
## 
## $counts
## function gradient 
##        4       NA 
## 
## $convergence
## [1] 0
## 
## $message
## NULL
optim(par=c(0.5,0.5,0.5),fn=f0,method="Nelder-Mead")
## $par
## [1] 0.8624988 1.0298286 1.0106217
## 
## $value
## [1] -1
## 
## $counts
## function gradient 
##      146       NA 
## 
## $convergence
## [1] 0
## 
## $message
## NULL
(result<-optim(par=c(1,1,1),fn=f0,method="Nelder-Mead"))
## $par
## [1] 0.8625729 1.0297417 1.0106094
## 
## $value
## [1] -1
## 
## $counts
## function gradient 
##       70       NA 
## 
## $convergence
## [1] 0
## 
## $message
## NULL

Let’s check the p value of \((\psi_1,\psi_2,\psi_3)=(0,0,0),(0.5,0.5,0.5),(1,1,1)\) and the converged one/the estimated value \((\hat{\psi}_1,\hat{\psi}_2,\hat{\psi}_3)\).

-f0(c(0,0,0))
## [1] 0
-f0(c(0.5,0.5,0.5))
## [1] 7.141666e-09
-f0(c(1,1,1))
## [1] 0.2388487
-f0(result$par)
## [1] 1