require(geepack)
## Loading required package: geepack
\[Y_i^A - Y_i^{a=0} = \psi A_i , \psi = 2\]
We will set grid from 0 to 4, by 0.01, to search for \(\hat{\psi}\)
##################################################################
set.seed(123)
# Generate counterfactual outcome Y1 and Y0, observed data Y
n <-1000
beta <- 2.0
L<-rnorm(n,0,1)
A<- rbinom(n,1, 1/(1+exp(L)) )
Y0 <- L + L^2
Y1 <- Y0 + A * beta
Y <- Y1*(A==1) + Y0*(A==0)
data<- data.frame(A=A,Y=Y,L=L,id=1:n)
grid <- seq(from = 1.5,to = 2.5, by = 0.01) # set by = 0.001 for finer estimate j=0
Hpsi.coefs <- double(length(grid))
Hpsi.var <- double(length(grid))
j<-0
for (i in grid){
psi = i; j = j+1
data$Hpsi <- data$Y - psi * data$A
# GEE (generalized estimation equation)
# requires less assumption, more robust, costs more time)
# gee.obj <- geeglm(A ~ L + Hpsi, data = data,
# id=id, corstr="independence", family = binomial(logit))
# result<-summary(gee.obj)
# Hpsi.coefs[j] <- coef(gee.obj)["Hpsi"]
# Hpsi.var[j] <- result$cov.scaled[3,3]
lm0 <- glm(A ~ L + Hpsi, data = data,family="binomial")
lm0$coefficients["Hpsi"]
result<-summary(lm0)
Hpsi.coefs[j] <- coef(lm0)["Hpsi"]
Hpsi.var[j] <- result$cov.scaled[3,3]
}
Hpsi.p <- 1-pchisq((Hpsi.coefs)^2 /Hpsi.var,1)
plot(grid,abs(Hpsi.coefs),main="|alpha| ~ psi")
plot(grid,Hpsi.p,main="p value ~ psi")
store.results <- as.data.frame(cbind(grid,abs(Hpsi.coefs),Hpsi.p))
names(store.results) <- c("grid", "Hpsi.est","p.value")
store.results[store.results$Hpsi.est == min(store.results$Hpsi.est),]
## grid Hpsi.est p.value
## 45 1.94 0.0004991559 0.9941772
chosen<- which(store.results$p.value>0.05)
cat("psi estimation = ",store.results$grid[which.max(store.results$p.value)])
## psi estimation = 1.94
cat("95% Confidence interval = (", min(store.results$grid[chosen]),",",max(store.results$grid[chosen]),")")
## 95% Confidence interval = ( 1.81 , 2.07 )
\[Y_i^A - Y_i^{a=0} = \psi_1 A_i + \psi_2 A_i V_i, \ \psi_1 =\psi_2 = 1\]
where \(\psi_1\) is the treatment’s main causal effect, \(\psi_2\) is the interaction term between treatment and covariate V.
1, We will set two grids from 0.5 to 1.5, by 0.02, to search for \((\hat{\psi}_1,\hat{\psi}_2)\), with double for-loop.
2, When there are two values \(\psi_1\) and \(\psi_2\), there should be two separate p values, what should we do? Answer: We will perform an overall hypothesis testing of \(\alpha_1 = \alpha_2 = 0\), which give us a single p value.
# G-estimation: Checking multiple possible values of psi*/
##################################################################
set.seed(123)
n <-1000
beta1 <- 1.0
beta2 <- 1.0
L<-rnorm(n,0,1)
V<-rnorm(n,0,1)
A<- rbinom(n,1, 1/(1+exp(L)) )
Y0 <- L + L^2
Y1 <- Y0 + A * beta1 + A*V*beta2
Y <- Y1*(A==1) + Y0*(A==0)
data<- data.frame(A=A,Y=Y,L=L,V=V)
grid1 <- seq(from = 0.5,to = 1.5, by = 0.02)
grid2 <- seq(from = 0.5,to = 1.5, by = 0.02)
Hpsi.p <- matrix(0,length(grid1),length(grid2))
j<-0
for (ii in 1: length(grid1) ){
for(jj in 1: length(grid2)){
psi1 = grid1[ii]
psi2 = grid2[jj]
data$Hpsi <- data$Y - psi1 * data$A - psi2 * data$V * data$A
lm0 <- glm(A ~ L, data = data,family="binomial")
lm1 <- glm(A ~ L + Hpsi + Hpsi:V, data = data,family="binomial")
result<-summary(lm0)
# hypothesis testing whether "parameters of Hpsi and Hpsi*V = 0"
# the deviance difference follows chi square distribution, with d.f. = 2.
Hpsi.p[ii,jj] <- 1-pchisq(lm0$deviance - lm1$deviance,2)
}
}
options(rgl.useNULL = TRUE)
options(rgl.printRglwidget = TRUE)
require(rgl)
## Loading required package: rgl
x = rep(grid1,length(grid2))
y = rep(grid2,each=length(grid1))
df<-data.frame(x=x,y=y,z = as.vector(Hpsi.p),color = as.vector(Hpsi.p)>0.05)
with(df, {
plot3d(
x = x, y = y, z = z, type = "s",col=2,size=0.5
)
})
library(ggplot2)
ggplot(data=df,aes(x=x,y=y,col=color,alpha = z))+geom_point() + xlab("psi1") + ylab("psi2") + ggtitle("Confidence interval of (psi1,psi2)")
cat("(psi1,psi2) with maximal p value = (",df$x[which.max(df$z)],
",",df$y[which.max(df$z)],")")
## (psi1,psi2) with maximal p value = ( 1.04 , 1.04 )
\[Y_i^A - Y_i^{a=0} = \psi_1 A_i + \psi_2 A_i V_{1i} + \psi_3 A_i V_{2i}, \ \psi_1 =\psi_2 =\psi_3 = 1\]
where \(\psi_1\) is the treatment’s main causal effect, \(\psi_2\) and \(\psi_3\) are the interaction terms between treatment and covariates \(V_1\) and \(V_2\).
# G-estimation: Checking multiple possible values of psi*/
##################################################################
require(geepack)
set.seed(123)
n <-1000
beta1 <- 1.0
beta2 <- 1.0
beta3 <- 1.0
L<-rnorm(n,0,1)
V1<-rnorm(n,0,1)
V2<-rnorm(n,0,1)
A<- rbinom(n,1, 1/(1+exp(L)) )
Y0 <- L + L^2
Y1 <- Y0 + A * beta1 + A*V1*beta2 + A*V2*beta3
Y <- Y1*(A==1) + Y0*(A==0)
data<- data.frame(A=A,Y=Y,L=L,V1=V1,V2=V2)
f0<-function(psi){
psi1<-psi[1];psi2<-psi[2];psi3<-psi[3];
data$Hpsi <- data$Y - psi1 * data$A - psi2 * data$V1 * data$A- psi3 * data$V2 * data$A
lm0 <- glm(A ~ L, data = data,family="binomial")
lm1 <- glm(A ~ L + Hpsi + Hpsi:V1+ Hpsi:V2, data = data,family="binomial")
result<-summary(lm0)
#maximize p value
Hpsi.p <- 1-pchisq(lm0$deviance - lm1$deviance,3)
return(-Hpsi.p)
# OR
# minimize sum |alpha_i|
# Hpsi.sum <- sum(abs(lm1$coefficients[3:5]))
# return(Hpsi.sum)
}
1, We will not use triple for-loop to search, beacuse it is too time-consuming (curse of dimension, imagine 10 variables, \(grid^{10}\) iterations). Instead, we will use other optimization algorithms.
2, The optimization algorithm will generally do one thing: find the minimal point of a function. Here we want to find the minimal \(\sum_{i=1}^3 |\hat{\alpha}_i|\) or maximal p value. Thus our function will return a value of \(\sum_{i=1}^3 |\hat{\alpha}_i|\) or minus p value. Let’s just try the latter one!
3, The optimization algorithm requires a good “starting point”, then it will travel to the “optimal point” according to designed algorihm.
4, In this case, starting from \((\hat{\psi}_1,\hat{\psi}_2,\hat{\psi}_3)=(0,0,0)\) is too far away from the ground truth \((\psi_1,\psi_2,\psi_3)=(1,1,1)\), so it doesn’t converge.
5, Starting from \((\hat{\psi}_1,\hat{\psi}_2,\hat{\psi}_3)=(0.5,0.5,0.5), (1,1,1)\) are close to the ground truth \((\psi_1,\psi_2,\psi_3)=(1,1,1)\), so they both converge.
optim(par=c(0,0,0),fn=f0,method="Nelder-Mead")
## $par
## [1] 0 0 0
##
## $value
## [1] 0
##
## $counts
## function gradient
## 4 NA
##
## $convergence
## [1] 0
##
## $message
## NULL
optim(par=c(0.5,0.5,0.5),fn=f0,method="Nelder-Mead")
## $par
## [1] 0.8624988 1.0298286 1.0106217
##
## $value
## [1] -1
##
## $counts
## function gradient
## 146 NA
##
## $convergence
## [1] 0
##
## $message
## NULL
(result<-optim(par=c(1,1,1),fn=f0,method="Nelder-Mead"))
## $par
## [1] 0.8625729 1.0297417 1.0106094
##
## $value
## [1] -1
##
## $counts
## function gradient
## 70 NA
##
## $convergence
## [1] 0
##
## $message
## NULL
Let’s check the p value of \((\psi_1,\psi_2,\psi_3)=(0,0,0),(0.5,0.5,0.5),(1,1,1)\) and the converged one/the estimated value \((\hat{\psi}_1,\hat{\psi}_2,\hat{\psi}_3)\).
-f0(c(0,0,0))
## [1] 0
-f0(c(0.5,0.5,0.5))
## [1] 7.141666e-09
-f0(c(1,1,1))
## [1] 0.2388487
-f0(result$par)
## [1] 1