We will show a real application for g estimation.

Outcome (Y): weight change from 1971 to 1982.

Intervention (A): QUIT SMOKING BETWEEN 1ST QUESTIONNAIRE AND 1982, 1:YES, 0:NO

Possible predictors (L): sex, race, education, age, smoke intensity, exercise, baseline weight.

Research question: Whether ``smoking quit’’ has causal effect on weight change? Is there interaction between causal effect and sex/race?

# install.packages("causaldata")
# install.packages("geepack")
require(geepack)
## Loading required package: geepack
require(causaldata) 
## Loading required package: causaldata
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
df<- nhefs%>%dplyr::select(wt82_71, qsmk, sex, race, education, age, smokeintensity,
                           exercise, wt71)
df<-df[rowSums(is.na(df))==0,]# complete data analysis for now
Y<-df$wt82_71
A<-as.factor(df$qsmk)
plot(density(Y),main="Density of Y (weight change)")

table(A)
## A
##    0    1 
## 1163  403
ggplot(data = df,aes(x=Y,colour=A)) + geom_density() + ggtitle("Density of Y (weight change)")

G-estimation (univariate/no interaction)

\[Y_i^A - Y_i^{a=0} = \psi A_i \] \[H(\psi) = Y - \psi A\] Fit logistic regression for treatment assignment: \[A \sim \alpha H(\psi) + \beta L\]

We will set grid from -2 to 10, by 0.01, to search for \(\hat{\psi}\)

##################################################################
set.seed(123)
# Generate counterfactual outcome Y1 and Y0, observed data Y
n <- dim(df)[1]

data<- df
grid <- seq(from = -2,to = 10, by = 0.01) # set by = 0.001 for finer estimate j=0
Hpsi.coefs <- double(length(grid))
Hpsi.var <- double(length(grid))
j<-0
for (i in grid){
  psi = i; j = j+1
  data$Hpsi <- data$wt82_71 - psi * data$qsmk
  lm0 <- glm(qsmk ~  Hpsi + as.factor(sex) + as.factor(race) + as.factor(education) + age + smokeintensity + as.factor(exercise) + wt71, data = data,family="binomial")
  lm0$coefficients["Hpsi"]
  result<-summary(lm0)
  Hpsi.coefs[j] <- coef(lm0)["Hpsi"]
  Hpsi.var[j] <- result$cov.scaled[2,2]
}

Hpsi.p <- 1-pchisq((Hpsi.coefs)^2 /Hpsi.var,1)
plot(grid,abs(Hpsi.coefs),main="|alpha| ~ psi",xlab = "grid of psi",ylab="abs(alpha)")

plot(grid,Hpsi.p,main="p value ~ psi",xlab = "grid of psi",ylab="p value")

store.results <- as.data.frame(cbind(grid,abs(Hpsi.coefs),Hpsi.p))
names(store.results) <- c("grid", "Hpsi.est","p.value")
store.results[store.results$Hpsi.est == min(store.results$Hpsi.est),]
##     grid     Hpsi.est   p.value
## 536 3.35 6.742316e-05 0.9932942
chosen<- which(store.results$p.value>0.05)
cat("psi estimation = ",store.results$grid[which.max(store.results$p.value)],"\n")
## psi estimation =  3.35
cat("95% Confidence interval = (", min(store.results$grid[chosen]),",",max(store.results$grid[chosen]),")")
## 95% Confidence interval = ( 2.49 , 4.21 )

G-estimation (bivariate/ one interaction term)

\[Y_i^A - Y_i^{a=0} = \psi_1 A_i + \psi_2 A_i V_i\] \[H(\psi) = Y - \psi_1 A - \psi_2 A V\] \[A \sim \alpha_1 H(\psi) + \alpha_2 H(\psi)\times V + \beta L\]

where \(V_1\) is sex. \(\psi_1\) is the treatment’s main causal effect, \(\psi_2\) is the interaction term between treatment and covariate V.

1, We will set two grids to search for \((\hat{\psi}_1,\hat{\psi}_2)\), with double for-loop.

2, When there are two values \(\psi_1\) and \(\psi_2\), there should be two separate p values, what should we do? Answer: We will perform an overall hypothesis testing of \(\alpha_1 = \alpha_2 = 0\), which give us a single p value.

# G-estimation: Checking multiple possible values of psi*/
##################################################################


grid1 <- seq(from = 0,to = 8, by = 0.25)
grid2 <- seq(from = -3,to = 2, by = 0.1)
Hpsi.p <- matrix(0,length(grid1),length(grid2)) 

j<-0
for (ii in 1: length(grid1) ){
  for(jj in 1: length(grid2)){
    psi1 = grid1[ii]
    psi2 = grid2[jj]
    data$Hpsi <- data$wt82_71 - psi1 * data$qsmk - psi2 * as.numeric(data$sex) * data$qsmk
    lm0 <- glm(qsmk ~   as.factor(sex) + as.factor(race) + as.factor(education) + age + smokeintensity + as.factor(exercise) + wt71, data = data,family="binomial")
    lm1 <- glm(qsmk ~  Hpsi + Hpsi:as.factor(sex) + as.factor(sex) + as.factor(race) + as.factor(education) + age + smokeintensity + as.factor(exercise) + wt71, data = data,family="binomial")
    result<-summary(lm0)
    # hypothesis testing whether "parameters of Hpsi and Hpsi*V = 0"
    # the deviance difference follows chi square distribution, with d.f. = 2.
    Hpsi.p[ii,jj] <- 1-pchisq(lm0$deviance -  lm1$deviance,2)  
  }
}


options(rgl.useNULL = TRUE)
options(rgl.printRglwidget = TRUE)
require(rgl)
## Loading required package: rgl
x = rep(grid1,length(grid2))
y = rep(grid2,each=length(grid1))
df<-data.frame(x=x,y=y,z = as.vector(Hpsi.p),color = as.vector(Hpsi.p)>0.05)
with(df, {
  plot3d(
    x = x, y = y, z = z,  type = "s",col=2,size=0.5
  )
})
library(ggplot2)
ggplot(data=df,aes(x=x,y=y,col=color,alpha = z))+geom_point() + xlab("psi1") + ylab("psi2") + ggtitle("Confidence interval of (psi1,psi2)")

cat("(psi1,psi2) with maximal p value = (",df$x[which.max(df$z)],
    ",",df$y[which.max(df$z)],")")
## (psi1,psi2) with maximal p value = ( 4 , -0.4 )
data$A_sex <- paste("A=",data$qsmk,",Sex=",data$sex,sep="")



ggplot(data = data,aes(x=wt82_71,colour=as.factor(A_sex))) + geom_density() + ggtitle("Density plot of outcome")

res<-data%>%group_by(A_sex)%>%summarise(mean = mean(wt82_71))
res<-matrix(res$mean,2,2)
rownames(res)<-c("Sex:0","Sex:1")
colnames(res)<-c("Treatment:0","Treatment:1")
knitr::kable(res)
Treatment:0 Treatment:1
Sex:0 2.001335 4.832318
Sex:1 1.969802 4.155721

Conclusion: ``quit smkoking has a significant causal effect on weight reducing; but the interaction between intervention and sex is not significant.(The confidence region cover 0 for \(\psi_2\).)’’

G-estimation (two or more interaction terms)

\[Y_i^A - Y_i^{a=0} = \psi_1 A_i + \psi_2 A_i V_{1i} + \psi_3 A_i V_{2i}\] \[H(\psi) = Y - \psi_1 A - \psi_2 A V_1- \psi_3 A V_2\] \[A \sim \alpha_1 H(\psi) + \alpha_2 H(\psi)\times V_1 + \alpha_3 H(\psi)\times V_2 + \beta L\] where \(V_1\) is sex and \(V_2\) is race. \(\psi_1\) is the treatment’s main causal effect, \(\psi_2\) and \(\psi_3\) are the interaction terms between treatment and covariates \(V_1\) and \(V_2\).

# G-estimation: Checking multiple possible values of psi*/
##################################################################

f0<-function(psi){
  psi1<-psi[1];psi2<-psi[2];psi3<-psi[3];
  data$Hpsi <- data$wt82_71 - psi1 * data$qsmk - 
                psi2 * as.numeric(data$sex) * data$qsmk- psi3 * as.numeric(data$race) * data$qsmk
  
  lm0 <- glm(qsmk ~   as.factor(sex) + as.factor(race) + as.factor(education) + age + smokeintensity + as.factor(exercise) + wt71, data = data,family="binomial")
  lm1 <- glm(qsmk ~  Hpsi + Hpsi:as.factor(sex) + Hpsi:as.factor(race) +
               as.factor(sex) + as.factor(race) + as.factor(education) + age + smokeintensity + as.factor(exercise) + wt71, data = data,family="binomial")
    
  result<-summary(lm0)
  #maximize p value
  Hpsi.p <- 1-pchisq(lm0$deviance -  lm1$deviance,3)
  return(-Hpsi.p)
  # OR
  # minimize sum |alpha_i|
  # Hpsi.sum <-  sum(abs(lm1$coefficients[3:5]))
  # return(Hpsi.sum)
}

1, We will not use triple for-loop to search, beacuse it is too time-consuming (curse of dimension, imagine 10 variables, \(grid^{10}\) iterations). Instead, we will use other optimization algorithms.

2, The optimization algorithm will generally do one thing: find the minimal point of a function. Here we want to find the minimal \(\sum_{i=1}^3 |\hat{\alpha}_i|\) or maximal p value. Thus our function will return a value of \(\sum_{i=1}^3 |\hat{\alpha}_i|\) or minus p value. Let’s just try the latter one!

3, The optimization algorithm requires a good “starting point”, then it will travel to the “optimal point” according to designed algorihm.

(result<-optim(par=c(3,0,0),fn=f0,method="Nelder-Mead"))
## $par
## [1]  2.1321528 -0.5086409  1.7827819
## 
## $value
## [1] -1
## 
## $counts
## function gradient 
##      106       NA 
## 
## $convergence
## [1] 0
## 
## $message
## NULL

Let’s check the p value of \((\psi_1,\psi_2,\psi_3)=(0,0,0),(3,0,0)\) and the converged one/the estimated value \((\hat{\psi}_1,\hat{\psi}_2,\hat{\psi}_3)\).

-f0(c(0,0,0))
## [1] 1.461498e-12
-f0(c(3,0,0))
## [1] 0.5623868
-f0(result$par)
## [1] 1

According to the result, the last one (the estimated value) has the highest p value and is thus the best one. (0,0,0) has an extreme small p value.