References

Assumptions

Load packages

library(magrittr)
library(XLConnect)

Load data

datFile <- "nhefs_book.xls"
if(!file.exists(datFile)) {
    download.file(url = "http://cdn1.sph.harvard.edu/wp-content/uploads/sites/1268/2013/01/nhefs_book.xls",
                  destfile = datFile)
}
nhefs <- readWorksheetFromFile(datFile, sheet = 1)
dim(nhefs)
## [1] 1746   61

parametric G-formula

## Fit outcome model
outcomeModel <- lm(wt82_71 ~ qsmk + sex + poly(age,2) + race + poly(smokeyrs,2), data = nhefs)

## Untreated dataset
nhefsUntreated <- nhefs
nhefsUntreated$qsmk <- 0

## Treated dataset
nhefsTreated <- nhefs
nhefsTreated$qsmk <- 1

## Obtain E[Ya=0] and E[Ya=1]
E_Y_a_0 <- mean(predict(outcomeModel, newdata = nhefsUntreated))
E_Y_a_1 <- mean(predict(outcomeModel, newdata = nhefsTreated))

## Obtain marginal effect
E_Y_a_1 - E_Y_a_0
## [1] 2.99682

IPT(C)W

## Fit exposure model
exposureModel <- glm(qsmk ~ sex + poly(age,2) + race + poly(smokeyrs,2), data = nhefs, family = binomial)
nhefs$ps <- predict(exposureModel, type = "response")

## Create IPTW: 1/P[A = 1 | L] if treated or 1/P[A = 0 | L] if not treated
nhefs$iptw <- 1/((nhefs$ps * nhefs$qsmk) + ((1 - nhefs$ps) * (1 - nhefs$qsmk)))
summary(nhefs$iptw)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.093   1.282   1.393   2.002   2.268   9.809
## Fit censoring model (include treatment)
censoringModel <- glm(I(as.numeric(is.na(wt82_71))) ~ qsmk + sex + poly(age,2) + race + poly(smokeyrs,2),
                      data = nhefs, family = binomial)
nhefs$censProb <- predict(censoringModel, type = "response")

## Create IPCW: 1/P[C = 0 | A, L] for noncensored, 0 for censored
nhefs$ipcw <- (1 - nhefs$censProb) * as.numeric(!is.na(nhefs$wt82_71))
summary(nhefs$ipcw)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.9576  0.9707  0.9261  0.9786  0.9968
## Fit MSM (geepack)
library(geepack)
msmModel1 <- geeglm(formula   = wt82_71 ~ qsmk,
                    family    = gaussian(link = "identity"),
                    id        = seqn,
                    data      = nhefs,
                    weights   = iptw*ipcw,
                    corstr    = "exchangeable")
summary(msmModel1)
## 
## Call:
## geeglm(formula = wt82_71 ~ qsmk, family = gaussian(link = "identity"), 
##     data = nhefs, weights = iptw * ipcw, id = seqn, corstr = "exchangeable")
## 
##  Coefficients:
##             Estimate Std.err  Wald Pr(>|W|)    
## (Intercept)   1.9926  0.2102 89.86   <2e-16 ***
## qsmk          3.0873  0.4645 44.18    3e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Estimated Scale Parameters:
##             Estimate Std.err
## (Intercept)    62.42   3.775
## 
## Correlation: Structure = exchangeable  Link = identity 
## 
## Estimated Correlation Parameters:
##       Estimate Std.err
## alpha        0       0
## Number of clusters:   1679   Maximum cluster size: 1

G-estimation

## Extract complete cases
nhefsCC <- subset(nhefs, !is.na(wt82_71))

## H(psi) function constructor: H(psi) = Y - psi*A
Hpsi <- function(outcome, treatment) {
    function(psi) {
        outcome - psi*treatment
    }
}

## Absolute value of coefficient of H(psi) function constructor
AbsCoefHpsi <- function(txConfounderFormula, data, outcome, treatment) {
    function(psi) {
        ## Model without H(psi)
        model <- glm(formula = txConfounderFormula,
                     family  = binomial(link = "logit"),
                     data    = data)

        ## Calculate H(psi)
        data$..Hpsi.. <- (Hpsi(outcome, treatment))(psi)
        
        ## Add H(psi)
        model2 <- update(model, formula = ~ . + ..Hpsi..)

        ## Coefficient for H(psi)
        tail(coef(model2), 1) %>% abs
    }
}

## Try a psi value of 3
AbsCoefHspiFun <- AbsCoefHpsi(qsmk ~ sex + poly(age,2) + race + poly(smokeyrs,2), data = nhefsCC,
                              outcome = nhefsCC$wt82_71, treatment = nhefsCC$qsmk)
AbsCoefHspiFun(psi = 3)
##  ..Hpsi.. 
## 0.0001597
## Find a value of psi that makes the coefficient of H(psi) near-zero
optim(par = 0, fn = AbsCoefHspiFun, lower = -10, upper = 10)
## Warning in optim(par = 0, fn = AbsCoefHspiFun, lower = -10, upper = 10): bounds can only be used with method
## L-BFGS-B (or Brent)
## $par
## [1] 2.991
## 
## $value
## [1] 1.056e-10
## 
## $counts
## function gradient 
##       18       18 
## 
## $convergence
## [1] 0
## 
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"