library(magrittr)
library(XLConnect)
datFile <- "nhefs_book.xls"
if(!file.exists(datFile)) {
download.file(url = "http://cdn1.sph.harvard.edu/wp-content/uploads/sites/1268/2013/01/nhefs_book.xls",
destfile = datFile)
}
nhefs <- readWorksheetFromFile(datFile, sheet = 1)
dim(nhefs)
## [1] 1746 61
## Fit outcome model
outcomeModel <- lm(wt82_71 ~ qsmk + sex + poly(age,2) + race + poly(smokeyrs,2), data = nhefs)
## Untreated dataset
nhefsUntreated <- nhefs
nhefsUntreated$qsmk <- 0
## Treated dataset
nhefsTreated <- nhefs
nhefsTreated$qsmk <- 1
## Obtain E[Ya=0] and E[Ya=1]
E_Y_a_0 <- mean(predict(outcomeModel, newdata = nhefsUntreated))
E_Y_a_1 <- mean(predict(outcomeModel, newdata = nhefsTreated))
## Obtain marginal effect
E_Y_a_1 - E_Y_a_0
## [1] 2.99682
Need to model confounder-exposure relationships correctly
The functional forms are not necessarily the same as in the outcome model.
## Fit exposure model
exposureModel <- glm(qsmk ~ sex + poly(age,2) + race + poly(smokeyrs,2), data = nhefs, family = binomial)
nhefs$ps <- predict(exposureModel, type = "response")
## Create IPTW: 1/P[A = 1 | L] if treated or 1/P[A = 0 | L] if not treated
nhefs$iptw <- 1/((nhefs$ps * nhefs$qsmk) + ((1 - nhefs$ps) * (1 - nhefs$qsmk)))
summary(nhefs$iptw)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.093 1.282 1.393 2.002 2.268 9.809
## Fit censoring model (include treatment)
censoringModel <- glm(I(as.numeric(is.na(wt82_71))) ~ qsmk + sex + poly(age,2) + race + poly(smokeyrs,2),
data = nhefs, family = binomial)
nhefs$censProb <- predict(censoringModel, type = "response")
## Create IPCW: 1/P[C = 0 | A, L] for noncensored, 0 for censored
nhefs$ipcw <- (1 - nhefs$censProb) * as.numeric(!is.na(nhefs$wt82_71))
summary(nhefs$ipcw)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.9576 0.9707 0.9261 0.9786 0.9968
## Fit MSM (geepack)
library(geepack)
msmModel1 <- geeglm(formula = wt82_71 ~ qsmk,
family = gaussian(link = "identity"),
id = seqn,
data = nhefs,
weights = iptw*ipcw,
corstr = "exchangeable")
summary(msmModel1)
##
## Call:
## geeglm(formula = wt82_71 ~ qsmk, family = gaussian(link = "identity"),
## data = nhefs, weights = iptw * ipcw, id = seqn, corstr = "exchangeable")
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) 1.9926 0.2102 89.86 <2e-16 ***
## qsmk 3.0873 0.4645 44.18 3e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Estimated Scale Parameters:
## Estimate Std.err
## (Intercept) 62.42 3.775
##
## Correlation: Structure = exchangeable Link = identity
##
## Estimated Correlation Parameters:
## Estimate Std.err
## alpha 0 0
## Number of clusters: 1679 Maximum cluster size: 1
## Extract complete cases
nhefsCC <- subset(nhefs, !is.na(wt82_71))
## H(psi) function constructor: H(psi) = Y - psi*A
Hpsi <- function(outcome, treatment) {
function(psi) {
outcome - psi*treatment
}
}
## Absolute value of coefficient of H(psi) function constructor
AbsCoefHpsi <- function(txConfounderFormula, data, outcome, treatment) {
function(psi) {
## Model without H(psi)
model <- glm(formula = txConfounderFormula,
family = binomial(link = "logit"),
data = data)
## Calculate H(psi)
data$..Hpsi.. <- (Hpsi(outcome, treatment))(psi)
## Add H(psi)
model2 <- update(model, formula = ~ . + ..Hpsi..)
## Coefficient for H(psi)
tail(coef(model2), 1) %>% abs
}
}
## Try a psi value of 3
AbsCoefHspiFun <- AbsCoefHpsi(qsmk ~ sex + poly(age,2) + race + poly(smokeyrs,2), data = nhefsCC,
outcome = nhefsCC$wt82_71, treatment = nhefsCC$qsmk)
AbsCoefHspiFun(psi = 3)
## ..Hpsi..
## 0.0001597
## Find a value of psi that makes the coefficient of H(psi) near-zero
optim(par = 0, fn = AbsCoefHspiFun, lower = -10, upper = 10)
## Warning in optim(par = 0, fn = AbsCoefHspiFun, lower = -10, upper = 10): bounds can only be used with method
## L-BFGS-B (or Brent)
## $par
## [1] 2.991
##
## $value
## [1] 1.056e-10
##
## $counts
## function gradient
## 18 18
##
## $convergence
## [1] 0
##
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"