## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.1 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## here() starts at C:/Users/hed2/Desktop
nhefs$cens <- ifelse(is.na(nhefs$wt82), 1, 0)
nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)),] # pro visional ignore subjects with missing values for weight in 1982
lm(wt82_71 ~ qsmk, data=nhefs.nmv) # qsmk
##
## Call:
## lm(formula = wt82_71 ~ qsmk, data = nhefs.nmv)
##
## Coefficients:
## (Intercept) qsmk
## 1.984 2.541
predict(lm(wt82_71 ~ qsmk, data=nhefs.nmv), data.frame(qsmk=1)) # Smoking cessation
## 1
## 4.525079
predict(lm(wt82_71 ~ qsmk, data=nhefs.nmv), data.frame(qsmk=0)) # No smoking cessation
## 1
## 1.984498
# Table
summary(nhefs.nmv[which(nhefs.nmv$qsmk==0),]$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 25.00 33.00 42.00 42.79 51.00 72.00
summary(nhefs.nmv[which(nhefs.nmv$qsmk==1),]$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 25.00 35.00 46.00 46.17 56.00 74.00
(Weighting the exposure)
# Estimation of ip weights via a logistic model
fit <- glm(qsmk ~ sex + race + age + I(age^2) +
as.factor(education) + smokeintensity +
I(smokeintensity^2) + smokeyrs + I(smokeyrs^2) +
as.factor(exercise) + as.factor(active) + wt71 + I(wt71^2),
family = binomial(), data = nhefs.nmv)
summary(fit)
##
## Call:
## glm(formula = qsmk ~ sex + race + age + I(age^2) + as.factor(education) +
## smokeintensity + I(smokeintensity^2) + smokeyrs + I(smokeyrs^2) +
## as.factor(exercise) + as.factor(active) + wt71 + I(wt71^2),
## family = binomial(), data = nhefs.nmv)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5127 -0.7907 -0.6387 0.9832 2.3729
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.2425191 1.3808360 -1.624 0.104369
## sex -0.5274782 0.1540496 -3.424 0.000617 ***
## race -0.8392636 0.2100665 -3.995 6.46e-05 ***
## age 0.1212052 0.0512663 2.364 0.018068 *
## I(age^2) -0.0008246 0.0005361 -1.538 0.124039
## as.factor(education)2 -0.0287755 0.1983506 -0.145 0.884653
## as.factor(education)3 0.0864318 0.1780850 0.485 0.627435
## as.factor(education)4 0.0636010 0.2732108 0.233 0.815924
## as.factor(education)5 0.4759606 0.2262237 2.104 0.035384 *
## smokeintensity -0.0772704 0.0152499 -5.067 4.04e-07 ***
## I(smokeintensity^2) 0.0010451 0.0002866 3.647 0.000265 ***
## smokeyrs -0.0735966 0.0277775 -2.650 0.008061 **
## I(smokeyrs^2) 0.0008441 0.0004632 1.822 0.068398 .
## as.factor(exercise)1 0.3548405 0.1801351 1.970 0.048855 *
## as.factor(exercise)2 0.3957040 0.1872400 2.113 0.034571 *
## as.factor(active)1 0.0319445 0.1329372 0.240 0.810100
## as.factor(active)2 0.1767840 0.2149720 0.822 0.410873
## wt71 -0.0152357 0.0263161 -0.579 0.562625
## I(wt71^2) 0.0001352 0.0001632 0.829 0.407370
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1786.1 on 1565 degrees of freedom
## Residual deviance: 1676.9 on 1547 degrees of freedom
## AIC: 1714.9
##
## Number of Fisher Scoring iterations: 4
p.qsmk.obs <- ifelse(nhefs.nmv$qsmk == 0, 1 - predict(fit, type = "response"),
predict(fit, type = "response"))
nhefs.nmv$w <- 1/p.qsmk.obs
summary(nhefs.nmv$w)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.054 1.230 1.373 1.996 1.990 16.700
library("geepack")
# what is id
msm.w <- geeglm(wt82_71 ~ qsmk, data=nhefs.nmv, weights=w,id = seqn,corstr="independence")
summary(msm.w)
##
## Call:
## geeglm(formula = wt82_71 ~ qsmk, data = nhefs.nmv, weights = w,
## id = seqn, corstr = "independence")
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) 1.7800 0.2247 62.73 2.33e-15 ***
## qsmk 3.4405 0.5255 42.87 5.86e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation structure = independence
## Estimated Scale Parameters:
##
## Estimate Std.err
## (Intercept) 65.06 4.221
## Number of clusters: 1566 Maximum cluster size: 1
# no association between sex and qsmk in pseudo-population after weighting
xtabs(nhefs.nmv$w ~ nhefs.nmv$sex + nhefs.nmv$qsmk)
## nhefs.nmv$qsmk
## nhefs.nmv$sex 0 1
## 0 763.6 763.6
## 1 801.7 797.2
# "check" for positivity (White women)
table(nhefs.nmv$age[nhefs.nmv$race == 0 & nhefs.nmv$sex == 1],
nhefs.nmv$qsmk[nhefs.nmv$race == 0 & nhefs.nmv$sex == 1])
##
## 0 1
## 25 24 3
## 26 14 5
## 27 18 2
## 28 20 5
## 29 15 4
## 30 14 5
## 31 11 5
## 32 14 7
## 33 12 3
## 34 22 5
## 35 16 5
## 36 13 3
## 37 14 1
## 38 6 2
## 39 19 4
## 40 10 4
## 41 13 3
## 42 16 3
## 43 14 3
## 44 9 4
## 45 12 5
## 46 19 4
## 47 19 4
## 48 19 4
## 49 11 3
## 50 18 4
## 51 9 3
## 52 11 3
## 53 11 4
## 54 17 9
## 55 9 4
## 56 8 7
## 57 9 2
## 58 8 4
## 59 5 4
## 60 5 4
## 61 5 2
## 62 6 5
## 63 3 3
## 64 7 1
## 65 3 2
## 66 4 0
## 67 2 0
## 69 6 2
## 70 2 1
## 71 0 1
## 72 2 2
## 74 0 1
The inverse probability weight at this time can be: the treatment group is Pr[A=1]/f(A|L) (Or 1/f (A|L), Or 0.5/f (A|L)), and the non-treatment group is Pr[A=0]/f(A|L). WA =1/ f (A| L) is called non-stable weights, while SWA = f (A)/ f (A| L) is called stable weights.
# estimation of denominator of ip weights (same as above, conditional probability)
denom.fit <- glm(qsmk ~ as.factor(sex) + as.factor(race) + age + I(age^2) +
as.factor(education) + smokeintensity +
I(smokeintensity^2) + smokeyrs + I(smokeyrs^2) +
as.factor(exercise) + as.factor(active) + wt71 + I(wt71^2),
family = binomial(), data = nhefs.nmv)
summary(denom.fit)
##
## Call:
## glm(formula = qsmk ~ as.factor(sex) + as.factor(race) + age +
## I(age^2) + as.factor(education) + smokeintensity + I(smokeintensity^2) +
## smokeyrs + I(smokeyrs^2) + as.factor(exercise) + as.factor(active) +
## wt71 + I(wt71^2), family = binomial(), data = nhefs.nmv)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.513 -0.791 -0.639 0.983 2.373
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.242519 1.380836 -1.62 0.10437
## as.factor(sex)1 -0.527478 0.154050 -3.42 0.00062 ***
## as.factor(race)1 -0.839264 0.210067 -4.00 6.5e-05 ***
## age 0.121205 0.051266 2.36 0.01807 *
## I(age^2) -0.000825 0.000536 -1.54 0.12404
## as.factor(education)2 -0.028776 0.198351 -0.15 0.88465
## as.factor(education)3 0.086432 0.178085 0.49 0.62744
## as.factor(education)4 0.063601 0.273211 0.23 0.81592
## as.factor(education)5 0.475961 0.226224 2.10 0.03538 *
## smokeintensity -0.077270 0.015250 -5.07 4.0e-07 ***
## I(smokeintensity^2) 0.001045 0.000287 3.65 0.00027 ***
## smokeyrs -0.073597 0.027777 -2.65 0.00806 **
## I(smokeyrs^2) 0.000844 0.000463 1.82 0.06840 .
## as.factor(exercise)1 0.354841 0.180135 1.97 0.04885 *
## as.factor(exercise)2 0.395704 0.187240 2.11 0.03457 *
## as.factor(active)1 0.031944 0.132937 0.24 0.81010
## as.factor(active)2 0.176784 0.214972 0.82 0.41087
## wt71 -0.015236 0.026316 -0.58 0.56262
## I(wt71^2) 0.000135 0.000163 0.83 0.40737
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1786.1 on 1565 degrees of freedom
## Residual deviance: 1676.9 on 1547 degrees of freedom
## AIC: 1715
##
## Number of Fisher Scoring iterations: 4
pd.qsmk <- predict(denom.fit, type = "response")
# Estimation of numerator of ip weights (probability (or mean) of treatment, marginal probability)
numer.fit <- glm(qsmk~1, family = binomial(), data = nhefs.nmv)
summary(numer.fit)
##
## Call:
## glm(formula = qsmk ~ 1, family = binomial(), data = nhefs.nmv)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.771 -0.771 -0.771 1.648 1.648
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.0598 0.0578 -18.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1786.1 on 1565 degrees of freedom
## Residual deviance: 1786.1 on 1565 degrees of freedom
## AIC: 1788
##
## Number of Fisher Scoring iterations: 4
pn.qsmk <- predict(numer.fit, type = "response")
nhefs.nmv$sw <- ifelse(nhefs.nmv$qsmk == 0, ((1-pn.qsmk)/(1-pd.qsmk)),
(pn.qsmk/pd.qsmk))
# 0.257 is the probability of a treatment
summary(nhefs.nmv$sw)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.331 0.867 0.950 0.999 1.079 4.298
summary(nhefs.nmv$w)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.05 1.23 1.37 2.00 1.99 16.70
msm.sw <- geeglm(wt82_71 ~ qsmk, data=nhefs.nmv, weights=sw, id=seqn,
corstr="independence")
summary(msm.sw)
##
## Call:
## geeglm(formula = wt82_71 ~ qsmk, data = nhefs.nmv, weights = sw,
## id = seqn, corstr = "independence")
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) 1.780 0.225 62.7 2.3e-15 ***
## qsmk 3.441 0.525 42.9 5.9e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation structure = independence
## Estimated Scale Parameters:
##
## Estimate Std.err
## (Intercept) 60.7 3.71
## Number of clusters: 1566 Maximum cluster size: 1
# no association between sex and qsmk in pseudo-population
xtabs(nhefs.nmv$sw ~ nhefs.nmv$sex + nhefs.nmv$qsmk)
## nhefs.nmv$qsmk
## nhefs.nmv$sex 0 1
## 0 567 197
## 1 595 205
We assume that the probability density f (A|L) is a normal distribution, u=E(A|L), and the density f(A) in the molecule is also normally distributed. Because effect estimates are very sensitive to model choice for the conditional density f(A|L), we need to be very careful when using inverse probability weighting for continuous variables.
# Analysis restricted to subjects reporting <=25 cig/day at baseline
nhefs.nmv.s <- subset(nhefs.nmv, smokeintensity <=25)
# estimation of denominator of ip weights
den.fit.obj <- lm(smkintensity82_71 ~ as.factor(sex) +
as.factor(race) + age + I(age^2) +
as.factor(education) + smokeintensity + I(smokeintensity^2) +
smokeyrs + I(smokeyrs^2) + as.factor(exercise) + as.factor(active) + wt71 +
I(wt71^2), data = nhefs.nmv.s)
p.den <- predict(den.fit.obj, type = "response")
dens.den <- dnorm(nhefs.nmv.s$smkintensity82_71, p.den, summary(den.fit.obj)$sigma) # ipw denominator
# dnorm(x, mean, sd)
# estimation of numerator of ip weights
num.fit.obj <- lm(smkintensity82_71 ~ 1, data = nhefs.nmv.s) # ipw numerator
p.num <- predict(num.fit.obj, type = "response")
dens.num <- dnorm(nhefs.nmv.s$smkintensity82_71, p.num, summary(num.fit.obj)$sigma)
nhefs.nmv.s$sw.a = dens.num/dens.den
summary(nhefs.nmv.s$sw.a)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.19 0.89 0.97 1.00 1.05 5.10
msm.sw.cont <- geeglm(wt82_71 ~ smkintensity82_71 + I(smkintensity82_71*smkintensity82_71),
data=nhefs.nmv.s, weights=sw.a, id=seqn, corstr="independence")
summary(msm.sw.cont)
##
## Call:
## geeglm(formula = wt82_71 ~ smkintensity82_71 + I(smkintensity82_71 *
## smkintensity82_71), data = nhefs.nmv.s, weights = sw.a, id = seqn,
## corstr = "independence")
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) 2.00452 0.29512 46.13 1.1e-11 ***
## smkintensity82_71 -0.10899 0.03154 11.94 0.00055 ***
## I(smkintensity82_71 * smkintensity82_71) 0.00269 0.00242 1.24 0.26489
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation structure = independence
## Estimated Scale Parameters:
##
## Estimate Std.err
## (Intercept) 60.5 4.5
## Number of clusters: 1162 Maximum cluster size: 1
table(nhefs.nmv$qsmk, nhefs.nmv$death) # qsmk is treatment # death is outcome
##
## 0 1
## 0 963 200
## 1 312 91
# First, estimation of stabilized weights sw.a
# Second, fit logistic model below # nhefs.nmv.s/nhefs.nmv
msm.logistic <- geeglm(death ~ qsmk, data=nhefs.nmv.s, weights=sw.a, # sw /sw.a
id=seqn, family=binomial(), corstr="independence")
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(msm.logistic)
##
## Call:
## geeglm(formula = death ~ qsmk, family = binomial(), data = nhefs.nmv.s,
## weights = sw.a, id = seqn, corstr = "independence")
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) -1.5026 0.0981 234.80 <2e-16 ***
## qsmk 0.0369 0.1736 0.05 0.83
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation structure = independence
## Estimated Scale Parameters:
##
## Estimate Std.err
## (Intercept) 1 0.082
## Number of clusters: 1162 Maximum cluster size: 1
However, when effect modification is present (exposure and a modifier, qsmk and sex), we can put other covariates in the marginal structure model, even if these covariates are not confounding variables. So our numerator of stable weights can be f(A) or f(A|V).
table(nhefs.nmv$sex)
##
## 0 1
## 762 804
# estimation of denominator of ip weights
denom.fit <- glm(qsmk ~ as.factor(sex) + as.factor(race) + age + I(age^2) +
as.factor(education) + smokeintensity +
I(smokeintensity^2) + smokeyrs + I(smokeyrs^2) +
as.factor(exercise) + as.factor(active) + wt71 + I(wt71^2),
family = binomial(), data = nhefs.nmv) # denominator
summary(denom.fit)
##
## Call:
## glm(formula = qsmk ~ as.factor(sex) + as.factor(race) + age +
## I(age^2) + as.factor(education) + smokeintensity + I(smokeintensity^2) +
## smokeyrs + I(smokeyrs^2) + as.factor(exercise) + as.factor(active) +
## wt71 + I(wt71^2), family = binomial(), data = nhefs.nmv)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.513 -0.791 -0.639 0.983 2.373
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.242519 1.380836 -1.62 0.10437
## as.factor(sex)1 -0.527478 0.154050 -3.42 0.00062 ***
## as.factor(race)1 -0.839264 0.210067 -4.00 6.5e-05 ***
## age 0.121205 0.051266 2.36 0.01807 *
## I(age^2) -0.000825 0.000536 -1.54 0.12404
## as.factor(education)2 -0.028776 0.198351 -0.15 0.88465
## as.factor(education)3 0.086432 0.178085 0.49 0.62744
## as.factor(education)4 0.063601 0.273211 0.23 0.81592
## as.factor(education)5 0.475961 0.226224 2.10 0.03538 *
## smokeintensity -0.077270 0.015250 -5.07 4.0e-07 ***
## I(smokeintensity^2) 0.001045 0.000287 3.65 0.00027 ***
## smokeyrs -0.073597 0.027777 -2.65 0.00806 **
## I(smokeyrs^2) 0.000844 0.000463 1.82 0.06840 .
## as.factor(exercise)1 0.354841 0.180135 1.97 0.04885 *
## as.factor(exercise)2 0.395704 0.187240 2.11 0.03457 *
## as.factor(active)1 0.031944 0.132937 0.24 0.81010
## as.factor(active)2 0.176784 0.214972 0.82 0.41087
## wt71 -0.015236 0.026316 -0.58 0.56262
## I(wt71^2) 0.000135 0.000163 0.83 0.40737
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1786.1 on 1565 degrees of freedom
## Residual deviance: 1676.9 on 1547 degrees of freedom
## AIC: 1715
##
## Number of Fisher Scoring iterations: 4
pd.qsmk <- predict(denom.fit, type = "response")
# estimation of numerator of ip weights
numer.fit <- glm(qsmk~as.factor(sex), family = binomial(), data = nhefs.nmv) # numerator
summary(numer.fit)
##
## Call:
## glm(formula = qsmk ~ as.factor(sex), family = binomial(), data = nhefs.nmv)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.825 -0.825 -0.719 1.576 1.720
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.9016 0.0799 -11.28 <2e-16 ***
## as.factor(sex)1 -0.3202 0.1160 -2.76 0.0058 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1786.1 on 1565 degrees of freedom
## Residual deviance: 1778.4 on 1564 degrees of freedom
## AIC: 1782
##
## Number of Fisher Scoring iterations: 4
pn.qsmk <- predict(numer.fit, type = "response")
nhefs.nmv$sw.a <- ifelse(nhefs.nmv$qsmk == 0, ((1-pn.qsmk)/(1-pd.qsmk)),
(pn.qsmk/pd.qsmk))
summary(nhefs.nmv$sw.a)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.29 0.88 0.96 1.00 1.08 3.80
# Estimating parameters of a marginal structural mean model
msm.emm <- geeglm(wt82_71~as.factor(qsmk) + as.factor(sex)
+ as.factor(qsmk):as.factor(sex), data=nhefs.nmv,
weights=sw.a, id=seqn, corstr="independence")
summary(msm.emm)
##
## Call:
## geeglm(formula = wt82_71 ~ as.factor(qsmk) + as.factor(sex) +
## as.factor(qsmk):as.factor(sex), data = nhefs.nmv, weights = sw.a,
## id = seqn, corstr = "independence")
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) 1.78445 0.30984 33.17 8.5e-09 ***
## as.factor(qsmk)1 3.52198 0.65707 28.73 8.3e-08 ***
## as.factor(sex)1 -0.00872 0.44882 0.00 0.98
## as.factor(qsmk)1:as.factor(sex)1 -0.15948 1.04608 0.02 0.88
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation structure = independence
## Estimated Scale Parameters:
##
## Estimate Std.err
## (Intercept) 60.8 3.71
## Number of clusters: 1566 Maximum cluster size: 1
If we want to explore the interaction between two treatment variables A and B, we need to put both A and B into the marginal structure model. When calculating the inverse probability weight, the denominator will include these two joint distribution of variables.
A quick view for censoring data
table(nhefs$qsmk, nhefs$cens)
##
## 0 1
## 0 1163 38
## 1 403 25
summary(nhefs[which(nhefs$cens==0),]$wt71)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 39.6 59.5 69.2 70.8 79.8 151.7
summary(nhefs[which(nhefs$cens==1),]$wt71)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 36.2 63.1 72.1 76.6 87.9 169.2
Calculate the weights for treatment and censor
# for completed data
# estimation of denominator of ip weights for A
denom.fit <- glm(qsmk ~ as.factor(sex) + as.factor(race) + age + I(age^2) +
as.factor(education) + smokeintensity +
I(smokeintensity^2) + smokeyrs + I(smokeyrs^2) +
as.factor(exercise) + as.factor(active) + wt71 + I(wt71^2),
family = binomial(), data = nhefs)
summary(denom.fit)
##
## Call:
## glm(formula = qsmk ~ as.factor(sex) + as.factor(race) + age +
## I(age^2) + as.factor(education) + smokeintensity + I(smokeintensity^2) +
## smokeyrs + I(smokeyrs^2) + as.factor(exercise) + as.factor(active) +
## wt71 + I(wt71^2), family = binomial(), data = nhefs)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.465 -0.804 -0.646 1.058 2.355
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.988902 1.241279 -1.60 0.10909
## as.factor(sex)1 -0.507522 0.148232 -3.42 0.00062 ***
## as.factor(race)1 -0.850231 0.205872 -4.13 3.6e-05 ***
## age 0.103013 0.048900 2.11 0.03515 *
## I(age^2) -0.000605 0.000507 -1.19 0.23297
## as.factor(education)2 -0.098320 0.190655 -0.52 0.60607
## as.factor(education)3 0.015699 0.170714 0.09 0.92673
## as.factor(education)4 -0.042526 0.264276 -0.16 0.87216
## as.factor(education)5 0.379663 0.220395 1.72 0.08495 .
## smokeintensity -0.065156 0.014759 -4.41 1.0e-05 ***
## I(smokeintensity^2) 0.000846 0.000276 3.07 0.00216 **
## smokeyrs -0.073371 0.026996 -2.72 0.00657 **
## I(smokeyrs^2) 0.000838 0.000443 1.89 0.05867 .
## as.factor(exercise)1 0.291412 0.173554 1.68 0.09314 .
## as.factor(exercise)2 0.355052 0.179929 1.97 0.04846 *
## as.factor(active)1 0.010875 0.129832 0.08 0.93324
## as.factor(active)2 0.068312 0.208727 0.33 0.74346
## wt71 -0.012848 0.022283 -0.58 0.56423
## I(wt71^2) 0.000121 0.000135 0.89 0.37096
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1876.3 on 1628 degrees of freedom
## Residual deviance: 1766.7 on 1610 degrees of freedom
## AIC: 1805
##
## Number of Fisher Scoring iterations: 4
pd.qsmk <- predict(denom.fit, type = "response")
# estimation of numerator of ip weights for A
numer.fit <- glm(qsmk~1, family = binomial(), data = nhefs)
summary(numer.fit)
##
## Call:
## glm(formula = qsmk ~ 1, family = binomial(), data = nhefs)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.781 -0.781 -0.781 1.635 1.635
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.0318 0.0563 -18.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1876.3 on 1628 degrees of freedom
## Residual deviance: 1876.3 on 1628 degrees of freedom
## AIC: 1878
##
## Number of Fisher Scoring iterations: 4
pn.qsmk <- predict(numer.fit, type = "response")
# for censoring data
# as.factor(qsmk) likes a effect modification
# estimation of denominator of ip weights for C
denom.cens <- glm(cens ~ as.factor(qsmk) + as.factor(sex) +
as.factor(race) + age + I(age^2) +
as.factor(education) + smokeintensity +
I(smokeintensity^2) + smokeyrs + I(smokeyrs^2) +
as.factor(exercise) + as.factor(active) + wt71 + I(wt71^2),
family = binomial(), data = nhefs)
summary(denom.cens)
##
## Call:
## glm(formula = cens ~ as.factor(qsmk) + as.factor(sex) + as.factor(race) +
## age + I(age^2) + as.factor(education) + smokeintensity +
## I(smokeintensity^2) + smokeyrs + I(smokeyrs^2) + as.factor(exercise) +
## as.factor(active) + wt71 + I(wt71^2), family = binomial(),
## data = nhefs)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.097 -0.287 -0.207 -0.157 2.996
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.014466 2.576106 1.56 0.1192
## as.factor(qsmk)1 0.516867 0.287716 1.80 0.0724 .
## as.factor(sex)1 0.057313 0.330278 0.17 0.8622
## as.factor(race)1 -0.012271 0.452489 -0.03 0.9784
## age -0.269729 0.117465 -2.30 0.0217 *
## I(age^2) 0.002884 0.001114 2.59 0.0096 **
## as.factor(education)2 -0.440788 0.419399 -1.05 0.2933
## as.factor(education)3 -0.164688 0.370547 -0.44 0.6567
## as.factor(education)4 0.138447 0.569797 0.24 0.8080
## as.factor(education)5 -0.382382 0.560181 -0.68 0.4949
## smokeintensity 0.015712 0.034732 0.45 0.6510
## I(smokeintensity^2) -0.000113 0.000606 -0.19 0.8517
## smokeyrs 0.078597 0.074958 1.05 0.2944
## I(smokeyrs^2) -0.000557 0.001032 -0.54 0.5894
## as.factor(exercise)1 -0.971471 0.387810 -2.51 0.0122 *
## as.factor(exercise)2 -0.583989 0.372313 -1.57 0.1168
## as.factor(active)1 -0.247479 0.325455 -0.76 0.4470
## as.factor(active)2 0.706583 0.396458 1.78 0.0747 .
## wt71 -0.087887 0.040012 -2.20 0.0281 *
## I(wt71^2) 0.000635 0.000226 2.81 0.0049 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 533.36 on 1628 degrees of freedom
## Residual deviance: 465.36 on 1609 degrees of freedom
## AIC: 505.4
##
## Number of Fisher Scoring iterations: 7
pd.cens <- 1-predict(denom.cens, type = "response")
# estimation of numerator of ip weights for Censor data
numer.cens <- glm(cens~ as.factor(qsmk), family = binomial(), data = nhefs)
summary(numer.cens)
##
## Call:
## glm(formula = cens ~ as.factor(qsmk), family = binomial(), data = nhefs)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.347 -0.254 -0.254 -0.254 2.628
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.421 0.165 -20.75 <2e-16 ***
## as.factor(qsmk)1 0.641 0.264 2.43 0.015 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 533.36 on 1628 degrees of freedom
## Residual deviance: 527.76 on 1627 degrees of freedom
## AIC: 531.8
##
## Number of Fisher Scoring iterations: 6
pn.cens <- 1-predict(numer.cens, type = "response")
P(A|L)~P(A|L,C)*P(C|A,L)
# times a_w and censor_w
nhefs$sw.a <- ifelse(nhefs$qsmk == 0, ((1-pn.qsmk)/(1-pd.qsmk)),
(pn.qsmk/pd.qsmk))
nhefs$sw.c <- pn.cens/pd.cens
nhefs$sw <- nhefs$sw.c* nhefs$sw.a #times
summary(nhefs$sw.a)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.33 0.86 0.95 1.00 1.08 4.21
summary(nhefs$sw)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.35 0.86 0.94 1.01 1.08 12.86
msm.sw <- geeglm(wt82_71~qsmk, data=nhefs,
weights=sw, id=seqn, corstr="independence")
summary(msm.sw)
##
## Call:
## geeglm(formula = wt82_71 ~ qsmk, data = nhefs, weights = sw,
## id = seqn, corstr = "independence")
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) 1.662 0.233 51.0 9.3e-13 ***
## qsmk 3.496 0.526 44.2 2.9e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation structure = independence
## Estimated Scale Parameters:
##
## Estimate Std.err
## (Intercept) 61.8 3.83
## Number of clusters: 1566 Maximum cluster size: 1
beta <- coef(msm.sw)
SE <- coef(summary(msm.sw))[,2]
lcl <- beta-qnorm(0.975)*SE
ucl <- beta+qnorm(0.975)*SE
cbind(beta, lcl, ucl)
## beta lcl ucl
## (Intercept) 1.66 1.21 2.12
## qsmk 3.50 2.47 4.53