── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.3.6 ✔ purrr 0.3.4
✔ tibble 3.1.8 ✔ dplyr 1.0.9
✔ tidyr 1.2.0 ✔ stringr 1.4.1
✔ readr 2.1.2 ✔ forcats 0.5.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
Loading required package: grid
Loading required package: Matrix
Attaching package: 'Matrix'
The following objects are masked from 'package:tidyr':
expand, pack, unpack
Loading required package: survival
Attaching package: 'survey'
The following object is masked from 'package:graphics':
dotchart
Loading required package: carData
Attaching package: 'car'
The following object is masked from 'package:dplyr':
recode
The following object is masked from 'package:purrr':
some
#BlackLivesMatter
Attaching package: 'psych'
The following object is masked from 'package:ggsurvfit':
%+%
The following object is masked from 'package:car':
logit
The following objects are masked from 'package:ggplot2':
%+%, alpha
hw5
In this analysis I use data from National Health Interview Survey (NHIS) linked to the National Death Index (NDI). I use a 10-year sample from 2009-2018. The mortality link up includes deaths through December 31, 2019. The outcome variable that I am interested in is death.
nhis <- read_ipums_ddi("nhis_00007.xml")
nhis_dat <- read_ipums_micro(nhis)Use of data from IPUMS NHIS is subject to conditions including that users
should cite the data appropriately. Use command `ipums_conditions()` for more
details.
nhis_dat <- haven::zap_labels(nhis_dat)nhis_dat <- nhis_dat %>%
filter(MORTELIG == 1)
nhis_dat <- nhis_dat %>%
mutate(death_age = ifelse( MORTSTAT ==1,
MORTDODY - (YEAR - AGE),
2019 - (YEAR - AGE)),
d.event = ifelse(MORTSTAT == 1, 1, 0))
nhis_dat$married <- Recode(nhis_dat$MARSTAT, recodes="00=NA; 10:13='married'; 20:40='sep'; 50='nm'; 99=NA" ,as.factor=T)
nhis_dat$male <- if_else(nhis_dat$SEX==1,1,0)
nhis_dat$age5 <- cut(nhis_dat$AGE,seq(15,85,5))
nhis_dat <- nhis_dat %>% mutate(age2 = AGE * AGE)
nhis_dat$race <- Recode(nhis_dat$RACENEW, recodes="100='wht'; 200 ='blk'; 300:617='other'; 900:990=NA", as.factor=T)
nhis_dat$college <- Recode(nhis_dat$EDUC, recodes= "000=NA; 100:202='hs or less';300:303='some coll';400:504='coll';else=NA", as.factor=T)
nhis_dat$black <- if_else(nhis_dat$race=='blk',1,0)
nhis_dat$other <- if_else(nhis_dat$race=='other',1,0)
nhis_dat$hs <- if_else(nhis_dat$college=='hs or less',1,0)
nhis_dat$col1 <- if_else(nhis_dat$college=='some coll',1,0)
nhis_dat$sep <- if_else(nhis_dat$married=='sep',1,0)
nhis_dat$nm <- if_else(nhis_dat$married=='nm',1,0)subpp <- survSplit(Surv(death_age, d.event)~ ., data = nhis_dat,
cut = seq(18,100,5), episode = "mort_5_yr")
des2 <- svydesign(ids = ~PSU, strata = ~STRATA, weights = ~MORTWTSA,
data = subpp, nest = T)
#constant model
m0 <- svyglm(d.event ~ married+college,
design = des2, family = binomial (link = "cloglog"))Warning in eval(family$initialize): non-integer #successes in a binomial glm!
#general model
m1 <- svyglm(d.event ~factor(mort_5_yr)+married+college,
design = des2, family = binomial (link = "cloglog"))Warning in eval(family$initialize): non-integer #successes in a binomial glm!
#linear model
m2 <- svyglm(d.event~mort_5_yr+married+college,
design = des2, family = binomial (link = "cloglog"))Warning in eval(family$initialize): non-integer #successes in a binomial glm!
#quadratic model
m3 <- svyglm(d.event~mort_5_yr+I(mort_5_yr^2)+married+college,
design = des2, family = binomial (link = "cloglog"))Warning in eval(family$initialize): non-integer #successes in a binomial glm!
newdat <- expand.grid(mort_5_yr = unique(subpp$mort_5_yr),
married = unique(subpp$married),
college = unique(subpp$college))
newdat <- newdat %>%
filter(complete.cases(.))
newdat$h0<-predict(m0, newdata=newdat, type="response")
newdat$h1<-predict(m1, newdata=newdat, type="response")
newdat$h2<-predict(m2, newdata=newdat, type="response")
newdat$h3<-predict(m3, newdata=newdat, type="response")
head(newdat) mort_5_yr married college h0 h1 h2
1 1 nm some coll 0.003949921 2.998418e-05 0.0001395741
2 2 nm some coll 0.003949921 3.518150e-04 0.0002496623
3 3 nm some coll 0.003949921 9.781186e-04 0.0004465624
4 4 nm some coll 0.003949921 1.225002e-03 0.0007986888
5 5 nm some coll 0.003949921 1.780563e-03 0.0014282779
6 6 nm some coll 0.003949921 2.865865e-03 0.0025535239
h3
1 0.0003353027
2 0.0004940912
3 0.0007432397
4 0.0011412739
5 0.0017888258
6 0.0028617065
library(data.table)
Attaching package: 'data.table'
The following objects are masked from 'package:dplyr':
between, first, last
The following object is masked from 'package:purrr':
transpose
library(magrittr)
Attaching package: 'magrittr'
The following object is masked from 'package:purrr':
set_names
The following object is masked from 'package:tidyr':
extract
out<-melt(setDT(newdat),
id = c("mort_5_yr", "married", "college"),
measure.vars = list(haz=c("h0", "h1","h2","h3")))
head(out, n=20) mort_5_yr married college variable value
1: 1 nm some coll h0 0.003949921
2: 2 nm some coll h0 0.003949921
3: 3 nm some coll h0 0.003949921
4: 4 nm some coll h0 0.003949921
5: 5 nm some coll h0 0.003949921
6: 6 nm some coll h0 0.003949921
7: 7 nm some coll h0 0.003949921
8: 8 nm some coll h0 0.003949921
9: 9 nm some coll h0 0.003949921
10: 10 nm some coll h0 0.003949921
11: 11 nm some coll h0 0.003949921
12: 12 nm some coll h0 0.003949921
13: 13 nm some coll h0 0.003949921
14: 14 nm some coll h0 0.003949921
15: 15 nm some coll h0 0.003949921
16: 16 nm some coll h0 0.003949921
17: 17 nm some coll h0 0.003949921
18: 1 married some coll h0 0.005188841
19: 2 married some coll h0 0.005188841
20: 3 married some coll h0 0.005188841
out%>%
dplyr::filter(college == "coll")%>%
dplyr::mutate(mod = dplyr::case_when(.$variable =="h0" ~ "Constant",
.$variable =="h1" ~ "General",
.$variable =="h2" ~ "Linear",
.$variable =="h3" ~ "Polynomial - 2"))%>%
ggplot(aes(x = mort_5_yr*5, y=value ))+
geom_line(aes(group=married, color=married) )+
labs(title = "Hazard function for adult mortality",
subtitle = "Alternative model specifications")+
xlab("Age")+ylab("S(t)")+
facet_wrap(~mod)#+ scale_y_continuous(trans = "log10")Don't know how to automatically pick scale for object of type svystat. Defaulting to continuous.
The non-married individuals have the highest hazard functions of all marital statuses among adults with college education.
AIC0 <- AIC(m0)Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC1 <- AIC(m1)Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC2 <- AIC(m2)Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC3 <- AIC(m3)Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AICS<-data.frame(AIC = c(AIC0["AIC"],AIC1["AIC"],AIC2["AIC"],AIC3["AIC"]),
mod = factor(c("const", "general", "linear", "poly 2")) )
AICS$mod<-forcats::fct_relevel(AICS$mod,
c("general", "const" , "linear", "poly2"))Warning: 1 unknown level in `f`: poly2
AICS$deltaAIC<-AICS$AIC - AICS$AIC[AICS$mod=="general"]
knitr::kable(AICS[, c("mod", "AIC", "deltaAIC")],
caption = "Relative AIC for alternative time specifications")| mod | AIC | deltaAIC |
|---|---|---|
| const | 389990.5 | 122490.750 |
| general | 267499.7 | 0.000 |
| linear | 269624.0 | 2124.340 |
| poly 2 | 269119.8 | 1620.133 |
The general model is clearly the best.
interactmodel <- glm(d.event ~ married * college,
data=nhis_dat)
summary(interactmodel)
Call:
glm(formula = d.event ~ married * college, data = nhis_dat)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.17729 -0.08277 -0.05198 -0.03523 0.98078
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.0352322 0.0008133 43.321 < 2e-16 ***
marriednm -0.0160126 0.0016218 -9.873 < 2e-16 ***
marriedsep 0.0582433 0.0018213 31.980 < 2e-16 ***
collegehs or less 0.0475367 0.0010976 43.308 < 2e-16 ***
collegesome coll 0.0167462 0.0012052 13.895 < 2e-16 ***
marriednm:collegehs or less -0.0293673 0.0020372 -14.415 < 2e-16 ***
marriedsep:collegehs or less 0.0362769 0.0022183 16.353 < 2e-16 ***
marriednm:collegesome coll -0.0167112 0.0021587 -7.741 9.85e-15 ***
marriedsep:collegesome coll -0.0019005 0.0024138 -0.787 0.431
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for gaussian family taken to be 0.0610511)
Null deviance: 34164 on 540719 degrees of freedom
Residual deviance: 33011 on 540711 degrees of freedom
(7720 observations deleted due to missingness)
AIC: 22631
Number of Fisher Scoring iterations: 2
There is interaction between marital status and education