This example will illustrate how to fit a multistate hazard model using the multinomial logit model. The outcome for the example is whether a family experiences a transition between poverty states between waves 1 and 5 of the data. The data from the ECLS-K.
#Load required libraries
library(survival)
library(car)
## Loading required package: carData
library(survey)
## Loading required package: grid
## Loading required package: Matrix
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(lattice)
First we load our data
load("~/Google Drive/classes/dem7223/dem7223_19/data/eclsk_k5.Rdata")
names(eclskk5)<-tolower(names(eclskk5))
#get out only the variables I'm going to use for this example
myvars<-c( "childid","x_chsex_r", "x_raceth_r", "x1kage_r","x4age", "x5age", "x6age", "x7age", "x2povty","x4povty_i", "x6povty_i", "x8povty_i","x12par1ed_i",'x1hparnt',"x4hparnt","x6hparnt","x1par1emp","x4par1emp_i", "x6par1emp_i", "s2_id", "w6c6p_6psu", "w6c6p_6str", "w6c6p_20")
eclskk5<-eclskk5[,myvars]
eclskk5$age1<-ifelse(eclskk5$x1kage_r==-9, NA, eclskk5$x1kage_r/12)
eclskk5$age2<-ifelse(eclskk5$x4age==-9, NA, eclskk5$x4age/12)
#for the later waves, the NCES group the ages into ranges of months, so 1= <105 months, 2=105 to 108 months. So, I fix the age at the midpoint of the interval they give, and make it into years by dividing by 12
eclskk5$age3<-ifelse(eclskk5$x5age==-9, NA, eclskk5$x5age/12)
eclskk5$pov1<-ifelse(eclskk5$x2povty==1,1,0)
eclskk5$pov2<-ifelse(eclskk5$x4povty_i==1,1,0)
eclskk5$pov3<-ifelse(eclskk5$x6povty_i==1,1,0)
#mother's work status-time varying
eclskk5$momwork1<-Recode(eclskk5$x1par1emp, recodes = "1='fulltime';2='parttime'; 3='unemployed'; 4='nilf';else =NA", as.factor = T)
eclskk5$momwork2<-Recode(eclskk5$x4par1emp_i, recodes = "1='fulltime';2='parttime'; 3='unemployed'; 4='nilf';else =NA", as.factor = T)
#HH type - parents - time varying
eclskk5$parenttype1<-recode(eclskk5$x1hparnt, recodes="1='twoparent'; 2='onebio_oneother'; 3='onebioonly';4='othernonparent'; else=NA", as.factor = T)
eclskk5$parenttype2<-recode(eclskk5$x4hparnt, recodes="1='twoparent'; 2='onebio_oneother'; 3='onebioonly';4='othernonparent'; else=NA", as.factor = T)
#Recode race with white, non Hispanic as reference using dummy vars
eclskk5$race_rec<-Recode (eclskk5$x_raceth_r, recodes="1 = 'nhwhite';2='nhblack';3:4='hispanic';5='nhasian'; 6:8='other';-9=NA", as.factor = T)
eclskk5$race_rec<-relevel(eclskk5$race_rec, ref = "nhwhite")
eclskk5$male<-Recode(eclskk5$x_chsex_r, recodes="1=1; 2=0; -9=NA")
eclskk5$mlths<-Recode(eclskk5$x12par1ed_i, recodes = "1:2=1; 3:9=0; else = NA")
eclskk5$mgths<-Recode(eclskk5$x12par1ed_i, recodes = "1:3=0; 4:9=1; else =NA")
Now I reshape the data to person-period form:
eclskk5<-subset(eclskk5, is.na(pov1)==F&is.na(pov2)==F&is.na(pov3)==F&is.na(age1)==F&is.na(age2)==F&is.na(age3)==F&is.na(eclskk5$w6c6p_6psu)==F)
head(eclskk5)
## # A tibble: 6 x 37
## childid x_chsex_r x_raceth_r x1kage_r x4age x5age x6age x7age x2povty
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 100000~ 1 1 67.8 85.9 91.7 97.5 107. 3
## 2 100000~ 2 5 68.4 88.6 93.4 100. 111. 3
## 3 100000~ 2 8 68.6 87.7 93.0 99.2 111. 3
## 4 100000~ 2 1 69.4 86.9 92.7 99.3 110. 2
## 5 100000~ 1 2 76.2 93.3 99.6 106. 115. 2
## 6 100000~ 2 5 64.3 82.8 88.2 94.8 106. 3
## # ... with 28 more variables: x4povty_i <dbl>, x6povty_i <dbl>,
## # x8povty_i <dbl>, x12par1ed_i <dbl>, x1hparnt <dbl>, x4hparnt <dbl>,
## # x6hparnt <dbl>, x1par1emp <dbl>, x4par1emp_i <dbl>, x6par1emp_i <dbl>,
## # s2_id <chr>, w6c6p_6psu <dbl>, w6c6p_6str <dbl>, w6c6p_20 <dbl>,
## # age1 <dbl>, age2 <dbl>, age3 <dbl>, pov1 <dbl>, pov2 <dbl>, pov3 <dbl>,
## # momwork1 <fct>, momwork2 <fct>, parenttype1 <fct>, parenttype2 <fct>,
## # race_rec <fct>, male <dbl>, mlths <dbl>, mgths <dbl>
names(eclskk5)
## [1] "childid" "x_chsex_r" "x_raceth_r" "x1kage_r" "x4age"
## [6] "x5age" "x6age" "x7age" "x2povty" "x4povty_i"
## [11] "x6povty_i" "x8povty_i" "x12par1ed_i" "x1hparnt" "x4hparnt"
## [16] "x6hparnt" "x1par1emp" "x4par1emp_i" "x6par1emp_i" "s2_id"
## [21] "w6c6p_6psu" "w6c6p_6str" "w6c6p_20" "age1" "age2"
## [26] "age3" "pov1" "pov2" "pov3" "momwork1"
## [31] "momwork2" "parenttype1" "parenttype2" "race_rec" "male"
## [36] "mlths" "mgths"
e.long<-reshape(data.frame(eclskk5), idvar="childid", varying=list( momwork=c("momwork1","momwork2"), partype=c("parenttype1", "parenttype2")), times=1:2, direction="long" , v.names = c("momwork", "partype"))
e.long<-e.long[order(e.long$childid, e.long$time),]
head(e.long)
## childid x_chsex_r x_raceth_r x1kage_r x4age x5age x6age x7age
## 10000014.1 10000014 1 1 67.82 85.94 91.73 97.51 106.85
## 10000014.2 10000014 1 1 67.82 85.94 91.73 97.51 106.85
## 10000020.1 10000020 2 5 68.38 88.57 93.37 100.34 111.12
## 10000020.2 10000020 2 5 68.38 88.57 93.37 100.34 111.12
## 10000022.1 10000022 2 8 68.61 87.68 92.98 99.19 110.99
## 10000022.2 10000022 2 8 68.61 87.68 92.98 99.19 110.99
## x2povty x4povty_i x6povty_i x8povty_i x12par1ed_i x1hparnt x4hparnt
## 10000014.1 3 3 3 3 3 1 1
## 10000014.2 3 3 3 3 3 1 1
## 10000020.1 3 3 3 3 3 NA 1
## 10000020.2 3 3 3 3 3 NA 1
## 10000022.1 3 3 3 3 6 1 1
## 10000022.2 3 3 3 3 6 1 1
## x6hparnt x1par1emp x4par1emp_i x6par1emp_i s2_id w6c6p_6psu
## 10000014.1 1 2 2 2 1433 2
## 10000014.2 1 2 2 2 1433 2
## 10000020.1 1 NA 1 1 1365 2
## 10000020.2 1 NA 1 1 1365 2
## 10000022.1 1 4 2 2 1405 1
## 10000022.2 1 4 2 2 1405 1
## w6c6p_6str w6c6p_20 age1 age2 age3 pov1 pov2 pov3
## 10000014.1 39 328.0577 5.651667 7.161667 7.644167 0 0 0
## 10000014.2 39 328.0577 5.651667 7.161667 7.644167 0 0 0
## 10000020.1 53 136.5265 5.698333 7.380833 7.780833 0 0 0
## 10000020.2 53 136.5265 5.698333 7.380833 7.780833 0 0 0
## 10000022.1 35 163.1234 5.717500 7.306667 7.748333 0 0 0
## 10000022.2 35 163.1234 5.717500 7.306667 7.748333 0 0 0
## race_rec male mlths mgths time momwork partype
## 10000014.1 nhwhite 1 0 0 1 parttime twoparent
## 10000014.2 nhwhite 1 0 0 2 parttime twoparent
## 10000020.1 nhasian 0 0 0 1 <NA> <NA>
## 10000020.2 nhasian 0 0 0 2 fulltime twoparent
## 10000022.1 other 0 0 1 1 nilf twoparent
## 10000022.2 other 0 0 1 2 parttime twoparent
###Create states for poverty transitions
Now, I need to form the transition variable, this is my event variable, and in this case it will be categorical. It is categorical becuase we have the following state-space in our outcome.
State space
#find which kids failed in the first time period and remove them from the second risk period risk set
#binary outcomes for particular transition
e.long$povtran<-NA #transition into poverty
e.long$povtran[e.long$pov1==0&e.long$pov2==1&e.long$time==1]<-1
e.long$povtran[e.long$pov2==0&e.long$pov3==1&e.long$time==2]<-1
e.long$povtran[e.long$pov1==0&e.long$pov2==0&e.long$time==1]<-0
e.long$povtran[e.long$pov2==0&e.long$pov3==0&e.long$time==2]<-0
e.long$povtran2<-NA #transition out of poverty
e.long$povtran2[e.long$pov1==1&e.long$pov2==1&e.long$time==1]<-0
e.long$povtran2[e.long$pov2==1&e.long$pov3==1&e.long$time==2]<-0
e.long$povtran2[e.long$pov1==1&e.long$pov2==0&e.long$time==1]<-1
e.long$povtran2[e.long$pov2==1&e.long$pov3==0&e.long$time==2]<-1
#
#Categorical outcome
e.long$povcat<-NA
e.long$povcat[e.long$pov1==0&e.long$pov2==0&e.long$time==1]<-"staynotinpov"
e.long$povcat[e.long$pov1==1&e.long$pov2==1&e.long$time==1]<-"stayinpov"
e.long$povcat[e.long$pov1==0&e.long$pov2==1&e.long$time==1]<-"notpov_pov"
e.long$povcat[e.long$pov1==1&e.long$pov2==0&e.long$time==1]<-"pov_notpov"
e.long$povcat[e.long$pov2==0&e.long$pov3==0&e.long$time==2]<-"staynotinpov"
e.long$povcat[e.long$pov2==1&e.long$pov3==1&e.long$time==2]<-"stayinpov"
e.long$povcat[e.long$pov2==0&e.long$pov3==1&e.long$time==2]<-"notpov_pov"
e.long$povcat[e.long$pov2==1&e.long$pov3==0&e.long$time==2]<-"pov_notpov"
e.long$povcat<-as.factor(e.long$povcat)
e.long$povcat<-relevel(e.long$povcat, ref = "staynotinpov")
knitr::kable(head(e.long[, c("childid", "pov1", "pov2", "pov3","time", "povcat")], n=20))
childid | pov1 | pov2 | pov3 | time | povcat | |
---|---|---|---|---|---|---|
10000014.1 | 10000014 | 0 | 0 | 0 | 1 | staynotinpov |
10000014.2 | 10000014 | 0 | 0 | 0 | 2 | staynotinpov |
10000020.1 | 10000020 | 0 | 0 | 0 | 1 | staynotinpov |
10000020.2 | 10000020 | 0 | 0 | 0 | 2 | staynotinpov |
10000022.1 | 10000022 | 0 | 0 | 0 | 1 | staynotinpov |
10000022.2 | 10000022 | 0 | 0 | 0 | 2 | staynotinpov |
10000029.1 | 10000029 | 0 | 0 | 0 | 1 | staynotinpov |
10000029.2 | 10000029 | 0 | 0 | 0 | 2 | staynotinpov |
10000034.1 | 10000034 | 0 | 0 | 1 | 1 | staynotinpov |
10000034.2 | 10000034 | 0 | 0 | 1 | 2 | notpov_pov |
10000040.1 | 10000040 | 0 | 0 | 0 | 1 | staynotinpov |
10000040.2 | 10000040 | 0 | 0 | 0 | 2 | staynotinpov |
10000046.1 | 10000046 | 0 | 0 | 0 | 1 | staynotinpov |
10000046.2 | 10000046 | 0 | 0 | 0 | 2 | staynotinpov |
10000047.1 | 10000047 | 0 | 0 | 0 | 1 | staynotinpov |
10000047.2 | 10000047 | 0 | 0 | 0 | 2 | staynotinpov |
10000049.1 | 10000049 | 1 | 1 | 1 | 1 | stayinpov |
10000049.2 | 10000049 | 1 | 1 | 1 | 2 | stayinpov |
10000052.1 | 10000052 | 1 | 1 | 1 | 1 | stayinpov |
10000052.2 | 10000052 | 1 | 1 | 1 | 2 | stayinpov |
table(e.long$time, e.long$povcat)
staynotinpov notpov_pov pov_notpov stayinpov
1 1900 147 99 555 2 1896 103 141 561
We will use the multinomial logit model for this outcome, as it is specified for modeling a categorical outcome. The nnet
package has the multinom()
function to do this, it accepts weights but not sample design. Also, its summary functions do not return test statistics or p-values, so we will need to create those.
library(nnet)
multnm<-multinom(factor(povcat)~(time)+mlths+mgths+momwork+partype+race_rec,
data=e.long,
weights=w6c6p_20/mean(w6c6p_20, na.rm=T),
maxit=500)
## # weights: 60 (42 variable)
## initial value 7110.128870
## iter 10 value 4630.937239
## iter 20 value 3672.386533
## iter 30 value 3400.135554
## iter 40 value 3372.137711
## iter 50 value 3370.480351
## final value 3370.478988
## converged
sums<-summary(multnm) #store model
#create test statistics
sums$z<-sums$coefficients/sums$standard.errors
#create p values
sums$pval<-2*pnorm(abs(sums$z), lower.tail=FALSE)
round(sums$pval,3)
## (Intercept) time mlths mgths momworknilf momworkparttime
## notpov_pov 0 0.012 0.109 0 0 0.039
## pov_notpov 0 0.004 0.006 0 0 0.861
## stayinpov 0 0.684 0.000 0 0 0.000
## momworkunemployed partypeonebioonly partypeothernonparent
## notpov_pov 0.012 0.752 0.267
## pov_notpov 0.000 0.190 0.266
## stayinpov 0.000 0.000 0.521
## partypetwoparent race_rechispanic race_recnhasian race_recnhblack
## notpov_pov 0 0 0.588 0
## pov_notpov 0 0 0.660 0
## stayinpov 0 0 0.000 0
## race_recother
## notpov_pov 0.704
## pov_notpov 0.817
## stayinpov 0.047
#odds ratios
round(exp(sums$coefficients),3)
## (Intercept) time mlths mgths momworknilf momworkparttime
## notpov_pov 0.224 0.706 1.406 0.282 2.086 1.499
## pov_notpov 0.068 1.488 1.793 0.398 1.860 1.037
## stayinpov 0.115 1.037 3.475 0.318 3.291 2.212
## momworkunemployed partypeonebioonly partypeothernonparent
## notpov_pov 2.223 1.094 0.573
## pov_notpov 3.767 1.409 0.583
## stayinpov 7.812 2.169 1.217
## partypetwoparent race_rechispanic race_recnhasian race_recnhblack
## notpov_pov 0.360 2.684 1.262 2.305
## pov_notpov 0.287 2.856 1.214 2.293
## stayinpov 0.378 6.371 2.851 4.103
## race_recother
## notpov_pov 1.125
## pov_notpov 0.929
## stayinpov 1.504
This can be a lot to interpret, so we will also visualize the model output by estimating the transition probabilities themselves and graphing them.
Now we get estimates of the transition probabilities from the model to visualize them
dat1<-expand.grid(time=1:2,mlths=c(0,1), mgths=c(0,1), momwork=levels(e.long$momwork), partype=levels(e.long$partype), race_rec=levels(e.long$race_rec))
test1<-predict(multnm, newdata=dat1, type="probs")
dat1$pstaynotpov<-test1[,1]
dat1$pnotpov_pov<-test1[,2]
dat1$ppov_notpov<-test1[,3]
dat1$pstayinpov<-test1[,4]
#Reshape the data
library(data.table)
preddat<-melt(setDT(dat1),
id.vars=c("time", "mlths", "mgths", "momwork","partype", "race_rec"),
measure.vars=c("pstaynotpov", "pnotpov_pov", "ppov_notpov", "pstayinpov"))
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
preddat%>%
filter(race_rec%in%c("nhwhite", "hispanic", "nhblack"))%>%
filter(mlths==0, mgths==0)%>%
filter(momwork=="fulltime")%>%
ggplot()+geom_bar(aes(y=value, x=time, fill=variable), stat="identity")+facet_wrap(~race_rec+partype)+ggtitle(label = "Poverty transition probabilities by race and parent type", subtitle = "Full time working parent")
preddat%>%
filter(race_rec%in%c("nhwhite", "hispanic", "nhblack"))%>%
filter(mlths==0, mgths==0)%>%
#filter(momwork=="fulltime")%>%
filter(partype=="twoparent")%>%
ggplot()+geom_bar(aes(y=value, x=time, fill=variable), stat="identity")+facet_wrap(~race_rec+momwork)+ggtitle(label = "Poverty transition probabilities by race and work status", subtitle = "Two parent households")