DEM 7223 - Event History Analysis - Example of Multi-state event history analysis

This example will illustrate how to fit a multistate hazard model using the multinomial logit model. The outcome for the example is whether a family experiences a transition between poverty states between waves 1 and 5 of the data. The data from the ECLS-K.

#Load required libraries
library(survival)
library(car)

## Loading required package: carData

library(survey)

## Loading required package: grid

## Loading required package: Matrix

## 
## Attaching package: 'survey'

## The following object is masked from 'package:graphics':
## 
##     dotchart

library(lattice)

First we load our data

load("~/Google Drive/classes/dem7223/dem7223_19/data/eclsk_k5.Rdata")
names(eclskk5)<-tolower(names(eclskk5))
#get out only the variables I'm going to use for this example
myvars<-c( "childid","x_chsex_r", "x_raceth_r", "x1kage_r","x4age", "x5age", "x6age", "x7age", "x2povty","x4povty_i", "x6povty_i", "x8povty_i","x12par1ed_i",'x1hparnt',"x4hparnt","x6hparnt","x1par1emp","x4par1emp_i", "x6par1emp_i", "s2_id", "w6c6p_6psu", "w6c6p_6str", "w6c6p_20")
eclskk5<-eclskk5[,myvars]


eclskk5$age1<-ifelse(eclskk5$x1kage_r==-9, NA, eclskk5$x1kage_r/12)
eclskk5$age2<-ifelse(eclskk5$x4age==-9, NA, eclskk5$x4age/12)
#for the later waves, the NCES group the ages into ranges of months, so 1= <105 months, 2=105 to 108 months. So, I fix the age at the midpoint of the interval they give, and make it into years by dividing by 12
eclskk5$age3<-ifelse(eclskk5$x5age==-9, NA, eclskk5$x5age/12)

eclskk5$pov1<-ifelse(eclskk5$x2povty==1,1,0)
eclskk5$pov2<-ifelse(eclskk5$x4povty_i==1,1,0)
eclskk5$pov3<-ifelse(eclskk5$x6povty_i==1,1,0)



#mother's work status-time varying
eclskk5$momwork1<-Recode(eclskk5$x1par1emp, recodes = "1='fulltime';2='parttime'; 3='unemployed'; 4='nilf';else =NA", as.factor = T) 
eclskk5$momwork2<-Recode(eclskk5$x4par1emp_i, recodes = "1='fulltime';2='parttime'; 3='unemployed'; 4='nilf';else =NA", as.factor = T) 


#HH type - parents - time varying
eclskk5$parenttype1<-recode(eclskk5$x1hparnt, recodes="1='twoparent'; 2='onebio_oneother'; 3='onebioonly';4='othernonparent'; else=NA", as.factor = T)
eclskk5$parenttype2<-recode(eclskk5$x4hparnt, recodes="1='twoparent'; 2='onebio_oneother'; 3='onebioonly';4='othernonparent'; else=NA", as.factor = T)

#Recode race with white, non Hispanic as reference using dummy vars
eclskk5$race_rec<-Recode (eclskk5$x_raceth_r, recodes="1 = 'nhwhite';2='nhblack';3:4='hispanic';5='nhasian'; 6:8='other';-9=NA", as.factor = T)
eclskk5$race_rec<-relevel(eclskk5$race_rec, ref = "nhwhite")
eclskk5$male<-Recode(eclskk5$x_chsex_r, recodes="1=1; 2=0; -9=NA")
eclskk5$mlths<-Recode(eclskk5$x12par1ed_i, recodes = "1:2=1; 3:9=0; else = NA")
eclskk5$mgths<-Recode(eclskk5$x12par1ed_i, recodes = "1:3=0; 4:9=1; else =NA")

Now I reshape the data to person-period form:

eclskk5<-subset(eclskk5, is.na(pov1)==F&is.na(pov2)==F&is.na(pov3)==F&is.na(age1)==F&is.na(age2)==F&is.na(age3)==F&is.na(eclskk5$w6c6p_6psu)==F)
head(eclskk5)

## # A tibble: 6 x 37
##   childid x_chsex_r x_raceth_r x1kage_r x4age x5age x6age x7age x2povty
##   <chr>       <dbl>      <dbl>    <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 100000~         1          1     67.8  85.9  91.7  97.5  107.       3
## 2 100000~         2          5     68.4  88.6  93.4 100.   111.       3
## 3 100000~         2          8     68.6  87.7  93.0  99.2  111.       3
## 4 100000~         2          1     69.4  86.9  92.7  99.3  110.       2
## 5 100000~         1          2     76.2  93.3  99.6 106.   115.       2
## 6 100000~         2          5     64.3  82.8  88.2  94.8  106.       3
## # ... with 28 more variables: x4povty_i <dbl>, x6povty_i <dbl>,
## #   x8povty_i <dbl>, x12par1ed_i <dbl>, x1hparnt <dbl>, x4hparnt <dbl>,
## #   x6hparnt <dbl>, x1par1emp <dbl>, x4par1emp_i <dbl>, x6par1emp_i <dbl>,
## #   s2_id <chr>, w6c6p_6psu <dbl>, w6c6p_6str <dbl>, w6c6p_20 <dbl>,
## #   age1 <dbl>, age2 <dbl>, age3 <dbl>, pov1 <dbl>, pov2 <dbl>, pov3 <dbl>,
## #   momwork1 <fct>, momwork2 <fct>, parenttype1 <fct>, parenttype2 <fct>,
## #   race_rec <fct>, male <dbl>, mlths <dbl>, mgths <dbl>

names(eclskk5)

##  [1] "childid"     "x_chsex_r"   "x_raceth_r"  "x1kage_r"    "x4age"      
##  [6] "x5age"       "x6age"       "x7age"       "x2povty"     "x4povty_i"  
## [11] "x6povty_i"   "x8povty_i"   "x12par1ed_i" "x1hparnt"    "x4hparnt"   
## [16] "x6hparnt"    "x1par1emp"   "x4par1emp_i" "x6par1emp_i" "s2_id"      
## [21] "w6c6p_6psu"  "w6c6p_6str"  "w6c6p_20"    "age1"        "age2"       
## [26] "age3"        "pov1"        "pov2"        "pov3"        "momwork1"   
## [31] "momwork2"    "parenttype1" "parenttype2" "race_rec"    "male"       
## [36] "mlths"       "mgths"

e.long<-reshape(data.frame(eclskk5), idvar="childid", varying=list( momwork=c("momwork1","momwork2"), partype=c("parenttype1", "parenttype2")), times=1:2, direction="long" , v.names = c("momwork", "partype"))
e.long<-e.long[order(e.long$childid, e.long$time),]
head(e.long)

##             childid x_chsex_r x_raceth_r x1kage_r x4age x5age  x6age  x7age
## 10000014.1 10000014         1          1    67.82 85.94 91.73  97.51 106.85
## 10000014.2 10000014         1          1    67.82 85.94 91.73  97.51 106.85
## 10000020.1 10000020         2          5    68.38 88.57 93.37 100.34 111.12
## 10000020.2 10000020         2          5    68.38 88.57 93.37 100.34 111.12
## 10000022.1 10000022         2          8    68.61 87.68 92.98  99.19 110.99
## 10000022.2 10000022         2          8    68.61 87.68 92.98  99.19 110.99
##            x2povty x4povty_i x6povty_i x8povty_i x12par1ed_i x1hparnt x4hparnt
## 10000014.1       3         3         3         3           3        1        1
## 10000014.2       3         3         3         3           3        1        1
## 10000020.1       3         3         3         3           3       NA        1
## 10000020.2       3         3         3         3           3       NA        1
## 10000022.1       3         3         3         3           6        1        1
## 10000022.2       3         3         3         3           6        1        1
##            x6hparnt x1par1emp x4par1emp_i x6par1emp_i s2_id w6c6p_6psu
## 10000014.1        1         2           2           2  1433          2
## 10000014.2        1         2           2           2  1433          2
## 10000020.1        1        NA           1           1  1365          2
## 10000020.2        1        NA           1           1  1365          2
## 10000022.1        1         4           2           2  1405          1
## 10000022.2        1         4           2           2  1405          1
##            w6c6p_6str w6c6p_20     age1     age2     age3 pov1 pov2 pov3
## 10000014.1         39 328.0577 5.651667 7.161667 7.644167    0    0    0
## 10000014.2         39 328.0577 5.651667 7.161667 7.644167    0    0    0
## 10000020.1         53 136.5265 5.698333 7.380833 7.780833    0    0    0
## 10000020.2         53 136.5265 5.698333 7.380833 7.780833    0    0    0
## 10000022.1         35 163.1234 5.717500 7.306667 7.748333    0    0    0
## 10000022.2         35 163.1234 5.717500 7.306667 7.748333    0    0    0
##            race_rec male mlths mgths time  momwork   partype
## 10000014.1  nhwhite    1     0     0    1 parttime twoparent
## 10000014.2  nhwhite    1     0     0    2 parttime twoparent
## 10000020.1  nhasian    0     0     0    1     <NA>      <NA>
## 10000020.2  nhasian    0     0     0    2 fulltime twoparent
## 10000022.1    other    0     0     1    1     nilf twoparent
## 10000022.2    other    0     0     1    2 parttime twoparent

###Create states for poverty transitions

Now, I need to form the transition variable, this is my event variable, and in this case it will be categorical. It is categorical becuase we have the following state-space in our outcome.

State space

#find which kids failed in the first time period and remove them from the second risk period risk set
#binary outcomes for particular transition
e.long$povtran<-NA #transition into poverty
e.long$povtran[e.long$pov1==0&e.long$pov2==1&e.long$time==1]<-1
e.long$povtran[e.long$pov2==0&e.long$pov3==1&e.long$time==2]<-1
e.long$povtran[e.long$pov1==0&e.long$pov2==0&e.long$time==1]<-0
e.long$povtran[e.long$pov2==0&e.long$pov3==0&e.long$time==2]<-0

e.long$povtran2<-NA #transition out of poverty
e.long$povtran2[e.long$pov1==1&e.long$pov2==1&e.long$time==1]<-0
e.long$povtran2[e.long$pov2==1&e.long$pov3==1&e.long$time==2]<-0
e.long$povtran2[e.long$pov1==1&e.long$pov2==0&e.long$time==1]<-1
e.long$povtran2[e.long$pov2==1&e.long$pov3==0&e.long$time==2]<-1
#

#Categorical outcome
e.long$povcat<-NA
e.long$povcat[e.long$pov1==0&e.long$pov2==0&e.long$time==1]<-"staynotinpov"
e.long$povcat[e.long$pov1==1&e.long$pov2==1&e.long$time==1]<-"stayinpov"
e.long$povcat[e.long$pov1==0&e.long$pov2==1&e.long$time==1]<-"notpov_pov"
e.long$povcat[e.long$pov1==1&e.long$pov2==0&e.long$time==1]<-"pov_notpov"

e.long$povcat[e.long$pov2==0&e.long$pov3==0&e.long$time==2]<-"staynotinpov"
e.long$povcat[e.long$pov2==1&e.long$pov3==1&e.long$time==2]<-"stayinpov"
e.long$povcat[e.long$pov2==0&e.long$pov3==1&e.long$time==2]<-"notpov_pov"
e.long$povcat[e.long$pov2==1&e.long$pov3==0&e.long$time==2]<-"pov_notpov"

e.long$povcat<-as.factor(e.long$povcat)
e.long$povcat<-relevel(e.long$povcat, ref = "staynotinpov")

knitr::kable(head(e.long[, c("childid", "pov1", "pov2", "pov3","time",  "povcat")], n=20))

	childid	pov1	pov2	pov3	time	povcat
10000014.1	10000014	0	0	0	1	staynotinpov
10000014.2	10000014	0	0	0	2	staynotinpov
10000020.1	10000020	0	0	0	1	staynotinpov
10000020.2	10000020	0	0	0	2	staynotinpov
10000022.1	10000022	0	0	0	1	staynotinpov
10000022.2	10000022	0	0	0	2	staynotinpov
10000029.1	10000029	0	0	0	1	staynotinpov
10000029.2	10000029	0	0	0	2	staynotinpov
10000034.1	10000034	0	0	1	1	staynotinpov
10000034.2	10000034	0	0	1	2	notpov_pov
10000040.1	10000040	0	0	0	1	staynotinpov
10000040.2	10000040	0	0	0	2	staynotinpov
10000046.1	10000046	0	0	0	1	staynotinpov
10000046.2	10000046	0	0	0	2	staynotinpov
10000047.1	10000047	0	0	0	1	staynotinpov
10000047.2	10000047	0	0	0	2	staynotinpov
10000049.1	10000049	1	1	1	1	stayinpov
10000049.2	10000049	1	1	1	2	stayinpov
10000052.1	10000052	1	1	1	1	stayinpov
10000052.2	10000052	1	1	1	2	stayinpov

table(e.long$time, e.long$povcat)

staynotinpov notpov_pov pov_notpov stayinpov

1 1900 147 99 555 2 1896 103 141 561

Model for categorical outcome

We will use the multinomial logit model for this outcome, as it is specified for modeling a categorical outcome. The nnet package has the multinom() function to do this, it accepts weights but not sample design. Also, its summary functions do not return test statistics or p-values, so we will need to create those.

library(nnet)
multnm<-multinom(factor(povcat)~(time)+mlths+mgths+momwork+partype+race_rec,
                 data=e.long,
                 weights=w6c6p_20/mean(w6c6p_20, na.rm=T),
                 maxit=500)

## # weights:  60 (42 variable)
## initial  value 7110.128870 
## iter  10 value 4630.937239
## iter  20 value 3672.386533
## iter  30 value 3400.135554
## iter  40 value 3372.137711
## iter  50 value 3370.480351
## final  value 3370.478988 
## converged

sums<-summary(multnm) #store model 

#create test statistics
sums$z<-sums$coefficients/sums$standard.errors

#create p values
sums$pval<-2*pnorm(abs(sums$z), lower.tail=FALSE)
round(sums$pval,3)

##            (Intercept)  time mlths mgths momworknilf momworkparttime
## notpov_pov           0 0.012 0.109     0           0           0.039
## pov_notpov           0 0.004 0.006     0           0           0.861
## stayinpov            0 0.684 0.000     0           0           0.000
##            momworkunemployed partypeonebioonly partypeothernonparent
## notpov_pov             0.012             0.752                 0.267
## pov_notpov             0.000             0.190                 0.266
## stayinpov              0.000             0.000                 0.521
##            partypetwoparent race_rechispanic race_recnhasian race_recnhblack
## notpov_pov                0                0           0.588               0
## pov_notpov                0                0           0.660               0
## stayinpov                 0                0           0.000               0
##            race_recother
## notpov_pov         0.704
## pov_notpov         0.817
## stayinpov          0.047

#odds ratios
round(exp(sums$coefficients),3)

##            (Intercept)  time mlths mgths momworknilf momworkparttime
## notpov_pov       0.224 0.706 1.406 0.282       2.086           1.499
## pov_notpov       0.068 1.488 1.793 0.398       1.860           1.037
## stayinpov        0.115 1.037 3.475 0.318       3.291           2.212
##            momworkunemployed partypeonebioonly partypeothernonparent
## notpov_pov             2.223             1.094                 0.573
## pov_notpov             3.767             1.409                 0.583
## stayinpov              7.812             2.169                 1.217
##            partypetwoparent race_rechispanic race_recnhasian race_recnhblack
## notpov_pov            0.360            2.684           1.262           2.305
## pov_notpov            0.287            2.856           1.214           2.293
## stayinpov             0.378            6.371           2.851           4.103
##            race_recother
## notpov_pov         1.125
## pov_notpov         0.929
## stayinpov          1.504

This can be a lot to interpret, so we will also visualize the model output by estimating the transition probabilities themselves and graphing them.

Now we get estimates of the transition probabilities from the model to visualize them

dat1<-expand.grid(time=1:2,mlths=c(0,1), mgths=c(0,1), momwork=levels(e.long$momwork), partype=levels(e.long$partype), race_rec=levels(e.long$race_rec))

test1<-predict(multnm, newdata=dat1, type="probs")

dat1$pstaynotpov<-test1[,1]
dat1$pnotpov_pov<-test1[,2]
dat1$ppov_notpov<-test1[,3]
dat1$pstayinpov<-test1[,4]

#Reshape the data
library(data.table)
preddat<-melt(setDT(dat1),
              id.vars=c("time", "mlths", "mgths", "momwork","partype", "race_rec"),
              measure.vars=c("pstaynotpov", "pnotpov_pov", "ppov_notpov", "pstayinpov"))

library(ggplot2)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:data.table':
## 
##     between, first, last

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

preddat%>%
  filter(race_rec%in%c("nhwhite", "hispanic", "nhblack"))%>%
  filter(mlths==0, mgths==0)%>%
  filter(momwork=="fulltime")%>%
  ggplot()+geom_bar(aes(y=value, x=time, fill=variable), stat="identity")+facet_wrap(~race_rec+partype)+ggtitle(label = "Poverty transition probabilities by race and parent type", subtitle = "Full time working parent")

preddat%>%
  filter(race_rec%in%c("nhwhite", "hispanic", "nhblack"))%>%
  filter(mlths==0, mgths==0)%>%
  #filter(momwork=="fulltime")%>%
  filter(partype=="twoparent")%>%
  ggplot()+geom_bar(aes(y=value, x=time, fill=variable), stat="identity")+facet_wrap(~race_rec+momwork)+ggtitle(label = "Poverty transition probabilities by race and work status", subtitle = "Two parent households")

DEM 7223 - Event History Analysis - Example of Multi-state event history analysis

Corey Sparks, PhD

December 3, 2019

Model for categorical outcome