DEM 7903 Week 5: Longitudinal Models for Change

In this example, we will use hierarchical models to do some longitudinal modeling of data from the ECLS-K 2011. Specifically, we will model changes in a student’s standardized math score from fall kindergarten to spring, 1st grade.

Longitudinal data are collected in waves, representing the sample at different time points For example, currently, the ECLS-K 2011 has four waves, intending to capture the children as they progress through school, and will eventuall have more. We really want to measure “similar” items at each time point so we can understand what makes people change over their lives. When it comes to data we need to introduce a different data structure to accomodate this.

In this example, I will focus on the use of the linear mixed model for a continuous outcome

We start simple then move into growth curve models. This follows the presentation in Singer and Willett (2003) Chapters 3-6.

Data and recodes

First we load our data

load("~/Google Drive/dem7903_App_Hier/data/eclsk_11.Rdata")
names(eclsk11)<-tolower(names(eclsk11))
library (car)
library(arm)
library(lmerTest)
#get out only the variables I'm going to use for this example

myvars<-c("childid", "x_chsex_r", "x1locale", "x_raceth_r", "x2povty", "x12par1ed_i","p1curmar", "x1htotal", "x1mscalk1", "x2mscalk1", "x3mscalk1", "x4mscalk1", "p1hscale", "p2hscale", "p4hscale","x2fsstat2","x4fsstat2", "x12sesl","x4sesl_i", "p2parct1", "p2parct2", "s1_id", "p2safepl","x2krceth","p1o2near", "x_distpov" , "w1c0", "w1p0", "w2p0", "w1c0str", "w1p0str",
        "w4c4p_40","w4c4p_4str", "w4c4p_4psu",  "w1c0psu", "w1p0psu", "x1height","x2height","x3height","x4height", "x1kage_r","x2kage_r","x3age","x4age")

#subset the data
eclsk.sub<-eclsk11[,myvars]

rm(eclsk11); gc()

##           used (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells 1665255 89.0    2637877  140.9   2164898  115.7
## Vcells 2968870 22.7  219933776 1678.0 182884454 1395.3

Next, I do some recoding of variables. I have to make the repeated measures of each of my longitudinal variables.

#Longitudinal variables
#recode our outcomes, the  first is the child's math standardized test score  in Kindergarten
eclsk.sub$math1<-ifelse(eclsk.sub$x1mscalk1<0, NA, eclsk.sub$x1mscalk1)
eclsk.sub$math2<-ifelse(eclsk.sub$x2mscalk1<0, NA, eclsk.sub$x2mscalk1)
#eclsk.sub$math3<-ifelse(eclsk.sub$x3mscalk1<0, NA, eclsk.sub$x3mscalk1)
eclsk.sub$math4<-ifelse(eclsk.sub$x4mscalk1<0, NA, eclsk.sub$x4mscalk1)

#Second outcome is child's height for age, continuous outcome
eclsk.sub$height1<-ifelse(eclsk.sub$x1height<=-7, NA, eclsk.sub$x1height)
eclsk.sub$height2<-ifelse(eclsk.sub$x2height<=-7, NA, eclsk.sub$x2height)
#eclsk.sub$height3<-ifelse(eclsk.sub$x3height<=-7, NA, eclsk.sub$x3height)
eclsk.sub$height4<-ifelse(eclsk.sub$x4height<=-7, NA, eclsk.sub$x4height)

#Age at each wave
eclsk.sub$age_yrs1<-ifelse(eclsk.sub$x1kage_r<0, NA, eclsk.sub$x1kage_r/12)
eclsk.sub$age_yrs2<-ifelse(eclsk.sub$x2kage_r<0, NA, eclsk.sub$x2kage_r/12)
#eclsk.sub$age_yrs3<-ifelse(eclsk.sub$x3age<0, NA, eclsk.sub$x3age/12)
eclsk.sub$age_yrs4<-ifelse(eclsk.sub$x4age<0, NA, eclsk.sub$x4age/12)

eclsk.sub<- eclsk.sub[is.na(eclsk.sub$age_yrs1)==F, ]

#Height for age z score standardized by sex and age
eclsk.sub$height_z1<-ave(eclsk.sub$height1, as.factor(paste(round(eclsk.sub$age_yrs1, 1.5), eclsk.sub$male)), FUN=scale)
eclsk.sub$height_z2<-ave(eclsk.sub$height2, as.factor(paste(round(eclsk.sub$age_yrs2, 1.5), eclsk.sub$male)), FUN=scale)
#eclsk.sub$height_z3<-ave(eclsk.sub$height3, as.factor(paste(round(eclsk.sub$age_yrs3, 1.5), eclsk.sub$male)), FUN=scale)
eclsk.sub$height_z4<-ave(eclsk.sub$height4, as.factor(paste(round(eclsk.sub$age_yrs4, 1.5), eclsk.sub$male)), FUN=scale)


#Household food insecurity, dichotomous outcome
#This outcome is only present at two waves
eclsk.sub$foodinsec1<-recode(eclsk.sub$x2fsstat2, recodes="2:3=1; 1=0; else=NA")
eclsk.sub$foodinsec2<-recode(eclsk.sub$x4fsstat2, recodes="2:3=1; 1=0; else=NA")


#Child health assessment Excellent to poor , ordinal outcome
eclsk.sub$chhealth1<-ifelse(eclsk.sub$p1hscale<0, NA, eclsk.sub$p1hscale)
eclsk.sub$chhealth2<-ifelse(eclsk.sub$p2hscale<0, NA, eclsk.sub$p2hscale)
eclsk.sub$chhealth4<-ifelse(eclsk.sub$p4hscale<0, NA, eclsk.sub$p4hscale)

#SES
eclsk.sub$hhses1<-ifelse(eclsk.sub$x12sesl==-9, NA, scale(eclsk.sub$x12sesl))
eclsk.sub$hhses4<-ifelse(eclsk.sub$x4sesl_i==-9, NA, scale(eclsk.sub$x4sesl_i))

That does it for our time varying variables. We now code some variables from the kindergarten wave to use as predictors:

#Non time varying variables
#First we recode some Child characteristics
#Child's sex: recode as male =1
eclsk.sub$male<-recode(eclsk.sub$x_chsex_r, recodes="1=1; 2=0; -9=NA")

#Recode race with white, non Hispanic as reference using dummy vars
eclsk.sub$hisp<-recode (eclsk.sub$x_raceth_r, recodes="3:4=1;-9=NA; else=0")
eclsk.sub$black<-recode (eclsk.sub$x_raceth_r, recodes="2=1;-9=NA; else=0")
eclsk.sub$asian<-recode (eclsk.sub$x_raceth_r, recodes="5=1;-9=NA; else=0")
eclsk.sub$nahn<-recode (eclsk.sub$x_raceth_r, recodes="6:7=1;-9=NA; else=0")
eclsk.sub$other<-recode (eclsk.sub$x_raceth_r, recodes="8=1;-9=NA; else=0")


#Then we recode some parent/mother characteristics
#Mother's education, recode as 2 dummys with HS = reference
eclsk.sub$lths<-recode(eclsk.sub$x12par1ed_i, recodes = "0:2=1; 3:8=0; else = NA")
eclsk.sub$gths<-recode(eclsk.sub$x12par1ed_i, recodes = "1:3=0; 4:8=1; else =NA") 

#marital status, recode as 2 dummys, ref= married
eclsk.sub$single<-recode(eclsk.sub$p1curmar, recodes="4=1; -7:-9=NA; else=0")
eclsk.sub$notmar<-recode(eclsk.sub$p1curmar, recodes="2:3=1; -7:-9=NA; else=0")


#Then we do some household level variables

#Urban school location = 1
eclsk.sub$urban<-recode(eclsk.sub$x1locale, recodes = "1:3=1; 4=0; -1:-9=NA")

#poverty level in poverty = 1
eclsk.sub$pov<-recode(eclsk.sub$x2povty , recodes ="1:2=1; 3=0; -9=NA")

#Household size
eclsk.sub$hhsize<-eclsk.sub$x1htotal

#school % minority student body
eclsk.sub$minorsch<-ifelse(eclsk.sub$x2krceth <0, NA, eclsk.sub$x2krceth/10)

#Unsafe neighborhood
eclsk.sub$unsafe<-recode(eclsk.sub$p2safepl , recodes = "1:2='unsafe'; 3='safe'; else=NA", as.factor.result = T)

#school district poverty
eclsk.sub$dist_pov<-ifelse(eclsk.sub$x_distpov==-9, NA, scale(eclsk.sub$x_distpov))

Reshaping data into longitudinal format

To analyze data longitudinally, we must reshape the data fron its current “wide” format, where each repeated measure is a column, into the “long” format, where there is a single column for a particular variable, and we account for the repeated measurements of each person. In this case, I’m going to use three waves of data, so each child can contribute up to three lines to the data.

This is just a little example using the first 10 kids, just to illustrate this process.

test<-eclsk.sub[1:10,]
#look at the data in "wide" format
head(test)

##    childid x_chsex_r x1locale x_raceth_r x2povty x12par1ed_i p1curmar
## 1 10000001         1        4          1       2           5        1
## 2 10000002         2        2          1       3           5        1
## 4 10000004         1        4          1       3           5        1
## 5 10000005         2        2          3       2           4        1
## 6 10000006         2        4          1       2           3        3
## 7 10000007         2        3          1       1           4        1
##   x1htotal x1mscalk1 x2mscalk1 x3mscalk1 x4mscalk1 p1hscale p2hscale
## 1        6   44.9695   61.5056        NA   79.5010        1        2
## 2        4   30.1857   50.0694        NA   71.0421        1        1
## 4        4   13.8410   31.0148        NA   53.9906        1        1
## 5        5   27.6587   45.5457        NA        NA        1        1
## 6        4   22.2918   27.8530        NA   42.6581        1        2
## 7        5   26.1178   36.7276        NA   59.3014        2        2
##   p4hscale x2fsstat2 x4fsstat2 x12sesl x4sesl_i p2parct1 p2parct2 s1_id
## 1        3         1         1   -0.13    -0.07        1        1  1861
## 2        1         1         1    0.27     0.23        1        1  2113
## 4       -9         1        -9   -0.26    -0.29        1        1  2070
## 5       NA         1        NA    0.28       NA        1        3  1079
## 6        1         1         1   -0.69    -0.71        1       -1  1210
## 7       -9         1        -9   -0.58    -0.60        1        1  1282
##   p2safepl x2krceth p1o2near x_distpov      w1c0     w1p0     w2p0 w1c0str
## 1        2     8.00        2        45 188.09105 211.4320 215.1901      70
## 2        3    30.00       -1         7 237.88083 274.5976 327.8694      45
## 4        2    18.00       -1        17  96.36698 103.5068 111.8929      51
## 5        3    90.48        3        -1 178.54929 237.3439 267.5328      30
## 6        3     2.00       -1        11 497.90703 525.4855 636.1168      65
## 7        3     3.00        0         9 447.97361 543.0198 584.5155      65
##   w1p0str w4c4p_40 w4c4p_4str w4c4p_4psu w1c0psu w1p0psu x1height x2height
## 1      70 322.0055         69          3       3       3    44.50    45.75
## 2      45 474.8417         43          2       2       2    45.63    47.00
## 4      51 171.4766         49          2       2       2    46.00    48.00
## 5      30   0.0000         NA         NA       3       3    40.80    42.00
## 6      65 957.6496         64          2       2       2    46.00    47.00
## 7      65 535.6196         64          1       1       1    45.25    46.00
##   x3height x4height x1kage_r x2kage_r x3age x4age   math1   math2   math4
## 1       NA    48.00    72.39    79.76    NA 91.66 44.9695 61.5056 79.5010
## 2       NA    49.50    71.41    77.52    NA 89.56 30.1857 50.0694 71.0421
## 4       NA    50.00    72.39    78.84    NA 89.23 13.8410 31.0148 53.9906
## 5       NA       NA    59.51    65.06    NA    NA 27.6587 45.5457      NA
## 6       NA    49.75    75.78    82.62    NA 94.82 22.2918 27.8530 42.6581
## 7       NA    48.00    63.95    69.90    NA 80.98 26.1178 36.7276 59.3014
##   height1 height2 height4 age_yrs1 age_yrs2 age_yrs4   height_z1
## 1   44.50   45.75   48.00 6.032500 6.646667 7.638333 -0.74750120
## 2   45.63   47.00   49.50 5.950833 6.460000 7.463333  0.01182955
## 4   46.00   48.00   50.00 6.032500 6.570000 7.435833  0.03840209
## 5   40.80   42.00      NA 4.959167 5.421667       NA -1.45613917
## 6   46.00   47.00   49.75 6.315000 6.885000 7.901667 -0.62911889
## 7   45.25   46.00   48.00 5.329167 5.825000 6.748333  0.49810428
##    height_z2   height_z4 foodinsec1 foodinsec2 chhealth1 chhealth2
## 1 -0.7144538 -0.69549851          0          0         1         2
## 2  0.1659170  0.01627224          0          0         1         1
## 4  0.4210688  0.41691644          0         NA         1         1
## 5 -1.0816114          NA          0         NA         1         1
## 6 -0.2994689 -0.52126128          0          0         1         2
## 7  0.3141864  0.16071039          0         NA         2         2
##   chhealth4      hhses1      hhses4 male hisp black asian nahn other lths
## 1         3 -0.05343635 -0.04820708    1    0     0     0    0     0    0
## 2         1  0.36092641  0.32296225    0    0     0     0    0     0    0
## 4        NA -0.18810425 -0.32039792    1    0     0     0    0     0    0
## 5        NA  0.37128548          NA    0    1     0     0    0     0    0
## 6         1 -0.63354422 -0.84003498    0    0     0     0    0     0    0
## 7        NA -0.51959446 -0.70393956    0    0     0     0    0     0    0
##   gths single notmar urban pov hhsize minorsch unsafe    dist_pov
## 1    1      0      0     0   1      6    0.800 unsafe  2.22748174
## 2    1      0      0     1   0      4    3.000   safe -0.67176163
## 4    1      0      0     0   0      4    1.800 unsafe  0.09119715
## 5    1      0      0     1   1      5    9.048   safe -1.28212866
## 6    0      0      1     0   1      4    0.200   safe -0.36657812
## 7    1      0      0     1   1      5    0.300   safe -0.51916987

e.longt<-reshape(test, idvar="childid", varying=list(math = c("math1", "math2",  "math4"),
                                         age = c("age_yrs1", "age_yrs2",  "age_yrs4"),
                                         height= c("height_z1", "height_z2", "height_z4"),
                                         health=c("chhealth1", "chhealth2", "chhealth4"), 
                                         ses = c("hhses1", "hhses1", "hhses4")),
                                          v.names = c("math", "age", "height", "health"),
                                         timevar="wave",
                                         times=c(1,2,4),direction="long",  
                                          drop = names(test)[c(2:20, 22:25)])

#here we look at the new data set, in the "long" format

head(e.longt[order(e.longt$childid, e.longt$wave),c(1, 40:44)], n=20)

##             childid minorsch unsafe    dist_pov wave    math
## 10000001.1 10000001    0.800 unsafe  2.22748174    1 44.9695
## 10000001.2 10000001    0.800 unsafe  2.22748174    2 61.5056
## 10000001.4 10000001    0.800 unsafe  2.22748174    4 79.5010
## 10000002.1 10000002    3.000   safe -0.67176163    1 30.1857
## 10000002.2 10000002    3.000   safe -0.67176163    2 50.0694
## 10000002.4 10000002    3.000   safe -0.67176163    4 71.0421
## 10000004.1 10000004    1.800 unsafe  0.09119715    1 13.8410
## 10000004.2 10000004    1.800 unsafe  0.09119715    2 31.0148
## 10000004.4 10000004    1.800 unsafe  0.09119715    4 53.9906
## 10000005.1 10000005    9.048   safe -1.28212866    1 27.6587
## 10000005.2 10000005    9.048   safe -1.28212866    2 45.5457
## 10000005.4 10000005    9.048   safe -1.28212866    4      NA
## 10000006.1 10000006    0.200   safe -0.36657812    1 22.2918
## 10000006.2 10000006    0.200   safe -0.36657812    2 27.8530
## 10000006.4 10000006    0.200   safe -0.36657812    4 42.6581
## 10000007.1 10000007    0.300   safe -0.51916987    1 26.1178
## 10000007.2 10000007    0.300   safe -0.51916987    2 36.7276
## 10000007.4 10000007    0.300   safe -0.51916987    4 59.3014
## 10000008.1 10000008    2.600   safe  0.39638067    1 30.9004
## 10000008.2 10000008    2.600   safe  0.39638067    2 54.5561

So, we can see the children that are repeated three times, but some children are missing for some variables at some waves, which is common in longitudinal surveys.

To illustrate these data, we can plot the children’s math scores and other variables over time to see trends within child. Some kids are missing at all times, others don’t have complete data for all times, but you can see the trends within child.

require(lattice)
xyplot(math~age|childid, data=e.longt,
       panel=function(x,y){
       panel.xyplot(x,y)
     panel.lmline(x,y,)}, main="Math Scores over time for each child", xlab= "Age", ylab="Math Score")

xyplot(height~age|childid, data=e.longt,
       panel=function(x,y){
       panel.xyplot(x,y)
     panel.lmline(x,y,)}, main="Height over time for each child", xlab= "Age", ylab="Height")

xyplot(health~age|childid, data=e.longt,
       panel=function(x,y){
       panel.xyplot(x,y)
     panel.lmline(x,y,)}, main="Self Rated Health over time for each child", xlab= "Age", ylab="SRH")

So, these plots actually fit a linear regression line to each child’s data, which is what we are effectively trying to do, but doing this within the traditional linear model framework is inefficient. Instead we will use the linear mixed model to analyze the overall trajectory of change over time, but allowing each child to deviate from the overall pattern.

That was a cute little example, now we do the entire data set:

e.long<-reshape(eclsk.sub, idvar="childid", varying=list(math = c("math1", "math2",  "math4"),
                                         age = c("age_yrs1", "age_yrs2",  "age_yrs4"),
                                         height= c("height_z1", "height_z2", "height_z4"),
                                         health=c("chhealth1", "chhealth2", "chhealth4"), 
                                         ses = c("hhses1", "hhses1", "hhses4")),
                                        v.names = c("math", "age", "height", "health", "ses"),
                                         timevar="wave",times=c(1,2,4),direction="long",  
                                          drop = names(eclsk.sub)[c(2:20, 22:25)])

e.long<-e.long[order(e.long$childid, e.long$wave),]
head(e.long, n=10)

##             childid p2parct2 x_distpov      w1c0     w1p0     w2p0 w1c0str
## 10000001.1 10000001        1        45 188.09105 211.4320 215.1901      70
## 10000001.2 10000001        1        45 188.09105 211.4320 215.1901      70
## 10000001.4 10000001        1        45 188.09105 211.4320 215.1901      70
## 10000002.1 10000002        1         7 237.88083 274.5976 327.8694      45
## 10000002.2 10000002        1         7 237.88083 274.5976 327.8694      45
## 10000002.4 10000002        1         7 237.88083 274.5976 327.8694      45
## 10000004.1 10000004        1        17  96.36698 103.5068 111.8929      51
## 10000004.2 10000004        1        17  96.36698 103.5068 111.8929      51
## 10000004.4 10000004        1        17  96.36698 103.5068 111.8929      51
## 10000005.1 10000005        3        -1 178.54929 237.3439 267.5328      30
##            w1p0str w4c4p_40 w4c4p_4str w4c4p_4psu w1c0psu w1p0psu x1height
## 10000001.1      70 322.0055         69          3       3       3    44.50
## 10000001.2      70 322.0055         69          3       3       3    44.50
## 10000001.4      70 322.0055         69          3       3       3    44.50
## 10000002.1      45 474.8417         43          2       2       2    45.63
## 10000002.2      45 474.8417         43          2       2       2    45.63
## 10000002.4      45 474.8417         43          2       2       2    45.63
## 10000004.1      51 171.4766         49          2       2       2    46.00
## 10000004.2      51 171.4766         49          2       2       2    46.00
## 10000004.4      51 171.4766         49          2       2       2    46.00
## 10000005.1      30   0.0000         NA         NA       3       3    40.80
##            x2height x3height x4height x1kage_r x2kage_r x3age x4age
## 10000001.1    45.75       NA     48.0    72.39    79.76    NA 91.66
## 10000001.2    45.75       NA     48.0    72.39    79.76    NA 91.66
## 10000001.4    45.75       NA     48.0    72.39    79.76    NA 91.66
## 10000002.1    47.00       NA     49.5    71.41    77.52    NA 89.56
## 10000002.2    47.00       NA     49.5    71.41    77.52    NA 89.56
## 10000002.4    47.00       NA     49.5    71.41    77.52    NA 89.56
## 10000004.1    48.00       NA     50.0    72.39    78.84    NA 89.23
## 10000004.2    48.00       NA     50.0    72.39    78.84    NA 89.23
## 10000004.4    48.00       NA     50.0    72.39    78.84    NA 89.23
## 10000005.1    42.00       NA       NA    59.51    65.06    NA    NA
##            height1 height2 height4 foodinsec1 foodinsec2 male hisp black
## 10000001.1   44.50   45.75    48.0          0          0    1    0     0
## 10000001.2   44.50   45.75    48.0          0          0    1    0     0
## 10000001.4   44.50   45.75    48.0          0          0    1    0     0
## 10000002.1   45.63   47.00    49.5          0          0    0    0     0
## 10000002.2   45.63   47.00    49.5          0          0    0    0     0
## 10000002.4   45.63   47.00    49.5          0          0    0    0     0
## 10000004.1   46.00   48.00    50.0          0         NA    1    0     0
## 10000004.2   46.00   48.00    50.0          0         NA    1    0     0
## 10000004.4   46.00   48.00    50.0          0         NA    1    0     0
## 10000005.1   40.80   42.00      NA          0         NA    0    1     0
##            asian nahn other lths gths single notmar urban pov hhsize
## 10000001.1     0    0     0    0    1      0      0     0   1      6
## 10000001.2     0    0     0    0    1      0      0     0   1      6
## 10000001.4     0    0     0    0    1      0      0     0   1      6
## 10000002.1     0    0     0    0    1      0      0     1   0      4
## 10000002.2     0    0     0    0    1      0      0     1   0      4
## 10000002.4     0    0     0    0    1      0      0     1   0      4
## 10000004.1     0    0     0    0    1      0      0     0   0      4
## 10000004.2     0    0     0    0    1      0      0     0   0      4
## 10000004.4     0    0     0    0    1      0      0     0   0      4
## 10000005.1     0    0     0    0    1      0      0     1   1      5
##            minorsch unsafe    dist_pov wave    math      age      height
## 10000001.1    0.800 unsafe  2.22748174    1 44.9695 6.032500 -0.74750120
## 10000001.2    0.800 unsafe  2.22748174    2 61.5056 6.646667 -0.71445383
## 10000001.4    0.800 unsafe  2.22748174    4 79.5010 7.638333 -0.69549851
## 10000002.1    3.000   safe -0.67176163    1 30.1857 5.950833  0.01182955
## 10000002.2    3.000   safe -0.67176163    2 50.0694 6.460000  0.16591695
## 10000002.4    3.000   safe -0.67176163    4 71.0421 7.463333  0.01627224
## 10000004.1    1.800 unsafe  0.09119715    1 13.8410 6.032500  0.03840209
## 10000004.2    1.800 unsafe  0.09119715    2 31.0148 6.570000  0.42106884
## 10000004.4    1.800 unsafe  0.09119715    4 53.9906 7.435833  0.41691644
## 10000005.1    9.048   safe -1.28212866    1 27.6587 4.959167 -1.45613917
##            health         ses
## 10000001.1      1 -0.05343635
## 10000001.2      2 -0.05343635
## 10000001.4      3 -0.04820708
## 10000002.1      1  0.36092641
## 10000002.2      1  0.36092641
## 10000002.4      1  0.32296225
## 10000004.1      1 -0.18810425
## 10000004.2      1 -0.18810425
## 10000004.4     NA -0.32039792
## 10000005.1      1  0.37128548

Modeling

Longitudinal Models

Now we fit models to the longitudinal data. We start simple then move into “growth curve”" models. This follows the presentation in Singer and Willett (2003) Chapters 3-6. I also incorporate survey design elements into the analysis as much as I can. Specifically, I include the mean centered sampling weight, and in the models (except for the first one), I include a second random effect for the survey stratum

#basic linear model
fit.1<-glm(scale(math)~scale(age)+male+black+hisp+asian+nahn+other+ses, data=e.long[e.long$w4c4p_40>0,], weights=w4c4p_40/mean(w4c4p_40))
summary(fit.1)

## 
## Call:
## glm(formula = scale(math) ~ scale(age) + male + black + hisp + 
##     asian + nahn + other + ses, data = e.long[e.long$w4c4p_40 > 
##     0, ], weights = w4c4p_40/mean(w4c4p_40))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -5.6180  -0.3775   0.0171   0.4002   3.1039  
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.024267   0.006566   3.696 0.000219 ***
## scale(age)   0.702677   0.003697 190.042  < 2e-16 ***
## male        -0.018122   0.007435  -2.437 0.014799 *  
## black       -0.230601   0.011626 -19.835  < 2e-16 ***
## hisp        -0.127924   0.009665 -13.236  < 2e-16 ***
## asian        0.192175   0.019204  10.007  < 2e-16 ***
## nahn        -0.148831   0.027280  -5.456 4.92e-08 ***
## other       -0.011359   0.019194  -0.592 0.554001    
## ses          0.278515   0.004742  58.735  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.4241632)
## 
##     Null deviance: 30551  on 30782  degrees of freedom
## Residual deviance: 13053  on 30774  degrees of freedom
##   (195 observations deleted due to missingness)
## AIC: 66269
## 
## Number of Fisher Scoring iterations: 2

#random intercept model for individual student differences
fit.2<-lmer(scale(math)~scale(age)+male+black+hisp+asian+nahn+other+ses+(1|childid)+(1|w4c4p_4str/w4c4p_4psu),weights=w4c4p_40/mean(w4c4p_40), data=e.long[e.long$w4c4p_40>0,], REML=F)
summary(fit.2)

## Linear mixed model fit by maximum likelihood t-tests use Satterthwaite
##   approximations to degrees of freedom [lmerMod]
## Formula: scale(math) ~ scale(age) + male + black + hisp + asian + nahn +  
##     other + ses + (1 | childid) + (1 | w4c4p_4str/w4c4p_4psu)
##    Data: e.long[e.long$w4c4p_40 > 0, ]
## Weights: w4c4p_40/mean(w4c4p_40)
## 
##      AIC      BIC   logLik deviance df.resid 
##  43799.3  43907.7 -21886.7  43773.3    30770 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.8116 -0.4862  0.0269  0.5107  4.2858 
## 
## Random effects:
##  Groups                Name        Variance Std.Dev.
##  childid               (Intercept) 0.324389 0.56955 
##  w4c4p_4psu:w4c4p_4str (Intercept) 0.013637 0.11678 
##  w4c4p_4str            (Intercept) 0.002553 0.05053 
##  Residual                          0.094640 0.30764 
## Number of obs: 30783, groups:  
## childid, 10316; w4c4p_4psu:w4c4p_4str, 232; w4c4p_4str, 69
## 
## Fixed effects:
##               Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)  6.899e-02  1.615e-02  1.270e+02   4.271 3.78e-05 ***
## scale(age)   8.494e-01  2.023e-03  2.208e+04 419.903  < 2e-16 ***
## male        -2.757e-02  1.201e-02  1.009e+04  -2.295   0.0217 *  
## black       -2.597e-01  2.243e-02  7.439e+03 -11.577  < 2e-16 ***
## hisp        -1.734e-01  1.780e-02  5.618e+03  -9.739  < 2e-16 ***
## asian        1.907e-01  2.669e-02  9.013e+03   7.145 9.71e-13 ***
## nahn        -1.508e-01  5.854e-02  9.231e+03  -2.575   0.0100 *  
## other       -3.394e-03  2.927e-02  1.048e+04  -0.116   0.9077    
## ses          1.928e-01  6.458e-03  1.742e+04  29.850  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr) scl(g) male   black  hisp   asian  nahn   other 
## scale(age) -0.020                                                 
## male       -0.381 -0.012                                          
## black      -0.277  0.031  0.007                                   
## hisp       -0.390  0.053  0.002  0.285                            
## asian      -0.223  0.005  0.027  0.151  0.218                     
## nahn       -0.101  0.006  0.016  0.064  0.105  0.064              
## other      -0.170  0.004  0.005  0.132  0.168  0.119  0.064       
## ses        -0.159  0.144  0.000  0.174  0.296 -0.015  0.049  0.022

#individual trajectory model with random slope for time
fit.3<-lmer(scale(math)~scale(age)+male+black+hisp+asian+nahn+other+ses+(scale(age)|childid)+(1|w4c4p_4str/w4c4p_4psu),weights=w4c4p_40/mean(w4c4p_40), data=e.long[e.long$w4c4p_40>0,], REML=F)
summary(fit.3)

## Linear mixed model fit by maximum likelihood t-tests use Satterthwaite
##   approximations to degrees of freedom [lmerMod]
## Formula: scale(math) ~ scale(age) + male + black + hisp + asian + nahn +  
##     other + ses + (scale(age) | childid) + (1 | w4c4p_4str/w4c4p_4psu)
##    Data: e.long[e.long$w4c4p_40 > 0, ]
## Weights: w4c4p_40/mean(w4c4p_40)
## 
##      AIC      BIC   logLik deviance df.resid 
##  40892.8  41017.8 -20431.4  40862.8    30768 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.1237 -0.4726 -0.0048  0.4585  4.6716 
## 
## Random effects:
##  Groups                Name        Variance Std.Dev. Corr
##  childid               (Intercept) 0.329290 0.57384      
##                        scale(age)  0.025719 0.16037  0.46
##  w4c4p_4psu:w4c4p_4str (Intercept) 0.013833 0.11761      
##  w4c4p_4str            (Intercept) 0.003937 0.06274      
##  Residual                          0.063939 0.25286      
## Number of obs: 30783, groups:  
## childid, 10316; w4c4p_4psu:w4c4p_4str, 232; w4c4p_4str, 69
## 
## Fixed effects:
##               Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)  7.726e-02  1.651e-02  1.110e+02   4.680 8.12e-06 ***
## scale(age)   8.559e-01  2.392e-03  1.084e+04 357.849  < 2e-16 ***
## male        -3.793e-02  1.138e-02  9.850e+03  -3.334 0.000858 ***
## black       -1.834e-01  2.128e-02  7.740e+03  -8.621  < 2e-16 ***
## hisp        -1.578e-01  1.694e-02  6.629e+03  -9.313  < 2e-16 ***
## asian        2.031e-01  2.529e-02  9.145e+03   8.032 1.11e-15 ***
## nahn        -1.429e-01  5.525e-02  8.872e+03  -2.586 0.009730 ** 
## other        2.548e-02  2.778e-02  1.030e+04   0.917 0.359124    
## ses          2.055e-01  6.618e-03  1.612e+04  31.053  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr) scl(g) male   black  hisp   asian  nahn   other 
## scale(age)  0.092                                                 
## male       -0.351 -0.007                                          
## black      -0.257  0.030  0.007                                   
## hisp       -0.362  0.042  0.002  0.285                            
## asian      -0.209 -0.016  0.026  0.151  0.220                     
## nahn       -0.093  0.013  0.015  0.063  0.106  0.065              
## other      -0.159 -0.002  0.004  0.133  0.168  0.121  0.065       
## ses        -0.150  0.106  0.003  0.167  0.292 -0.023  0.043  0.018

anova(fit.3, fit.2)

## Data: e.long[e.long$w4c4p_40 > 0, ]
## Models:
## ..1: scale(math) ~ scale(age) + male + black + hisp + asian + nahn + 
## ..1:     other + ses + (1 | childid) + (1 | w4c4p_4str/w4c4p_4psu)
## object: scale(math) ~ scale(age) + male + black + hisp + asian + nahn + 
## object:     other + ses + (scale(age) | childid) + (1 | w4c4p_4str/w4c4p_4psu)
##        Df   AIC   BIC logLik deviance  Chisq Chi Df Pr(>Chisq)    
## ..1    13 43799 43908 -21887    43773                             
## object 15 40893 41018 -20431    40863 2910.5      2  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#curvilinear trajectory model with random nonlinear time
fit.4<-lmer(scale(math)~scale(age)+male+black+hisp+asian+nahn+other+ses+I(scale(age^2))+(I(scale(age^2))|childid)+(1|w4c4p_4str/w4c4p_4psu),weights=w4c4p_40/mean(w4c4p_40), data=e.long[e.long$w4c4p_40>0,], REML=F, lmerControl(optimizer = "Nelder_Mead"))
summary(fit.4)

## Linear mixed model fit by maximum likelihood t-tests use Satterthwaite
##   approximations to degrees of freedom [lmerMod]
## Formula: scale(math) ~ scale(age) + male + black + hisp + asian + nahn +  
##     other + ses + I(scale(age^2)) + (I(scale(age^2)) | childid) +  
##     (1 | w4c4p_4str/w4c4p_4psu)
##    Data: e.long[e.long$w4c4p_40 > 0, ]
## Weights: w4c4p_40/mean(w4c4p_40)
## Control: lmerControl(optimizer = "Nelder_Mead")
## 
##      AIC      BIC   logLik deviance df.resid 
##  40681.1  40814.4 -20324.5  40649.1    30767 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.2573 -0.4627  0.0009  0.4586  4.5761 
## 
## Random effects:
##  Groups                Name            Variance Std.Dev. Corr
##  childid               (Intercept)     0.33502  0.57881      
##                        I(scale(age^2)) 0.02626  0.16205  0.34
##  w4c4p_4psu:w4c4p_4str (Intercept)     0.01402  0.11842      
##  w4c4p_4str            (Intercept)     0.00399  0.06317      
##  Residual                              0.06077  0.24651      
## Number of obs: 30783, groups:  
## childid, 10316; w4c4p_4psu:w4c4p_4str, 232; w4c4p_4str, 69
## 
## Fixed effects:
##                   Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)      7.052e-02  1.673e-02  1.130e+02   4.214 5.06e-05 ***
## scale(age)       1.447e+00  3.278e-02  1.780e+04  44.143  < 2e-16 ***
## male            -3.938e-02  1.167e-02  9.532e+03  -3.374 0.000743 ***
## black           -2.003e-01  2.188e-02  7.491e+03  -9.153  < 2e-16 ***
## hisp            -1.575e-01  1.740e-02  6.413e+03  -9.053  < 2e-16 ***
## asian            2.053e-01  2.583e-02  8.795e+03   7.950 2.22e-15 ***
## nahn            -1.482e-01  5.698e-02  8.670e+03  -2.601 0.009309 ** 
## other            2.171e-02  2.844e-02  9.945e+03   0.763 0.445290    
## ses              2.020e-01  6.704e-03  1.581e+04  30.135  < 2e-16 ***
## I(scale(age^2)) -5.906e-01  3.288e-02  1.771e+04 -17.963  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) scl(g) male   black  hisp   asian  nahn   other  ses   
## scale(age)  -0.015                                                        
## male        -0.355 -0.016                                                 
## black       -0.261  0.002  0.007                                          
## hisp        -0.366  0.005  0.002  0.285                                   
## asian       -0.211  0.012  0.026  0.151  0.221                            
## nahn        -0.094 -0.005  0.015  0.063  0.105  0.065                     
## other       -0.160  0.003  0.004  0.133  0.168  0.121  0.064              
## ses         -0.153 -0.027  0.003  0.169  0.295 -0.022  0.044  0.019       
## I(scl(g^2))  0.020 -0.997  0.016  0.000 -0.002 -0.013  0.006 -0.003  0.035

anova(fit.4, fit.3)

## Data: e.long[e.long$w4c4p_40 > 0, ]
## Models:
## ..1: scale(math) ~ scale(age) + male + black + hisp + asian + nahn + 
## ..1:     other + ses + (scale(age) | childid) + (1 | w4c4p_4str/w4c4p_4psu)
## object: scale(math) ~ scale(age) + male + black + hisp + asian + nahn + 
## object:     other + ses + I(scale(age^2)) + (I(scale(age^2)) | childid) + 
## object:     (1 | w4c4p_4str/w4c4p_4psu)
##        Df   AIC   BIC logLik deviance  Chisq Chi Df Pr(>Chisq)    
## ..1    15 40893 41018 -20431    40863                             
## object 16 40681 40814 -20324    40649 213.74      1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#individual trajectory model with fixed effects for race and different population
#trajectories for each race
fit.5<-lmer(scale(math)~scale(age)*(male+black+hisp+asian+nahn+other)+ses+(scale(age)|childid)+(1|w4c4p_4str/w4c4p_4psu),weights=w4c4p_40/mean(w4c4p_40), data=e.long[e.long$w4c4p_40>0,], REML=F)
summary(fit.5)

## Linear mixed model fit by maximum likelihood t-tests use Satterthwaite
##   approximations to degrees of freedom [lmerMod]
## Formula: scale(math) ~ scale(age) * (male + black + hisp + asian + nahn +  
##     other) + ses + (scale(age) | childid) + (1 | w4c4p_4str/w4c4p_4psu)
##    Data: e.long[e.long$w4c4p_40 > 0, ]
## Weights: w4c4p_40/mean(w4c4p_40)
## 
##      AIC      BIC   logLik deviance df.resid 
##  40744.5  40919.6 -20351.3  40702.5    30762 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.1255 -0.4746 -0.0059  0.4629  4.6324 
## 
## Random effects:
##  Groups                Name        Variance Std.Dev. Corr
##  childid               (Intercept) 0.329576 0.57409      
##                        scale(age)  0.024896 0.15779  0.46
##  w4c4p_4psu:w4c4p_4str (Intercept) 0.014036 0.11848      
##  w4c4p_4str            (Intercept) 0.004213 0.06491      
##  Residual                          0.063801 0.25259      
## Number of obs: 30783, groups:  
## childid, 10316; w4c4p_4psu:w4c4p_4str, 232; w4c4p_4str, 69
## 
## Fixed effects:
##                    Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)       8.813e-02  1.689e-02  1.150e+02   5.218 8.08e-07 ***
## scale(age)        8.673e-01  4.078e-03  1.084e+04 212.666  < 2e-16 ***
## male             -2.635e-02  1.192e-02  1.004e+04  -2.211 0.027069 *  
## black            -2.618e-01  2.227e-02  8.330e+03 -11.756  < 2e-16 ***
## hisp             -1.803e-01  1.771e-02  7.348e+03 -10.176  < 2e-16 ***
## asian             1.880e-01  2.623e-02  9.508e+03   7.165 8.36e-13 ***
## nahn             -1.555e-01  5.826e-02  9.513e+03  -2.669 0.007615 ** 
## other            -1.757e-03  2.897e-02  1.036e+04  -0.061 0.951630    
## ses               1.982e-01  6.655e-03  1.606e+04  29.783  < 2e-16 ***
## scale(age):male   1.563e-02  4.713e-03  1.056e+04   3.317 0.000914 ***
## scale(age):black -9.066e-02  7.493e-03  9.230e+03 -12.098  < 2e-16 ***
## scale(age):hisp  -2.296e-02  5.803e-03  1.048e+04  -3.956 7.68e-05 ***
## scale(age):asian -2.164e-02  1.091e-02  1.630e+04  -1.983 0.047381 *  
## scale(age):nahn  -1.496e-02  1.988e-02  6.800e+03  -0.752 0.451875    
## scale(age):other -3.745e-02  1.172e-02  1.187e+04  -3.196 0.001395 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) scl(g) male   black  hisp   asian  nahn   other  ses   
## scale(age)   0.180                                                        
## male        -0.361 -0.183                                                 
## black       -0.263 -0.087  0.007                                          
## hisp        -0.366 -0.108  0.003  0.287                                   
## asian       -0.210 -0.079  0.027  0.150  0.216                            
## nahn        -0.095 -0.035  0.015  0.064  0.105  0.064                     
## other       -0.161 -0.067  0.005  0.132  0.168  0.119  0.063              
## ses         -0.158  0.006  0.002  0.181  0.307 -0.019  0.050  0.022       
## scale(g):ml -0.112 -0.601  0.296  0.002  0.002  0.009  0.005  0.003 -0.001
## scl(g):blck -0.074 -0.352  0.003  0.291  0.082  0.038  0.022  0.038  0.074
## scal(g):hsp -0.094 -0.451  0.003  0.080  0.288  0.048  0.028  0.048  0.100
## scale(g):sn -0.045 -0.255  0.007  0.035  0.043  0.261  0.013  0.025  0.010
## scal(g):nhn -0.030 -0.141  0.006  0.023  0.032  0.015  0.314  0.014  0.031
## scal(g):thr -0.041 -0.224  0.003  0.032  0.041  0.024  0.013  0.280  0.015
##             scl(g):m scl(g):b scl(g):h scl(g):s scl(g):n
## scale(age)                                              
## male                                                    
## black                                                   
## hisp                                                    
## asian                                                   
## nahn                                                    
## other                                                   
## ses                                                     
## scale(g):ml                                             
## scl(g):blck  0.009                                      
## scal(g):hsp  0.004    0.252                             
## scale(g):sn  0.027    0.131    0.169                    
## scal(g):nhn  0.018    0.074    0.095    0.050           
## scal(g):thr  0.003    0.122    0.158    0.083    0.046

anova(fit.5, fit.4)

## Data: e.long[e.long$w4c4p_40 > 0, ]
## Models:
## ..1: scale(math) ~ scale(age) + male + black + hisp + asian + nahn + 
## ..1:     other + ses + I(scale(age^2)) + (I(scale(age^2)) | childid) + 
## ..1:     (1 | w4c4p_4str/w4c4p_4psu)
## object: scale(math) ~ scale(age) * (male + black + hisp + asian + nahn + 
## object:     other) + ses + (scale(age) | childid) + (1 | w4c4p_4str/w4c4p_4psu)
##        Df   AIC   BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## ..1    16 40681 40814 -20324    40649                        
## object 21 40745 40920 -20351    40703     0      5          1

#Compare models using AIC
AIC(fit.1)

## [1] 66268.54

AIC(fit.2)

## [1] 43799.32

AIC(fit.3)

## [1] 40892.79

AIC(fit.4) #Best Fit

## [1] 40681.05

AIC(fit.5)

## [1] 40744.54

Binary response longitudinal model

Here we use GEE’s instead of GLMMs.

library(geepack)
e.long.comp<-e.long[complete.cases(e.long[, c("black", "age", "health", "w1p0str")]),]
#basic linear model
fit.1b<-geeglm(I(health>2)~age+male+black+hisp+asian+nahn+other+ses, waves = wave,id=childid , family=binomial, data=e.long.comp, weights=w4c4p_40/mean(w4c4p_40))

## Warning: non-integer #successes in a binomial glm!

summary(fit.1b)

## 
## Call:
## geeglm(formula = I(health > 2) ~ age + male + black + hisp + 
##     asian + nahn + other + ses, family = binomial, data = e.long.comp, 
##     weights = w4c4p_40/mean(w4c4p_40), id = childid, waves = wave)
## 
##  Coefficients:
##             Estimate  Std.err    Wald Pr(>|W|)    
## (Intercept) -2.17084  0.20735 109.610  < 2e-16 ***
## age         -0.01942  0.03102   0.392  0.53131    
## male         0.15487  0.05719   7.334  0.00677 ** 
## black        0.39578  0.09007  19.309 1.11e-05 ***
## hisp         0.54363  0.07257  56.111 6.85e-14 ***
## asian        0.73280  0.11578  40.056 2.47e-10 ***
## nahn         0.18050  0.20574   0.770  0.38031    
## other        0.10770  0.14020   0.590  0.44237    
## ses         -0.57995  0.03821 230.340  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Estimated Scale Parameters:
##             Estimate Std.err
## (Intercept)    1.008 0.07269
## 
## Correlation: Structure = independenceNumber of clusters:   12975   Maximum cluster size: 3

fit.2b<-geeglm(I(health>2)~age+male+black+hisp+asian+nahn+other+ses, waves = wave,id=childid, family=binomial, data=e.long.comp, corstr="ar1", weights=w4c4p_40/mean(w4c4p_40))

## Warning: non-integer #successes in a binomial glm!

summary(fit.2b)

## 
## Call:
## geeglm(formula = I(health > 2) ~ age + male + black + hisp + 
##     asian + nahn + other + ses, family = binomial, data = e.long.comp, 
##     weights = w4c4p_40/mean(w4c4p_40), id = childid, waves = wave, 
##     corstr = "ar1")
## 
##  Coefficients:
##             Estimate Std.err   Wald Pr(>|W|)    
## (Intercept)  -2.0342  0.1947 109.14  < 2e-16 ***
## age          -0.0459  0.0289   2.52   0.1125    
## male          0.1732  0.0571   9.19   0.0024 ** 
## black         0.4307  0.0898  23.03  1.6e-06 ***
## hisp          0.5713  0.0721  62.84  2.2e-15 ***
## asian         0.7802  0.1149  46.10  1.1e-11 ***
## nahn          0.2182  0.2101   1.08   0.2991    
## other         0.1439  0.1420   1.03   0.3107    
## ses          -0.5655  0.0379 222.50  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Estimated Scale Parameters:
##             Estimate Std.err
## (Intercept)     1.01  0.0744
## 
## Correlation: Structure = ar1  Link = identity 
## 
## Estimated Correlation Parameters:
##       Estimate Std.err
## alpha    0.423  0.0373
## Number of clusters:   12975   Maximum cluster size: 3