In this example, we will use hierarchical models to do some longitudinal modeling of data from the ECLS-K 2011. Specifically, we will model changes in a student’s standardized math score from fall kindergarten to spring, 1st grade.
Longitudinal data are collected in waves, representing the sample at different time points For example, currently, the ECLS-K 2011 has four waves, intending to capture the children as they progress through school, and will eventuall have more. We really want to measure “similar” items at each time point so we can understand what makes people change over their lives. When it comes to data we need to introduce a different data structure to accomodate this.
In this example, I will focus on the use of the linear mixed model for a continuous outcome
We start simple then move into growth curve models. This follows the presentation in Singer and Willett (2003) Chapters 3-6.
First we load our data
load("~/Google Drive/dem7903_App_Hier/data/eclsk_11.Rdata")
names(eclsk11)<-tolower(names(eclsk11))
library (car)
library(arm)
library(lmerTest)
#get out only the variables I'm going to use for this example
myvars<-c("childid", "x_chsex_r", "x1locale", "x_raceth_r", "x2povty", "x12par1ed_i","p1curmar", "x1htotal", "x1mscalk1", "x2mscalk1", "x3mscalk1", "x4mscalk1", "p1hscale", "p2hscale", "p4hscale","x2fsstat2","x4fsstat2", "x12sesl","x4sesl_i", "p2parct1", "p2parct2", "s1_id", "p2safepl","x2krceth","p1o2near", "x_distpov" , "w1c0", "w1p0", "w2p0", "w1c0str", "w1p0str",
"w4c4p_40","w4c4p_4str", "w4c4p_4psu", "w1c0psu", "w1p0psu", "x1height","x2height","x3height","x4height", "x1kage_r","x2kage_r","x3age","x4age")
#subset the data
eclsk.sub<-eclsk11[,myvars]
rm(eclsk11); gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 1665255 89.0 2637877 140.9 2164898 115.7
## Vcells 2968870 22.7 219933776 1678.0 182884454 1395.3
Next, I do some recoding of variables. I have to make the repeated measures of each of my longitudinal variables.
#Longitudinal variables
#recode our outcomes, the first is the child's math standardized test score in Kindergarten
eclsk.sub$math1<-ifelse(eclsk.sub$x1mscalk1<0, NA, eclsk.sub$x1mscalk1)
eclsk.sub$math2<-ifelse(eclsk.sub$x2mscalk1<0, NA, eclsk.sub$x2mscalk1)
#eclsk.sub$math3<-ifelse(eclsk.sub$x3mscalk1<0, NA, eclsk.sub$x3mscalk1)
eclsk.sub$math4<-ifelse(eclsk.sub$x4mscalk1<0, NA, eclsk.sub$x4mscalk1)
#Second outcome is child's height for age, continuous outcome
eclsk.sub$height1<-ifelse(eclsk.sub$x1height<=-7, NA, eclsk.sub$x1height)
eclsk.sub$height2<-ifelse(eclsk.sub$x2height<=-7, NA, eclsk.sub$x2height)
#eclsk.sub$height3<-ifelse(eclsk.sub$x3height<=-7, NA, eclsk.sub$x3height)
eclsk.sub$height4<-ifelse(eclsk.sub$x4height<=-7, NA, eclsk.sub$x4height)
#Age at each wave
eclsk.sub$age_yrs1<-ifelse(eclsk.sub$x1kage_r<0, NA, eclsk.sub$x1kage_r/12)
eclsk.sub$age_yrs2<-ifelse(eclsk.sub$x2kage_r<0, NA, eclsk.sub$x2kage_r/12)
#eclsk.sub$age_yrs3<-ifelse(eclsk.sub$x3age<0, NA, eclsk.sub$x3age/12)
eclsk.sub$age_yrs4<-ifelse(eclsk.sub$x4age<0, NA, eclsk.sub$x4age/12)
eclsk.sub<- eclsk.sub[is.na(eclsk.sub$age_yrs1)==F, ]
#Height for age z score standardized by sex and age
eclsk.sub$height_z1<-ave(eclsk.sub$height1, as.factor(paste(round(eclsk.sub$age_yrs1, 1.5), eclsk.sub$male)), FUN=scale)
eclsk.sub$height_z2<-ave(eclsk.sub$height2, as.factor(paste(round(eclsk.sub$age_yrs2, 1.5), eclsk.sub$male)), FUN=scale)
#eclsk.sub$height_z3<-ave(eclsk.sub$height3, as.factor(paste(round(eclsk.sub$age_yrs3, 1.5), eclsk.sub$male)), FUN=scale)
eclsk.sub$height_z4<-ave(eclsk.sub$height4, as.factor(paste(round(eclsk.sub$age_yrs4, 1.5), eclsk.sub$male)), FUN=scale)
#Household food insecurity, dichotomous outcome
#This outcome is only present at two waves
eclsk.sub$foodinsec1<-recode(eclsk.sub$x2fsstat2, recodes="2:3=1; 1=0; else=NA")
eclsk.sub$foodinsec2<-recode(eclsk.sub$x4fsstat2, recodes="2:3=1; 1=0; else=NA")
#Child health assessment Excellent to poor , ordinal outcome
eclsk.sub$chhealth1<-ifelse(eclsk.sub$p1hscale<0, NA, eclsk.sub$p1hscale)
eclsk.sub$chhealth2<-ifelse(eclsk.sub$p2hscale<0, NA, eclsk.sub$p2hscale)
eclsk.sub$chhealth4<-ifelse(eclsk.sub$p4hscale<0, NA, eclsk.sub$p4hscale)
#SES
eclsk.sub$hhses1<-ifelse(eclsk.sub$x12sesl==-9, NA, scale(eclsk.sub$x12sesl))
eclsk.sub$hhses4<-ifelse(eclsk.sub$x4sesl_i==-9, NA, scale(eclsk.sub$x4sesl_i))
That does it for our time varying variables. We now code some variables from the kindergarten wave to use as predictors:
#Non time varying variables
#First we recode some Child characteristics
#Child's sex: recode as male =1
eclsk.sub$male<-recode(eclsk.sub$x_chsex_r, recodes="1=1; 2=0; -9=NA")
#Recode race with white, non Hispanic as reference using dummy vars
eclsk.sub$hisp<-recode (eclsk.sub$x_raceth_r, recodes="3:4=1;-9=NA; else=0")
eclsk.sub$black<-recode (eclsk.sub$x_raceth_r, recodes="2=1;-9=NA; else=0")
eclsk.sub$asian<-recode (eclsk.sub$x_raceth_r, recodes="5=1;-9=NA; else=0")
eclsk.sub$nahn<-recode (eclsk.sub$x_raceth_r, recodes="6:7=1;-9=NA; else=0")
eclsk.sub$other<-recode (eclsk.sub$x_raceth_r, recodes="8=1;-9=NA; else=0")
#Then we recode some parent/mother characteristics
#Mother's education, recode as 2 dummys with HS = reference
eclsk.sub$lths<-recode(eclsk.sub$x12par1ed_i, recodes = "0:2=1; 3:8=0; else = NA")
eclsk.sub$gths<-recode(eclsk.sub$x12par1ed_i, recodes = "1:3=0; 4:8=1; else =NA")
#marital status, recode as 2 dummys, ref= married
eclsk.sub$single<-recode(eclsk.sub$p1curmar, recodes="4=1; -7:-9=NA; else=0")
eclsk.sub$notmar<-recode(eclsk.sub$p1curmar, recodes="2:3=1; -7:-9=NA; else=0")
#Then we do some household level variables
#Urban school location = 1
eclsk.sub$urban<-recode(eclsk.sub$x1locale, recodes = "1:3=1; 4=0; -1:-9=NA")
#poverty level in poverty = 1
eclsk.sub$pov<-recode(eclsk.sub$x2povty , recodes ="1:2=1; 3=0; -9=NA")
#Household size
eclsk.sub$hhsize<-eclsk.sub$x1htotal
#school % minority student body
eclsk.sub$minorsch<-ifelse(eclsk.sub$x2krceth <0, NA, eclsk.sub$x2krceth/10)
#Unsafe neighborhood
eclsk.sub$unsafe<-recode(eclsk.sub$p2safepl , recodes = "1:2='unsafe'; 3='safe'; else=NA", as.factor.result = T)
#school district poverty
eclsk.sub$dist_pov<-ifelse(eclsk.sub$x_distpov==-9, NA, scale(eclsk.sub$x_distpov))
To analyze data longitudinally, we must reshape the data fron its current “wide” format, where each repeated measure is a column, into the “long” format, where there is a single column for a particular variable, and we account for the repeated measurements of each person. In this case, I’m going to use three waves of data, so each child can contribute up to three lines to the data.
This is just a little example using the first 10 kids, just to illustrate this process.
test<-eclsk.sub[1:10,]
#look at the data in "wide" format
head(test)
## childid x_chsex_r x1locale x_raceth_r x2povty x12par1ed_i p1curmar
## 1 10000001 1 4 1 2 5 1
## 2 10000002 2 2 1 3 5 1
## 4 10000004 1 4 1 3 5 1
## 5 10000005 2 2 3 2 4 1
## 6 10000006 2 4 1 2 3 3
## 7 10000007 2 3 1 1 4 1
## x1htotal x1mscalk1 x2mscalk1 x3mscalk1 x4mscalk1 p1hscale p2hscale
## 1 6 44.9695 61.5056 NA 79.5010 1 2
## 2 4 30.1857 50.0694 NA 71.0421 1 1
## 4 4 13.8410 31.0148 NA 53.9906 1 1
## 5 5 27.6587 45.5457 NA NA 1 1
## 6 4 22.2918 27.8530 NA 42.6581 1 2
## 7 5 26.1178 36.7276 NA 59.3014 2 2
## p4hscale x2fsstat2 x4fsstat2 x12sesl x4sesl_i p2parct1 p2parct2 s1_id
## 1 3 1 1 -0.13 -0.07 1 1 1861
## 2 1 1 1 0.27 0.23 1 1 2113
## 4 -9 1 -9 -0.26 -0.29 1 1 2070
## 5 NA 1 NA 0.28 NA 1 3 1079
## 6 1 1 1 -0.69 -0.71 1 -1 1210
## 7 -9 1 -9 -0.58 -0.60 1 1 1282
## p2safepl x2krceth p1o2near x_distpov w1c0 w1p0 w2p0 w1c0str
## 1 2 8.00 2 45 188.09105 211.4320 215.1901 70
## 2 3 30.00 -1 7 237.88083 274.5976 327.8694 45
## 4 2 18.00 -1 17 96.36698 103.5068 111.8929 51
## 5 3 90.48 3 -1 178.54929 237.3439 267.5328 30
## 6 3 2.00 -1 11 497.90703 525.4855 636.1168 65
## 7 3 3.00 0 9 447.97361 543.0198 584.5155 65
## w1p0str w4c4p_40 w4c4p_4str w4c4p_4psu w1c0psu w1p0psu x1height x2height
## 1 70 322.0055 69 3 3 3 44.50 45.75
## 2 45 474.8417 43 2 2 2 45.63 47.00
## 4 51 171.4766 49 2 2 2 46.00 48.00
## 5 30 0.0000 NA NA 3 3 40.80 42.00
## 6 65 957.6496 64 2 2 2 46.00 47.00
## 7 65 535.6196 64 1 1 1 45.25 46.00
## x3height x4height x1kage_r x2kage_r x3age x4age math1 math2 math4
## 1 NA 48.00 72.39 79.76 NA 91.66 44.9695 61.5056 79.5010
## 2 NA 49.50 71.41 77.52 NA 89.56 30.1857 50.0694 71.0421
## 4 NA 50.00 72.39 78.84 NA 89.23 13.8410 31.0148 53.9906
## 5 NA NA 59.51 65.06 NA NA 27.6587 45.5457 NA
## 6 NA 49.75 75.78 82.62 NA 94.82 22.2918 27.8530 42.6581
## 7 NA 48.00 63.95 69.90 NA 80.98 26.1178 36.7276 59.3014
## height1 height2 height4 age_yrs1 age_yrs2 age_yrs4 height_z1
## 1 44.50 45.75 48.00 6.032500 6.646667 7.638333 -0.74750120
## 2 45.63 47.00 49.50 5.950833 6.460000 7.463333 0.01182955
## 4 46.00 48.00 50.00 6.032500 6.570000 7.435833 0.03840209
## 5 40.80 42.00 NA 4.959167 5.421667 NA -1.45613917
## 6 46.00 47.00 49.75 6.315000 6.885000 7.901667 -0.62911889
## 7 45.25 46.00 48.00 5.329167 5.825000 6.748333 0.49810428
## height_z2 height_z4 foodinsec1 foodinsec2 chhealth1 chhealth2
## 1 -0.7144538 -0.69549851 0 0 1 2
## 2 0.1659170 0.01627224 0 0 1 1
## 4 0.4210688 0.41691644 0 NA 1 1
## 5 -1.0816114 NA 0 NA 1 1
## 6 -0.2994689 -0.52126128 0 0 1 2
## 7 0.3141864 0.16071039 0 NA 2 2
## chhealth4 hhses1 hhses4 male hisp black asian nahn other lths
## 1 3 -0.05343635 -0.04820708 1 0 0 0 0 0 0
## 2 1 0.36092641 0.32296225 0 0 0 0 0 0 0
## 4 NA -0.18810425 -0.32039792 1 0 0 0 0 0 0
## 5 NA 0.37128548 NA 0 1 0 0 0 0 0
## 6 1 -0.63354422 -0.84003498 0 0 0 0 0 0 0
## 7 NA -0.51959446 -0.70393956 0 0 0 0 0 0 0
## gths single notmar urban pov hhsize minorsch unsafe dist_pov
## 1 1 0 0 0 1 6 0.800 unsafe 2.22748174
## 2 1 0 0 1 0 4 3.000 safe -0.67176163
## 4 1 0 0 0 0 4 1.800 unsafe 0.09119715
## 5 1 0 0 1 1 5 9.048 safe -1.28212866
## 6 0 0 1 0 1 4 0.200 safe -0.36657812
## 7 1 0 0 1 1 5 0.300 safe -0.51916987
e.longt<-reshape(test, idvar="childid", varying=list(math = c("math1", "math2", "math4"),
age = c("age_yrs1", "age_yrs2", "age_yrs4"),
height= c("height_z1", "height_z2", "height_z4"),
health=c("chhealth1", "chhealth2", "chhealth4"),
ses = c("hhses1", "hhses1", "hhses4")),
v.names = c("math", "age", "height", "health"),
timevar="wave",
times=c(1,2,4),direction="long",
drop = names(test)[c(2:20, 22:25)])
#here we look at the new data set, in the "long" format
head(e.longt[order(e.longt$childid, e.longt$wave),c(1, 40:44)], n=20)
## childid minorsch unsafe dist_pov wave math
## 10000001.1 10000001 0.800 unsafe 2.22748174 1 44.9695
## 10000001.2 10000001 0.800 unsafe 2.22748174 2 61.5056
## 10000001.4 10000001 0.800 unsafe 2.22748174 4 79.5010
## 10000002.1 10000002 3.000 safe -0.67176163 1 30.1857
## 10000002.2 10000002 3.000 safe -0.67176163 2 50.0694
## 10000002.4 10000002 3.000 safe -0.67176163 4 71.0421
## 10000004.1 10000004 1.800 unsafe 0.09119715 1 13.8410
## 10000004.2 10000004 1.800 unsafe 0.09119715 2 31.0148
## 10000004.4 10000004 1.800 unsafe 0.09119715 4 53.9906
## 10000005.1 10000005 9.048 safe -1.28212866 1 27.6587
## 10000005.2 10000005 9.048 safe -1.28212866 2 45.5457
## 10000005.4 10000005 9.048 safe -1.28212866 4 NA
## 10000006.1 10000006 0.200 safe -0.36657812 1 22.2918
## 10000006.2 10000006 0.200 safe -0.36657812 2 27.8530
## 10000006.4 10000006 0.200 safe -0.36657812 4 42.6581
## 10000007.1 10000007 0.300 safe -0.51916987 1 26.1178
## 10000007.2 10000007 0.300 safe -0.51916987 2 36.7276
## 10000007.4 10000007 0.300 safe -0.51916987 4 59.3014
## 10000008.1 10000008 2.600 safe 0.39638067 1 30.9004
## 10000008.2 10000008 2.600 safe 0.39638067 2 54.5561
So, we can see the children that are repeated three times, but some children are missing for some variables at some waves, which is common in longitudinal surveys.
To illustrate these data, we can plot the children’s math scores and other variables over time to see trends within child. Some kids are missing at all times, others don’t have complete data for all times, but you can see the trends within child.
require(lattice)
xyplot(math~age|childid, data=e.longt,
panel=function(x,y){
panel.xyplot(x,y)
panel.lmline(x,y,)}, main="Math Scores over time for each child", xlab= "Age", ylab="Math Score")
xyplot(height~age|childid, data=e.longt,
panel=function(x,y){
panel.xyplot(x,y)
panel.lmline(x,y,)}, main="Height over time for each child", xlab= "Age", ylab="Height")
xyplot(health~age|childid, data=e.longt,
panel=function(x,y){
panel.xyplot(x,y)
panel.lmline(x,y,)}, main="Self Rated Health over time for each child", xlab= "Age", ylab="SRH")
So, these plots actually fit a linear regression line to each child’s data, which is what we are effectively trying to do, but doing this within the traditional linear model framework is inefficient. Instead we will use the linear mixed model to analyze the overall trajectory of change over time, but allowing each child to deviate from the overall pattern.
That was a cute little example, now we do the entire data set:
e.long<-reshape(eclsk.sub, idvar="childid", varying=list(math = c("math1", "math2", "math4"),
age = c("age_yrs1", "age_yrs2", "age_yrs4"),
height= c("height_z1", "height_z2", "height_z4"),
health=c("chhealth1", "chhealth2", "chhealth4"),
ses = c("hhses1", "hhses1", "hhses4")),
v.names = c("math", "age", "height", "health", "ses"),
timevar="wave",times=c(1,2,4),direction="long",
drop = names(eclsk.sub)[c(2:20, 22:25)])
e.long<-e.long[order(e.long$childid, e.long$wave),]
head(e.long, n=10)
## childid p2parct2 x_distpov w1c0 w1p0 w2p0 w1c0str
## 10000001.1 10000001 1 45 188.09105 211.4320 215.1901 70
## 10000001.2 10000001 1 45 188.09105 211.4320 215.1901 70
## 10000001.4 10000001 1 45 188.09105 211.4320 215.1901 70
## 10000002.1 10000002 1 7 237.88083 274.5976 327.8694 45
## 10000002.2 10000002 1 7 237.88083 274.5976 327.8694 45
## 10000002.4 10000002 1 7 237.88083 274.5976 327.8694 45
## 10000004.1 10000004 1 17 96.36698 103.5068 111.8929 51
## 10000004.2 10000004 1 17 96.36698 103.5068 111.8929 51
## 10000004.4 10000004 1 17 96.36698 103.5068 111.8929 51
## 10000005.1 10000005 3 -1 178.54929 237.3439 267.5328 30
## w1p0str w4c4p_40 w4c4p_4str w4c4p_4psu w1c0psu w1p0psu x1height
## 10000001.1 70 322.0055 69 3 3 3 44.50
## 10000001.2 70 322.0055 69 3 3 3 44.50
## 10000001.4 70 322.0055 69 3 3 3 44.50
## 10000002.1 45 474.8417 43 2 2 2 45.63
## 10000002.2 45 474.8417 43 2 2 2 45.63
## 10000002.4 45 474.8417 43 2 2 2 45.63
## 10000004.1 51 171.4766 49 2 2 2 46.00
## 10000004.2 51 171.4766 49 2 2 2 46.00
## 10000004.4 51 171.4766 49 2 2 2 46.00
## 10000005.1 30 0.0000 NA NA 3 3 40.80
## x2height x3height x4height x1kage_r x2kage_r x3age x4age
## 10000001.1 45.75 NA 48.0 72.39 79.76 NA 91.66
## 10000001.2 45.75 NA 48.0 72.39 79.76 NA 91.66
## 10000001.4 45.75 NA 48.0 72.39 79.76 NA 91.66
## 10000002.1 47.00 NA 49.5 71.41 77.52 NA 89.56
## 10000002.2 47.00 NA 49.5 71.41 77.52 NA 89.56
## 10000002.4 47.00 NA 49.5 71.41 77.52 NA 89.56
## 10000004.1 48.00 NA 50.0 72.39 78.84 NA 89.23
## 10000004.2 48.00 NA 50.0 72.39 78.84 NA 89.23
## 10000004.4 48.00 NA 50.0 72.39 78.84 NA 89.23
## 10000005.1 42.00 NA NA 59.51 65.06 NA NA
## height1 height2 height4 foodinsec1 foodinsec2 male hisp black
## 10000001.1 44.50 45.75 48.0 0 0 1 0 0
## 10000001.2 44.50 45.75 48.0 0 0 1 0 0
## 10000001.4 44.50 45.75 48.0 0 0 1 0 0
## 10000002.1 45.63 47.00 49.5 0 0 0 0 0
## 10000002.2 45.63 47.00 49.5 0 0 0 0 0
## 10000002.4 45.63 47.00 49.5 0 0 0 0 0
## 10000004.1 46.00 48.00 50.0 0 NA 1 0 0
## 10000004.2 46.00 48.00 50.0 0 NA 1 0 0
## 10000004.4 46.00 48.00 50.0 0 NA 1 0 0
## 10000005.1 40.80 42.00 NA 0 NA 0 1 0
## asian nahn other lths gths single notmar urban pov hhsize
## 10000001.1 0 0 0 0 1 0 0 0 1 6
## 10000001.2 0 0 0 0 1 0 0 0 1 6
## 10000001.4 0 0 0 0 1 0 0 0 1 6
## 10000002.1 0 0 0 0 1 0 0 1 0 4
## 10000002.2 0 0 0 0 1 0 0 1 0 4
## 10000002.4 0 0 0 0 1 0 0 1 0 4
## 10000004.1 0 0 0 0 1 0 0 0 0 4
## 10000004.2 0 0 0 0 1 0 0 0 0 4
## 10000004.4 0 0 0 0 1 0 0 0 0 4
## 10000005.1 0 0 0 0 1 0 0 1 1 5
## minorsch unsafe dist_pov wave math age height
## 10000001.1 0.800 unsafe 2.22748174 1 44.9695 6.032500 -0.74750120
## 10000001.2 0.800 unsafe 2.22748174 2 61.5056 6.646667 -0.71445383
## 10000001.4 0.800 unsafe 2.22748174 4 79.5010 7.638333 -0.69549851
## 10000002.1 3.000 safe -0.67176163 1 30.1857 5.950833 0.01182955
## 10000002.2 3.000 safe -0.67176163 2 50.0694 6.460000 0.16591695
## 10000002.4 3.000 safe -0.67176163 4 71.0421 7.463333 0.01627224
## 10000004.1 1.800 unsafe 0.09119715 1 13.8410 6.032500 0.03840209
## 10000004.2 1.800 unsafe 0.09119715 2 31.0148 6.570000 0.42106884
## 10000004.4 1.800 unsafe 0.09119715 4 53.9906 7.435833 0.41691644
## 10000005.1 9.048 safe -1.28212866 1 27.6587 4.959167 -1.45613917
## health ses
## 10000001.1 1 -0.05343635
## 10000001.2 2 -0.05343635
## 10000001.4 3 -0.04820708
## 10000002.1 1 0.36092641
## 10000002.2 1 0.36092641
## 10000002.4 1 0.32296225
## 10000004.1 1 -0.18810425
## 10000004.2 1 -0.18810425
## 10000004.4 NA -0.32039792
## 10000005.1 1 0.37128548
Now we fit models to the longitudinal data. We start simple then move into “growth curve”" models. This follows the presentation in Singer and Willett (2003) Chapters 3-6. I also incorporate survey design elements into the analysis as much as I can. Specifically, I include the mean centered sampling weight, and in the models (except for the first one), I include a second random effect for the survey stratum
#basic linear model
fit.1<-glm(scale(math)~scale(age)+male+black+hisp+asian+nahn+other+ses, data=e.long[e.long$w4c4p_40>0,], weights=w4c4p_40/mean(w4c4p_40))
summary(fit.1)
##
## Call:
## glm(formula = scale(math) ~ scale(age) + male + black + hisp +
## asian + nahn + other + ses, data = e.long[e.long$w4c4p_40 >
## 0, ], weights = w4c4p_40/mean(w4c4p_40))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -5.6180 -0.3775 0.0171 0.4002 3.1039
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.024267 0.006566 3.696 0.000219 ***
## scale(age) 0.702677 0.003697 190.042 < 2e-16 ***
## male -0.018122 0.007435 -2.437 0.014799 *
## black -0.230601 0.011626 -19.835 < 2e-16 ***
## hisp -0.127924 0.009665 -13.236 < 2e-16 ***
## asian 0.192175 0.019204 10.007 < 2e-16 ***
## nahn -0.148831 0.027280 -5.456 4.92e-08 ***
## other -0.011359 0.019194 -0.592 0.554001
## ses 0.278515 0.004742 58.735 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.4241632)
##
## Null deviance: 30551 on 30782 degrees of freedom
## Residual deviance: 13053 on 30774 degrees of freedom
## (195 observations deleted due to missingness)
## AIC: 66269
##
## Number of Fisher Scoring iterations: 2
#random intercept model for individual student differences
fit.2<-lmer(scale(math)~scale(age)+male+black+hisp+asian+nahn+other+ses+(1|childid)+(1|w4c4p_4str/w4c4p_4psu),weights=w4c4p_40/mean(w4c4p_40), data=e.long[e.long$w4c4p_40>0,], REML=F)
summary(fit.2)
## Linear mixed model fit by maximum likelihood t-tests use Satterthwaite
## approximations to degrees of freedom [lmerMod]
## Formula: scale(math) ~ scale(age) + male + black + hisp + asian + nahn +
## other + ses + (1 | childid) + (1 | w4c4p_4str/w4c4p_4psu)
## Data: e.long[e.long$w4c4p_40 > 0, ]
## Weights: w4c4p_40/mean(w4c4p_40)
##
## AIC BIC logLik deviance df.resid
## 43799.3 43907.7 -21886.7 43773.3 30770
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.8116 -0.4862 0.0269 0.5107 4.2858
##
## Random effects:
## Groups Name Variance Std.Dev.
## childid (Intercept) 0.324389 0.56955
## w4c4p_4psu:w4c4p_4str (Intercept) 0.013637 0.11678
## w4c4p_4str (Intercept) 0.002553 0.05053
## Residual 0.094640 0.30764
## Number of obs: 30783, groups:
## childid, 10316; w4c4p_4psu:w4c4p_4str, 232; w4c4p_4str, 69
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 6.899e-02 1.615e-02 1.270e+02 4.271 3.78e-05 ***
## scale(age) 8.494e-01 2.023e-03 2.208e+04 419.903 < 2e-16 ***
## male -2.757e-02 1.201e-02 1.009e+04 -2.295 0.0217 *
## black -2.597e-01 2.243e-02 7.439e+03 -11.577 < 2e-16 ***
## hisp -1.734e-01 1.780e-02 5.618e+03 -9.739 < 2e-16 ***
## asian 1.907e-01 2.669e-02 9.013e+03 7.145 9.71e-13 ***
## nahn -1.508e-01 5.854e-02 9.231e+03 -2.575 0.0100 *
## other -3.394e-03 2.927e-02 1.048e+04 -0.116 0.9077
## ses 1.928e-01 6.458e-03 1.742e+04 29.850 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) scl(g) male black hisp asian nahn other
## scale(age) -0.020
## male -0.381 -0.012
## black -0.277 0.031 0.007
## hisp -0.390 0.053 0.002 0.285
## asian -0.223 0.005 0.027 0.151 0.218
## nahn -0.101 0.006 0.016 0.064 0.105 0.064
## other -0.170 0.004 0.005 0.132 0.168 0.119 0.064
## ses -0.159 0.144 0.000 0.174 0.296 -0.015 0.049 0.022
#individual trajectory model with random slope for time
fit.3<-lmer(scale(math)~scale(age)+male+black+hisp+asian+nahn+other+ses+(scale(age)|childid)+(1|w4c4p_4str/w4c4p_4psu),weights=w4c4p_40/mean(w4c4p_40), data=e.long[e.long$w4c4p_40>0,], REML=F)
summary(fit.3)
## Linear mixed model fit by maximum likelihood t-tests use Satterthwaite
## approximations to degrees of freedom [lmerMod]
## Formula: scale(math) ~ scale(age) + male + black + hisp + asian + nahn +
## other + ses + (scale(age) | childid) + (1 | w4c4p_4str/w4c4p_4psu)
## Data: e.long[e.long$w4c4p_40 > 0, ]
## Weights: w4c4p_40/mean(w4c4p_40)
##
## AIC BIC logLik deviance df.resid
## 40892.8 41017.8 -20431.4 40862.8 30768
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.1237 -0.4726 -0.0048 0.4585 4.6716
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## childid (Intercept) 0.329290 0.57384
## scale(age) 0.025719 0.16037 0.46
## w4c4p_4psu:w4c4p_4str (Intercept) 0.013833 0.11761
## w4c4p_4str (Intercept) 0.003937 0.06274
## Residual 0.063939 0.25286
## Number of obs: 30783, groups:
## childid, 10316; w4c4p_4psu:w4c4p_4str, 232; w4c4p_4str, 69
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 7.726e-02 1.651e-02 1.110e+02 4.680 8.12e-06 ***
## scale(age) 8.559e-01 2.392e-03 1.084e+04 357.849 < 2e-16 ***
## male -3.793e-02 1.138e-02 9.850e+03 -3.334 0.000858 ***
## black -1.834e-01 2.128e-02 7.740e+03 -8.621 < 2e-16 ***
## hisp -1.578e-01 1.694e-02 6.629e+03 -9.313 < 2e-16 ***
## asian 2.031e-01 2.529e-02 9.145e+03 8.032 1.11e-15 ***
## nahn -1.429e-01 5.525e-02 8.872e+03 -2.586 0.009730 **
## other 2.548e-02 2.778e-02 1.030e+04 0.917 0.359124
## ses 2.055e-01 6.618e-03 1.612e+04 31.053 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) scl(g) male black hisp asian nahn other
## scale(age) 0.092
## male -0.351 -0.007
## black -0.257 0.030 0.007
## hisp -0.362 0.042 0.002 0.285
## asian -0.209 -0.016 0.026 0.151 0.220
## nahn -0.093 0.013 0.015 0.063 0.106 0.065
## other -0.159 -0.002 0.004 0.133 0.168 0.121 0.065
## ses -0.150 0.106 0.003 0.167 0.292 -0.023 0.043 0.018
anova(fit.3, fit.2)
## Data: e.long[e.long$w4c4p_40 > 0, ]
## Models:
## ..1: scale(math) ~ scale(age) + male + black + hisp + asian + nahn +
## ..1: other + ses + (1 | childid) + (1 | w4c4p_4str/w4c4p_4psu)
## object: scale(math) ~ scale(age) + male + black + hisp + asian + nahn +
## object: other + ses + (scale(age) | childid) + (1 | w4c4p_4str/w4c4p_4psu)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## ..1 13 43799 43908 -21887 43773
## object 15 40893 41018 -20431 40863 2910.5 2 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#curvilinear trajectory model with random nonlinear time
fit.4<-lmer(scale(math)~scale(age)+male+black+hisp+asian+nahn+other+ses+I(scale(age^2))+(I(scale(age^2))|childid)+(1|w4c4p_4str/w4c4p_4psu),weights=w4c4p_40/mean(w4c4p_40), data=e.long[e.long$w4c4p_40>0,], REML=F, lmerControl(optimizer = "Nelder_Mead"))
summary(fit.4)
## Linear mixed model fit by maximum likelihood t-tests use Satterthwaite
## approximations to degrees of freedom [lmerMod]
## Formula: scale(math) ~ scale(age) + male + black + hisp + asian + nahn +
## other + ses + I(scale(age^2)) + (I(scale(age^2)) | childid) +
## (1 | w4c4p_4str/w4c4p_4psu)
## Data: e.long[e.long$w4c4p_40 > 0, ]
## Weights: w4c4p_40/mean(w4c4p_40)
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## AIC BIC logLik deviance df.resid
## 40681.1 40814.4 -20324.5 40649.1 30767
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.2573 -0.4627 0.0009 0.4586 4.5761
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## childid (Intercept) 0.33502 0.57881
## I(scale(age^2)) 0.02626 0.16205 0.34
## w4c4p_4psu:w4c4p_4str (Intercept) 0.01402 0.11842
## w4c4p_4str (Intercept) 0.00399 0.06317
## Residual 0.06077 0.24651
## Number of obs: 30783, groups:
## childid, 10316; w4c4p_4psu:w4c4p_4str, 232; w4c4p_4str, 69
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 7.052e-02 1.673e-02 1.130e+02 4.214 5.06e-05 ***
## scale(age) 1.447e+00 3.278e-02 1.780e+04 44.143 < 2e-16 ***
## male -3.938e-02 1.167e-02 9.532e+03 -3.374 0.000743 ***
## black -2.003e-01 2.188e-02 7.491e+03 -9.153 < 2e-16 ***
## hisp -1.575e-01 1.740e-02 6.413e+03 -9.053 < 2e-16 ***
## asian 2.053e-01 2.583e-02 8.795e+03 7.950 2.22e-15 ***
## nahn -1.482e-01 5.698e-02 8.670e+03 -2.601 0.009309 **
## other 2.171e-02 2.844e-02 9.945e+03 0.763 0.445290
## ses 2.020e-01 6.704e-03 1.581e+04 30.135 < 2e-16 ***
## I(scale(age^2)) -5.906e-01 3.288e-02 1.771e+04 -17.963 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) scl(g) male black hisp asian nahn other ses
## scale(age) -0.015
## male -0.355 -0.016
## black -0.261 0.002 0.007
## hisp -0.366 0.005 0.002 0.285
## asian -0.211 0.012 0.026 0.151 0.221
## nahn -0.094 -0.005 0.015 0.063 0.105 0.065
## other -0.160 0.003 0.004 0.133 0.168 0.121 0.064
## ses -0.153 -0.027 0.003 0.169 0.295 -0.022 0.044 0.019
## I(scl(g^2)) 0.020 -0.997 0.016 0.000 -0.002 -0.013 0.006 -0.003 0.035
anova(fit.4, fit.3)
## Data: e.long[e.long$w4c4p_40 > 0, ]
## Models:
## ..1: scale(math) ~ scale(age) + male + black + hisp + asian + nahn +
## ..1: other + ses + (scale(age) | childid) + (1 | w4c4p_4str/w4c4p_4psu)
## object: scale(math) ~ scale(age) + male + black + hisp + asian + nahn +
## object: other + ses + I(scale(age^2)) + (I(scale(age^2)) | childid) +
## object: (1 | w4c4p_4str/w4c4p_4psu)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## ..1 15 40893 41018 -20431 40863
## object 16 40681 40814 -20324 40649 213.74 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#individual trajectory model with fixed effects for race and different population
#trajectories for each race
fit.5<-lmer(scale(math)~scale(age)*(male+black+hisp+asian+nahn+other)+ses+(scale(age)|childid)+(1|w4c4p_4str/w4c4p_4psu),weights=w4c4p_40/mean(w4c4p_40), data=e.long[e.long$w4c4p_40>0,], REML=F)
summary(fit.5)
## Linear mixed model fit by maximum likelihood t-tests use Satterthwaite
## approximations to degrees of freedom [lmerMod]
## Formula: scale(math) ~ scale(age) * (male + black + hisp + asian + nahn +
## other) + ses + (scale(age) | childid) + (1 | w4c4p_4str/w4c4p_4psu)
## Data: e.long[e.long$w4c4p_40 > 0, ]
## Weights: w4c4p_40/mean(w4c4p_40)
##
## AIC BIC logLik deviance df.resid
## 40744.5 40919.6 -20351.3 40702.5 30762
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.1255 -0.4746 -0.0059 0.4629 4.6324
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## childid (Intercept) 0.329576 0.57409
## scale(age) 0.024896 0.15779 0.46
## w4c4p_4psu:w4c4p_4str (Intercept) 0.014036 0.11848
## w4c4p_4str (Intercept) 0.004213 0.06491
## Residual 0.063801 0.25259
## Number of obs: 30783, groups:
## childid, 10316; w4c4p_4psu:w4c4p_4str, 232; w4c4p_4str, 69
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 8.813e-02 1.689e-02 1.150e+02 5.218 8.08e-07 ***
## scale(age) 8.673e-01 4.078e-03 1.084e+04 212.666 < 2e-16 ***
## male -2.635e-02 1.192e-02 1.004e+04 -2.211 0.027069 *
## black -2.618e-01 2.227e-02 8.330e+03 -11.756 < 2e-16 ***
## hisp -1.803e-01 1.771e-02 7.348e+03 -10.176 < 2e-16 ***
## asian 1.880e-01 2.623e-02 9.508e+03 7.165 8.36e-13 ***
## nahn -1.555e-01 5.826e-02 9.513e+03 -2.669 0.007615 **
## other -1.757e-03 2.897e-02 1.036e+04 -0.061 0.951630
## ses 1.982e-01 6.655e-03 1.606e+04 29.783 < 2e-16 ***
## scale(age):male 1.563e-02 4.713e-03 1.056e+04 3.317 0.000914 ***
## scale(age):black -9.066e-02 7.493e-03 9.230e+03 -12.098 < 2e-16 ***
## scale(age):hisp -2.296e-02 5.803e-03 1.048e+04 -3.956 7.68e-05 ***
## scale(age):asian -2.164e-02 1.091e-02 1.630e+04 -1.983 0.047381 *
## scale(age):nahn -1.496e-02 1.988e-02 6.800e+03 -0.752 0.451875
## scale(age):other -3.745e-02 1.172e-02 1.187e+04 -3.196 0.001395 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) scl(g) male black hisp asian nahn other ses
## scale(age) 0.180
## male -0.361 -0.183
## black -0.263 -0.087 0.007
## hisp -0.366 -0.108 0.003 0.287
## asian -0.210 -0.079 0.027 0.150 0.216
## nahn -0.095 -0.035 0.015 0.064 0.105 0.064
## other -0.161 -0.067 0.005 0.132 0.168 0.119 0.063
## ses -0.158 0.006 0.002 0.181 0.307 -0.019 0.050 0.022
## scale(g):ml -0.112 -0.601 0.296 0.002 0.002 0.009 0.005 0.003 -0.001
## scl(g):blck -0.074 -0.352 0.003 0.291 0.082 0.038 0.022 0.038 0.074
## scal(g):hsp -0.094 -0.451 0.003 0.080 0.288 0.048 0.028 0.048 0.100
## scale(g):sn -0.045 -0.255 0.007 0.035 0.043 0.261 0.013 0.025 0.010
## scal(g):nhn -0.030 -0.141 0.006 0.023 0.032 0.015 0.314 0.014 0.031
## scal(g):thr -0.041 -0.224 0.003 0.032 0.041 0.024 0.013 0.280 0.015
## scl(g):m scl(g):b scl(g):h scl(g):s scl(g):n
## scale(age)
## male
## black
## hisp
## asian
## nahn
## other
## ses
## scale(g):ml
## scl(g):blck 0.009
## scal(g):hsp 0.004 0.252
## scale(g):sn 0.027 0.131 0.169
## scal(g):nhn 0.018 0.074 0.095 0.050
## scal(g):thr 0.003 0.122 0.158 0.083 0.046
anova(fit.5, fit.4)
## Data: e.long[e.long$w4c4p_40 > 0, ]
## Models:
## ..1: scale(math) ~ scale(age) + male + black + hisp + asian + nahn +
## ..1: other + ses + I(scale(age^2)) + (I(scale(age^2)) | childid) +
## ..1: (1 | w4c4p_4str/w4c4p_4psu)
## object: scale(math) ~ scale(age) * (male + black + hisp + asian + nahn +
## object: other) + ses + (scale(age) | childid) + (1 | w4c4p_4str/w4c4p_4psu)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## ..1 16 40681 40814 -20324 40649
## object 21 40745 40920 -20351 40703 0 5 1
#Compare models using AIC
AIC(fit.1)
## [1] 66268.54
AIC(fit.2)
## [1] 43799.32
AIC(fit.3)
## [1] 40892.79
AIC(fit.4) #Best Fit
## [1] 40681.05
AIC(fit.5)
## [1] 40744.54
Here we use GEE’s instead of GLMMs.
library(geepack)
e.long.comp<-e.long[complete.cases(e.long[, c("black", "age", "health", "w1p0str")]),]
#basic linear model
fit.1b<-geeglm(I(health>2)~age+male+black+hisp+asian+nahn+other+ses, waves = wave,id=childid , family=binomial, data=e.long.comp, weights=w4c4p_40/mean(w4c4p_40))
## Warning: non-integer #successes in a binomial glm!
summary(fit.1b)
##
## Call:
## geeglm(formula = I(health > 2) ~ age + male + black + hisp +
## asian + nahn + other + ses, family = binomial, data = e.long.comp,
## weights = w4c4p_40/mean(w4c4p_40), id = childid, waves = wave)
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) -2.17084 0.20735 109.610 < 2e-16 ***
## age -0.01942 0.03102 0.392 0.53131
## male 0.15487 0.05719 7.334 0.00677 **
## black 0.39578 0.09007 19.309 1.11e-05 ***
## hisp 0.54363 0.07257 56.111 6.85e-14 ***
## asian 0.73280 0.11578 40.056 2.47e-10 ***
## nahn 0.18050 0.20574 0.770 0.38031
## other 0.10770 0.14020 0.590 0.44237
## ses -0.57995 0.03821 230.340 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Estimated Scale Parameters:
## Estimate Std.err
## (Intercept) 1.008 0.07269
##
## Correlation: Structure = independenceNumber of clusters: 12975 Maximum cluster size: 3
fit.2b<-geeglm(I(health>2)~age+male+black+hisp+asian+nahn+other+ses, waves = wave,id=childid, family=binomial, data=e.long.comp, corstr="ar1", weights=w4c4p_40/mean(w4c4p_40))
## Warning: non-integer #successes in a binomial glm!
summary(fit.2b)
##
## Call:
## geeglm(formula = I(health > 2) ~ age + male + black + hisp +
## asian + nahn + other + ses, family = binomial, data = e.long.comp,
## weights = w4c4p_40/mean(w4c4p_40), id = childid, waves = wave,
## corstr = "ar1")
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) -2.0342 0.1947 109.14 < 2e-16 ***
## age -0.0459 0.0289 2.52 0.1125
## male 0.1732 0.0571 9.19 0.0024 **
## black 0.4307 0.0898 23.03 1.6e-06 ***
## hisp 0.5713 0.0721 62.84 2.2e-15 ***
## asian 0.7802 0.1149 46.10 1.1e-11 ***
## nahn 0.2182 0.2101 1.08 0.2991
## other 0.1439 0.1420 1.03 0.3107
## ses -0.5655 0.0379 222.50 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Estimated Scale Parameters:
## Estimate Std.err
## (Intercept) 1.01 0.0744
##
## Correlation: Structure = ar1 Link = identity
##
## Estimated Correlation Parameters:
## Estimate Std.err
## alpha 0.423 0.0373
## Number of clusters: 12975 Maximum cluster size: 3