In this example, we will use hierarchical models to do some longitudinal modeling of data from the ECLS-K. Specifically, we will model changes in a student’s standardized math score from kindergarten to 8th grade.
First we load our data
load("~/Google Drive/dem7903_App_Hier/data/eclsk.Rdata")
names(eclsk)<-tolower(names(eclsk))
library (car)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(lattice)
library(lme4)
## Loading required package: Matrix
## Loading required package: Rcpp
#get out only the variables I'm going to use for this example
myvars<-c( "childid","gender", "race", "r1_kage","r4age", "r5age", "r6age", "r7age","c1r4mtsc", "c4r4mtsc", "c5r4mtsc", "c6r4mtsc", "c7r4mtsc", "w1povrty","w1povrty","w3povrty", "w5povrty", "w8povrty","wkmomed", "s2_id")
eclsk<-eclsk[,myvars]
#recode outcome, math score at each of the 4 waves
eclsk$math1<-ifelse(eclsk$c1r4mtsc==-9,NA, eclsk$c1r4mtsc)
eclsk$math2<-ifelse(eclsk$c4r4mtsc==-9,NA, eclsk$c4r4mtsc)
eclsk$math3<-ifelse(eclsk$c5r4mtsc==-9,NA, eclsk$c5r4mtsc)
eclsk$math4<-ifelse(eclsk$c6r4mtsc==-9,NA, eclsk$c6r4mtsc)
eclsk$math5<-ifelse(eclsk$c7r4mtsc==-9,NA, eclsk$c7r4mtsc)
eclsk$age1<-ifelse(eclsk$r1_kage==-9, NA, eclsk$r1_kage/12)
eclsk$age2<-ifelse(eclsk$r4age==-9, NA, eclsk$r4age/12)
#for the later waves, the NCES group the ages into ranges of months, so 1= <105 months, 2=105 to 108 months. So, I fix the age at the midpoint of the interval they give, and make it into years by dividing by 12
eclsk$age3<-recode(eclsk$r5age,recodes="1=105; 2=107; 3=109; 4=112; 5=115; 6=117; -9=NA")/12
eclsk$age4<-recode(eclsk$r6age,recodes="1=118; 2=129; 3=135; 4=141; 5=155; -9=NA")/12
eclsk$age5<-recode(eclsk$r7age,recodes="1=155; 2=166; 3=172; 4=178; 5=192; -9=NA")/12
eclsk$pov1<-ifelse(eclsk$w1povrty==1,1,0)
eclsk$pov2<-ifelse(eclsk$w1povrty==1,1,0)
eclsk$pov3<-ifelse(eclsk$w3povrty==1,1,0)
eclsk$pov4<-ifelse(eclsk$w5povrty==1,1,0)
eclsk$pov5<-ifelse(eclsk$w5povrty==1,1,0)
#Recode race with white, non Hispanic as reference using dummy vars
eclsk$hisp<-recode (eclsk$race, recodes="3:4=1;-9=NA; else=0")
eclsk$black<-recode (eclsk$race, recodes="2=1;-9=NA; else=0")
eclsk$asian<-recode (eclsk$race, recodes="5=1;-9=NA; else=0")
eclsk$nahn<-recode (eclsk$race, recodes="6:7=1;-9=NA; else=0")
eclsk$other<-recode (eclsk$race, recodes="8=1;-9=NA; else=0")
eclsk$male<-recode(eclsk$gender, recodes="1=1; 2=0; -9=NA")
eclsk$mlths<-recode(eclsk$wkmomed, recodes = "1:2=1; 3:9=0; else = NA")
eclsk$mgths<-recode(eclsk$wkmomed, recodes = "1:3=0; 4:9=1; else =NA")
To analyze data longitudinally, we need to reshape the data from the current “wide” format (repeated measures in columns) to a “long” format (repeated observations in rows). The reshape() function allows us to do this easily. It allows us to specify our repeated measures, time varying covariates as well as time-constant covariates.
This is just a little example using the first 20 kids, just to illustrate this process.
test<-eclsk[1:20,]
#look at the data in "wide" format
head(test)
## childid gender race r1_kage r4age r5age r6age r7age c1r4mtsc c4r4mtsc
## 1 0001001C 2 1 69.07 87.07 NA NA NA 64.73 65.21
## 2 0001002C 2 1 77.20 94.73 6 4 4 68.32 58.74
## 3 0001003C 2 1 64.10 NA NA NA NA 55.17 NA
## 4 0001004C 1 1 65.93 83.93 2 NA NA 53.06 59.85
## 5 0001005C 2 1 74.53 93.07 5 NA NA 65.75 53.72
## 6 0001006C 1 1 63.00 81.00 1 NA NA 49.38 42.68
## c5r4mtsc c6r4mtsc c7r4mtsc w1povrty w1povrty.1 w3povrty w5povrty
## 1 NA NA NA 2 2 NA NA
## 2 60.39 62.59 66.48 2 2 2 2
## 3 NA NA NA 2 2 2 NA
## 4 56.42 NA NA 2 2 2 NA
## 5 53.17 NA NA 2 2 NA NA
## 6 55.86 NA NA 2 2 2 NA
## w8povrty wkmomed s2_id math1 math2 math3 math4 math5 age1 age2 age3
## 1 NA 6 0001 64.73 65.21 NA NA NA 5.756 7.256 NA
## 2 2 3 0001 68.32 58.74 60.39 62.59 66.48 6.433 7.894 9.750
## 3 NA 9 0001 55.17 NA NA NA NA 5.342 NA NA
## 4 NA 6 0001 53.06 59.85 56.42 NA NA 5.494 6.994 8.917
## 5 NA 6 0001 65.75 53.72 53.17 NA NA 6.211 7.756 9.583
## 6 NA 8 0001 49.38 42.68 55.86 NA NA 5.250 6.750 8.750
## age4 age5 pov1 pov2 pov3 pov4 pov5 hisp black asian nahn other male
## 1 NA NA 0 0 NA NA NA 0 0 0 0 0 0
## 2 11.75 14.83 0 0 0 0 0 0 0 0 0 0 0
## 3 NA NA 0 0 0 NA NA 0 0 0 0 0 0
## 4 NA NA 0 0 0 NA NA 0 0 0 0 0 1
## 5 NA NA 0 0 NA NA NA 0 0 0 0 0 0
## 6 NA NA 0 0 0 NA NA 0 0 0 0 0 1
## mlths mgths
## 1 0 1
## 2 0 0
## 3 0 1
## 4 0 1
## 5 0 1
## 6 0 1
e.longt<-reshape(test, idvar="childid", varying=list(math = c("math1", "math2", "math3", "math4","math5"),
age = c("age1", "age2", "age3", "age4", "age5"),
pov= c("pov1", "pov2", "pov3", "pov4", "pov5")),
times=1:5,direction="long",
drop = names(test)[3:15])
#here we look at the new data set, in the "long" format
head(e.longt[order(e.longt$childid, e.longt$time),], n=25)
## childid gender w3povrty w5povrty w8povrty wkmomed s2_id hisp
## 0001001C.1 0001001C 2 NA NA NA 6 0001 0
## 0001001C.2 0001001C 2 NA NA NA 6 0001 0
## 0001001C.3 0001001C 2 NA NA NA 6 0001 0
## 0001001C.4 0001001C 2 NA NA NA 6 0001 0
## 0001001C.5 0001001C 2 NA NA NA 6 0001 0
## 0001002C.1 0001002C 2 2 2 2 3 0001 0
## 0001002C.2 0001002C 2 2 2 2 3 0001 0
## 0001002C.3 0001002C 2 2 2 2 3 0001 0
## 0001002C.4 0001002C 2 2 2 2 3 0001 0
## 0001002C.5 0001002C 2 2 2 2 3 0001 0
## 0001003C.1 0001003C 2 2 NA NA 9 0001 0
## 0001003C.2 0001003C 2 2 NA NA 9 0001 0
## 0001003C.3 0001003C 2 2 NA NA 9 0001 0
## 0001003C.4 0001003C 2 2 NA NA 9 0001 0
## 0001003C.5 0001003C 2 2 NA NA 9 0001 0
## 0001004C.1 0001004C 1 2 NA NA 6 0001 0
## 0001004C.2 0001004C 1 2 NA NA 6 0001 0
## 0001004C.3 0001004C 1 2 NA NA 6 0001 0
## 0001004C.4 0001004C 1 2 NA NA 6 0001 0
## 0001004C.5 0001004C 1 2 NA NA 6 0001 0
## 0001005C.1 0001005C 2 NA NA NA 6 0001 0
## 0001005C.2 0001005C 2 NA NA NA 6 0001 0
## 0001005C.3 0001005C 2 NA NA NA 6 0001 0
## 0001005C.4 0001005C 2 NA NA NA 6 0001 0
## 0001005C.5 0001005C 2 NA NA NA 6 0001 0
## black asian nahn other male mlths mgths time math1 age1 pov1
## 0001001C.1 0 0 0 0 0 0 1 1 64.73 5.756 0
## 0001001C.2 0 0 0 0 0 0 1 2 65.21 7.256 0
## 0001001C.3 0 0 0 0 0 0 1 3 NA NA NA
## 0001001C.4 0 0 0 0 0 0 1 4 NA NA NA
## 0001001C.5 0 0 0 0 0 0 1 5 NA NA NA
## 0001002C.1 0 0 0 0 0 0 0 1 68.32 6.433 0
## 0001002C.2 0 0 0 0 0 0 0 2 58.74 7.894 0
## 0001002C.3 0 0 0 0 0 0 0 3 60.39 9.750 0
## 0001002C.4 0 0 0 0 0 0 0 4 62.59 11.750 0
## 0001002C.5 0 0 0 0 0 0 0 5 66.48 14.833 0
## 0001003C.1 0 0 0 0 0 0 1 1 55.17 5.342 0
## 0001003C.2 0 0 0 0 0 0 1 2 NA NA 0
## 0001003C.3 0 0 0 0 0 0 1 3 NA NA 0
## 0001003C.4 0 0 0 0 0 0 1 4 NA NA NA
## 0001003C.5 0 0 0 0 0 0 1 5 NA NA NA
## 0001004C.1 0 0 0 0 1 0 1 1 53.06 5.494 0
## 0001004C.2 0 0 0 0 1 0 1 2 59.85 6.994 0
## 0001004C.3 0 0 0 0 1 0 1 3 56.42 8.917 0
## 0001004C.4 0 0 0 0 1 0 1 4 NA NA NA
## 0001004C.5 0 0 0 0 1 0 1 5 NA NA NA
## 0001005C.1 0 0 0 0 0 0 1 1 65.75 6.211 0
## 0001005C.2 0 0 0 0 0 0 1 2 53.72 7.756 0
## 0001005C.3 0 0 0 0 0 0 1 3 53.17 9.583 NA
## 0001005C.4 0 0 0 0 0 0 1 4 NA NA NA
## 0001005C.5 0 0 0 0 0 0 1 5 NA NA NA
We can plot the children’s scores over time to see trends within child. Some kids are missing at all times, others don’t have complete data for all times, but you can see the trends within child.
xyplot(math1~age1|childid, data=e.longt,
panel=function(x,y){
panel.xyplot(x,y)
panel.lmline(x,y,)})
Now we do the entire data set
e.long<-reshape(eclsk, idvar="childid", varying=list(math = c("math1", "math2", "math3", "math4","math5"),
age = c("age1", "age2", "age3", "age4", "age5"),
pov= c("pov1", "pov2", "pov3", "pov4", "pov5")),
times=1:5,direction="long",
drop = names(test)[3:15])
e.long<-e.long[order(e.long$childid, e.long$time),]
head(e.long, n=20)
## childid gender w3povrty w5povrty w8povrty wkmomed s2_id hisp
## 0001001C.1 0001001C 2 NA NA NA 6 0001 0
## 0001001C.2 0001001C 2 NA NA NA 6 0001 0
## 0001001C.3 0001001C 2 NA NA NA 6 0001 0
## 0001001C.4 0001001C 2 NA NA NA 6 0001 0
## 0001001C.5 0001001C 2 NA NA NA 6 0001 0
## 0001002C.1 0001002C 2 2 2 2 3 0001 0
## 0001002C.2 0001002C 2 2 2 2 3 0001 0
## 0001002C.3 0001002C 2 2 2 2 3 0001 0
## 0001002C.4 0001002C 2 2 2 2 3 0001 0
## 0001002C.5 0001002C 2 2 2 2 3 0001 0
## 0001003C.1 0001003C 2 2 NA NA 9 0001 0
## 0001003C.2 0001003C 2 2 NA NA 9 0001 0
## 0001003C.3 0001003C 2 2 NA NA 9 0001 0
## 0001003C.4 0001003C 2 2 NA NA 9 0001 0
## 0001003C.5 0001003C 2 2 NA NA 9 0001 0
## 0001004C.1 0001004C 1 2 NA NA 6 0001 0
## 0001004C.2 0001004C 1 2 NA NA 6 0001 0
## 0001004C.3 0001004C 1 2 NA NA 6 0001 0
## 0001004C.4 0001004C 1 2 NA NA 6 0001 0
## 0001004C.5 0001004C 1 2 NA NA 6 0001 0
## black asian nahn other male mlths mgths time math1 age1 pov1
## 0001001C.1 0 0 0 0 0 0 1 1 64.73 5.756 0
## 0001001C.2 0 0 0 0 0 0 1 2 65.21 7.256 0
## 0001001C.3 0 0 0 0 0 0 1 3 NA NA NA
## 0001001C.4 0 0 0 0 0 0 1 4 NA NA NA
## 0001001C.5 0 0 0 0 0 0 1 5 NA NA NA
## 0001002C.1 0 0 0 0 0 0 0 1 68.32 6.433 0
## 0001002C.2 0 0 0 0 0 0 0 2 58.74 7.894 0
## 0001002C.3 0 0 0 0 0 0 0 3 60.39 9.750 0
## 0001002C.4 0 0 0 0 0 0 0 4 62.59 11.750 0
## 0001002C.5 0 0 0 0 0 0 0 5 66.48 14.833 0
## 0001003C.1 0 0 0 0 0 0 1 1 55.17 5.342 0
## 0001003C.2 0 0 0 0 0 0 1 2 NA NA 0
## 0001003C.3 0 0 0 0 0 0 1 3 NA NA 0
## 0001003C.4 0 0 0 0 0 0 1 4 NA NA NA
## 0001003C.5 0 0 0 0 0 0 1 5 NA NA NA
## 0001004C.1 0 0 0 0 1 0 1 1 53.06 5.494 0
## 0001004C.2 0 0 0 0 1 0 1 2 59.85 6.994 0
## 0001004C.3 0 0 0 0 1 0 1 3 56.42 8.917 0
## 0001004C.4 0 0 0 0 1 0 1 4 NA NA NA
## 0001004C.5 0 0 0 0 1 0 1 5 NA NA NA
Now we fit models to the longitudinal data. We start simple then move into growth curve models. This follows the presentation in Singer and Willett (2003) Chapters 3-6.
#basic linear model
fit.1<-glm(math1~time+male+black+hisp+asian+nahn+other, data=e.long)
summary(fit.1)
##
## Call:
## glm(formula = math1 ~ time + male + black + hisp + asian + nahn +
## other, data = e.long)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -53.72 -5.69 0.57 6.43 44.76
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 51.4641 0.0982 524.3 <2e-16 ***
## time 0.4396 0.0277 15.8 <2e-16 ***
## male 0.8208 0.0759 10.8 <2e-16 ***
## black -7.5194 0.1161 -64.8 <2e-16 ***
## hisp -5.9896 0.1029 -58.2 <2e-16 ***
## asian -4.1116 0.1577 -26.1 <2e-16 ***
## nahn -7.6179 0.2247 -33.9 <2e-16 ***
## other -2.1624 0.2429 -8.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 101.5)
##
## Null deviance: 7917813 on 70547 degrees of freedom
## Residual deviance: 7162995 on 70540 degrees of freedom
## (36497 observations deleted due to missingness)
## AIC: 526184
##
## Number of Fisher Scoring iterations: 2
#random intercept model for individual student differences
fit.2<-lmer(math1~time+male+black+hisp+asian+nahn+other+(1|childid), data=e.long)
summary(fit.2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: math1 ~ time + male + black + hisp + asian + nahn + other + (1 |
## childid)
## Data: e.long
##
## REML criterion at convergence: 492254
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -9.140 -0.447 0.016 0.485 6.190
##
## Random effects:
## Groups Name Variance Std.Dev.
## childid (Intercept) 71.2 8.44
## Residual 35.3 5.94
## Number of obs: 70548, groups: childid, 20724
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 51.7552 0.1166 444
## time 0.2437 0.0179 14
## male 0.5864 0.1279 5
## black -7.1026 0.1878 -38
## hisp -6.0797 0.1739 -35
## asian -5.1231 0.2668 -19
## nahn -7.5625 0.3855 -20
## other -2.3584 0.4085 -6
##
## Correlation of Fixed Effects:
## (Intr) time male black hisp asian nahn
## time -0.375
## male -0.565 0.002
## black -0.350 0.024 0.009
## hisp -0.367 0.004 0.005 0.225
## asian -0.242 0.003 0.007 0.147 0.159
## nahn -0.165 -0.001 0.003 0.102 0.110 0.072
## other -0.154 0.005 -0.003 0.096 0.104 0.068 0.047
#individual trajectory model with random slope for time
fit.3<-lmer(math1~time+male+black+hisp+asian+nahn+other+(time|childid), data=e.long)
summary(fit.3)
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## math1 ~ time + male + black + hisp + asian + nahn + other + (time |
## childid)
## Data: e.long
##
## REML criterion at convergence: 487615
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.554 -0.423 0.011 0.446 6.140
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## childid (Intercept) 129.25 11.4
## time 5.27 2.3 -0.70
## Residual 24.99 5.0
## Number of obs: 70548, groups: childid, 20724
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 51.4133 0.1254 410
## time 0.3099 0.0239 13
## male 0.8061 0.1259 6
## black -7.3892 0.1863 -40
## hisp -5.8155 0.1709 -34
## asian -3.3523 0.2620 -13
## nahn -7.3589 0.3778 -19
## other -2.2312 0.4024 -6
##
## Correlation of Fixed Effects:
## (Intr) time male black hisp asian nahn
## time -0.532
## male -0.517 0.003
## black -0.325 0.032 0.008
## hisp -0.335 0.003 0.004 0.223
## asian -0.221 0.002 0.008 0.145 0.159
## nahn -0.150 -0.002 0.003 0.101 0.110 0.072
## other -0.142 0.004 -0.002 0.095 0.103 0.067 0.047
anova(fit.3, fit.2)
## refitting model(s) with ML (instead of REML)
## Data: e.long
## Models:
## fit.2: math1 ~ time + male + black + hisp + asian + nahn + other + (1 |
## fit.2: childid)
## fit.3: math1 ~ time + male + black + hisp + asian + nahn + other + (time |
## fit.3: childid)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## fit.2 10 492257 492349 -246119 492237
## fit.3 12 487623 487733 -243800 487599 4638 2 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#curvilinear trajectory model with random nonlinear time
fit.4<-lmer(math1~time+male+black+hisp+asian+nahn+other+I(time^2)+(time+I(time^2)|childid), data=e.long)
## Warning: Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
summary(fit.4)
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## math1 ~ time + male + black + hisp + asian + nahn + other + I(time^2) +
## (time + I(time^2) | childid)
## Data: e.long
##
## REML criterion at convergence: 482316
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -8.233 -0.404 0.013 0.419 5.577
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## childid (Intercept) 289.65 17.02
## time 82.90 9.11 -0.86
## I(time^2) 1.46 1.21 0.83 -0.99
## Residual 18.44 4.29
## Number of obs: 70548, groups: childid, 20724
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 50.4950 0.1676 301.3
## time 1.2297 0.0924 13.3
## male 0.7636 0.1261 6.1
## black -7.4019 0.1863 -39.7
## hisp -5.8262 0.1714 -34.0
## asian -3.5118 0.2626 -13.4
## nahn -7.3835 0.3783 -19.5
## other -2.2827 0.4023 -5.7
## I(time^2) -0.1684 0.0140 -12.1
##
## Correlation of Fixed Effects:
## (Intr) time male black hisp asian nahn other
## time -0.752
## male -0.387 -0.001
## black -0.243 0.009 0.009
## hisp -0.252 0.002 0.004 0.223
## asian -0.164 -0.002 0.008 0.146 0.158
## nahn -0.112 -0.001 0.002 0.101 0.110 0.072
## other -0.105 -0.001 -0.002 0.095 0.103 0.067 0.047
## I(time^2) 0.694 -0.975 0.002 -0.004 -0.001 0.003 0.001 0.002
anova(fit.4, fit.3)
## refitting model(s) with ML (instead of REML)
## Data: e.long
## Models:
## fit.3: math1 ~ time + male + black + hisp + asian + nahn + other + (time |
## fit.3: childid)
## fit.4: math1 ~ time + male + black + hisp + asian + nahn + other + I(time^2) +
## fit.4: (time + I(time^2) | childid)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## fit.3 12 487623 487733 -243800 487599
## fit.4 16 482325 482472 -241147 482293 5306 4 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#individual trajectory model with fixed effects for race and different population
#trajectories for each race
fit.5<-lmer(math1~time*(male+black+hisp+asian+nahn+other)+(time|childid), data=e.long)
summary(fit.5)
## Linear mixed model fit by REML ['lmerMod']
## Formula: math1 ~ time * (male + black + hisp + asian + nahn + other) +
## (time | childid)
## Data: e.long
##
## REML criterion at convergence: 485341
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.251 -0.436 0.009 0.455 6.011
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## childid (Intercept) 120.31 10.97
## time 4.14 2.03 -0.67
## Residual 24.88 4.99
## Number of obs: 70548, groups: childid, 20724
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 53.0607 0.1488 357
## time -0.2915 0.0373 -8
## male -0.5919 0.1763 -3
## black -5.6565 0.2598 -22
## hisp -7.7903 0.2395 -33
## asian -14.7664 0.3681 -40
## nahn -9.1958 0.5295 -17
## other -3.0150 0.5620 -5
## time:male 0.5138 0.0447 11
## time:black -0.7138 0.0700 -10
## time:hisp 0.7118 0.0603 12
## time:asian 4.1164 0.0929 44
## time:nahn 0.6574 0.1307 5
## time:other 0.2837 0.1438 2
##
## Correlation of Fixed Effects:
## (Intr) time male black hisp asian nahn other tim:ml
## time -0.703
## male -0.608 0.431
## black -0.366 0.256 0.008
## hisp -0.395 0.276 0.005 0.224
## asian -0.259 0.182 0.007 0.146 0.158
## nahn -0.180 0.126 0.004 0.101 0.110 0.072
## other -0.165 0.116 -0.003 0.096 0.104 0.067 0.047
## time:male 0.426 -0.611 -0.703 -0.005 -0.003 -0.006 -0.003 0.001
## time:black 0.238 -0.337 -0.004 -0.700 -0.146 -0.095 -0.066 -0.062 0.005
## time:hisp 0.275 -0.389 -0.003 -0.157 -0.703 -0.111 -0.077 -0.072 0.003
## time:asian 0.181 -0.258 -0.006 -0.102 -0.110 -0.705 -0.050 -0.047 0.012
## time:nahn 0.128 -0.181 -0.003 -0.072 -0.078 -0.051 -0.703 -0.033 0.004
## time:other 0.114 -0.163 0.001 -0.066 -0.071 -0.046 -0.032 -0.700 0.002
## tm:blc tm:hsp tim:sn tm:nhn
## time
## male
## black
## hisp
## asian
## nahn
## other
## time:male
## time:black
## time:hisp 0.206
## time:asian 0.134 0.156
## time:nahn 0.095 0.111 0.072
## time:other 0.086 0.101 0.065 0.046
anova(fit.4, fit.5)
## refitting model(s) with ML (instead of REML)
## Data: e.long
## Models:
## fit.4: math1 ~ time + male + black + hisp + asian + nahn + other + I(time^2) +
## fit.4: (time + I(time^2) | childid)
## fit.5: math1 ~ time * (male + black + hisp + asian + nahn + other) +
## fit.5: (time | childid)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## fit.4 16 482325 482472 -241147 482293
## fit.5 18 485342 485507 -242653 485306 0 2 1
AIC(fit.1)
## [1] 526184
AIC(fit.2)
## [1] 492273
AIC(fit.3)
## [1] 487639
AIC(fit.4) #Best Fit
## [1] 482348
AIC(fit.5)
## [1] 485377
Often, we may be interested in modeling the correlation among students over time. These models typically assume some form of structure to the covariance matrix of either the residuals or the random effects themselves. R’s basic functions won’t fit correlated random effect models, but we CAN do these in BUGS/JAGS (next time)
If we want to model correlations among the residuals for individuals over time, we can use the functions in the nlme library, but only if your outcome is continuous. NO GLMMs ALLOWED!
library(nlme)
##
## Attaching package: 'nlme'
##
## The following object is masked from 'package:lme4':
##
## lmList
#here I fit the AR(1) covariance structure and compare it to model 3 from above
fit.6<-lme(math1~time+male+black+hisp+asian+nahn+other, random=~time|childid, correlation=corAR1(,form=~time|childid), data=e.long, na.action="na.omit")
summary(fit.6)
## Linear mixed-effects model fit by REML
## Data: e.long
## AIC BIC logLik
## 487245 487364 -243610
##
## Random effects:
## Formula: ~time | childid
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 10.913 (Intr)
## time 2.028 -0.708
## Residual 5.669
##
## Correlation Structure: ARMA(1,0)
## Formula: ~time | childid
## Parameter estimate(s):
## Phi1
## 0.247
## Fixed effects: math1 ~ time + male + black + hisp + asian + nahn + other
## Value Std.Error DF t-value p-value
## (Intercept) 51.38 0.1264 49823 406.5 0
## time 0.32 0.0240 49823 13.3 0
## male 0.79 0.1259 20717 6.3 0
## black -7.38 0.1867 20717 -39.5 0
## hisp -5.82 0.1707 20717 -34.1 0
## asian -3.41 0.2620 20717 -13.0 0
## nahn -7.36 0.3773 20717 -19.5 0
## other -2.20 0.4027 20717 -5.5 0
## Correlation:
## (Intr) time male black hisp asian nahn
## time -0.543
## male -0.513 0.003
## black -0.323 0.035 0.008
## hisp -0.333 0.005 0.004 0.222
## asian -0.220 0.003 0.008 0.145 0.158
## nahn -0.149 -0.002 0.003 0.100 0.110 0.072
## other -0.141 0.005 -0.002 0.094 0.103 0.067 0.047
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -6.58701 -0.40326 0.02063 0.44827 5.46964
##
## Number of Observations: 70548
## Number of Groups: 20724
fit.3l<-lme(math1~time+male+black+hisp+asian+nahn+other, random=~time|childid, data=e.long, na.action="na.omit")
summary(fit.3l)
## Linear mixed-effects model fit by REML
## Data: e.long
## AIC BIC logLik
## 487639 487749 -243807
##
## Random effects:
## Formula: ~time | childid
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 11.369 (Intr)
## time 2.297 -0.699
## Residual 4.999
##
## Fixed effects: math1 ~ time + male + black + hisp + asian + nahn + other
## Value Std.Error DF t-value p-value
## (Intercept) 51.41 0.1254 49823 409.9 0
## time 0.31 0.0239 49823 12.9 0
## male 0.81 0.1259 20717 6.4 0
## black -7.39 0.1863 20717 -39.7 0
## hisp -5.82 0.1709 20717 -34.0 0
## asian -3.35 0.2620 20717 -12.8 0
## nahn -7.36 0.3778 20717 -19.5 0
## other -2.23 0.4024 20717 -5.5 0
## Correlation:
## (Intr) time male black hisp asian nahn
## time -0.532
## male -0.517 0.003
## black -0.325 0.032 0.008
## hisp -0.335 0.003 0.004 0.223
## asian -0.221 0.002 0.008 0.145 0.159
## nahn -0.150 -0.002 0.003 0.101 0.110 0.072
## other -0.142 0.004 -0.002 0.095 0.103 0.067 0.047
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -7.55435 -0.42300 0.01061 0.44565 6.14023
##
## Number of Observations: 70548
## Number of Groups: 20724
anova(fit.6, fit.3l)
## Model df AIC BIC logLik Test L.Ratio p-value
## fit.6 1 13 487245 487364 -243610
## fit.3l 2 12 487639 487749 -243807 1 vs 2 395.8 <.0001