In this example, we will use hierarchical models to do some longitudinal modeling of data from the ECLS-K. Specifically, we will model changes in a student’s standardized math score from kindergarten to 8th grade.

First we load our data

load("~/Google Drive/dem7903_App_Hier/data/eclsk.Rdata")
names(eclsk)<-tolower(names(eclsk))
library (car)
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(lattice)
library(lme4)
## Loading required package: Matrix
## Loading required package: Rcpp
#get out only the variables I'm going to use for this example
myvars<-c( "childid","gender", "race", "r1_kage","r4age", "r5age", "r6age", "r7age","c1r4mtsc", "c4r4mtsc", "c5r4mtsc", "c6r4mtsc", "c7r4mtsc", "w1povrty","w1povrty","w3povrty", "w5povrty", "w8povrty","wkmomed", "s2_id")
eclsk<-eclsk[,myvars]

#recode outcome, math score at each of the 4 waves
eclsk$math1<-ifelse(eclsk$c1r4mtsc==-9,NA, eclsk$c1r4mtsc)
eclsk$math2<-ifelse(eclsk$c4r4mtsc==-9,NA, eclsk$c4r4mtsc)
eclsk$math3<-ifelse(eclsk$c5r4mtsc==-9,NA, eclsk$c5r4mtsc)
eclsk$math4<-ifelse(eclsk$c6r4mtsc==-9,NA, eclsk$c6r4mtsc)
eclsk$math5<-ifelse(eclsk$c7r4mtsc==-9,NA, eclsk$c7r4mtsc)

eclsk$age1<-ifelse(eclsk$r1_kage==-9, NA, eclsk$r1_kage/12)
eclsk$age2<-ifelse(eclsk$r4age==-9, NA, eclsk$r4age/12)
#for the later waves, the NCES group the ages into ranges of months, so 1= <105 months, 2=105 to 108 months. So, I fix the age at the midpoint of the interval they give, and make it into years by dividing by 12
eclsk$age3<-recode(eclsk$r5age,recodes="1=105; 2=107; 3=109; 4=112; 5=115; 6=117; -9=NA")/12
eclsk$age4<-recode(eclsk$r6age,recodes="1=118; 2=129; 3=135; 4=141; 5=155; -9=NA")/12
eclsk$age5<-recode(eclsk$r7age,recodes="1=155; 2=166; 3=172; 4=178; 5=192; -9=NA")/12

eclsk$pov1<-ifelse(eclsk$w1povrty==1,1,0)
eclsk$pov2<-ifelse(eclsk$w1povrty==1,1,0)
eclsk$pov3<-ifelse(eclsk$w3povrty==1,1,0)
eclsk$pov4<-ifelse(eclsk$w5povrty==1,1,0)
eclsk$pov5<-ifelse(eclsk$w5povrty==1,1,0)

#Recode race with white, non Hispanic as reference using dummy vars
eclsk$hisp<-recode (eclsk$race, recodes="3:4=1;-9=NA; else=0")
eclsk$black<-recode (eclsk$race, recodes="2=1;-9=NA; else=0")
eclsk$asian<-recode (eclsk$race, recodes="5=1;-9=NA; else=0")
eclsk$nahn<-recode (eclsk$race, recodes="6:7=1;-9=NA; else=0")
eclsk$other<-recode (eclsk$race, recodes="8=1;-9=NA; else=0")
eclsk$male<-recode(eclsk$gender, recodes="1=1; 2=0; -9=NA")
eclsk$mlths<-recode(eclsk$wkmomed, recodes = "1:2=1; 3:9=0; else = NA")
eclsk$mgths<-recode(eclsk$wkmomed, recodes = "1:3=0; 4:9=1; else =NA") 

To analyze data longitudinally, we need to reshape the data from the current “wide” format (repeated measures in columns) to a “long” format (repeated observations in rows). The reshape() function allows us to do this easily. It allows us to specify our repeated measures, time varying covariates as well as time-constant covariates.

This is just a little example using the first 20 kids, just to illustrate this process.

test<-eclsk[1:20,]
#look at the data in "wide" format
head(test)
##    childid gender race r1_kage r4age r5age r6age r7age c1r4mtsc c4r4mtsc
## 1 0001001C      2    1   69.07 87.07    NA    NA    NA    64.73    65.21
## 2 0001002C      2    1   77.20 94.73     6     4     4    68.32    58.74
## 3 0001003C      2    1   64.10    NA    NA    NA    NA    55.17       NA
## 4 0001004C      1    1   65.93 83.93     2    NA    NA    53.06    59.85
## 5 0001005C      2    1   74.53 93.07     5    NA    NA    65.75    53.72
## 6 0001006C      1    1   63.00 81.00     1    NA    NA    49.38    42.68
##   c5r4mtsc c6r4mtsc c7r4mtsc w1povrty w1povrty.1 w3povrty w5povrty
## 1       NA       NA       NA        2          2       NA       NA
## 2    60.39    62.59    66.48        2          2        2        2
## 3       NA       NA       NA        2          2        2       NA
## 4    56.42       NA       NA        2          2        2       NA
## 5    53.17       NA       NA        2          2       NA       NA
## 6    55.86       NA       NA        2          2        2       NA
##   w8povrty wkmomed s2_id math1 math2 math3 math4 math5  age1  age2  age3
## 1       NA       6  0001 64.73 65.21    NA    NA    NA 5.756 7.256    NA
## 2        2       3  0001 68.32 58.74 60.39 62.59 66.48 6.433 7.894 9.750
## 3       NA       9  0001 55.17    NA    NA    NA    NA 5.342    NA    NA
## 4       NA       6  0001 53.06 59.85 56.42    NA    NA 5.494 6.994 8.917
## 5       NA       6  0001 65.75 53.72 53.17    NA    NA 6.211 7.756 9.583
## 6       NA       8  0001 49.38 42.68 55.86    NA    NA 5.250 6.750 8.750
##    age4  age5 pov1 pov2 pov3 pov4 pov5 hisp black asian nahn other male
## 1    NA    NA    0    0   NA   NA   NA    0     0     0    0     0    0
## 2 11.75 14.83    0    0    0    0    0    0     0     0    0     0    0
## 3    NA    NA    0    0    0   NA   NA    0     0     0    0     0    0
## 4    NA    NA    0    0    0   NA   NA    0     0     0    0     0    1
## 5    NA    NA    0    0   NA   NA   NA    0     0     0    0     0    0
## 6    NA    NA    0    0    0   NA   NA    0     0     0    0     0    1
##   mlths mgths
## 1     0     1
## 2     0     0
## 3     0     1
## 4     0     1
## 5     0     1
## 6     0     1
e.longt<-reshape(test, idvar="childid", varying=list(math = c("math1", "math2", "math3", "math4","math5"),
                                         age = c("age1", "age2", "age3", "age4", "age5"),
                                         pov= c("pov1", "pov2", "pov3", "pov4", "pov5")),
                                         times=1:5,direction="long",  
                                          drop = names(test)[3:15])

#here we look at the new data set, in the "long" format

head(e.longt[order(e.longt$childid, e.longt$time),], n=25)
##             childid gender w3povrty w5povrty w8povrty wkmomed s2_id hisp
## 0001001C.1 0001001C      2       NA       NA       NA       6  0001    0
## 0001001C.2 0001001C      2       NA       NA       NA       6  0001    0
## 0001001C.3 0001001C      2       NA       NA       NA       6  0001    0
## 0001001C.4 0001001C      2       NA       NA       NA       6  0001    0
## 0001001C.5 0001001C      2       NA       NA       NA       6  0001    0
## 0001002C.1 0001002C      2        2        2        2       3  0001    0
## 0001002C.2 0001002C      2        2        2        2       3  0001    0
## 0001002C.3 0001002C      2        2        2        2       3  0001    0
## 0001002C.4 0001002C      2        2        2        2       3  0001    0
## 0001002C.5 0001002C      2        2        2        2       3  0001    0
## 0001003C.1 0001003C      2        2       NA       NA       9  0001    0
## 0001003C.2 0001003C      2        2       NA       NA       9  0001    0
## 0001003C.3 0001003C      2        2       NA       NA       9  0001    0
## 0001003C.4 0001003C      2        2       NA       NA       9  0001    0
## 0001003C.5 0001003C      2        2       NA       NA       9  0001    0
## 0001004C.1 0001004C      1        2       NA       NA       6  0001    0
## 0001004C.2 0001004C      1        2       NA       NA       6  0001    0
## 0001004C.3 0001004C      1        2       NA       NA       6  0001    0
## 0001004C.4 0001004C      1        2       NA       NA       6  0001    0
## 0001004C.5 0001004C      1        2       NA       NA       6  0001    0
## 0001005C.1 0001005C      2       NA       NA       NA       6  0001    0
## 0001005C.2 0001005C      2       NA       NA       NA       6  0001    0
## 0001005C.3 0001005C      2       NA       NA       NA       6  0001    0
## 0001005C.4 0001005C      2       NA       NA       NA       6  0001    0
## 0001005C.5 0001005C      2       NA       NA       NA       6  0001    0
##            black asian nahn other male mlths mgths time math1   age1 pov1
## 0001001C.1     0     0    0     0    0     0     1    1 64.73  5.756    0
## 0001001C.2     0     0    0     0    0     0     1    2 65.21  7.256    0
## 0001001C.3     0     0    0     0    0     0     1    3    NA     NA   NA
## 0001001C.4     0     0    0     0    0     0     1    4    NA     NA   NA
## 0001001C.5     0     0    0     0    0     0     1    5    NA     NA   NA
## 0001002C.1     0     0    0     0    0     0     0    1 68.32  6.433    0
## 0001002C.2     0     0    0     0    0     0     0    2 58.74  7.894    0
## 0001002C.3     0     0    0     0    0     0     0    3 60.39  9.750    0
## 0001002C.4     0     0    0     0    0     0     0    4 62.59 11.750    0
## 0001002C.5     0     0    0     0    0     0     0    5 66.48 14.833    0
## 0001003C.1     0     0    0     0    0     0     1    1 55.17  5.342    0
## 0001003C.2     0     0    0     0    0     0     1    2    NA     NA    0
## 0001003C.3     0     0    0     0    0     0     1    3    NA     NA    0
## 0001003C.4     0     0    0     0    0     0     1    4    NA     NA   NA
## 0001003C.5     0     0    0     0    0     0     1    5    NA     NA   NA
## 0001004C.1     0     0    0     0    1     0     1    1 53.06  5.494    0
## 0001004C.2     0     0    0     0    1     0     1    2 59.85  6.994    0
## 0001004C.3     0     0    0     0    1     0     1    3 56.42  8.917    0
## 0001004C.4     0     0    0     0    1     0     1    4    NA     NA   NA
## 0001004C.5     0     0    0     0    1     0     1    5    NA     NA   NA
## 0001005C.1     0     0    0     0    0     0     1    1 65.75  6.211    0
## 0001005C.2     0     0    0     0    0     0     1    2 53.72  7.756    0
## 0001005C.3     0     0    0     0    0     0     1    3 53.17  9.583   NA
## 0001005C.4     0     0    0     0    0     0     1    4    NA     NA   NA
## 0001005C.5     0     0    0     0    0     0     1    5    NA     NA   NA

We can plot the children’s scores over time to see trends within child. Some kids are missing at all times, others don’t have complete data for all times, but you can see the trends within child.

xyplot(math1~age1|childid, data=e.longt,
       panel=function(x,y){
       panel.xyplot(x,y)
     panel.lmline(x,y,)})

plot of chunk unnamed-chunk-3

Now we do the entire data set

e.long<-reshape(eclsk, idvar="childid",  varying=list(math = c("math1", "math2", "math3", "math4","math5"),
                                         age = c("age1", "age2", "age3", "age4", "age5"),
                                         pov= c("pov1", "pov2", "pov3", "pov4", "pov5")),
                                         times=1:5,direction="long",  
                                          drop = names(test)[3:15])
e.long<-e.long[order(e.long$childid, e.long$time),]
head(e.long, n=20)
##             childid gender w3povrty w5povrty w8povrty wkmomed s2_id hisp
## 0001001C.1 0001001C      2       NA       NA       NA       6  0001    0
## 0001001C.2 0001001C      2       NA       NA       NA       6  0001    0
## 0001001C.3 0001001C      2       NA       NA       NA       6  0001    0
## 0001001C.4 0001001C      2       NA       NA       NA       6  0001    0
## 0001001C.5 0001001C      2       NA       NA       NA       6  0001    0
## 0001002C.1 0001002C      2        2        2        2       3  0001    0
## 0001002C.2 0001002C      2        2        2        2       3  0001    0
## 0001002C.3 0001002C      2        2        2        2       3  0001    0
## 0001002C.4 0001002C      2        2        2        2       3  0001    0
## 0001002C.5 0001002C      2        2        2        2       3  0001    0
## 0001003C.1 0001003C      2        2       NA       NA       9  0001    0
## 0001003C.2 0001003C      2        2       NA       NA       9  0001    0
## 0001003C.3 0001003C      2        2       NA       NA       9  0001    0
## 0001003C.4 0001003C      2        2       NA       NA       9  0001    0
## 0001003C.5 0001003C      2        2       NA       NA       9  0001    0
## 0001004C.1 0001004C      1        2       NA       NA       6  0001    0
## 0001004C.2 0001004C      1        2       NA       NA       6  0001    0
## 0001004C.3 0001004C      1        2       NA       NA       6  0001    0
## 0001004C.4 0001004C      1        2       NA       NA       6  0001    0
## 0001004C.5 0001004C      1        2       NA       NA       6  0001    0
##            black asian nahn other male mlths mgths time math1   age1 pov1
## 0001001C.1     0     0    0     0    0     0     1    1 64.73  5.756    0
## 0001001C.2     0     0    0     0    0     0     1    2 65.21  7.256    0
## 0001001C.3     0     0    0     0    0     0     1    3    NA     NA   NA
## 0001001C.4     0     0    0     0    0     0     1    4    NA     NA   NA
## 0001001C.5     0     0    0     0    0     0     1    5    NA     NA   NA
## 0001002C.1     0     0    0     0    0     0     0    1 68.32  6.433    0
## 0001002C.2     0     0    0     0    0     0     0    2 58.74  7.894    0
## 0001002C.3     0     0    0     0    0     0     0    3 60.39  9.750    0
## 0001002C.4     0     0    0     0    0     0     0    4 62.59 11.750    0
## 0001002C.5     0     0    0     0    0     0     0    5 66.48 14.833    0
## 0001003C.1     0     0    0     0    0     0     1    1 55.17  5.342    0
## 0001003C.2     0     0    0     0    0     0     1    2    NA     NA    0
## 0001003C.3     0     0    0     0    0     0     1    3    NA     NA    0
## 0001003C.4     0     0    0     0    0     0     1    4    NA     NA   NA
## 0001003C.5     0     0    0     0    0     0     1    5    NA     NA   NA
## 0001004C.1     0     0    0     0    1     0     1    1 53.06  5.494    0
## 0001004C.2     0     0    0     0    1     0     1    2 59.85  6.994    0
## 0001004C.3     0     0    0     0    1     0     1    3 56.42  8.917    0
## 0001004C.4     0     0    0     0    1     0     1    4    NA     NA   NA
## 0001004C.5     0     0    0     0    1     0     1    5    NA     NA   NA

Models

Now we fit models to the longitudinal data. We start simple then move into growth curve models. This follows the presentation in Singer and Willett (2003) Chapters 3-6.

#basic linear model
fit.1<-glm(math1~time+male+black+hisp+asian+nahn+other, data=e.long)
summary(fit.1)
## 
## Call:
## glm(formula = math1 ~ time + male + black + hisp + asian + nahn + 
##     other, data = e.long)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -53.72   -5.69    0.57    6.43   44.76  
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  51.4641     0.0982   524.3   <2e-16 ***
## time          0.4396     0.0277    15.8   <2e-16 ***
## male          0.8208     0.0759    10.8   <2e-16 ***
## black        -7.5194     0.1161   -64.8   <2e-16 ***
## hisp         -5.9896     0.1029   -58.2   <2e-16 ***
## asian        -4.1116     0.1577   -26.1   <2e-16 ***
## nahn         -7.6179     0.2247   -33.9   <2e-16 ***
## other        -2.1624     0.2429    -8.9   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 101.5)
## 
##     Null deviance: 7917813  on 70547  degrees of freedom
## Residual deviance: 7162995  on 70540  degrees of freedom
##   (36497 observations deleted due to missingness)
## AIC: 526184
## 
## Number of Fisher Scoring iterations: 2
#random intercept model for individual student differences
fit.2<-lmer(math1~time+male+black+hisp+asian+nahn+other+(1|childid), data=e.long)
summary(fit.2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: math1 ~ time + male + black + hisp + asian + nahn + other + (1 |  
##     childid)
##    Data: e.long
## 
## REML criterion at convergence: 492254
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -9.140 -0.447  0.016  0.485  6.190 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  childid  (Intercept) 71.2     8.44    
##  Residual             35.3     5.94    
## Number of obs: 70548, groups:  childid, 20724
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  51.7552     0.1166     444
## time          0.2437     0.0179      14
## male          0.5864     0.1279       5
## black        -7.1026     0.1878     -38
## hisp         -6.0797     0.1739     -35
## asian        -5.1231     0.2668     -19
## nahn         -7.5625     0.3855     -20
## other        -2.3584     0.4085      -6
## 
## Correlation of Fixed Effects:
##       (Intr) time   male   black  hisp   asian  nahn  
## time  -0.375                                          
## male  -0.565  0.002                                   
## black -0.350  0.024  0.009                            
## hisp  -0.367  0.004  0.005  0.225                     
## asian -0.242  0.003  0.007  0.147  0.159              
## nahn  -0.165 -0.001  0.003  0.102  0.110  0.072       
## other -0.154  0.005 -0.003  0.096  0.104  0.068  0.047
#individual trajectory model with random slope for time
fit.3<-lmer(math1~time+male+black+hisp+asian+nahn+other+(time|childid), data=e.long)
summary(fit.3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## math1 ~ time + male + black + hisp + asian + nahn + other + (time |  
##     childid)
##    Data: e.long
## 
## REML criterion at convergence: 487615
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -7.554 -0.423  0.011  0.446  6.140 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  childid  (Intercept) 129.25   11.4          
##           time          5.27    2.3     -0.70
##  Residual              24.99    5.0          
## Number of obs: 70548, groups:  childid, 20724
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  51.4133     0.1254     410
## time          0.3099     0.0239      13
## male          0.8061     0.1259       6
## black        -7.3892     0.1863     -40
## hisp         -5.8155     0.1709     -34
## asian        -3.3523     0.2620     -13
## nahn         -7.3589     0.3778     -19
## other        -2.2312     0.4024      -6
## 
## Correlation of Fixed Effects:
##       (Intr) time   male   black  hisp   asian  nahn  
## time  -0.532                                          
## male  -0.517  0.003                                   
## black -0.325  0.032  0.008                            
## hisp  -0.335  0.003  0.004  0.223                     
## asian -0.221  0.002  0.008  0.145  0.159              
## nahn  -0.150 -0.002  0.003  0.101  0.110  0.072       
## other -0.142  0.004 -0.002  0.095  0.103  0.067  0.047
anova(fit.3, fit.2)
## refitting model(s) with ML (instead of REML)
## Data: e.long
## Models:
## fit.2: math1 ~ time + male + black + hisp + asian + nahn + other + (1 | 
## fit.2:     childid)
## fit.3: math1 ~ time + male + black + hisp + asian + nahn + other + (time | 
## fit.3:     childid)
##       Df    AIC    BIC  logLik deviance Chisq Chi Df Pr(>Chisq)    
## fit.2 10 492257 492349 -246119   492237                            
## fit.3 12 487623 487733 -243800   487599  4638      2     <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#curvilinear trajectory model with random nonlinear time
fit.4<-lmer(math1~time+male+black+hisp+asian+nahn+other+I(time^2)+(time+I(time^2)|childid), data=e.long)
## Warning: Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables?
summary(fit.4)
## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## math1 ~ time + male + black + hisp + asian + nahn + other + I(time^2) +  
##     (time + I(time^2) | childid)
##    Data: e.long
## 
## REML criterion at convergence: 482316
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -8.233 -0.404  0.013  0.419  5.577 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr       
##  childid  (Intercept) 289.65   17.02               
##           time         82.90    9.11    -0.86      
##           I(time^2)     1.46    1.21     0.83 -0.99
##  Residual              18.44    4.29               
## Number of obs: 70548, groups:  childid, 20724
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  50.4950     0.1676   301.3
## time          1.2297     0.0924    13.3
## male          0.7636     0.1261     6.1
## black        -7.4019     0.1863   -39.7
## hisp         -5.8262     0.1714   -34.0
## asian        -3.5118     0.2626   -13.4
## nahn         -7.3835     0.3783   -19.5
## other        -2.2827     0.4023    -5.7
## I(time^2)    -0.1684     0.0140   -12.1
## 
## Correlation of Fixed Effects:
##           (Intr) time   male   black  hisp   asian  nahn   other 
## time      -0.752                                                 
## male      -0.387 -0.001                                          
## black     -0.243  0.009  0.009                                   
## hisp      -0.252  0.002  0.004  0.223                            
## asian     -0.164 -0.002  0.008  0.146  0.158                     
## nahn      -0.112 -0.001  0.002  0.101  0.110  0.072              
## other     -0.105 -0.001 -0.002  0.095  0.103  0.067  0.047       
## I(time^2)  0.694 -0.975  0.002 -0.004 -0.001  0.003  0.001  0.002
anova(fit.4, fit.3)
## refitting model(s) with ML (instead of REML)
## Data: e.long
## Models:
## fit.3: math1 ~ time + male + black + hisp + asian + nahn + other + (time | 
## fit.3:     childid)
## fit.4: math1 ~ time + male + black + hisp + asian + nahn + other + I(time^2) + 
## fit.4:     (time + I(time^2) | childid)
##       Df    AIC    BIC  logLik deviance Chisq Chi Df Pr(>Chisq)    
## fit.3 12 487623 487733 -243800   487599                            
## fit.4 16 482325 482472 -241147   482293  5306      4     <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#individual trajectory model with fixed effects for race and different population
#trajectories for each race
fit.5<-lmer(math1~time*(male+black+hisp+asian+nahn+other)+(time|childid), data=e.long)
summary(fit.5)
## Linear mixed model fit by REML ['lmerMod']
## Formula: math1 ~ time * (male + black + hisp + asian + nahn + other) +  
##     (time | childid)
##    Data: e.long
## 
## REML criterion at convergence: 485341
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -7.251 -0.436  0.009  0.455  6.011 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr 
##  childid  (Intercept) 120.31   10.97         
##           time          4.14    2.03    -0.67
##  Residual              24.88    4.99         
## Number of obs: 70548, groups:  childid, 20724
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  53.0607     0.1488     357
## time         -0.2915     0.0373      -8
## male         -0.5919     0.1763      -3
## black        -5.6565     0.2598     -22
## hisp         -7.7903     0.2395     -33
## asian       -14.7664     0.3681     -40
## nahn         -9.1958     0.5295     -17
## other        -3.0150     0.5620      -5
## time:male     0.5138     0.0447      11
## time:black   -0.7138     0.0700     -10
## time:hisp     0.7118     0.0603      12
## time:asian    4.1164     0.0929      44
## time:nahn     0.6574     0.1307       5
## time:other    0.2837     0.1438       2
## 
## Correlation of Fixed Effects:
##            (Intr) time   male   black  hisp   asian  nahn   other  tim:ml
## time       -0.703                                                        
## male       -0.608  0.431                                                 
## black      -0.366  0.256  0.008                                          
## hisp       -0.395  0.276  0.005  0.224                                   
## asian      -0.259  0.182  0.007  0.146  0.158                            
## nahn       -0.180  0.126  0.004  0.101  0.110  0.072                     
## other      -0.165  0.116 -0.003  0.096  0.104  0.067  0.047              
## time:male   0.426 -0.611 -0.703 -0.005 -0.003 -0.006 -0.003  0.001       
## time:black  0.238 -0.337 -0.004 -0.700 -0.146 -0.095 -0.066 -0.062  0.005
## time:hisp   0.275 -0.389 -0.003 -0.157 -0.703 -0.111 -0.077 -0.072  0.003
## time:asian  0.181 -0.258 -0.006 -0.102 -0.110 -0.705 -0.050 -0.047  0.012
## time:nahn   0.128 -0.181 -0.003 -0.072 -0.078 -0.051 -0.703 -0.033  0.004
## time:other  0.114 -0.163  0.001 -0.066 -0.071 -0.046 -0.032 -0.700  0.002
##            tm:blc tm:hsp tim:sn tm:nhn
## time                                  
## male                                  
## black                                 
## hisp                                  
## asian                                 
## nahn                                  
## other                                 
## time:male                             
## time:black                            
## time:hisp   0.206                     
## time:asian  0.134  0.156              
## time:nahn   0.095  0.111  0.072       
## time:other  0.086  0.101  0.065  0.046
anova(fit.4, fit.5)
## refitting model(s) with ML (instead of REML)
## Data: e.long
## Models:
## fit.4: math1 ~ time + male + black + hisp + asian + nahn + other + I(time^2) + 
## fit.4:     (time + I(time^2) | childid)
## fit.5: math1 ~ time * (male + black + hisp + asian + nahn + other) + 
## fit.5:     (time | childid)
##       Df    AIC    BIC  logLik deviance Chisq Chi Df Pr(>Chisq)
## fit.4 16 482325 482472 -241147   482293                        
## fit.5 18 485342 485507 -242653   485306     0      2          1
AIC(fit.1)
## [1] 526184
AIC(fit.2)
## [1] 492273
AIC(fit.3)
## [1] 487639
AIC(fit.4) #Best Fit
## [1] 482348
AIC(fit.5)
## [1] 485377

Covariance structure models

Often, we may be interested in modeling the correlation among students over time. These models typically assume some form of structure to the covariance matrix of either the residuals or the random effects themselves. R’s basic functions won’t fit correlated random effect models, but we CAN do these in BUGS/JAGS (next time)

If we want to model correlations among the residuals for individuals over time, we can use the functions in the nlme library, but only if your outcome is continuous. NO GLMMs ALLOWED!

library(nlme)
## 
## Attaching package: 'nlme'
## 
## The following object is masked from 'package:lme4':
## 
##     lmList
#here I fit the AR(1) covariance structure and compare it to model 3 from above
fit.6<-lme(math1~time+male+black+hisp+asian+nahn+other, random=~time|childid, correlation=corAR1(,form=~time|childid), data=e.long, na.action="na.omit")
summary(fit.6)
## Linear mixed-effects model fit by REML
##  Data: e.long 
##      AIC    BIC  logLik
##   487245 487364 -243610
## 
## Random effects:
##  Formula: ~time | childid
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev Corr  
## (Intercept) 10.913 (Intr)
## time         2.028 -0.708
## Residual     5.669       
## 
## Correlation Structure: ARMA(1,0)
##  Formula: ~time | childid 
##  Parameter estimate(s):
##  Phi1 
## 0.247 
## Fixed effects: math1 ~ time + male + black + hisp + asian + nahn + other 
##             Value Std.Error    DF t-value p-value
## (Intercept) 51.38    0.1264 49823   406.5       0
## time         0.32    0.0240 49823    13.3       0
## male         0.79    0.1259 20717     6.3       0
## black       -7.38    0.1867 20717   -39.5       0
## hisp        -5.82    0.1707 20717   -34.1       0
## asian       -3.41    0.2620 20717   -13.0       0
## nahn        -7.36    0.3773 20717   -19.5       0
## other       -2.20    0.4027 20717    -5.5       0
##  Correlation: 
##       (Intr) time   male   black  hisp   asian  nahn  
## time  -0.543                                          
## male  -0.513  0.003                                   
## black -0.323  0.035  0.008                            
## hisp  -0.333  0.005  0.004  0.222                     
## asian -0.220  0.003  0.008  0.145  0.158              
## nahn  -0.149 -0.002  0.003  0.100  0.110  0.072       
## other -0.141  0.005 -0.002  0.094  0.103  0.067  0.047
## 
## Standardized Within-Group Residuals:
##      Min       Q1      Med       Q3      Max 
## -6.58701 -0.40326  0.02063  0.44827  5.46964 
## 
## Number of Observations: 70548
## Number of Groups: 20724
fit.3l<-lme(math1~time+male+black+hisp+asian+nahn+other, random=~time|childid, data=e.long, na.action="na.omit")
summary(fit.3l)
## Linear mixed-effects model fit by REML
##  Data: e.long 
##      AIC    BIC  logLik
##   487639 487749 -243807
## 
## Random effects:
##  Formula: ~time | childid
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev Corr  
## (Intercept) 11.369 (Intr)
## time         2.297 -0.699
## Residual     4.999       
## 
## Fixed effects: math1 ~ time + male + black + hisp + asian + nahn + other 
##             Value Std.Error    DF t-value p-value
## (Intercept) 51.41    0.1254 49823   409.9       0
## time         0.31    0.0239 49823    12.9       0
## male         0.81    0.1259 20717     6.4       0
## black       -7.39    0.1863 20717   -39.7       0
## hisp        -5.82    0.1709 20717   -34.0       0
## asian       -3.35    0.2620 20717   -12.8       0
## nahn        -7.36    0.3778 20717   -19.5       0
## other       -2.23    0.4024 20717    -5.5       0
##  Correlation: 
##       (Intr) time   male   black  hisp   asian  nahn  
## time  -0.532                                          
## male  -0.517  0.003                                   
## black -0.325  0.032  0.008                            
## hisp  -0.335  0.003  0.004  0.223                     
## asian -0.221  0.002  0.008  0.145  0.159              
## nahn  -0.150 -0.002  0.003  0.101  0.110  0.072       
## other -0.142  0.004 -0.002  0.095  0.103  0.067  0.047
## 
## Standardized Within-Group Residuals:
##      Min       Q1      Med       Q3      Max 
## -7.55435 -0.42300  0.01061  0.44565  6.14023 
## 
## Number of Observations: 70548
## Number of Groups: 20724
anova(fit.6, fit.3l)
##        Model df    AIC    BIC  logLik   Test L.Ratio p-value
## fit.6      1 13 487245 487364 -243610                       
## fit.3l     2 12 487639 487749 -243807 1 vs 2   395.8  <.0001