In this data set, the variable year is nested within students (the variable cid), and students are nested within schools (the variable school). There are 60 schools.
The focus here is to examine the annual changes in math scores (math) of students within schools.
pacman::p_load(foreign, tidyverse, lme4, merTools)fLoc <- “http://www.ats.ucla.edu/stat/spss/code/eg_hlm.sav”
# file location
fLoc <- "C:/Users/Ching-Fang Wu/Documents/data/eg_hlm.sav"dta <- read.spss(fLoc) %>% as.data.framestr(dta)## 'data.frame':    7230 obs. of  12 variables:
##  $ size    : num  380 380 380 380 380 380 380 380 380 380 ...
##  $ lowinc  : num  40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 ...
##  $ mobility: num  12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 ...
##  $ female  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ black   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ hispanic: num  1 1 1 0 0 0 0 0 1 1 ...
##  $ year    : num  0.5 1.5 2.5 -1.5 -0.5 0.5 1.5 2.5 -1.5 -0.5 ...
##  $ grade   : num  2 3 4 0 1 2 3 4 0 1 ...
##  $ math    : num  1.146 1.134 2.3 -1.303 0.439 ...
##  $ retained: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ school  : Factor w/ 60 levels "        2020",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ cid     : Factor w/ 1721 levels "   101480302",..: 244 244 244 248 248 248 248 248 253 253 ...theme_set(theme_bw())ggplot(dta, aes(year, math, group = cid)) +
 geom_point(alpha = .1) +
 geom_line(alpha = .1) +
 stat_smooth(aes(group = school), method = "lm", se = F, lwd = .6) +
 stat_smooth(aes(group = 1), method = "lm", se = F, col = "magenta") +
 labs(x = "Year of age (centered)", y = "Math score")## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'summary(m1 <- lmer(math ~ year + (1 | school/cid), data = dta))#(1 | school/cid)意思是intercept varying among school and cid within school.## Linear mixed model fit by REML ['lmerMod']
## Formula: math ~ year + (1 | school/cid)
##    Data: dta
## 
## REML criterion at convergence: 16759.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8093 -0.5831 -0.0270  0.5566  6.0867 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  cid:school (Intercept) 0.6699   0.8185  
##  school     (Intercept) 0.1869   0.4324  
##  Residual               0.3470   0.5891  
## Number of obs: 7230, groups:  cid:school, 1721; school, 60
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept) -0.780482   0.061088  -12.78
## year         0.746123   0.005396  138.26
## 
## Correlation of Fixed Effects:
##      (Intr)
## year -0.031m0 <- lme4::lmer(math ~ year + (1 | school/cid), data=dta)
sjPlot::tab_model(m0, show.p=FALSE, show.r2=FALSE)| math | ||
|---|---|---|
| Predictors | Estimates | CI | 
| (Intercept) | -0.78 | -0.90 – -0.66 | 
| year | 0.75 | 0.74 – 0.76 | 
| Random Effects | ||
| σ2 | 0.35 | |
| τ00 cid:school | 0.67 | |
| τ00 school | 0.19 | |
| ICC | 0.71 | |
| N cid | 1721 | |
| N school | 60 | |
| Observations | 7230 | |
#lmer(math ~ year + (1 | school) +( 1 | school:cid) , data = dta)) #
summary(m2 <- lmer(math ~ ( year | school/cid), data = dta))## Linear mixed model fit by REML ['lmerMod']
## Formula: math ~ (year | school/cid)
##    Data: dta
## 
## REML criterion at convergence: 16553.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2231 -0.5611 -0.0308  0.5312  5.1579 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev. Corr
##  cid:school (Intercept) 0.64128  0.8008       
##             year        0.01132  0.1064   0.55
##  school     (Intercept) 0.92678  0.9627       
##             year        0.60804  0.7798   0.92
##  Residual               0.30114  0.5488       
## Number of obs: 7230, groups:  cid:school, 1721; school, 60
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) -1.64549    0.05462  -30.13anova(m1, m2)## refitting model(s) with ML (instead of REML)## Data: dta
## Models:
## m1: math ~ year + (1 | school/cid)
## m2: math ~ (year | school/cid)
##    npar   AIC   BIC  logLik deviance Chisq Df Pr(>Chisq)    
## m1    5 16757 16792 -8373.5    16747                        
## m2    8 16566 16621 -8275.0    16550 197.1  3  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1m2_re <- REsim(m2)plotREsim(m2_re)plot(m1, ylim = c(-4, 4), 
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)plot(m2, ylim = c(-4, 4),
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)