1 data input

# load them
pacman::p_load(foreign, tidyverse, lme4, merTools)
# read spss file58
dta <- read.spss("eg_hlm.sav", header = T) %>% as.data.frame
# check data structure1
str(dta)
## 'data.frame':    7230 obs. of  12 variables:
##  $ size    : num  380 380 380 380 380 380 380 380 380 380 ...
##  $ lowinc  : num  40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 ...
##  $ mobility: num  12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 ...
##  $ female  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ black   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ hispanic: num  1 1 1 0 0 0 0 0 1 1 ...
##  $ year    : num  0.5 1.5 2.5 -1.5 -0.5 0.5 1.5 2.5 -1.5 -0.5 ...
##  $ grade   : num  2 3 4 0 1 2 3 4 0 1 ...
##  $ math    : num  1.146 1.134 2.3 -1.303 0.439 ...
##  $ retained: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ school  : Factor w/ 60 levels "        2020",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ cid     : Factor w/ 1721 levels "   101480302",..: 244 244 244 248 248 248 248 248 253 253 ...

2 plot

ggplot(dta, aes(year, math, group = cid)) +
 geom_point(alpha = .1) +
 geom_line(alpha = .1) +
 stat_smooth(aes(group = school), method = "lm", se = F, lwd = .6) +
 stat_smooth(aes(group = 1), method = "lm", se = F, col = "magenta") +
 labs(x = "Year of age (centered)", y = "Math score")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

3 model 1-fix effect of year

summary(m1 <- lmer(math ~ year + (1 | school/cid), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: math ~ year + (1 | school/cid)
##    Data: dta
## 
## REML criterion at convergence: 16759.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8093 -0.5831 -0.0270  0.5566  6.0867 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  cid:school (Intercept) 0.6699   0.8185  
##  school     (Intercept) 0.1869   0.4324  
##  Residual               0.3470   0.5891  
## Number of obs: 7230, groups:  cid:school, 1721; school, 60
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept) -0.780482   0.061088  -12.78
## year         0.746123   0.005396  138.26
## 
## Correlation of Fixed Effects:
##      (Intr)
## year -0.031

4 model 2-random effect of year

# random slopes
summary(m2 <- lmer(math ~ ( year | school/cid), data = dta))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00321233 (tol = 0.002, component 1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: math ~ (year | school/cid)
##    Data: dta
## 
## REML criterion at convergence: 16553.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2231 -0.5611 -0.0308  0.5312  5.1579 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev. Corr
##  cid:school (Intercept) 0.64127  0.8008       
##             year        0.01132  0.1064   0.55
##  school     (Intercept) 0.92436  0.9614       
##             year        0.60793  0.7797   0.92
##  Residual               0.30115  0.5488       
## Number of obs: 7230, groups:  cid:school, 1721; school, 60
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) -1.64424    0.05462  -30.11
## convergence code: 0
## Model failed to converge with max|grad| = 0.00321233 (tol = 0.002, component 1)

5 comparison

# compare models
anova(m1, m2)
## refitting model(s) with ML (instead of REML)
## Data: dta
## Models:
## m1: math ~ year + (1 | school/cid)
## m2: math ~ (year | school/cid)
##    npar   AIC   BIC  logLik deviance Chisq Df Pr(>Chisq)    
## m1    5 16757 16792 -8373.5    16747                        
## m2    8 16566 16621 -8275.0    16550 197.1  3  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#plot random effect

# estimate random-effects parameters by simulation
m2_re <- REsim(m2)
# plot for random effects
plotREsim(m2_re)

6 Residual plot

6.1 m1

# residual plots of m1
plot(m1, ylim = c(-4, 4), 
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)

6.2 m2

# residual plots of m2
plot(m2, ylim = c(-4, 4),
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)