pacman::p_load(foreign, tidyverse, lme4, merTools)

1 Input Data

# file location
fLoc <- "C:/Users/HANK/Desktop/HOMEWORK/eg_hlm.sav"
# read spss format and covert it to data frame 
dta <- read.spss(fLoc) %>% as.data.frame
# check data structure
str(dta)
## 'data.frame':    7230 obs. of  12 variables:
##  $ size    : num  380 380 380 380 380 380 380 380 380 380 ...
##  $ lowinc  : num  40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 ...
##  $ mobility: num  12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 ...
##  $ female  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ black   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ hispanic: num  1 1 1 0 0 0 0 0 1 1 ...
##  $ year    : num  0.5 1.5 2.5 -1.5 -0.5 0.5 1.5 2.5 -1.5 -0.5 ...
##  $ grade   : num  2 3 4 0 1 2 3 4 0 1 ...
##  $ math    : num  1.146 1.134 2.3 -1.303 0.439 ...
##  $ retained: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ school  : Factor w/ 60 levels "        2020",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ cid     : Factor w/ 1721 levels "   101480302",..: 244 244 244 248 248 248 248 248 253 253 ...
# set black-and-white theme
theme_set(theme_bw())

2 Plot

ggplot(dta, aes(year, math, group = cid)) +
 geom_point(alpha = .1) +
 geom_line(alpha = .1) +
 stat_smooth(aes(group = school), method = "lm", se = F, lwd = .6) +
 stat_smooth(aes(group = 1), method = "lm", se = F, col = "magenta") +
 labs(x = "Year of age (centered)", y = "Math score")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

3 LMER

# nested specification with random intercepts only
# fixed-effect for slope
summary(m1 <- lmer(math ~ year + (1 | school/cid), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: math ~ year + (1 | school/cid)
##    Data: dta
## 
## REML criterion at convergence: 16759.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8093 -0.5831 -0.0270  0.5566  6.0867 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  cid:school (Intercept) 0.6699   0.8185  
##  school     (Intercept) 0.1869   0.4324  
##  Residual               0.3470   0.5891  
## Number of obs: 7230, groups:  cid:school, 1721; school, 60
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept) -0.780482   0.061088  -12.78
## year         0.746123   0.005396  138.26
## 
## Correlation of Fixed Effects:
##      (Intr)
## year -0.031

fixed-effect的斜率

library(lmerTest)
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
# random slopes
summary(m2 <- lmer(math ~ ( year | school/cid), data = dta))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: math ~ (year | school/cid)
##    Data: dta
## 
## REML criterion at convergence: 16553.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2231 -0.5611 -0.0308  0.5312  5.1579 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev. Corr
##  cid:school (Intercept) 0.64128  0.8008       
##             year        0.01132  0.1064   0.55
##  school     (Intercept) 0.92678  0.9627       
##             year        0.60804  0.7798   0.92
##  Residual               0.30114  0.5488       
## Number of obs: 7230, groups:  cid:school, 1721; school, 60
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept) -1.64549    0.05462 59.88455  -30.13   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

random effect的斜率

# compare models
anova(m1, m2)
## refitting model(s) with ML (instead of REML)
## Data: dta
## Models:
## m1: math ~ year + (1 | school/cid)
## m2: math ~ (year | school/cid)
##    npar   AIC   BIC  logLik deviance Chisq Df Pr(>Chisq)    
## m1    5 16757 16792 -8373.5    16747                        
## m2    8 16566 16621 -8275.0    16550 197.1  3  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# merTools
# estimate random-effects parameters by simulation
m2_re <- REsim(m2)

4 Plot for random effects

plotREsim(m2_re)

5 Residual plots

#m1
plot(m1, ylim = c(-4, 4), 
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)

#m2
plot(m2, ylim = c(-4, 4),
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)

6 The end