In this data set, the variable year is nested within students (the variable cid), and students are nested within schools (the variable school). There are 60 schools.

The focus here is to examine the annual changes in math scores (math) of students within schools.

1 load them

pacman::p_load(foreign, tidyverse, lme4, merTools)

2 file location

fLoc <- “http://www.ats.ucla.edu/stat/spss/code/eg_hlm.sav”

# file location
fLoc <- "C:/Users/Ching-Fang Wu/Documents/data/eg_hlm.sav"

3 read spss format and covert it to data frame

dta <- read.spss(fLoc) %>% as.data.frame

4 check data structure

str(dta)

## 'data.frame':    7230 obs. of  12 variables:
##  $ size    : num  380 380 380 380 380 380 380 380 380 380 ...
##  $ lowinc  : num  40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 ...
##  $ mobility: num  12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 ...
##  $ female  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ black   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ hispanic: num  1 1 1 0 0 0 0 0 1 1 ...
##  $ year    : num  0.5 1.5 2.5 -1.5 -0.5 0.5 1.5 2.5 -1.5 -0.5 ...
##  $ grade   : num  2 3 4 0 1 2 3 4 0 1 ...
##  $ math    : num  1.146 1.134 2.3 -1.303 0.439 ...
##  $ retained: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ school  : Factor w/ 60 levels "        2020",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ cid     : Factor w/ 1721 levels "   101480302",..: 244 244 244 248 248 248 248 248 253 253 ...

5 set black-and-white theme

theme_set(theme_bw())

6

ggplot(dta, aes(year, math, group = cid)) +
 geom_point(alpha = .1) +
 geom_line(alpha = .1) +
 stat_smooth(aes(group = school), method = "lm", se = F, lwd = .6) +
 stat_smooth(aes(group = 1), method = "lm", se = F, col = "magenta") +
 labs(x = "Year of age (centered)", y = "Math score")

## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

7 nested specification with random intercepts only

8 fixed-effect for slope

summary(m1 <- lmer(math ~ year + (1 | school/cid), data = dta))#(1 | school/cid)意思是intercept varying among school and cid within school.

## Linear mixed model fit by REML ['lmerMod']
## Formula: math ~ year + (1 | school/cid)
##    Data: dta
## 
## REML criterion at convergence: 16759.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8093 -0.5831 -0.0270  0.5566  6.0867 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  cid:school (Intercept) 0.6699   0.8185  
##  school     (Intercept) 0.1869   0.4324  
##  Residual               0.3470   0.5891  
## Number of obs: 7230, groups:  cid:school, 1721; school, 60
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept) -0.780482   0.061088  -12.78
## year         0.746123   0.005396  138.26
## 
## Correlation of Fixed Effects:
##      (Intr)
## year -0.031

m0 <- lme4::lmer(math ~ year + (1 | school/cid), data=dta)
sjPlot::tab_model(m0, show.p=FALSE, show.r2=FALSE)

	math
Predictors	Estimates	CI
(Intercept)	-0.78	-0.90 – -0.66
year	0.75	0.74 – 0.76
Random Effects
σ²	0.35
τ₀₀ _cid:school	0.67
τ₀₀ _school	0.19
ICC	0.71
N _cid	1721
N _school	60
Observations	7230

9 same as

#lmer(math ~ year + (1 | school) +( 1 | school:cid) , data = dta)) #

10 random slopes

summary(m2 <- lmer(math ~ ( year | school/cid), data = dta))

## Linear mixed model fit by REML ['lmerMod']
## Formula: math ~ (year | school/cid)
##    Data: dta
## 
## REML criterion at convergence: 16553.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2231 -0.5611 -0.0308  0.5312  5.1579 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev. Corr
##  cid:school (Intercept) 0.64128  0.8008       
##             year        0.01132  0.1064   0.55
##  school     (Intercept) 0.92678  0.9627       
##             year        0.60804  0.7798   0.92
##  Residual               0.30114  0.5488       
## Number of obs: 7230, groups:  cid:school, 1721; school, 60
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) -1.64549    0.05462  -30.13

11 compare models

anova(m1, m2)

## refitting model(s) with ML (instead of REML)

## Data: dta
## Models:
## m1: math ~ year + (1 | school/cid)
## m2: math ~ (year | school/cid)
##    npar   AIC   BIC  logLik deviance Chisq Df Pr(>Chisq)    
## m1    5 16757 16792 -8373.5    16747                        
## m2    8 16566 16621 -8275.0    16550 197.1  3  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

12 merTools

13 estimate random-effects parameters by simulation

m2_re <- REsim(m2)

14 plot for random effects

plotREsim(m2_re)

15 residual plots

plot(m1, ylim = c(-4, 4), 
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)

16

plot(m2, ylim = c(-4, 4),
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)

Week5 Homework 2

Ching-Fang Wu

Mon Oct 12 12:20:42 2020