# load them
pacman::p_load(foreign, tidyverse, lme4, merTools)
# read spss file58
dta <- read.spss("eg_hlm.sav", header = T) %>% as.data.frame
# check data structure1
str(dta)
## 'data.frame': 7230 obs. of 12 variables:
## $ size : num 380 380 380 380 380 380 380 380 380 380 ...
## $ lowinc : num 40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 ...
## $ mobility: num 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 ...
## $ female : num 0 0 0 0 0 0 0 0 0 0 ...
## $ black : num 0 0 0 0 0 0 0 0 0 0 ...
## $ hispanic: num 1 1 1 0 0 0 0 0 1 1 ...
## $ year : num 0.5 1.5 2.5 -1.5 -0.5 0.5 1.5 2.5 -1.5 -0.5 ...
## $ grade : num 2 3 4 0 1 2 3 4 0 1 ...
## $ math : num 1.146 1.134 2.3 -1.303 0.439 ...
## $ retained: num 0 0 0 0 0 0 0 0 0 0 ...
## $ school : Factor w/ 60 levels " 2020",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ cid : Factor w/ 1721 levels " 101480302",..: 244 244 244 248 248 248 248 248 253 253 ...
ggplot(dta, aes(year, math, group = cid)) +
geom_point(alpha = .1) +
geom_line(alpha = .1) +
stat_smooth(aes(group = school), method = "lm", se = F, lwd = .6) +
stat_smooth(aes(group = 1), method = "lm", se = F, col = "magenta") +
labs(x = "Year of age (centered)", y = "Math score")
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
summary(m1 <- lmer(math ~ year + (1 | school/cid), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: math ~ year + (1 | school/cid)
## Data: dta
##
## REML criterion at convergence: 16759.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.8093 -0.5831 -0.0270 0.5566 6.0867
##
## Random effects:
## Groups Name Variance Std.Dev.
## cid:school (Intercept) 0.6699 0.8185
## school (Intercept) 0.1869 0.4324
## Residual 0.3470 0.5891
## Number of obs: 7230, groups: cid:school, 1721; school, 60
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.780482 0.061088 -12.78
## year 0.746123 0.005396 138.26
##
## Correlation of Fixed Effects:
## (Intr)
## year -0.031
# random slopes
summary(m2 <- lmer(math ~ ( year | school/cid), data = dta))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00321233 (tol = 0.002, component 1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: math ~ (year | school/cid)
## Data: dta
##
## REML criterion at convergence: 16553.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2231 -0.5611 -0.0308 0.5312 5.1579
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## cid:school (Intercept) 0.64127 0.8008
## year 0.01132 0.1064 0.55
## school (Intercept) 0.92436 0.9614
## year 0.60793 0.7797 0.92
## Residual 0.30115 0.5488
## Number of obs: 7230, groups: cid:school, 1721; school, 60
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -1.64424 0.05462 -30.11
## convergence code: 0
## Model failed to converge with max|grad| = 0.00321233 (tol = 0.002, component 1)
# compare models
anova(m1, m2)
## refitting model(s) with ML (instead of REML)
## Data: dta
## Models:
## m1: math ~ year + (1 | school/cid)
## m2: math ~ (year | school/cid)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m1 5 16757 16792 -8373.5 16747
## m2 8 16566 16621 -8275.0 16550 197.1 3 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#plot random effect
# estimate random-effects parameters by simulation
m2_re <- REsim(m2)
# plot for random effects
plotREsim(m2_re)
# residual plots of m1
plot(m1, ylim = c(-4, 4),
xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)
# residual plots of m2
plot(m2, ylim = c(-4, 4),
xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)