Kreft and de Leeuw (1998) obtained a sub-sample of students in eighth grade from the National Education Longitudinal Study of 1988 (NELS–88) collected by the National Center for Educational Statistics of the U.S. Department of Education.
The students are nested in schools.
Here, we consider the following subset of the variables:
schid: school identifier
math: continuous measure of achievement in mathematics (standardized to have a mean of 50 and a standard deviation of 10)
homework: number of hours of homework done per week
library(tidyverse)
library(haven)
fL <- "https://stats.idre.ucla.edu/stat/examples/imm/imm10.dta"
dta <- read_dta(fL)
glimpse(dta)
Rows: 260
Columns: 19
$ schid <dbl> 7472, 7472, 7472, 7472, 7472, 7472, 7472, 7472, 7472, 7472...
$ stuid <dbl> 3, 8, 13, 17, 27, 28, 30, 36, 37, 42, 52, 53, 61, 64, 72, ...
$ ses <dbl> -0.13, -0.39, -0.80, -0.72, -0.74, -0.58, -0.83, -0.51, -0...
$ meanses <dbl> -0.48261, -0.48261, -0.48261, -0.48261, -0.48261, -0.48261...
$ homework <dbl> 1, 0, 0, 1, 2, 1, 5, 1, 1, 2, 1, 1, 1, 2, 1, 4, 1, 2, 1, 1...
$ white <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
$ parented <dbl> 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, 2, 3, 2, 1, 2, 3, 3, 1, 3, 3...
$ public <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
$ ratio <dbl> 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19...
$ percmin <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ math <dbl> 48, 48, 53, 42, 43, 57, 33, 64, 36, 56, 48, 48, 44, 35, 50...
$ sex <dbl> 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1...
$ race <dbl> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4...
$ sctype <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
$ cstr <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
$ scsize <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3...
$ urban <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
$ region <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
$ schnum <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
# make schid a factor type
dta$schid <- factor(dta$schid)
coef(m0 <- nlme::lmList(math ~ homework | schid, data=dta))
(Intercept) homework
7472 50.684 -3.5538
7829 49.012 -2.9201
7930 38.750 7.9091
24725 34.394 5.5927
25456 53.939 -4.7184
25642 49.259 -2.4861
62821 59.210 1.0946
68448 36.055 6.4963
68493 38.520 5.8600
72292 37.714 6.3351
ggplot(data=dta, aes(homework, math, color=schid)) +
geom_point(alpha=0.5) +
stat_smooth(aes(group=1), method="lm", formula=y~x) +
stat_smooth(method="lm", formula=y ~ x, se=F) +
labs(x="Homework (hours per week)",
y="Math score") +
guides(color=guide_legend(ncol=2))+
theme_minimal() +
theme(legend.position=c(.9,.3))
library(lme4)
summary(m1 <- lmer(math ~ homework + (homework | schid), data=dta))
Linear mixed model fit by REML ['lmerMod']
Formula: math ~ homework + (homework | schid)
Data: dta
REML criterion at convergence: 1764
Scaled residuals:
Min 1Q Median 3Q Max
-2.5110 -0.5357 0.0175 0.6121 2.5708
Random effects:
Groups Name Variance Std.Dev. Corr
schid (Intercept) 69.3 8.33
homework 22.5 4.74 -0.81
Residual 43.1 6.56
Number of obs: 260, groups: schid, 10
Fixed effects:
Estimate Std. Error t value
(Intercept) 44.77 2.74 16.32
homework 2.04 1.55 1.31
Correlation of Fixed Effects:
(Intr)
homework -0.804
library(lattice)
qqmath(ranef(m1))
$schid
plot(m1, resid(., scaled=TRUE) ~ fitted(.) | schid,
xlab="Fitted values", ylab= "Standardized residuals",
abline=0, lty=3)
qqmath(m1, grid=TRUE)