Kreft and de Leeuw (1998) obtained a sub-sample of students in eighth grade from the National Education Longitudinal Study of 1988 (NELS��88) collected by the National Center for Educational Statistics of the U.S. Department of Education.
The students are nested in schools.
Here, we consider the following subset of the variables:
schid: school identifier
math: continuous measure of achievement in mathematics (standardized to have a mean of 50 and a standard deviation of 10)
homework: number of hours of homework done per week
library(tidyverse)
library(haven)
fL <- "https://stats.idre.ucla.edu/stat/examples/imm/imm10.dta"
dta <- read_dta(fL)
glimpse(dta)
Rows: 260
Columns: 19
$ schid <dbl> 7472, 7472, 7472, 7472, 7472, 7472, 7472, 7472, 7472, 7472...
$ stuid <dbl> 3, 8, 13, 17, 27, 28, 30, 36, 37, 42, 52, 53, 61, 64, 72, ...
$ ses <dbl> -0.13, -0.39, -0.80, -0.72, -0.74, -0.58, -0.83, -0.51, -0...
$ meanses <dbl> -0.48261, -0.48261, -0.48261, -0.48261, -0.48261, -0.48261...
$ homework <dbl> 1, 0, 0, 1, 2, 1, 5, 1, 1, 2, 1, 1, 1, 2, 1, 4, 1, 2, 1, 1...
$ white <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
$ parented <dbl> 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, 2, 3, 2, 1, 2, 3, 3, 1, 3, 3...
$ public <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
$ ratio <dbl> 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19...
$ percmin <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ math <dbl> 48, 48, 53, 42, 43, 57, 33, 64, 36, 56, 48, 48, 44, 35, 50...
$ sex <dbl> 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1...
$ race <dbl> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4...
$ sctype <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
$ cstr <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
$ scsize <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3...
$ urban <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
$ region <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
$ schnum <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
# make schid a factor type
dta$schid <- factor(dta$schid)
coef(m0 <- nlme::lmList(math ~ homework | schid, data=dta))
(Intercept) homework
7472 50.684 -3.5538
7829 49.012 -2.9201
7930 38.750 7.9091
24725 34.394 5.5927
25456 53.939 -4.7184
25642 49.259 -2.4861
62821 59.210 1.0946
68448 36.055 6.4963
68493 38.520 5.8600
72292 37.714 6.3351
ggplot(data=dta, aes(homework, math, color=schid)) +
geom_point(alpha=0.5) +
stat_smooth(aes(group=1), method="lm", formula=y~x) +
stat_smooth(method="lm", formula=y ~ x, se=F) +
labs(x="Homework (hours per week)",
y="Math score") +
guides(color=guide_legend(ncol=2))+
theme_minimal() +
theme(legend.position=c(.9,.3))
library(lme4)
summary(m1 <- lmer(math ~ homework + (homework | schid), data=dta))
Linear mixed model fit by REML ['lmerMod']
Formula: math ~ homework + (homework | schid)
Data: dta
REML criterion at convergence: 1764
Scaled residuals:
Min 1Q Median 3Q Max
-2.5110 -0.5357 0.0175 0.6121 2.5708
Random effects:
Groups Name Variance Std.Dev. Corr
schid (Intercept) 69.3 8.33
homework 22.5 4.74 -0.81
Residual 43.1 6.56
Number of obs: 260, groups: schid, 10
Fixed effects:
Estimate Std. Error t value
(Intercept) 44.77 2.74 16.32
homework 2.04 1.55 1.31
Correlation of Fixed Effects:
(Intr)
homework -0.804
library(lattice)
qqmath(ranef(m1))
$schid
plot(m1, resid(., scaled=TRUE) ~ fitted(.) | schid,
xlab="Fitted values", ylab= "Standardized residuals",
abline=0, lty=3)
qqmath(m1, grid=TRUE)