Introduction

Kreft and de Leeuw (1998) obtained a sub-sample of students in eighth grade from the National Education Longitudinal Study of 1988 (NELS��88) collected by the National Center for Educational Statistics of the U.S. Department of Education.

The students are nested in schools.

Here, we consider the following subset of the variables:

Data management

library(tidyverse)
library(haven)
fL <- "https://stats.idre.ucla.edu/stat/examples/imm/imm10.dta"
dta <- read_dta(fL)
glimpse(dta)
Rows: 260
Columns: 19
$ schid    <dbl> 7472, 7472, 7472, 7472, 7472, 7472, 7472, 7472, 7472, 7472...
$ stuid    <dbl> 3, 8, 13, 17, 27, 28, 30, 36, 37, 42, 52, 53, 61, 64, 72, ...
$ ses      <dbl> -0.13, -0.39, -0.80, -0.72, -0.74, -0.58, -0.83, -0.51, -0...
$ meanses  <dbl> -0.48261, -0.48261, -0.48261, -0.48261, -0.48261, -0.48261...
$ homework <dbl> 1, 0, 0, 1, 2, 1, 5, 1, 1, 2, 1, 1, 1, 2, 1, 4, 1, 2, 1, 1...
$ white    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
$ parented <dbl> 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, 2, 3, 2, 1, 2, 3, 3, 1, 3, 3...
$ public   <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
$ ratio    <dbl> 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19...
$ percmin  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ math     <dbl> 48, 48, 53, 42, 43, 57, 33, 64, 36, 56, 48, 48, 44, 35, 50...
$ sex      <dbl> 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1...
$ race     <dbl> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4...
$ sctype   <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
$ cstr     <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
$ scsize   <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3...
$ urban    <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
$ region   <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
$ schnum   <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
# make schid a factor type 
dta$schid <- factor(dta$schid)

OLS regression lines over 10 schools

coef(m0 <- nlme::lmList(math ~ homework | schid, data=dta))
      (Intercept) homework
7472       50.684  -3.5538
7829       49.012  -2.9201
7930       38.750   7.9091
24725      34.394   5.5927
25456      53.939  -4.7184
25642      49.259  -2.4861
62821      59.210   1.0946
68448      36.055   6.4963
68493      38.520   5.8600
72292      37.714   6.3351

Visualization

ggplot(data=dta, aes(homework, math, color=schid)) +
  geom_point(alpha=0.5) +
  stat_smooth(aes(group=1), method="lm", formula=y~x) +
  stat_smooth(method="lm", formula=y ~ x, se=F) +
  labs(x="Homework (hours per week)",
       y="Math score") +
  guides(color=guide_legend(ncol=2))+
  theme_minimal() +
  theme(legend.position=c(.9,.3))

A random intercepts and random slopes model

library(lme4)
summary(m1 <- lmer(math ~ homework + (homework | schid), data=dta))
Linear mixed model fit by REML ['lmerMod']
Formula: math ~ homework + (homework | schid)
   Data: dta

REML criterion at convergence: 1764

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.5110 -0.5357  0.0175  0.6121  2.5708 

Random effects:
 Groups   Name        Variance Std.Dev. Corr 
 schid    (Intercept) 69.3     8.33          
          homework    22.5     4.74     -0.81
 Residual             43.1     6.56          
Number of obs: 260, groups:  schid, 10

Fixed effects:
            Estimate Std. Error t value
(Intercept)    44.77       2.74   16.32
homework        2.04       1.55    1.31

Correlation of Fixed Effects:
         (Intr)
homework -0.804

Random effects

library(lattice)
qqmath(ranef(m1))
$schid

Residual plot

plot(m1, resid(., scaled=TRUE) ~ fitted(.) | schid, 
     xlab="Fitted values", ylab= "Standardized residuals",
     abline=0, lty=3)

Normality QQ-plot

qqmath(m1, grid=TRUE)

The end