In-class exercise3:Reproduce the results in homework example using the rmarkdown file and publish it with a web link on the course moodle page.

Introduction

Kreft and de Leeuw (1998) obtained a sub-sample of students in eighth grade from the National Education Longitudinal Study of 1988 (NELS–88) collected by the National Center for Educational Statistics of the U.S. Department of Education.

The students are nested in schools.

Here, we consider the following subset of the variables:

Data management

library(tidyverse)
library(haven)
fL <- "https://stats.idre.ucla.edu/stat/examples/imm/imm10.dta"
dta <- read_dta(fL)
glimpse(dta)
Rows: 260
Columns: 19
$ schid    <dbl> 7472, 7472, 7472, 7472, 7472, 7472, 7472, 7472, 7472, 7472...
$ stuid    <dbl> 3, 8, 13, 17, 27, 28, 30, 36, 37, 42, 52, 53, 61, 64, 72, ...
$ ses      <dbl> -0.13, -0.39, -0.80, -0.72, -0.74, -0.58, -0.83, -0.51, -0...
$ meanses  <dbl> -0.48261, -0.48261, -0.48261, -0.48261, -0.48261, -0.48261...
$ homework <dbl> 1, 0, 0, 1, 2, 1, 5, 1, 1, 2, 1, 1, 1, 2, 1, 4, 1, 2, 1, 1...
$ white    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
$ parented <dbl> 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, 2, 3, 2, 1, 2, 3, 3, 1, 3, 3...
$ public   <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
$ ratio    <dbl> 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19...
$ percmin  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
$ math     <dbl> 48, 48, 53, 42, 43, 57, 33, 64, 36, 56, 48, 48, 44, 35, 50...
$ sex      <dbl> 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1...
$ race     <dbl> 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4...
$ sctype   <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
$ cstr     <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
$ scsize   <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3...
$ urban    <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
$ region   <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
$ schnum   <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
# make schid a factor type 
dta$schid <- factor(dta$schid)

OLS regression lines over 10 schools

coef(m0 <- nlme::lmList(math ~ homework | schid, data=dta))
      (Intercept) homework
7472       50.684  -3.5538
7829       49.012  -2.9201
7930       38.750   7.9091
24725      34.394   5.5927
25456      53.939  -4.7184
25642      49.259  -2.4861
62821      59.210   1.0946
68448      36.055   6.4963
68493      38.520   5.8600
72292      37.714   6.3351

Visualization

ggplot(data=dta, aes(homework, math, color=schid)) +
  geom_point(alpha=0.5) +
  stat_smooth(aes(group=1), method="lm", formula=y~x) +
  stat_smooth(method="lm", formula=y ~ x, se=F) +
  labs(x="Homework (hours per week)",
       y="Math score") +
  guides(color=guide_legend(ncol=2))+
  theme_minimal() +
  theme(legend.position=c(.9,.3))

A random intercepts and random slopes model

library(lme4)
summary(m1 <- lmer(math ~ homework + (homework | schid), data=dta))
Linear mixed model fit by REML ['lmerMod']
Formula: math ~ homework + (homework | schid)
   Data: dta

REML criterion at convergence: 1764

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.5110 -0.5357  0.0175  0.6121  2.5708 

Random effects:
 Groups   Name        Variance Std.Dev. Corr 
 schid    (Intercept) 69.3     8.33          
          homework    22.5     4.74     -0.81
 Residual             43.1     6.56          
Number of obs: 260, groups:  schid, 10

Fixed effects:
            Estimate Std. Error t value
(Intercept)    44.77       2.74   16.32
homework        2.04       1.55    1.31

Correlation of Fixed Effects:
         (Intr)
homework -0.804

Random effects

library(lattice)
qqmath(ranef(m1))
$schid

Residual plot

plot(m1, resid(., scaled=TRUE) ~ fitted(.) | schid, 
     xlab="Fitted values", ylab= "Standardized residuals",
     abline=0, lty=3)

Normality QQ-plot

qqmath(m1, grid=TRUE)

The end