# Understand the meaning of predictors at different levels
# as well as the effect of the centering the teacher level
# predictor against the respective school-level means in a
# school-teacher-pupil three-level example

0.1 load package

# package management
library(pacman)

# load them
pacman::p_load(mlmRev, tidyverse, lme4, merTools)

0.2 data input and management

# input data
dta <- read.csv("pts.csv")

#
head(dta)
##   School Teacher Pupil      Read_0 Read_1
## 1      1      11    18 -0.16407000 4.2426
## 2      1      11    19 -0.98126000 1.0000
## 3      1      11    20 -1.25370000 2.2361
## 4      1      11    15 -0.87230000 3.1623
## 5      1      11    17 -0.00063104 3.4641
## 6      1      11    16 -0.92678000 4.0000
# coerce variables to factor type and
# compute mean Read_0 by school
dta <- dta %>%
  mutate(School = factor(School), Teacher = factor(Teacher),
         Pupil = factor(Pupil)) %>%
  group_by( School ) %>%
  mutate(msRead_0 = mean(Read_0))


# compute mean Read_0 by teacher and
# centered teacher mean of Read_0 (from respective school means)
#
dta <- dta %>%
  group_by( Teacher ) %>%
  mutate(mtRead_0 = mean(Read_0), ctRead_0 = mean(Read_0) - msRead_0 )
#
str(dta)
## tibble [777 x 8] (S3: grouped_df/tbl_df/tbl/data.frame)
##  $ School  : Factor w/ 20 levels "1","3","4","5",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Teacher : Factor w/ 46 levels "11","12","31",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Pupil   : Factor w/ 777 levels "1","2","3","4",..: 17 18 19 14 16 15 20 21 22 4 ...
##  $ Read_0  : num [1:777] -0.16407 -0.98126 -1.2537 -0.8723 -0.000631 ...
##  $ Read_1  : num [1:777] 4.24 1 2.24 3.16 3.46 ...
##  $ msRead_0: num [1:777] -0.314 -0.314 -0.314 -0.314 -0.314 ...
##  $ mtRead_0: num [1:777] -0.256 -0.256 -0.256 -0.256 -0.256 ...
##  $ ctRead_0: num [1:777] 0.0585 0.0585 0.0585 0.0585 0.0585 ...
##  - attr(*, "groups")= tibble [46 x 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Teacher: Factor w/ 46 levels "11","12","31",..: 1 2 3 4 5 6 7 8 9 10 ...
##   ..$ .rows  : list<int> [1:46] 
##   .. ..$ : int [1:22] 1 2 3 4 5 6 7 8 9 10 ...
##   .. ..$ : int [1:27] 23 24 25 26 27 28 29 30 31 32 ...
##   .. ..$ : int [1:19] 50 51 52 53 54 55 56 57 58 59 ...
##   .. ..$ : int [1:18] 69 70 71 72 73 74 75 76 77 78 ...
##   .. ..$ : int [1:19] 87 88 89 90 91 92 93 94 95 96 ...
##   .. ..$ : int [1:23] 106 107 108 109 110 111 112 113 114 115 ...
##   .. ..$ : int [1:9] 129 130 131 132 133 134 135 136 137
##   .. ..$ : int [1:17] 138 139 140 141 142 143 144 145 146 147 ...
##   .. ..$ : int [1:21] 155 156 157 158 159 160 161 162 163 164 ...
##   .. ..$ : int [1:23] 176 177 178 179 180 181 182 183 184 185 ...
##   .. ..$ : int [1:21] 199 200 201 202 203 204 205 206 207 208 ...
##   .. ..$ : int [1:22] 220 221 222 223 224 225 226 227 228 229 ...
##   .. ..$ : int [1:19] 242 243 244 245 246 247 248 249 250 251 ...
##   .. ..$ : int [1:12] 261 262 263 264 265 266 267 268 269 270 ...
##   .. ..$ : int [1:13] 273 274 275 276 277 278 279 280 281 282 ...
##   .. ..$ : int [1:4] 286 287 288 289
##   .. ..$ : int [1:5] 290 291 292 293 294
##   .. ..$ : int [1:13] 295 296 297 298 299 300 301 302 303 304 ...
##   .. ..$ : int [1:12] 308 309 310 311 312 313 314 315 316 317 ...
##   .. ..$ : int [1:23] 320 321 322 323 324 325 326 327 328 329 ...
##   .. ..$ : int [1:28] 343 344 345 346 347 348 349 350 351 352 ...
##   .. ..$ : int [1:15] 371 372 373 374 375 376 377 378 379 380 ...
##   .. ..$ : int [1:18] 386 387 388 389 390 391 392 393 394 395 ...
##   .. ..$ : int [1:6] 404 405 406 407 408 409
##   .. ..$ : int [1:11] 410 411 412 413 414 415 416 417 418 419 ...
##   .. ..$ : int [1:7] 421 422 423 424 425 426 427
##   .. ..$ : int [1:8] 428 429 430 431 432 433 434 435
##   .. ..$ : int [1:19] 436 437 438 439 440 441 442 443 444 445 ...
##   .. ..$ : int [1:26] 455 456 457 458 459 460 461 462 463 464 ...
##   .. ..$ : int [1:25] 481 482 483 484 485 486 487 488 489 490 ...
##   .. ..$ : int [1:21] 506 507 508 509 510 511 512 513 514 515 ...
##   .. ..$ : int [1:21] 527 528 529 530 531 532 533 534 535 536 ...
##   .. ..$ : int [1:24] 548 549 550 551 552 553 554 555 556 557 ...
##   .. ..$ : int [1:21] 572 573 574 575 576 577 578 579 580 581 ...
##   .. ..$ : int [1:22] 593 594 595 596 597 598 599 600 601 602 ...
##   .. ..$ : int [1:24] 615 616 617 618 619 620 621 622 623 624 ...
##   .. ..$ : int [1:20] 639 640 641 642 643 644 645 646 647 648 ...
##   .. ..$ : int [1:22] 659 660 661 662 663 664 665 666 667 668 ...
##   .. ..$ : int [1:15] 681 682 683 684 685 686 687 688 689 690 ...
##   .. ..$ : int [1:11] 696 697 698 699 700 701 702 703 704 705 ...
##   .. ..$ : int [1:12] 707 708 709 710 711 712 713 714 715 716 ...
##   .. ..$ : int [1:9] 719 720 721 722 723 724 725 726 727
##   .. ..$ : int [1:11] 728 729 730 731 732 733 734 735 736 737 ...
##   .. ..$ : int [1:2] 739 740
##   .. ..$ : int [1:18] 741 742 743 744 745 746 747 748 749 750 ...
##   .. ..$ : int [1:19] 759 760 761 762 763 764 765 766 767 768 ...
##   .. ..@ ptype: int(0) 
##   ..- attr(*, ".drop")= logi TRUE

0.3 school-teacher level model

##
# no predictor
summary(m0 <- lmer(Read_1 ~  (1 | School) + (1 | Teacher), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Read_1 ~ (1 | School) + (1 | Teacher)
##    Data: dta
## 
## REML criterion at convergence: 2486.8
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.05438 -0.65686  0.06497  0.62523  2.47233 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Teacher  (Intercept) 0.1370   0.3701  
##  School   (Intercept) 0.1277   0.3574  
##  Residual             1.3250   1.1511  
## Number of obs: 777, groups:  Teacher, 46; School, 20
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   3.3755     0.1065   31.69
# add a school-level predictor
summary(m0_s <- update(m0, . ~ . + msRead_0))
## boundary (singular) fit: see ?isSingular
## Linear mixed model fit by REML ['lmerMod']
## Formula: Read_1 ~ (1 | School) + (1 | Teacher) + msRead_0
##    Data: dta
## 
## REML criterion at convergence: 2467.4
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.06110 -0.65597  0.06822  0.62772  2.45391 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Teacher  (Intercept) 0.1234   0.3513  
##  School   (Intercept) 0.0000   0.0000  
##  Residual             1.3224   1.1500  
## Number of obs: 777, groups:  Teacher, 46; School, 20
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  3.39822    0.06808  49.918
## msRead_0     1.00159    0.17807   5.625
## 
## Correlation of Fixed Effects:
##          (Intr)
## msRead_0 0.086 
## convergence code: 0
## boundary (singular) fit: see ?isSingular
# add a teacher-level predictor
summary(m0_t <- update(m0, . ~ . + mtRead_0))
## boundary (singular) fit: see ?isSingular
## Linear mixed model fit by REML ['lmerMod']
## Formula: Read_1 ~ (1 | School) + (1 | Teacher) + mtRead_0
##    Data: dta
## 
## REML criterion at convergence: 2434.9
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.07539 -0.65979  0.06707  0.63259  2.49973 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. 
##  Teacher  (Intercept) 3.296e-15 5.741e-08
##  School   (Intercept) 4.471e-02 2.114e-01
##  Residual             1.309e+00 1.144e+00
## Number of obs: 777, groups:  Teacher, 46; School, 20
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  3.40649    0.06307  54.010
## mtRead_0     1.07319    0.11495   9.336
## 
## Correlation of Fixed Effects:
##          (Intr)
## mtRead_0 0.024 
## convergence code: 0
## boundary (singular) fit: see ?isSingular
# add a teacher-level predictor away from respective school means
summary(m0_ct <- update(m0, . ~ . + ctRead_0))
## boundary (singular) fit: see ?isSingular
## Linear mixed model fit by REML ['lmerMod']
## Formula: Read_1 ~ (1 | School) + (1 | Teacher) + ctRead_0
##    Data: dta
## 
## REML criterion at convergence: 2454.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1804 -0.6461  0.0791  0.6420  2.5272 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. 
##  Teacher  (Intercept) 2.462e-10 1.569e-05
##  School   (Intercept) 1.764e-01 4.200e-01
##  Residual             1.311e+00 1.145e+00
## Number of obs: 777, groups:  Teacher, 46; School, 20
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   3.3896     0.1029  32.932
## ctRead_0      1.1742     0.1581   7.427
## 
## Correlation of Fixed Effects:
##          (Intr)
## ctRead_0 0.000 
## convergence code: 0
## boundary (singular) fit: see ?isSingular

0.4 create teacher ID nested in School

dta <- dta %>% 
        group_by(School) %>% 
        mutate(T_in_S = factor(match(Teacher, unique(Teacher)))) %>% 
        as.data.frame
str(dta)
## 'data.frame':    777 obs. of  9 variables:
##  $ School  : Factor w/ 20 levels "1","3","4","5",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Teacher : Factor w/ 46 levels "11","12","31",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Pupil   : Factor w/ 777 levels "1","2","3","4",..: 17 18 19 14 16 15 20 21 22 4 ...
##  $ Read_0  : num  -0.16407 -0.98126 -1.2537 -0.8723 -0.000631 ...
##  $ Read_1  : num  4.24 1 2.24 3.16 3.46 ...
##  $ msRead_0: num  -0.314 -0.314 -0.314 -0.314 -0.314 ...
##  $ mtRead_0: num  -0.256 -0.256 -0.256 -0.256 -0.256 ...
##  $ ctRead_0: num  0.0585 0.0585 0.0585 0.0585 0.0585 ...
##  $ T_in_S  : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...
# 
theme_set(theme_bw())
# plot
ggplot(dta, aes(Read_0, Read_1, group = T_in_S, color = T_in_S)) +
 geom_smooth(method = "lm", se = F, formula= y ~ poly(x, 2)) +
 geom_point(cex = 0.6, alpha = .3) +
 facet_wrap(~ School, nrow = 2) +
 labs(x = "Reading attainment at reception", 
      y = "Reading attainment at year 1") +
 theme(legend.position = "NONE")
## Warning: Computation failed in `stat_smooth()`:
## 'degree' must be less than number of unique points

0.5 random teacher and random school effects

summary(m3 <- lmer(Read_1 ~ Read_0 + I(Read_0*Read_0) + 
                  (1 | School) + (1 | Teacher), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Read_1 ~ Read_0 + I(Read_0 * Read_0) + (1 | School) + (1 | Teacher)
##    Data: dta
## 
## REML criterion at convergence: 1906.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.2824 -0.5059 -0.0041  0.6268  3.4686 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Teacher  (Intercept) 0.02331  0.1527  
##  School   (Intercept) 0.04335  0.2082  
##  Residual             0.63769  0.7986  
## Number of obs: 777, groups:  Teacher, 46; School, 20
## 
## Fixed effects:
##                    Estimate Std. Error t value
## (Intercept)         3.47337    0.06740  51.534
## Read_0              0.95479    0.03592  26.578
## I(Read_0 * Read_0) -0.06688    0.03062  -2.184
## 
## Correlation of Fixed Effects:
##             (Intr) Read_0
## Read_0       0.250       
## I(Rd_0*R_0) -0.462 -0.521
# residual plot
plot(m3, pch = 20, cex = 0.5, col = "steelblue", 
     xlab = "Fitted values", ylab = "Pearson residuals")

# quick summary of model parameter estimates
fastdisp(m3)
## lmer(formula = Read_1 ~ Read_0 + I(Read_0 * Read_0) + (1 | School) + 
##     (1 | Teacher), data = dta)
##                    coef.est coef.se
## (Intercept)         3.47     0.07  
## Read_0              0.95     0.04  
## I(Read_0 * Read_0) -0.07     0.03  
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  Teacher  (Intercept) 0.15    
##  School   (Intercept) 0.21    
##  Residual             0.80    
## ---
## number of obs: 777, groups: Teacher, 46; School, 20
## AIC = 1918.9

0.6 plot fix and random effect

# estimate fixed-effects parameters by simulation
m3_fe <- FEsim(m3, 1000)

# plot for fixed effects
plotFEsim(m3_fe) + 
 labs(title = "Coefficient Plot", x = "Median Effect Estimate")

# estimate random-effects parameters by simulation
m3_re <- REsim(m3)

# normality plot for random effects
plotREsim(m3_re)

0.7 ordinary regression residual plot

summary(m0 <- lm(Read_1 ~ Read_0 + I(Read_0*Read_0), data = dta))
## 
## Call:
## lm(formula = Read_1 ~ Read_0 + I(Read_0 * Read_0), data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0675 -0.4619  0.0146  0.5319  2.8194 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         3.46873    0.04323  80.242   <2e-16 ***
## Read_0              0.96445    0.03541  27.240   <2e-16 ***
## I(Read_0 * Read_0) -0.07293    0.03104  -2.349   0.0191 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8397 on 774 degrees of freedom
## Multiple R-squared:  0.5481, Adjusted R-squared:  0.5469 
## F-statistic: 469.3 on 2 and 774 DF,  p-value: < 2.2e-16
# residual plot
plot(rstandard(m0) ~ fitted(m0), pch = 20, cex = 0.5, col = "steelblue", 
     xlab = "Fitted values", ylab = "Standardized residuals")
abline(h = 0, lty = 2)
grid()