0.1 data input

library(WWGbook)
data("classroom")
head(classroom)
##   sex minority mathkind mathgain   ses yearstea mathknow housepov mathprep
## 1   1        1      448       32  0.46        1       NA    0.082     2.00
## 2   0        1      460      109 -0.27        1       NA    0.082     2.00
## 3   1        1      511       56 -0.03        1       NA    0.082     2.00
## 4   0        1      449       83 -0.38        2    -0.11    0.082     3.25
## 5   0        1      425       53 -0.03        2    -0.11    0.082     3.25
## 6   1        1      450       65  0.76        2    -0.11    0.082     3.25
##   classid schoolid childid
## 1     160        1       1
## 2     160        1       2
## 3     160        1       3
## 4     217        1       4
## 5     217        1       5
## 6     217        1       6

0.2 data management

dta <- classroom[,c(12,10,11,1,2,5,3,4)]
dta[,1:4] <- apply(dta[,1:4], 2, as.factor)
dta$minority <- as.factor(ifelse(dta$minority==0, "N", "Y"))
names(dta) <- c("Child","Class","School","Sex","Minor","Ses","Pre","Gain")
str(dta)
## 'data.frame':    1190 obs. of  8 variables:
##  $ Child : chr  "1" "2" "3" "4" ...
##  $ Class : chr  "160" "160" "160" "217" ...
##  $ School: chr  "1" "1" "1" "1" ...
##  $ Sex   : chr  "1" "0" "1" "0" ...
##  $ Minor : Factor w/ 2 levels "N","Y": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Ses   : num  0.46 -0.27 -0.03 -0.38 -0.03 0.76 -0.03 0.2 0.64 0.13 ...
##  $ Pre   : int  448 460 511 449 425 450 452 443 422 480 ...
##  $ Gain  : int  32 109 56 83 53 65 51 66 88 -7 ...
head(dta)
##   Child Class School Sex Minor   Ses Pre Gain
## 1     1   160      1   1     Y  0.46 448   32
## 2     2   160      1   0     Y -0.27 460  109
## 3     3   160      1   1     Y -0.03 511   56
## 4     4   217      1   0     Y -0.38 449   83
## 5     5   217      1   0     Y -0.03 425   53
## 6     6   217      1   1     Y  0.76 450   65

0.3 Gain scores by school

library(ggplot2)
ggplot(data=dta, 
       aes(x=reorder(School, Gain, median), 
           y=Gain, 
           group=School)) + 
 geom_point(size=rel(.5), 
            alpha=.5) + 
 geom_hline(yintercept=quantile(dta$Gain, 
                                probs=c(.25,.75)), 
            linetype="dotted",
            col=2) +
 labs(y="Gain score", 
      x="School ID") +
 theme_minimal() +
 theme(axis.text.x=element_text(size=rel(.5),
                                angle=90, 
                                hjust=1), 
       legend.position="NONE")

0.4 School gain score means and CIs

ggplot(data=dta, 
       aes(x=reorder(School, Gain, mean), 
           y=Gain, 
           group=School)) + 
 stat_summary(fun.data="mean_cl_boot", 
              size=rel(.1))+
 geom_hline(yintercept=quantile(dta$Gain, 
                                probs=c(.25,.5,.75)), 
            linetype="dotted",
            col=2) +
 coord_flip()+
 labs(y="Gain score", 
      x="School ID") +
 theme_minimal() +
 theme(axis.text.y=element_text(size=rel(.5),
                                angle=0, 
                                vjust=1), 
       legend.position="NONE")

0.5 Distribution of school averages

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
sdta <- dta %>% 
  group_by(School) %>%
  dplyr::summarize(gain_ave=mean(Gain), 
                   gain_sd2=var(Gain), n = n())
## `summarise()` ungrouping output (override with `.groups` argument)
#
ggplot(data=sdta, 
       aes(gain_ave)) + 
 stat_bin(bins=grDevices::nclass.FD(sdta$gain_ave),
          alpha=0.3, fill=8,
          col=1) +
 geom_vline(xintercept=mean(dta$Gain), 
            linetype="dashed",
            col=2) +
 geom_vline(xintercept=mean(sdta$gain_ave), 
            linetype="dotted",
            col=3) +
 labs(y="Count", 
      x="Average score by school") +
 theme_minimal()

0.6 Distributions of squared deviatons

sdta <- sdta %>% 
  mutate(dev2=(gain_ave-mean(dta$Gain))^2)
t(apply(sdta[,c(5,3)], 2, quantile, probs=c(.025, .5, .975), na.rm=T))
##                 2.5%       50%    97.5%
## dev2       0.6871764  96.69995 1084.239
## gain_sd2 231.4041667 891.00000 4828.663
#
ggplot(data=sdta, 
       aes(dev2)) + 
 stat_bin(bins=grDevices::nclass.FD(sdta$dev2),
          alpha=0.3, fill='white', col=4) +
 stat_bin(aes(gain_sd2), 
          bins=grDevices::nclass.FD(sdta$gain_sd2),
          alpha=0.3, fill=8, col=1) +
 labs(y="Count", 
      x="Deviation estimates", 
      title="within- and between-school") +
 theme_minimal()

0.7 Average of school averages

knitr::kable(t(with(sdta, c(weighted.mean(gain_ave, n), mean(gain_ave)))))
## Warning in kable_pipe(x = structure(c("57.56639", "57.01803"), .Dim =
## 1:2, .Dimnames = list(: The table should have a header (column names)
57.56639 57.01803

0.8 Parameter estimates

library(sjPlot)
m0 <- lme4::lmer(Gain ~ (1 | School), data=dta)
sjPlot::tab_model(m0, show.p=FALSE, show.r2=FALSE)
  Gain
Predictors Estimates CI
(Intercept) 57.58 54.76 – 60.41
Random Effects
σ2 1094.00
τ00 School 109.24
ICC 0.09
N School 107
Observations 1190
sdta <- data.frame(School=row.names(coef(m0)$School), 
                   yhat=coef(m0)$School[,1]) %>%
        inner_join(., sdta, by="School")
str(sdta)
## 'data.frame':    107 obs. of  6 variables:
##  $ School  : chr  "1" "10" "100" "101" ...
##  $ yhat    : num  58.7 68.7 59.9 61.6 55.4 ...
##  $ gain_ave: num  59.6 74.9 61.7 64.1 53.5 ...
##  $ gain_sd2: num  919 353 809 1146 1275 ...
##  $ n       : int  11 18 13 16 11 8 6 10 2 10 ...
##  $ dev2    : num  4.28 302 17.02 42.2 16.91 ...
library(lme4)
## Loading required package: Matrix
ggplot(sdta, 
       aes(x=as.integer(School), 
           xend=as.integer(School), 
           y=gain_ave, 
           yend=yhat)) + 
 geom_point(aes(x=as.integer(School),
                y=gain_ave), 
            pch=1, size=rel(1))+
 geom_segment(arrow=arrow(length=unit(0.1, "cm")))+
 geom_hline(yintercept=fixef(m0)[[1]], 
            linetype="dashed", col=2) +
 coord_flip()+
 labs(x="School", 
      y="School averages") +
 theme_minimal()

0.9 model 0

#
m0 <- lme4::lmer(Gain ~ (1 | Class) + (1 | School), data=dta)
summary(m0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Gain ~ (1 | Class) + (1 | School)
##    Data: dta
## 
## REML criterion at convergence: 11768.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.6441 -0.5984 -0.0336  0.5334  5.6335 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Class    (Intercept)   99.22   9.961  
##  School   (Intercept)   77.50   8.804  
##  Residual             1028.23  32.066  
## Number of obs: 1190, groups:  Class, 312; School, 107
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   57.427      1.443   39.79
#
sjPlot::tab_model(m0, 
          show.intercept = TRUE,
          show.est = TRUE,
          show.se = TRUE,
          show.df = TRUE,
          show.stat = TRUE,
          show.p = TRUE,
          show.aic = TRUE,
          show.aicc = TRUE)
  Gain
Predictors Estimates std. Error CI Statistic p df
(Intercept) 57.43 1.44 54.60 – 60.26 39.79 <0.001 1186.00
Random Effects
σ2 1028.23
τ00 Class 99.22
τ00 School 77.50
ICC 0.15
N Class 312
N School 107
Observations 1190
Marginal R2 / Conditional R2 0.000 / 0.147
AIC 11776.799

0.10 model 1

m1 <- lme4::lmer(Gain ~ Pre + Sex + Minor + Ses + (1 | Class) + (1 | School), data=dta)
sjPlot::tab_model(m0, m1, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE)
  Gain Gain
Predictors Estimates std. Error Estimates std. Error
(Intercept) 57.43 1.44 282.79 10.85
Pre -0.47 0.02
Sex [1] -1.25 1.66
Minor [Y] -8.26 2.34
Ses 5.35 1.24
Random Effects
σ2 1028.23 734.57
τ00 99.22 Class 83.28 Class
77.50 School 75.20 School
ICC 0.15 0.18