1005class3

0.1 data input

library(WWGbook)
data("classroom")
head(classroom)

##   sex minority mathkind mathgain   ses yearstea mathknow housepov mathprep
## 1   1        1      448       32  0.46        1       NA    0.082     2.00
## 2   0        1      460      109 -0.27        1       NA    0.082     2.00
## 3   1        1      511       56 -0.03        1       NA    0.082     2.00
## 4   0        1      449       83 -0.38        2    -0.11    0.082     3.25
## 5   0        1      425       53 -0.03        2    -0.11    0.082     3.25
## 6   1        1      450       65  0.76        2    -0.11    0.082     3.25
##   classid schoolid childid
## 1     160        1       1
## 2     160        1       2
## 3     160        1       3
## 4     217        1       4
## 5     217        1       5
## 6     217        1       6

0.2 data management

dta <- classroom[,c(12,10,11,1,2,5,3,4)]
dta[,1:4] <- apply(dta[,1:4], 2, as.factor)
dta$minority <- as.factor(ifelse(dta$minority==0, "N", "Y"))
names(dta) <- c("Child","Class","School","Sex","Minor","Ses","Pre","Gain")
str(dta)

## 'data.frame':    1190 obs. of  8 variables:
##  $ Child : chr  "1" "2" "3" "4" ...
##  $ Class : chr  "160" "160" "160" "217" ...
##  $ School: chr  "1" "1" "1" "1" ...
##  $ Sex   : chr  "1" "0" "1" "0" ...
##  $ Minor : Factor w/ 2 levels "N","Y": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Ses   : num  0.46 -0.27 -0.03 -0.38 -0.03 0.76 -0.03 0.2 0.64 0.13 ...
##  $ Pre   : int  448 460 511 449 425 450 452 443 422 480 ...
##  $ Gain  : int  32 109 56 83 53 65 51 66 88 -7 ...

head(dta)

##   Child Class School Sex Minor   Ses Pre Gain
## 1     1   160      1   1     Y  0.46 448   32
## 2     2   160      1   0     Y -0.27 460  109
## 3     3   160      1   1     Y -0.03 511   56
## 4     4   217      1   0     Y -0.38 449   83
## 5     5   217      1   0     Y -0.03 425   53
## 6     6   217      1   1     Y  0.76 450   65

0.3 Gain scores by school

library(ggplot2)
ggplot(data=dta, 
       aes(x=reorder(School, Gain, median), 
           y=Gain, 
           group=School)) + 
 geom_point(size=rel(.5), 
            alpha=.5) + 
 geom_hline(yintercept=quantile(dta$Gain, 
                                probs=c(.25,.75)), 
            linetype="dotted",
            col=2) +
 labs(y="Gain score", 
      x="School ID") +
 theme_minimal() +
 theme(axis.text.x=element_text(size=rel(.5),
                                angle=90, 
                                hjust=1), 
       legend.position="NONE")

0.4 School gain score means and CIs

ggplot(data=dta, 
       aes(x=reorder(School, Gain, mean), 
           y=Gain, 
           group=School)) + 
 stat_summary(fun.data="mean_cl_boot", 
              size=rel(.1))+
 geom_hline(yintercept=quantile(dta$Gain, 
                                probs=c(.25,.5,.75)), 
            linetype="dotted",
            col=2) +
 coord_flip()+
 labs(y="Gain score", 
      x="School ID") +
 theme_minimal() +
 theme(axis.text.y=element_text(size=rel(.5),
                                angle=0, 
                                vjust=1), 
       legend.position="NONE")

0.5 Distribution of school averages

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

sdta <- dta %>% 
  group_by(School) %>%
  dplyr::summarize(gain_ave=mean(Gain), 
                   gain_sd2=var(Gain), n = n())

## `summarise()` ungrouping output (override with `.groups` argument)

#
ggplot(data=sdta, 
       aes(gain_ave)) + 
 stat_bin(bins=grDevices::nclass.FD(sdta$gain_ave),
          alpha=0.3, fill=8,
          col=1) +
 geom_vline(xintercept=mean(dta$Gain), 
            linetype="dashed",
            col=2) +
 geom_vline(xintercept=mean(sdta$gain_ave), 
            linetype="dotted",
            col=3) +
 labs(y="Count", 
      x="Average score by school") +
 theme_minimal()

0.6 Distributions of squared deviatons

sdta <- sdta %>% 
  mutate(dev2=(gain_ave-mean(dta$Gain))^2)
t(apply(sdta[,c(5,3)], 2, quantile, probs=c(.025, .5, .975), na.rm=T))

##                 2.5%       50%    97.5%
## dev2       0.6871764  96.69995 1084.239
## gain_sd2 231.4041667 891.00000 4828.663

#
ggplot(data=sdta, 
       aes(dev2)) + 
 stat_bin(bins=grDevices::nclass.FD(sdta$dev2),
          alpha=0.3, fill='white', col=4) +
 stat_bin(aes(gain_sd2), 
          bins=grDevices::nclass.FD(sdta$gain_sd2),
          alpha=0.3, fill=8, col=1) +
 labs(y="Count", 
      x="Deviation estimates", 
      title="within- and between-school") +
 theme_minimal()

0.7 Average of school averages

knitr::kable(t(with(sdta, c(weighted.mean(gain_ave, n), mean(gain_ave)))))

## Warning in kable_pipe(x = structure(c("57.56639", "57.01803"), .Dim =
## 1:2, .Dimnames = list(: The table should have a header (column names)

57.56639

57.01803

0.8 Parameter estimates

library(sjPlot)
m0 <- lme4::lmer(Gain ~ (1 | School), data=dta)
sjPlot::tab_model(m0, show.p=FALSE, show.r2=FALSE)

	Gain
Predictors	Estimates	CI
(Intercept)	57.58	54.76 – 60.41
Random Effects
σ²	1094.00
τ₀₀ _School	109.24
ICC	0.09
N _School	107
Observations	1190

sdta <- data.frame(School=row.names(coef(m0)$School), 
                   yhat=coef(m0)$School[,1]) %>%
        inner_join(., sdta, by="School")
str(sdta)

## 'data.frame':    107 obs. of  6 variables:
##  $ School  : chr  "1" "10" "100" "101" ...
##  $ yhat    : num  58.7 68.7 59.9 61.6 55.4 ...
##  $ gain_ave: num  59.6 74.9 61.7 64.1 53.5 ...
##  $ gain_sd2: num  919 353 809 1146 1275 ...
##  $ n       : int  11 18 13 16 11 8 6 10 2 10 ...
##  $ dev2    : num  4.28 302 17.02 42.2 16.91 ...

library(lme4)

## Loading required package: Matrix

ggplot(sdta, 
       aes(x=as.integer(School), 
           xend=as.integer(School), 
           y=gain_ave, 
           yend=yhat)) + 
 geom_point(aes(x=as.integer(School),
                y=gain_ave), 
            pch=1, size=rel(1))+
 geom_segment(arrow=arrow(length=unit(0.1, "cm")))+
 geom_hline(yintercept=fixef(m0)[[1]], 
            linetype="dashed", col=2) +
 coord_flip()+
 labs(x="School", 
      y="School averages") +
 theme_minimal()

0.9 model 0

#
m0 <- lme4::lmer(Gain ~ (1 | Class) + (1 | School), data=dta)
summary(m0)

## Linear mixed model fit by REML ['lmerMod']
## Formula: Gain ~ (1 | Class) + (1 | School)
##    Data: dta
## 
## REML criterion at convergence: 11768.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.6441 -0.5984 -0.0336  0.5334  5.6335 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Class    (Intercept)   99.22   9.961  
##  School   (Intercept)   77.50   8.804  
##  Residual             1028.23  32.066  
## Number of obs: 1190, groups:  Class, 312; School, 107
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   57.427      1.443   39.79

#
sjPlot::tab_model(m0, 
          show.intercept = TRUE,
          show.est = TRUE,
          show.se = TRUE,
          show.df = TRUE,
          show.stat = TRUE,
          show.p = TRUE,
          show.aic = TRUE,
          show.aicc = TRUE)

	Gain
Predictors	Estimates	std. Error	CI	Statistic	p	df
(Intercept)	57.43	1.44	54.60 – 60.26	39.79	<0.001	1186.00
Random Effects
σ²	1028.23
τ₀₀ _Class	99.22
τ₀₀ _School	77.50
ICC	0.15
N _Class	312
N _School	107
Observations	1190
Marginal R² / Conditional R²	0.000 / 0.147
AIC	11776.799

0.10 model 1

m1 <- lme4::lmer(Gain ~ Pre + Sex + Minor + Ses + (1 | Class) + (1 | School), data=dta)
sjPlot::tab_model(m0, m1, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE)

	Gain		Gain
Predictors	Estimates	std. Error	Estimates	std. Error
(Intercept)	57.43	1.44	282.79	10.85
Pre			-0.47	0.02
Sex [1]			-1.25	1.66
Minor [Y]			-8.26	2.34
Ses			5.35	1.24
Random Effects
σ²	1028.23		734.57
τ₀₀	99.22 _Class		83.28 _Class
	77.50 _School		75.20 _School
ICC	0.15		0.18

1005class3

jenny hsu

0.1 data input

0.2 data management

0.3 Gain scores by school

0.4 School gain score means and CIs

0.5 Distribution of school averages

0.6 Distributions of squared deviatons

0.7 Average of school averages

0.8 Parameter estimates

0.9 model 0

0.10 model 1