library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(stringr)

dta <- read.csv("D:/sheu/df_output_cleaned.csv", header=T)
dta<-dta[-1]
head(dta)
##   studid sector urban gender classtype schoolID classID mathassesment1
## 1      1      0     2      2         1     t009   t0489              2
## 2      4      0     3      1         1     t304   t0129              3
## 3      6      0     2      2         2     t173   t1004              3
## 4      7      0     3      2         1     t218   t0414              2
## 5      8      0     3      2         1     t136   t0930              3
## 6     11      0     3      1         1     t205   t1071              3
##   mathstudy1 mathwork1 mathquestion1 premath yearstea mathassesment2 mathstudy2
## 1          2         2             1       7        5              2          3
## 2          3         2             2      10        5              2          2
## 3          3         1             3      10        1              2          3
## 4          2         2             1       7        6              2          3
## 5          3         1             1       8        6              3          3
## 6          2         1             3       9        2              3          3
##   mathwork2 mathquestion2 mathjr3
## 1         1             1       7
## 2         3             1       8
## 3         1             1       7
## 4         1             2       8
## 5         1             2       9
## 6         2             3      11
str(dta)
## 'data.frame':    7059 obs. of  18 variables:
##  $ studid        : int  1 4 6 7 8 11 12 13 23 27 ...
##  $ sector        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ urban         : int  2 3 2 3 3 3 1 2 3 2 ...
##  $ gender        : int  2 1 2 2 2 1 2 2 1 2 ...
##  $ classtype     : int  1 1 2 1 1 1 1 3 1 1 ...
##  $ schoolID      : chr  "t009" "t304" "t173" "t218" ...
##  $ classID       : chr  "t0489" "t0129" "t1004" "t0414" ...
##  $ mathassesment1: int  2 3 3 2 3 3 2 3 3 2 ...
##  $ mathstudy1    : int  2 3 3 2 3 2 2 3 3 2 ...
##  $ mathwork1     : int  2 2 1 2 1 1 2 2 1 2 ...
##  $ mathquestion1 : int  1 2 3 1 1 3 1 1 2 1 ...
##  $ premath       : int  7 10 10 7 8 9 7 9 9 7 ...
##  $ yearstea      : int  5 5 1 6 6 2 4 3 6 5 ...
##  $ mathassesment2: int  2 2 2 2 3 3 3 2 2 1 ...
##  $ mathstudy2    : int  3 2 3 3 3 3 2 2 2 1 ...
##  $ mathwork2     : int  1 3 1 1 1 2 2 3 2 2 ...
##  $ mathquestion2 : int  1 1 1 2 2 3 3 1 1 1 ...
##  $ mathjr3       : int  7 8 7 8 9 11 10 8 7 5 ...
dta[,1:7] <- apply(dta[,1:7], 2, as.factor)
str(dta)
## 'data.frame':    7059 obs. of  18 variables:
##  $ studid        : chr  "    1" "    4" "    6" "    7" ...
##  $ sector        : chr  "0" "0" "0" "0" ...
##  $ urban         : chr  "2" "3" "2" "3" ...
##  $ gender        : chr  "2" "1" "2" "2" ...
##  $ classtype     : chr  " 1" " 1" " 2" " 1" ...
##  $ schoolID      : chr  "t009" "t304" "t173" "t218" ...
##  $ classID       : chr  "t0489" "t0129" "t1004" "t0414" ...
##  $ mathassesment1: int  2 3 3 2 3 3 2 3 3 2 ...
##  $ mathstudy1    : int  2 3 3 2 3 2 2 3 3 2 ...
##  $ mathwork1     : int  2 2 1 2 1 1 2 2 1 2 ...
##  $ mathquestion1 : int  1 2 3 1 1 3 1 1 2 1 ...
##  $ premath       : int  7 10 10 7 8 9 7 9 9 7 ...
##  $ yearstea      : int  5 5 1 6 6 2 4 3 6 5 ...
##  $ mathassesment2: int  2 2 2 2 3 3 3 2 2 1 ...
##  $ mathstudy2    : int  3 2 3 3 3 3 2 2 2 1 ...
##  $ mathwork2     : int  1 3 1 1 1 2 2 3 2 2 ...
##  $ mathquestion2 : int  1 1 1 2 2 3 3 1 1 1 ...
##  $ mathjr3       : int  7 8 7 8 9 11 10 8 7 5 ...
dta$sector <- as.factor(ifelse(dta$sector==0, "Public", "Private"))
head(dta)
##   studid sector urban gender classtype schoolID classID mathassesment1
## 1      1 Public     2      2         1     t009   t0489              2
## 2      4 Public     3      1         1     t304   t0129              3
## 3      6 Public     2      2         2     t173   t1004              3
## 4      7 Public     3      2         1     t218   t0414              2
## 5      8 Public     3      2         1     t136   t0930              3
## 6     11 Public     3      1         1     t205   t1071              3
##   mathstudy1 mathwork1 mathquestion1 premath yearstea mathassesment2 mathstudy2
## 1          2         2             1       7        5              2          3
## 2          3         2             2      10        5              2          2
## 3          3         1             3      10        1              2          3
## 4          2         2             1       7        6              2          3
## 5          3         1             1       8        6              3          3
## 6          2         1             3       9        2              3          3
##   mathwork2 mathquestion2 mathjr3
## 1         1             1       7
## 2         3             1       8
## 3         1             1       7
## 4         1             2       8
## 5         1             2       9
## 6         2             3      11
dta$gender <- as.factor(ifelse(dta$gender==1, "M", "F"))

head(dta)
##   studid sector urban gender classtype schoolID classID mathassesment1
## 1      1 Public     2      F         1     t009   t0489              2
## 2      4 Public     3      M         1     t304   t0129              3
## 3      6 Public     2      F         2     t173   t1004              3
## 4      7 Public     3      F         1     t218   t0414              2
## 5      8 Public     3      F         1     t136   t0930              3
## 6     11 Public     3      M         1     t205   t1071              3
##   mathstudy1 mathwork1 mathquestion1 premath yearstea mathassesment2 mathstudy2
## 1          2         2             1       7        5              2          3
## 2          3         2             2      10        5              2          2
## 3          3         1             3      10        1              2          3
## 4          2         2             1       7        6              2          3
## 5          3         1             1       8        6              3          3
## 6          2         1             3       9        2              3          3
##   mathwork2 mathquestion2 mathjr3
## 1         1             1       7
## 2         3             1       8
## 3         1             1       7
## 4         1             2       8
## 5         1             2       9
## 6         2             3      11
dta$classtype <- gsub("1", "N", dta$classtype)
dta$classtype <- gsub("2", "G", dta$classtype)
dta$urban <- gsub("1", "village", dta$urban)
dta$urban <- gsub("2", "town", dta$urban)
dta$urban <- gsub("3", "city", dta$urban)

dta$yearstea <- gsub("3", "5", dta$yearstea)
dta$yearstea <- gsub("4", "8", dta$yearstea)
dta$yearstea <- gsub("5", "15", dta$yearstea)
dta$yearstea <- gsub("6", "20", dta$yearstea)

dta$yearstea <-as.numeric(dta$yearstea)
dta$classtype <- gsub("3", "A", dta$classtype)
dta$classtype <- str_replace(dta$classtype,"97","F") 
dta$classtype <- str_replace(dta$classtype,"99","F") 
dta$classtype <-as.character(dta$classtype)

head(dta)
##   studid sector urban gender classtype schoolID classID mathassesment1
## 1      1 Public  town      F         N     t009   t0489              2
## 2      4 Public  city      M         N     t304   t0129              3
## 3      6 Public  town      F         G     t173   t1004              3
## 4      7 Public  city      F         N     t218   t0414              2
## 5      8 Public  city      F         N     t136   t0930              3
## 6     11 Public  city      M         N     t205   t1071              3
##   mathstudy1 mathwork1 mathquestion1 premath yearstea mathassesment2 mathstudy2
## 1          2         2             1       7       15              2          3
## 2          3         2             2      10       15              2          2
## 3          3         1             3      10        1              2          3
## 4          2         2             1       7       20              2          3
## 5          3         1             1       8       20              3          3
## 6          2         1             3       9        2              3          3
##   mathwork2 mathquestion2 mathjr3
## 1         1             1       7
## 2         3             1       8
## 3         1             1       7
## 4         1             2       8
## 5         1             2       9
## 6         2             3      11
str(dta)
## 'data.frame':    7059 obs. of  18 variables:
##  $ studid        : chr  "    1" "    4" "    6" "    7" ...
##  $ sector        : Factor w/ 2 levels "Private","Public": 2 2 2 2 2 2 2 2 2 2 ...
##  $ urban         : chr  "town" "city" "town" "city" ...
##  $ gender        : Factor w/ 2 levels "F","M": 1 2 1 1 1 2 1 1 2 1 ...
##  $ classtype     : chr  " N" " N" " G" " N" ...
##  $ schoolID      : chr  "t009" "t304" "t173" "t218" ...
##  $ classID       : chr  "t0489" "t0129" "t1004" "t0414" ...
##  $ mathassesment1: int  2 3 3 2 3 3 2 3 3 2 ...
##  $ mathstudy1    : int  2 3 3 2 3 2 2 3 3 2 ...
##  $ mathwork1     : int  2 2 1 2 1 1 2 2 1 2 ...
##  $ mathquestion1 : int  1 2 3 1 1 3 1 1 2 1 ...
##  $ premath       : int  7 10 10 7 8 9 7 9 9 7 ...
##  $ yearstea      : num  15 15 1 20 20 2 8 15 20 15 ...
##  $ mathassesment2: int  2 2 2 2 3 3 3 2 2 1 ...
##  $ mathstudy2    : int  3 2 3 3 3 3 2 2 2 1 ...
##  $ mathwork2     : int  1 3 1 1 1 2 2 3 2 2 ...
##  $ mathquestion2 : int  1 1 1 2 2 3 3 1 1 1 ...
##  $ mathjr3       : int  7 8 7 8 9 11 10 8 7 5 ...
str(dta)
## 'data.frame':    7059 obs. of  18 variables:
##  $ studid        : chr  "    1" "    4" "    6" "    7" ...
##  $ sector        : Factor w/ 2 levels "Private","Public": 2 2 2 2 2 2 2 2 2 2 ...
##  $ urban         : chr  "town" "city" "town" "city" ...
##  $ gender        : Factor w/ 2 levels "F","M": 1 2 1 1 1 2 1 1 2 1 ...
##  $ classtype     : chr  " N" " N" " G" " N" ...
##  $ schoolID      : chr  "t009" "t304" "t173" "t218" ...
##  $ classID       : chr  "t0489" "t0129" "t1004" "t0414" ...
##  $ mathassesment1: int  2 3 3 2 3 3 2 3 3 2 ...
##  $ mathstudy1    : int  2 3 3 2 3 2 2 3 3 2 ...
##  $ mathwork1     : int  2 2 1 2 1 1 2 2 1 2 ...
##  $ mathquestion1 : int  1 2 3 1 1 3 1 1 2 1 ...
##  $ premath       : int  7 10 10 7 8 9 7 9 9 7 ...
##  $ yearstea      : num  15 15 1 20 20 2 8 15 20 15 ...
##  $ mathassesment2: int  2 2 2 2 3 3 3 2 2 1 ...
##  $ mathstudy2    : int  3 2 3 3 3 3 2 2 2 1 ...
##  $ mathwork2     : int  1 3 1 1 1 2 2 3 2 2 ...
##  $ mathquestion2 : int  1 1 1 2 2 3 3 1 1 1 ...
##  $ mathjr3       : int  7 8 7 8 9 11 10 8 7 5 ...
dta %>% 
  furniture::table1(premath, mathjr3, yearstea,   
                    splitby= ~ gender,       
                    test=FALSE )
## 
## ────────────────────────────────
##                 gender 
##           F          M         
##           n = 3379   n = 3680  
##  premath                       
##           8.7 (1.6)  8.7 (1.7) 
##  mathjr3                       
##           8.5 (1.9)  8.4 (2.0) 
##  yearstea                      
##           12.4 (6.0) 12.0 (6.2)
## ────────────────────────────────
dta %>% 
  furniture::table1(premath, mathjr3, yearstea,   
                    splitby= ~ urban,       
                    test=FALSE )
## 
## ───────────────────────────────────────────
##                       urban 
##           city       town       village   
##           n = 4052   n = 2608   n = 399   
##  premath                                  
##           8.8 (1.7)  8.7 (1.7)  8.3 (1.8) 
##  mathjr3                                  
##           8.6 (2.0)  8.3 (1.9)  8.0 (1.9) 
##  yearstea                                 
##           12.8 (6.0) 11.6 (6.1) 10.3 (6.0)
## ───────────────────────────────────────────
dta %>% 
  furniture::table1(premath, mathjr3, yearstea,   
                    splitby= ~ sector,       
                    test=FALSE )
## 
## ────────────────────────────────
##                 sector 
##           Private    Public    
##           n = 837    n = 6222  
##  premath                       
##           9.2 (1.5)  8.7 (1.7) 
##  mathjr3                       
##           8.9 (1.8)  8.4 (2.0) 
##  yearstea                      
##           12.1 (6.0) 12.2 (6.1)
## ────────────────────────────────
dta %>% 
  furniture::table1(premath, mathjr3,   
                    splitby= ~ gender,       
                    test=FALSE )
## 
## ─────────────────────────────
##               gender 
##          F         M        
##          n = 3379  n = 3680 
##  premath                    
##          8.7 (1.6) 8.7 (1.7)
##  mathjr3                    
##          8.5 (1.9) 8.4 (2.0)
## ─────────────────────────────
dta %>% 
  furniture::table1(premath, mathjr3,   
                    splitby= ~ urban,       
                    test=FALSE )
## 
## ───────────────────────────────────────
##                    urban 
##          city      town      village  
##          n = 4052  n = 2608  n = 399  
##  premath                              
##          8.8 (1.7) 8.7 (1.7) 8.3 (1.8)
##  mathjr3                              
##          8.6 (2.0) 8.3 (1.9) 8.0 (1.9)
## ───────────────────────────────────────
dta %>% 
  furniture::table1(premath, mathjr3,   
                    splitby= ~ sector,       
                    test=FALSE )
## 
## ─────────────────────────────
##               sector 
##          Private   Public   
##          n = 837   n = 6222 
##  premath                    
##          9.2 (1.5) 8.7 (1.7)
##  mathjr3                    
##          8.9 (1.8) 8.4 (2.0)
## ─────────────────────────────
dta %>% 
  furniture::table1(premath, mathjr3,   
                    splitby= ~ yearstea,       
                    test=FALSE )
## 
## ───────────────────────────────────────────────────────────
##                             yearstea 
##          1         2         8         15        20       
##          n = 320   n = 978   n = 1212  n = 3353  n = 1196 
##  premath                                                  
##          8.7 (1.7) 8.7 (1.7) 8.7 (1.6) 8.8 (1.7) 8.7 (1.7)
##  mathjr3                                                  
##          8.1 (1.9) 8.3 (1.9) 8.5 (1.9) 8.5 (2.0) 8.4 (2.0)
## ───────────────────────────────────────────────────────────
ggplot(data=dta, 
       aes(x=reorder(schoolID, mathjr3, median), 
           y=mathjr3, 
           group=schoolID)) + 
 geom_point(size=rel(.5), 
            alpha=.5) + 
 geom_hline(yintercept=quantile(dta$mathjr3, 
                                probs=c(.25,.75)), 
            linetype="dotted",
            col=2) +
 labs(y="9th score", 
      x="School ID") +
 theme_minimal() +
 theme(axis.text.x=element_text(size=rel(.5),
                                angle=90, 
                                hjust=1), 
       legend.position="NONE")

ggplot(data=dta, 
       aes(x=reorder(schoolID, mathjr3, mean), 
           y=mathjr3, 
           group=schoolID)) + 
 stat_summary(fun.data="mean_cl_boot", 
              size=rel(.1))+
 geom_hline(yintercept=quantile(dta$mathjr3, 
                                probs=c(.25,.5,.75)), 
            linetype="dotted",
            col=2) +
 coord_flip()+
 labs(y="9th score", 
      x="School ID") +
 theme_minimal() +
 theme(axis.text.y=element_text(size=rel(.5),
                                angle=0, 
                                vjust=1), 
       legend.position="NONE")

ggplot(data=dta, 
       aes(x=reorder(schoolID, mathjr3, mean), 
           y=mathjr3, 
           group=schoolID)) + 
 stat_summary(fun.data="mean_cl_boot", 
              size=rel(.1)) +
 coord_flip()+
 labs(y="9th score", 
      x="School ID") +
 theme_minimal() +
 theme(axis.text.y=element_text(size=rel(.5),
                                angle=0, 
                                vjust=1), 
       legend.position="NONE")

sdta <- dta %>% 
  group_by(schoolID) %>%
  dplyr::summarize(jr3_ave=mean(mathjr3), 
                   jr3_sd2=var(mathjr3),
                   n = n())
## `summarise()` ungrouping output (override with `.groups` argument)
ggplot(data=sdta, 
       aes(jr3_ave)) + 
 stat_bin(bins=grDevices::nclass.FD(sdta$jr3_ave),
          alpha=0.3, fill=8,
          col=1) +
 geom_vline(xintercept=mean(dta$mathjr3), 
            linetype="dashed",
            col=2) +
 geom_vline(xintercept=mean(sdta$jr3_ave), 
            linetype="dotted",
            col=3) +
 labs(y="Count", 
      x="Average score by school") +
 theme_minimal()

head(sdta)
## # A tibble: 6 x 4
##   schoolID jr3_ave jr3_sd2     n
##   <chr>      <dbl>   <dbl> <int>
## 1 t001        8.79    4.17    33
## 2 t002        8.57    4.81    37
## 3 t004        8.78    3.92    32
## 4 t005        8.46    2.85    61
## 5 t006        8.17    3.35    41
## 6 t009        8.45    4.71    62
sdta <- sdta %>% 
  mutate(dev2=(jr3_ave-mean(dta$mathjr3))^2)
t(apply(sdta[,c(5,3)], 2, quantile, probs=c(.025, .5, .975), na.rm=T))
##                 2.5%        50%    97.5%
## dev2    0.0004679167 0.03589969 0.545860
## jr3_sd2 2.3978407557 3.84366327 5.221569
ggplot(data=sdta, 
       aes(dev2)) + 
 stat_bin(bins=grDevices::nclass.FD(sdta$dev2),
          alpha=0.3, fill='white', col=4) +
 stat_bin(aes(jr3_sd2), 
          bins=grDevices::nclass.FD(sdta$jr3_sd2),
          alpha=0.3, fill=8, col=1) +
 labs(y="Count", 
      x="Deviation estimates", 
      title="within- and between-school") +
 theme_minimal()

m0 <- lme4::lmer(mathjr3 ~ (1 | schoolID), data=dta)
sjPlot::tab_model(m0, show.p=FALSE, show.r2=FALSE,digits.re = 3)
  mathjr3
Predictors Estimates CI
(Intercept) 8.42 8.38 – 8.47
Random Effects
σ2 3.807
τ00 schoolID 0.001
ICC 0.000
N schoolID 167
Observations 7059
summary(m0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathjr3 ~ (1 | schoolID)
##    Data: dta
## 
## REML criterion at convergence: 29476.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.2720 -0.7296  0.2915  0.8066  3.3754 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  schoolID (Intercept) 0.001197 0.03459 
##  Residual             3.807165 1.95120 
## Number of obs: 7059, groups:  schoolID, 167
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   8.4232     0.0234     360

the estimated mean of 9th scores for all schools is 8.42

The variation between student within the same school is estimated to be 3.81

The estimated between school variance in 9th math score by school is 0.001.

The correlation between 9th scores of student attending the same school is 0.00 .

dta <- dta %>% 
  mutate(premath_a = mean(premath)) %>%
  group_by(schoolID) %>% 
  mutate(premath_sa=mean(premath) - premath_a, #各校平均-整體平均
         premath_c=premath - mean(premath)) #各校平均
head(dta)[1:5]
## # A tibble: 6 x 5
##   studid  sector urban gender classtype
##   <chr>   <fct>  <chr> <fct>  <chr>    
## 1 "    1" Public town  F      " N"     
## 2 "    4" Public city  M      " N"     
## 3 "    6" Public town  F      " G"     
## 4 "    7" Public city  F      " N"     
## 5 "    8" Public city  F      " N"     
## 6 "   11" Public city  M      " N"
head(dta)[10:13]
## # A tibble: 6 x 4
##   mathwork1 mathquestion1 premath yearstea
##       <int>         <int>   <int>    <dbl>
## 1         2             1       7       15
## 2         2             2      10       15
## 3         1             3      10        1
## 4         2             1       7       20
## 5         1             1       8       20
## 6         1             3       9        2
head(dta)[18:21]
## # A tibble: 6 x 4
##   mathjr3 premath_a premath_sa premath_c
##     <int>     <dbl>      <dbl>     <dbl>
## 1       7      8.72     0.0527    -1.77 
## 2       8      8.72     0.0214     1.26 
## 3       7      8.72     0.450      0.829
## 4       8      8.72    -0.0944    -1.63 
## 5       9      8.72     0.167     -0.889
## 6      11      8.72    -0.235      0.514
ggplot(dta, 
       aes(premath_c, mathjr3, 
           group=schoolID)) +
  geom_point(alpha=.2)+
  stat_smooth(method="lm", formula=y~x, 
              se=FALSE, size=rel(.5), col=8)+
  stat_smooth(aes(x=premath_sa, y=mathjr3, 
                  group=1),
              method="lm", formula=y~x, 
              se=FALSE, size=rel(.5), col=5)+
  labs(x="˙7th score (school-centered)",
       y="9th score")+
  theme_minimal()

m1 <- lme4::lmer(mathjr3 ~ premath_c + (1 | schoolID), data=dta)

sjPlot::tab_model(m0, m1, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE)
  mathjr3 mathjr3
Predictors Estimates std. Error Estimates std. Error
(Intercept) 8.42 0.02 8.42 0.02
premath_c 0.57 0.01
Random Effects
σ2 3.81 2.86
τ00 0.00 schoolID 0.02 schoolID
ICC 0.00 0.01
summary(m1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathjr3 ~ premath_c + (1 | schoolID)
##    Data: dta
## 
## REML criterion at convergence: 27500.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8223 -0.6672  0.0080  0.6785  3.7219 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  schoolID (Intercept) 0.02101  0.145   
##  Residual             2.85781  1.691   
## Number of obs: 7059, groups:  schoolID, 167
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  8.41982    0.02327  361.84
## premath_c    0.57473    0.01199   47.93
## 
## Correlation of Fixed Effects:
##           (Intr)
## premath_c 0.000

The estimated mean of 9th scores for all students is 8.42 with an SD of √2.86+0.02=1.69.

For one point increase in 7th score, 9th score increase by 0.57 point, on average.

Adjusting for individual 7th scores, correlation between 9th scores among children attending the same school is 0.01.

About 24.93% (= 3.81−2.86/3.81) of variance in 9th scores can be attributed to differences in 7th scores among children attending different schools

m1g <- lme4::lmer(mathjr3 ~ gender+premath_c + (1 | schoolID), data=dta)
summary(m1g)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathjr3 ~ gender + premath_c + (1 | schoolID)
##    Data: dta
## 
## REML criterion at convergence: 27495.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7883 -0.6718  0.0075  0.6817  3.7606 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  schoolID (Intercept) 0.02094  0.1447  
##  Residual             2.85444  1.6895  
## Number of obs: 7059, groups:  schoolID, 167
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  8.48438    0.03136 270.549
## genderM     -0.12378    0.04035  -3.068
## premath_c    0.57473    0.01198  47.954
## 
## Correlation of Fixed Effects:
##           (Intr) gendrM
## genderM   -0.671       
## premath_c  0.000  0.000
m2 <- lme4::lmer(mathjr3 ~ premath_sa + (1 | schoolID), data=dta)
## boundary (singular) fit: see ?isSingular
summary(m2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathjr3 ~ premath_sa + (1 | schoolID)
##    Data: dta
## 
## REML criterion at convergence: 29426
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.4837 -0.7398  0.1428  0.7603  3.5544 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  schoolID (Intercept) 0.00     0.000   
##  Residual             3.78     1.944   
## Number of obs: 7059, groups:  schoolID, 167
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  8.42343    0.02314 364.001
## premath_sa   0.69738    0.09530   7.318
## 
## Correlation of Fixed Effects:
##            (Intr)
## premath_sa 0.000 
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
sjPlot::tab_model(m0, m2, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE,digits.re = 3)
  mathjr3 mathjr3
Predictors Estimates std. Error Estimates std. Error
(Intercept) 8.42 0.02 8.42 0.02
premath_sa 0.70 0.10
Random Effects
σ2 3.807 3.780
τ00 0.001 schoolID 0.000 schoolID
ICC 0.000  

The estimated mean of 9th scores for all schools is 8.42. On average, there is a increase of 0.70 point on 9th score from an increase of one point in 7th average score.

m3 <- lme4::lmer(mathjr3 ~ premath_c + premath_sa + (1 | schoolID), data=dta)
## boundary (singular) fit: see ?isSingular
summary(m3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathjr3 ~ premath_c + premath_sa + (1 | schoolID)
##    Data: dta
## 
## REML criterion at convergence: 27439.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7422 -0.6717 -0.0027  0.6875  3.8353 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  schoolID (Intercept) 0.00     0.000   
##  Residual             2.85     1.688   
## Number of obs: 7059, groups:  schoolID, 167
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  8.42343    0.02009 419.183
## premath_c    0.57473    0.01198  47.988
## premath_sa   0.69738    0.08275   8.427
## 
## Correlation of Fixed Effects:
##            (Intr) prmth_c
## premath_c  0.000         
## premath_sa 0.000  0.000  
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
sjPlot::tab_model(m0, m3, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE)
  mathjr3 mathjr3
Predictors Estimates std. Error Estimates std. Error
(Intercept) 8.42 0.02 8.42 0.02
premath_c 0.57 0.01
premath_sa 0.70 0.08
Random Effects
σ2 3.81 2.85
τ00 0.00 schoolID 0.00 schoolID
ICC 0.00  

It is significant.

#加入班級
m3L0 <- lme4::lmer(mathjr3 ~ 1 + (1 | schoolID/classID), data=dta)
summary(m3L0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathjr3 ~ 1 + (1 | schoolID/classID)
##    Data: dta
## 
## REML criterion at convergence: 29476.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.2861 -0.7291  0.2851  0.8049  3.3655 
## 
## Random effects:
##  Groups           Name        Variance  Std.Dev. 
##  classID:schoolID (Intercept) 5.211e-03 0.0721899
##  schoolID         (Intercept) 4.159e-08 0.0002039
##  Residual                     3.803e+00 1.9501666
## Number of obs: 7059, groups:  classID:schoolID, 612; schoolID, 167
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  8.42310    0.02341   359.8
sjPlot::tab_model(m0, m3L0, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE , digits.re = 3)
  mathjr3 mathjr3
Predictors Estimates std. Error Estimates std. Error
(Intercept) 8.42 0.02 8.42 0.02
Random Effects
σ2 3.807 3.803
τ00 0.001 schoolID 0.005 classID:schoolID
  0.000 schoolID
ICC 0.000 0.001
# class level 的effect明顯是由個人來的

9th scores of any two children from the same class within the same school has a correlation of 0.013 (= 0.005+0/3.808)

9th scores of any two children from the same school has a correlation of 0

ggplot(dta, 
       aes(yearstea, 
           mathjr3)) +
  geom_point(alpha=.3)+
  stat_smooth(method="lm", 
              formula=y~x, se=FALSE,
              size=rel(.3), col="purple")+
  labs(x="Teaching experience (in years)",
       y="9th score")+
  theme_minimal()

老師教學年限與學生成績無關

library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
m3L1 <- lmer(mathjr3 ~ yearstea + (1 | schoolID) + (1| classID:schoolID), data=dta)
## boundary (singular) fit: see ?isSingular
sjPlot::tab_model(m3L0, m3L1, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE, show.re.var =FALSE, show.icc = FALSE)
  mathjr3 mathjr3
Predictors Estimates std. Error Estimates std. Error
(Intercept) 8.42 0.02 8.33 0.05
yearstea 0.01 0.00
summary(m3L1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathjr3 ~ yearstea + (1 | schoolID) + (1 | classID:schoolID)
##    Data: dta
## 
## REML criterion at convergence: 29481.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.3065 -0.7393  0.2604  0.7939  3.3557 
## 
## Random effects:
##  Groups           Name        Variance Std.Dev.
##  classID:schoolID (Intercept) 0.004509 0.06715 
##  schoolID         (Intercept) 0.000000 0.00000 
##  Residual                     3.802093 1.94990 
## Number of obs: 7059, groups:  classID:schoolID, 612; schoolID, 167
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 8.327338   0.052057  159.97
## yearstea    0.007846   0.003809    2.06
## 
## Correlation of Fixed Effects:
##          (Intr)
## yearstea -0.893
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular

在一次驗證了老師教學年限與成績無關

m3L3 <- lmer(mathjr3 ~ premath_c + premath_sa + sector + (1 | schoolID) + (1| classID:schoolID), data=dta)
## boundary (singular) fit: see ?isSingular
sjPlot::tab_model(m3L0, m3L3, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE)
  mathjr3 mathjr3
Predictors Estimates std. Error Estimates std. Error
(Intercept) 8.42 0.02 8.67 0.06
premath_c 0.57 0.01
premath_sa 0.69 0.08
sector [Public] -0.28 0.06
Random Effects
σ2 3.80 2.84
τ00 0.01 classID:schoolID 0.00 classID:schoolID
0.00 schoolID 0.00 schoolID
ICC 0.00  
ggplot(dta, aes(premath_c, mathjr3, color = sector)) +
 geom_smooth(method = "lm") +
 geom_jitter(alpha = 0.2) +
 labs(x = "Centered 7th score", y = "9th Math score") +
 theme(legend.position = c(.1, .8))
## `geom_smooth()` using formula 'y ~ x'

The effect of premath_c on 9th score for student in public school is similar than the student in private school.

ggplot(dta, aes(premath_c, mathjr3, color = classtype)) +
 geom_smooth(method = "lm") +
 geom_jitter(alpha = 0.2) +
 labs(x = "Centered 7th score", y = "9th Math score") +
 theme(legend.position = c(.1, .8))
## `geom_smooth()` using formula 'y ~ x'

ggplot(dta, aes(premath_c, mathjr3, color = urban)) +
 geom_smooth(method = "lm") +
 geom_jitter(alpha = 0.2) +
 labs(x = "Centered 7th score", y = "9th Math score") +
 theme(legend.position = c(.1, .8))
## `geom_smooth()` using formula 'y ~ x'

City is higher than town and village.

m3L4 <- lmer(mathjr3 ~ urban + (1 | schoolID) + (1| classID:schoolID), data=dta)
summary(m3L4)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathjr3 ~ urban + (1 | schoolID) + (1 | classID:schoolID)
##    Data: dta
## 
## REML criterion at convergence: 29434.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.3481 -0.7934  0.2243  0.7439  3.6070 
## 
## Random effects:
##  Groups           Name        Variance  Std.Dev.
##  classID:schoolID (Intercept) 0.0005004 0.02237 
##  schoolID         (Intercept) 0.0017061 0.04131 
##  Residual                     3.7808432 1.94444 
## Number of obs: 7059, groups:  classID:schoolID, 612; schoolID, 167
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept)   8.55148    0.03075 278.106
## urbantown    -0.26277    0.04883  -5.382
## urbanvillage -0.55441    0.10205  -5.433
## 
## Correlation of Fixed Effects:
##             (Intr) urbntw
## urbantown   -0.622       
## urbanvillag -0.297  0.187
sjPlot::tab_model(m3L0, m3L4, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE)
  mathjr3 mathjr3
Predictors Estimates std. Error Estimates std. Error
(Intercept) 8.42 0.02 8.55 0.03
urban [town] -0.26 0.05
urban [village] -0.55 0.10
Random Effects
σ2 3.80 3.78
τ00 0.01 classID:schoolID 0.00 classID:schoolID
0.00 schoolID 0.00 schoolID
ICC 0.00 0.00
plot(m3L3, ylim = c(-4, 4), 
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)

m3L5 <- lmer(mathjr3 ~ urban+ sector + (1 | schoolID) + (1| classID:schoolID), data=dta)
summary(m3L5)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathjr3 ~ urban + sector + (1 | schoolID) + (1 | classID:schoolID)
##    Data: dta
## 
## REML criterion at convergence: 29379.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.5939 -0.7614 -0.0118  0.7828  3.6215 
## 
## Random effects:
##  Groups           Name        Variance Std.Dev.
##  classID:schoolID (Intercept) 0.000000 0.00000 
##  schoolID         (Intercept) 0.001514 0.03891 
##  Residual                     3.750990 1.93675 
## Number of obs: 7059, groups:  classID:schoolID, 612; schoolID, 167
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept)   9.02672    0.06929 130.280
## urbantown    -0.25391    0.04864  -5.220
## urbanvillage -0.48216    0.10208  -4.723
## sectorPublic -0.54746    0.07163  -7.643
## 
## Correlation of Fixed Effects:
##             (Intr) urbntw urbnvl
## urbantown   -0.253              
## urbanvillag -0.048  0.189       
## sectorPublc -0.897 -0.024 -0.093
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
m3L6 <- lmer(mathjr3 ~ urban*sector + (1 | schoolID) + (1| classID:schoolID), data=dta)
summary(m3L6)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathjr3 ~ urban * sector + (1 | schoolID) + (1 | classID:schoolID)
##    Data: dta
## 
## REML criterion at convergence: 29381.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.5887 -0.7623 -0.0068  0.7821  3.6209 
## 
## Random effects:
##  Groups           Name        Variance Std.Dev.
##  classID:schoolID (Intercept) 0.00000  0.00000 
##  schoolID         (Intercept) 0.00143  0.03781 
##  Residual                     3.75158  1.93690 
## Number of obs: 7059, groups:  classID:schoolID, 612; schoolID, 167
## 
## Fixed effects:
##                        Estimate Std. Error t value
## (Intercept)             9.01683    0.08381 107.590
## urbantown              -0.22645    0.13943  -1.624
## urbanvillage           -0.48366    0.10234  -4.726
## sectorPublic           -0.53605    0.08990  -5.963
## urbantown:sectorPublic -0.03127    0.14878  -0.210
## 
## Correlation of Fixed Effects:
##             (Intr) urbntw urbnvl sctrPb
## urbantown   -0.600                     
## urbanvillag  0.000  0.000              
## sectorPublc -0.931  0.560 -0.116       
## urbntwn:scP  0.563 -0.937  0.070 -0.604
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 1 column / coefficient
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
anova(m3L0,m3L4,m3L5, m3L6)
## Data: dta
## Models:
## m3L0: mathjr3 ~ 1 + (1 | schoolID/classID)
## m3L4: mathjr3 ~ urban + (1 | schoolID) + (1 | classID:schoolID)
## m3L5: mathjr3 ~ urban + sector + (1 | schoolID) + (1 | classID:schoolID)
## m3L6: mathjr3 ~ urban * sector + (1 | schoolID) + (1 | classID:schoolID)
##      npar   AIC   BIC logLik deviance   Chisq Df Pr(>Chisq)    
## m3L0    4 29479 29506 -14735    29471                          
## m3L4    6 29434 29475 -14711    29422 49.0380  2  2.247e-11 ***
## m3L5    7 29378 29426 -14682    29364 58.2160  1  2.349e-14 ***
## m3L6    8 29380 29434 -14682    29364  0.0451  1     0.8318    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sjPlot::tab_model(m3L0, m3L6, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE)
  mathjr3 mathjr3
Predictors Estimates std. Error Estimates std. Error
(Intercept) 8.42 0.02 9.02 0.08
urban [town] -0.23 0.14
urban [village] -0.48 0.10
sector [Public] -0.54 0.09
urban [town] * sector
[Public]
-0.03 0.15
Random Effects
σ2 3.80 3.75
τ00 0.01 classID:schoolID 0.00 classID:schoolID
0.00 schoolID 0.00 schoolID
ICC 0.00  
library(lattice)
qqmath(m3L5, grid=TRUE)

ggplot(dta, aes(premath, mathjr3, color = sector)) +
 geom_smooth(method = "lm") +
 geom_jitter(alpha = 0.2) +
 labs(x = "Centered 7th score", y = "9th Math score") +
 theme(legend.position = c(.1, .8))