1 first

if (!require("pacman")) install.packages("pacman", repos="cloud-r.project.org")
## Loading required package: pacman
pacman::p_load(mlmRev, gridExtra, tidyverse, coda, reshape,lme4)

2 data 整理

library(mlmRev)
data(Chem97)
str(Chem97)
## 'data.frame':    31022 obs. of  8 variables:
##  $ lea      : Factor w/ 131 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ school   : Factor w/ 2410 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ student  : Factor w/ 31022 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ score    : num  4 10 10 10 8 10 6 8 4 10 ...
##  $ gender   : Factor w/ 2 levels "M","F": 2 2 2 2 2 2 2 2 2 2 ...
##  $ age      : num  3 -3 -4 -2 -1 4 1 4 3 0 ...
##  $ gcsescore: num  6.62 7.62 7.25 7.5 6.44 ...
##  $ gcsecnt  : num  0.339 1.339 0.964 1.214 0.158 ...
head(Chem97)
##   lea school student score gender age gcsescore   gcsecnt
## 1   1      1       1     4      F   3     6.625 0.3393157
## 2   1      1       2    10      F  -3     7.625 1.3393157
## 3   1      1       3    10      F  -4     7.250 0.9643157
## 4   1      1       4    10      F  -2     7.500 1.2143157
## 5   1      1       5     8      F  -1     6.444 0.1583157
## 6   1      1       6    10      F   4     7.750 1.4643157

#描述統計

dta <- Chem97[, -c(6, 8)]
names(dta) <- c("區域", "學校", "學生", "化學", "性別", "總成績")
sapply(dta[, c('化學', '總成績')], summary)
##              化學   總成績
## Min.     0.000000 0.000000
## 1st Qu.  4.000000 5.750000
## Median   6.000000 6.375000
## Mean     5.812585 6.285684
## 3rd Qu.  8.000000 6.900000
## Max.    10.000000 8.000000
sapply(dta[, c('化學', '總成績')], sd)
##      化學    總成績 
## 3.3191673 0.8734512
dta_m <- aggregate(cbind(化學, 總成績, as.numeric(性別) - 1) ~ 學校, data = dta, mean)
names(dta_m) <- c('學校', '化學平均', '總成績平均', '女性比率')

3 繪圖

library(lattice)
p1 <- histogram(~ 總成績, data = dta, type = 'density', 
                xlab = '總成績', ylab = '機率密度')
p2 <- histogram(~ 化學, data = dta, type = 'density', 
                xlab = '化學分數', ylab = '機率密度')
p3 <- histogram(~ 總成績平均, data = dta_m, type = 'density', 
                xlab = '各校總成績平均', ylab = '機率密度')
p4 <- histogram(~ 化學平均, data = dta_m, type = 'density', 
                xlab = '各校化學平均分數', ylab = '機率密度')
dta_sd <- aggregate(cbind(化學, 總成績) ~ 學校, data = dta, sd)
names(dta_sd)[2:3] <- c('化學標準差', '總成績標準差')
p5 <- histogram(~ 總成績標準差, data = dta_sd, type = 'density',
                xlab = '各校總成績標準差', ylab = '機率密度')
p6 <- histogram(~ 化學標準差, data = dta_sd, type = 'density',
                xlab = '各校化學分數標準差', ylab = '機率密度')
library(gridExtra)
grid.arrange(p6, p5, p4, p3, p2, p1, as.table = T)

cor(dta[, c('化學', '總成績')])
##             化學    總成績
## 化學   1.0000000 0.6622476
## 總成績 0.6622476 1.0000000
cor(dta_m[, -1])
##              化學平均 總成績平均   女性比率
## 化學平均   1.00000000  0.6980651 0.06625497
## 總成績平均 0.69806507  1.0000000 0.24520916
## 女性比率   0.06625497  0.2452092 1.00000000
plot(dta[, 4], dta[, 6], type = 'n', xlab ='化學分數', ylab = '總成績', 
     asp = 1)
grid()
#學生
points(dta[, 4], dta[, 6], pch = '.', cex = 2)
abline(lm(dta[,6] ~ dta[, 4]))
#學校
points(dta_m[, 2], dta_m[, 3], cex = 0.5, col = 'grey') 
abline(lm(dta_m[,3] ~ dta_m[,2]), col = 'grey')
#對角線
abline(0, 1, lty = 3, col = 'blue')

library(ggplot2)
ggplot(data = dta, aes(x = 化學, y = 總成績, group = 學校))+
  stat_smooth(method = 'lm', formula = y ~ x, se = F, color = 'lightgray') +
  geom_point(size = 1) +
  stat_smooth(aes(group = 1), method= 'lm', se = F, color = 'black') +
  labs(x = '化學分數', y = '總成績', title = '學校')
## `geom_smooth()` using formula 'y ~ x'

ggplot(data = dta, aes(x = 化學, y = 總成績, group = 區域))+
  stat_smooth(method = 'lm', formula = y ~ x, se = F, color = 'lightgray') +
  geom_point(size = 1) +
  stat_smooth(aes(group = 1), method = 'lm', se = F, color = 'black') +
  labs(x = '化學分數', y = '總成績' , title = '區域') 
## `geom_smooth()` using formula 'y ~ x'

dta$化學置中 <- scale(dta$化學, scale = F)
set.seed(1225)
ns25 <- sample(levels(dta$學校), 25)
set.seed(1225)
nr25 <- sample(levels(dta$區域), 25)
ggplot(data = dta[dta$學校 %in% ns25, ], aes(x = 化學置中, y = 總成績, color = 性別))+
  geom_point(size = 1) +
  stat_smooth(method = 'lm', formula = y ~ x, se = F) +
  facet_wrap( ~ 學校 )

ggplot(data = dta[dta$區域 %in% nr25, ], aes(x = 化學置中, y = 總成績, color = 性別))+
  geom_point(size = 1) +
  stat_smooth(method = 'lm', formula = y ~ x, se = F) +
  facet_wrap( ~ 區域 )

4 multilevel

library(lme4)
dta$化學置中 <- scale(dta$化學, scale = F)
summary(m0 <- lmer(總成績 ~ 化學置中 + 性別 + 化學置中:性別 + 
                     ( 1 | 學校 ) + ( 1 | 區域 ), data = dta ) )
## Linear mixed model fit by REML ['lmerMod']
## Formula: 總成績 ~ 化學置中 + 性別 + 化學置中:性別 + (1 |  
##     學校) + (1 | 區域)
##    Data: dta
## 
## REML criterion at convergence: 55708.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -9.2306 -0.5671  0.0638  0.6527  4.0788 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  學校     (Intercept) 0.08763  0.2960  
##  區域     (Intercept) 0.01064  0.1031  
##  Residual             0.31811  0.5640  
## Number of obs: 31022, groups:  學校, 2410; 區域, 131
## 
## Fixed effects:
##                 Estimate Std. Error t value
## (Intercept)     6.080254   0.013263 458.434
## 化學置中        0.168078   0.001408 119.333
## 性別F           0.334091   0.007518  44.440
## 化學置中:性別F -0.012266   0.002091  -5.866
## 
## Correlation of Fixed Effects:
## Warning in abbreviate(rn, minlength = 6): abbreviate 不適用於非 ASCII 字元
##                (Intr) 化學置中 性別F 
## 化學置中        0.051                
## 性別F          -0.265 -0.065         
## 化學置中:性別F -0.023 -0.630    0.067
m1 <- update(m0, . ~ . - ( 1 | 區域 ) )
anova(m0,m1)
## refitting model(s) with ML (instead of REML)
## Data: dta
## Models:
## m1: 總成績 ~ 化學置中 + 性別 + (1 | 學校) + 化學置中:性別
## m0: 總成績 ~ 化學置中 + 性別 + 化學置中:性別 + (1 | 學校) + (1 | 區域)
##    npar   AIC   BIC logLik deviance  Chisq Df Pr(>Chisq)    
## m1    6 55743 55793 -27866    55731                         
## m0    7 55685 55744 -27836    55671 59.744  1   1.08e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
drop1(m0, test = 'Chisq')
## Single term deletions
## 
## Model:
## 總成績 ~ 化學置中 + 性別 + 化學置中:性別 + (1 | 
##     學校) + (1 | 區域)
##               npar   AIC    LRT   Pr(Chi)    
## <none>             55685                     
## 化學置中:性別    1 55718 34.388 4.515e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: 總成績 ~ 化學置中 + 性別 + 化學置中:性別 + (1 |  
##     學校) + (1 | 區域)
##    Data: dta
## 
## REML criterion at convergence: 55708.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -9.2306 -0.5671  0.0638  0.6527  4.0788 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  學校     (Intercept) 0.08763  0.2960  
##  區域     (Intercept) 0.01064  0.1031  
##  Residual             0.31811  0.5640  
## Number of obs: 31022, groups:  學校, 2410; 區域, 131
## 
## Fixed effects:
##                 Estimate Std. Error t value
## (Intercept)     6.080254   0.013263 458.434
## 化學置中        0.168078   0.001408 119.333
## 性別F           0.334091   0.007518  44.440
## 化學置中:性別F -0.012266   0.002091  -5.866
## 
## Correlation of Fixed Effects:
## Warning in abbreviate(rn, minlength = 6): abbreviate 不適用於非 ASCII 字元
##                (Intr) 化學置中 性別F 
## 化學置中        0.051                
## 性別F          -0.265 -0.065         
## 化學置中:性別F -0.023 -0.630    0.067

5 fixed effect

if (!require("coefplot2")) install.packages("coefplot2",
                                            repos="http://www.math.mcmaster.ca/bolker/R")
## Loading required package: coefplot2
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'coefplot2'
## Warning: unable to access index for repository http://www.math.mcmaster.ca/bolker/R/bin/macosx/contrib/4.0:
##   無法開啟 URL 'http://www.math.mcmaster.ca/bolker/R/bin/macosx/contrib/4.0/PACKAGES'
## installing the source package 'coefplot2'
library(coefplot2)
coefplot2(m0)

print(vc <- lme4::VarCorr(m0), comp = 'Variance' )
##  Groups   Name        Variance
##  學校     (Intercept) 0.087635
##  區域     (Intercept) 0.010637
##  Residual             0.318108
vc <- as.data.frame(vc)
vc[vc$grp=='學校', 'vcov']/ sum(vc$vcov)
## [1] 0.2104676
vc[vc$grp=='區域', 'vcov']/ sum(vc$vcov)
## [1] 0.02554733
1 - (vc[vc$grp=='Residual', 'vcov']/ sum(vc$vcov))
## [1] 0.2360149

6 examine model

qq_r21 <- qqmath(~ ranef(m0)$學校, type = c('p', 'g', 'r'), pch = '.',
                 xlab = '常態分位數', ylab = '學校隨機效果分位數')
qq_r22 <- qqmath(~ ranef(m0)$區域, type = c('p', 'g', 'r'), pch = '.',
                 xlab = '常態分位數', ylab = '區域隨機效果分位數')
qq_r0 <- qqmath(~ resid(m0, scale = T), type = c('p', 'g', 'r'), pch = '.',
                xlab = '標準化常態分位數', ylab = '標準化殘差')
m0_f <-  data.frame(yhat=fitted(m0), zres=resid(m0, scaled=TRUE))
r_m0 <- ggplot(data = m0_f, aes(x = yhat, y = zres)) + 
  geom_point(pch = '.') +
  stat_smooth(method = 'gam', formula = y ~ s(x, bs='cs'),
              se = FALSE) +
  labs( x = '預測值', y = '標準化殘差')
library(gridExtra)
grid.arrange(qq_r21, qq_r22, qq_r0, r_m0, nrow = 2, ncol = 2)

7 complex model

dta <- merge(dta, dta_m, by="學校") 
rslt <- lmer(總成績 ~ 化學置中 + 性別 + 化學置中:性別 +
               女性比率 + 女性比率:性別+
               (1+化學置中+性別|學校) + (1|區域), data=dta)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00683933 (tol = 0.002, component 1)
summary(rslt)
## Linear mixed model fit by REML ['lmerMod']
## Formula: 
## 總成績 ~ 化學置中 + 性別 + 化學置中:性別 + 女性比率 +  
##     女性比率:性別 + (1 + 化學置中 + 性別 | 學校) +  
##     (1 | 區域)
##    Data: dta
## 
## REML criterion at convergence: 55393
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -9.3496 -0.5612  0.0682  0.6500  4.1231 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. Corr       
##  學校     (Intercept) 0.0981171 0.31324             
##           化學置中    0.0005485 0.02342  -0.56      
##           性別F       0.0180773 0.13445  -0.63  0.07
##  區域     (Intercept) 0.0106268 0.10309             
##  Residual             0.3103917 0.55713             
## Number of obs: 31022, groups:  學校, 2410; 區域, 131
## 
## Fixed effects:
##                 Estimate Std. Error t value
## (Intercept)     6.140239   0.019252 318.944
## 化學置中        0.169861   0.001557 109.110
## 性別F           0.146319   0.022258   6.574
## 女性比率       -0.222862   0.040019  -5.569
## 化學置中:性別F -0.011096   0.002156  -5.147
## 性別F:女性比率  0.430826   0.048234   8.932
## 
## Correlation of Fixed Effects:
## Warning in abbreviate(rn, minlength = 6): abbreviate 不適用於非 ASCII 字元
##                (Intr) 化學置中 性別F  女性比率 化學置中:性
## 化學置中       -0.095                                     
## 性別F          -0.482 -0.004                              
## 女性比率       -0.705  0.069    0.645                     
## 化學置中:性別F  0.066 -0.608    0.004 -0.101              
## 性別F:女性比率  0.494 -0.026   -0.915 -0.797    0.030     
## optimizer (nloptwrap) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00683933 (tol = 0.002, component 1)

8 end

sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS  10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] zh_TW.UTF-8/zh_TW.UTF-8/zh_TW.UTF-8/C/zh_TW.UTF-8/zh_TW.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] coefplot2_0.1.3.2 lattice_0.20-41   reshape_0.8.9     coda_0.19-4      
##  [5] forcats_0.5.1     stringr_1.4.0     dplyr_1.0.10      purrr_0.3.4      
##  [9] readr_2.1.2       tidyr_1.2.1       tibble_3.1.8      ggplot2_3.3.6    
## [13] tidyverse_1.3.2   gridExtra_2.3     mlmRev_1.0-8      lme4_1.1-28      
## [17] Matrix_1.4-1      pacman_0.5.1     
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.2          sass_0.4.1          jsonlite_1.8.0     
##  [4] splines_4.0.2       modelr_0.1.9        bslib_0.3.1        
##  [7] assertthat_0.2.1    highr_0.8           googlesheets4_1.0.1
## [10] cellranger_1.1.0    yaml_2.2.1          pillar_1.8.1       
## [13] backports_1.1.10    glue_1.6.2          digest_0.6.25      
## [16] rvest_1.0.3         minqa_1.2.4         colorspace_1.4-1   
## [19] plyr_1.8.6          htmltools_0.5.2     pkgconfig_2.0.3    
## [22] broom_1.0.1         haven_2.5.0         scales_1.2.1       
## [25] tzdb_0.3.0          googledrive_2.0.0   mgcv_1.8-31        
## [28] farver_2.0.3        generics_0.0.2      ellipsis_0.3.2     
## [31] withr_2.5.0         cli_3.3.0           crayon_1.5.1       
## [34] magrittr_2.0.3      readxl_1.3.1        evaluate_0.16      
## [37] fs_1.5.2            fansi_0.4.1         nlme_3.1-148       
## [40] MASS_7.3-51.6       xml2_1.3.3          tools_4.0.2        
## [43] hms_1.1.2           gargle_1.2.1        lifecycle_1.0.2    
## [46] munsell_0.5.0       reprex_2.0.2        compiler_4.0.2     
## [49] jquerylib_0.1.4     rlang_1.0.5         grid_4.0.2         
## [52] nloptr_1.2.2.2      rstudioapi_0.14     labeling_0.3       
## [55] rmarkdown_2.16      boot_1.3-25         gtable_0.3.0       
## [58] DBI_1.1.3           R6_2.4.1            lubridate_1.8.0    
## [61] knitr_1.40          fastmap_1.1.0       utf8_1.1.4         
## [64] stringi_1.5.3       Rcpp_1.0.8.3        vctrs_0.4.1        
## [67] dbplyr_2.2.1        tidyselect_1.1.2    xfun_0.33