if (!require("pacman")) install.packages("pacman", repos="cloud-r.project.org")## Loading required package: pacman
pacman::p_load(mlmRev, gridExtra, tidyverse, coda, reshape,lme4)library(mlmRev)
data(Chem97)
str(Chem97)## 'data.frame': 31022 obs. of 8 variables:
## $ lea : Factor w/ 131 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ school : Factor w/ 2410 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ student : Factor w/ 31022 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ score : num 4 10 10 10 8 10 6 8 4 10 ...
## $ gender : Factor w/ 2 levels "M","F": 2 2 2 2 2 2 2 2 2 2 ...
## $ age : num 3 -3 -4 -2 -1 4 1 4 3 0 ...
## $ gcsescore: num 6.62 7.62 7.25 7.5 6.44 ...
## $ gcsecnt : num 0.339 1.339 0.964 1.214 0.158 ...
head(Chem97)## lea school student score gender age gcsescore gcsecnt
## 1 1 1 1 4 F 3 6.625 0.3393157
## 2 1 1 2 10 F -3 7.625 1.3393157
## 3 1 1 3 10 F -4 7.250 0.9643157
## 4 1 1 4 10 F -2 7.500 1.2143157
## 5 1 1 5 8 F -1 6.444 0.1583157
## 6 1 1 6 10 F 4 7.750 1.4643157
#描述統計
dta <- Chem97[, -c(6, 8)]
names(dta) <- c("區域", "學校", "學生", "化學", "性別", "總成績")
sapply(dta[, c('化學', '總成績')], summary)## 化學 總成績
## Min. 0.000000 0.000000
## 1st Qu. 4.000000 5.750000
## Median 6.000000 6.375000
## Mean 5.812585 6.285684
## 3rd Qu. 8.000000 6.900000
## Max. 10.000000 8.000000
sapply(dta[, c('化學', '總成績')], sd)## 化學 總成績
## 3.3191673 0.8734512
dta_m <- aggregate(cbind(化學, 總成績, as.numeric(性別) - 1) ~ 學校, data = dta, mean)
names(dta_m) <- c('學校', '化學平均', '總成績平均', '女性比率')library(lattice)
p1 <- histogram(~ 總成績, data = dta, type = 'density',
xlab = '總成績', ylab = '機率密度')
p2 <- histogram(~ 化學, data = dta, type = 'density',
xlab = '化學分數', ylab = '機率密度')
p3 <- histogram(~ 總成績平均, data = dta_m, type = 'density',
xlab = '各校總成績平均', ylab = '機率密度')
p4 <- histogram(~ 化學平均, data = dta_m, type = 'density',
xlab = '各校化學平均分數', ylab = '機率密度')
dta_sd <- aggregate(cbind(化學, 總成績) ~ 學校, data = dta, sd)
names(dta_sd)[2:3] <- c('化學標準差', '總成績標準差')
p5 <- histogram(~ 總成績標準差, data = dta_sd, type = 'density',
xlab = '各校總成績標準差', ylab = '機率密度')
p6 <- histogram(~ 化學標準差, data = dta_sd, type = 'density',
xlab = '各校化學分數標準差', ylab = '機率密度')
library(gridExtra)
grid.arrange(p6, p5, p4, p3, p2, p1, as.table = T)cor(dta[, c('化學', '總成績')])## 化學 總成績
## 化學 1.0000000 0.6622476
## 總成績 0.6622476 1.0000000
cor(dta_m[, -1])## 化學平均 總成績平均 女性比率
## 化學平均 1.00000000 0.6980651 0.06625497
## 總成績平均 0.69806507 1.0000000 0.24520916
## 女性比率 0.06625497 0.2452092 1.00000000
plot(dta[, 4], dta[, 6], type = 'n', xlab ='化學分數', ylab = '總成績',
asp = 1)
grid()
#學生
points(dta[, 4], dta[, 6], pch = '.', cex = 2)
abline(lm(dta[,6] ~ dta[, 4]))
#學校
points(dta_m[, 2], dta_m[, 3], cex = 0.5, col = 'grey')
abline(lm(dta_m[,3] ~ dta_m[,2]), col = 'grey')
#對角線
abline(0, 1, lty = 3, col = 'blue')library(ggplot2)
ggplot(data = dta, aes(x = 化學, y = 總成績, group = 學校))+
stat_smooth(method = 'lm', formula = y ~ x, se = F, color = 'lightgray') +
geom_point(size = 1) +
stat_smooth(aes(group = 1), method= 'lm', se = F, color = 'black') +
labs(x = '化學分數', y = '總成績', title = '學校')## `geom_smooth()` using formula 'y ~ x'
ggplot(data = dta, aes(x = 化學, y = 總成績, group = 區域))+
stat_smooth(method = 'lm', formula = y ~ x, se = F, color = 'lightgray') +
geom_point(size = 1) +
stat_smooth(aes(group = 1), method = 'lm', se = F, color = 'black') +
labs(x = '化學分數', y = '總成績' , title = '區域') ## `geom_smooth()` using formula 'y ~ x'
dta$化學置中 <- scale(dta$化學, scale = F)
set.seed(1225)
ns25 <- sample(levels(dta$學校), 25)
set.seed(1225)
nr25 <- sample(levels(dta$區域), 25)
ggplot(data = dta[dta$學校 %in% ns25, ], aes(x = 化學置中, y = 總成績, color = 性別))+
geom_point(size = 1) +
stat_smooth(method = 'lm', formula = y ~ x, se = F) +
facet_wrap( ~ 學校 )ggplot(data = dta[dta$區域 %in% nr25, ], aes(x = 化學置中, y = 總成績, color = 性別))+
geom_point(size = 1) +
stat_smooth(method = 'lm', formula = y ~ x, se = F) +
facet_wrap( ~ 區域 )library(lme4)
dta$化學置中 <- scale(dta$化學, scale = F)
summary(m0 <- lmer(總成績 ~ 化學置中 + 性別 + 化學置中:性別 +
( 1 | 學校 ) + ( 1 | 區域 ), data = dta ) )## Linear mixed model fit by REML ['lmerMod']
## Formula: 總成績 ~ 化學置中 + 性別 + 化學置中:性別 + (1 |
## 學校) + (1 | 區域)
## Data: dta
##
## REML criterion at convergence: 55708.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -9.2306 -0.5671 0.0638 0.6527 4.0788
##
## Random effects:
## Groups Name Variance Std.Dev.
## 學校 (Intercept) 0.08763 0.2960
## 區域 (Intercept) 0.01064 0.1031
## Residual 0.31811 0.5640
## Number of obs: 31022, groups: 學校, 2410; 區域, 131
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 6.080254 0.013263 458.434
## 化學置中 0.168078 0.001408 119.333
## 性別F 0.334091 0.007518 44.440
## 化學置中:性別F -0.012266 0.002091 -5.866
##
## Correlation of Fixed Effects:
## Warning in abbreviate(rn, minlength = 6): abbreviate 不適用於非 ASCII 字元
## (Intr) 化學置中 性別F
## 化學置中 0.051
## 性別F -0.265 -0.065
## 化學置中:性別F -0.023 -0.630 0.067
m1 <- update(m0, . ~ . - ( 1 | 區域 ) )
anova(m0,m1)## refitting model(s) with ML (instead of REML)
## Data: dta
## Models:
## m1: 總成績 ~ 化學置中 + 性別 + (1 | 學校) + 化學置中:性別
## m0: 總成績 ~ 化學置中 + 性別 + 化學置中:性別 + (1 | 學校) + (1 | 區域)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m1 6 55743 55793 -27866 55731
## m0 7 55685 55744 -27836 55671 59.744 1 1.08e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
drop1(m0, test = 'Chisq')## Single term deletions
##
## Model:
## 總成績 ~ 化學置中 + 性別 + 化學置中:性別 + (1 |
## 學校) + (1 | 區域)
## npar AIC LRT Pr(Chi)
## <none> 55685
## 化學置中:性別 1 55718 34.388 4.515e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(m0)## Linear mixed model fit by REML ['lmerMod']
## Formula: 總成績 ~ 化學置中 + 性別 + 化學置中:性別 + (1 |
## 學校) + (1 | 區域)
## Data: dta
##
## REML criterion at convergence: 55708.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -9.2306 -0.5671 0.0638 0.6527 4.0788
##
## Random effects:
## Groups Name Variance Std.Dev.
## 學校 (Intercept) 0.08763 0.2960
## 區域 (Intercept) 0.01064 0.1031
## Residual 0.31811 0.5640
## Number of obs: 31022, groups: 學校, 2410; 區域, 131
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 6.080254 0.013263 458.434
## 化學置中 0.168078 0.001408 119.333
## 性別F 0.334091 0.007518 44.440
## 化學置中:性別F -0.012266 0.002091 -5.866
##
## Correlation of Fixed Effects:
## Warning in abbreviate(rn, minlength = 6): abbreviate 不適用於非 ASCII 字元
## (Intr) 化學置中 性別F
## 化學置中 0.051
## 性別F -0.265 -0.065
## 化學置中:性別F -0.023 -0.630 0.067
if (!require("coefplot2")) install.packages("coefplot2",
repos="http://www.math.mcmaster.ca/bolker/R")## Loading required package: coefplot2
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'coefplot2'
## Warning: unable to access index for repository http://www.math.mcmaster.ca/bolker/R/bin/macosx/contrib/4.0:
## 無法開啟 URL 'http://www.math.mcmaster.ca/bolker/R/bin/macosx/contrib/4.0/PACKAGES'
## installing the source package 'coefplot2'
library(coefplot2)
coefplot2(m0)print(vc <- lme4::VarCorr(m0), comp = 'Variance' )## Groups Name Variance
## 學校 (Intercept) 0.087635
## 區域 (Intercept) 0.010637
## Residual 0.318108
vc <- as.data.frame(vc)
vc[vc$grp=='學校', 'vcov']/ sum(vc$vcov)## [1] 0.2104676
vc[vc$grp=='區域', 'vcov']/ sum(vc$vcov)## [1] 0.02554733
1 - (vc[vc$grp=='Residual', 'vcov']/ sum(vc$vcov))## [1] 0.2360149
qq_r21 <- qqmath(~ ranef(m0)$學校, type = c('p', 'g', 'r'), pch = '.',
xlab = '常態分位數', ylab = '學校隨機效果分位數')
qq_r22 <- qqmath(~ ranef(m0)$區域, type = c('p', 'g', 'r'), pch = '.',
xlab = '常態分位數', ylab = '區域隨機效果分位數')
qq_r0 <- qqmath(~ resid(m0, scale = T), type = c('p', 'g', 'r'), pch = '.',
xlab = '標準化常態分位數', ylab = '標準化殘差')
m0_f <- data.frame(yhat=fitted(m0), zres=resid(m0, scaled=TRUE))
r_m0 <- ggplot(data = m0_f, aes(x = yhat, y = zres)) +
geom_point(pch = '.') +
stat_smooth(method = 'gam', formula = y ~ s(x, bs='cs'),
se = FALSE) +
labs( x = '預測值', y = '標準化殘差')
library(gridExtra)
grid.arrange(qq_r21, qq_r22, qq_r0, r_m0, nrow = 2, ncol = 2)dta <- merge(dta, dta_m, by="學校")
rslt <- lmer(總成績 ~ 化學置中 + 性別 + 化學置中:性別 +
女性比率 + 女性比率:性別+
(1+化學置中+性別|學校) + (1|區域), data=dta)## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00683933 (tol = 0.002, component 1)
summary(rslt)## Linear mixed model fit by REML ['lmerMod']
## Formula:
## 總成績 ~ 化學置中 + 性別 + 化學置中:性別 + 女性比率 +
## 女性比率:性別 + (1 + 化學置中 + 性別 | 學校) +
## (1 | 區域)
## Data: dta
##
## REML criterion at convergence: 55393
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -9.3496 -0.5612 0.0682 0.6500 4.1231
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## 學校 (Intercept) 0.0981171 0.31324
## 化學置中 0.0005485 0.02342 -0.56
## 性別F 0.0180773 0.13445 -0.63 0.07
## 區域 (Intercept) 0.0106268 0.10309
## Residual 0.3103917 0.55713
## Number of obs: 31022, groups: 學校, 2410; 區域, 131
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 6.140239 0.019252 318.944
## 化學置中 0.169861 0.001557 109.110
## 性別F 0.146319 0.022258 6.574
## 女性比率 -0.222862 0.040019 -5.569
## 化學置中:性別F -0.011096 0.002156 -5.147
## 性別F:女性比率 0.430826 0.048234 8.932
##
## Correlation of Fixed Effects:
## Warning in abbreviate(rn, minlength = 6): abbreviate 不適用於非 ASCII 字元
## (Intr) 化學置中 性別F 女性比率 化學置中:性
## 化學置中 -0.095
## 性別F -0.482 -0.004
## 女性比率 -0.705 0.069 0.645
## 化學置中:性別F 0.066 -0.608 0.004 -0.101
## 性別F:女性比率 0.494 -0.026 -0.915 -0.797 0.030
## optimizer (nloptwrap) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00683933 (tol = 0.002, component 1)
sessionInfo()## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] zh_TW.UTF-8/zh_TW.UTF-8/zh_TW.UTF-8/C/zh_TW.UTF-8/zh_TW.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] coefplot2_0.1.3.2 lattice_0.20-41 reshape_0.8.9 coda_0.19-4
## [5] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.10 purrr_0.3.4
## [9] readr_2.1.2 tidyr_1.2.1 tibble_3.1.8 ggplot2_3.3.6
## [13] tidyverse_1.3.2 gridExtra_2.3 mlmRev_1.0-8 lme4_1.1-28
## [17] Matrix_1.4-1 pacman_0.5.1
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.2 sass_0.4.1 jsonlite_1.8.0
## [4] splines_4.0.2 modelr_0.1.9 bslib_0.3.1
## [7] assertthat_0.2.1 highr_0.8 googlesheets4_1.0.1
## [10] cellranger_1.1.0 yaml_2.2.1 pillar_1.8.1
## [13] backports_1.1.10 glue_1.6.2 digest_0.6.25
## [16] rvest_1.0.3 minqa_1.2.4 colorspace_1.4-1
## [19] plyr_1.8.6 htmltools_0.5.2 pkgconfig_2.0.3
## [22] broom_1.0.1 haven_2.5.0 scales_1.2.1
## [25] tzdb_0.3.0 googledrive_2.0.0 mgcv_1.8-31
## [28] farver_2.0.3 generics_0.0.2 ellipsis_0.3.2
## [31] withr_2.5.0 cli_3.3.0 crayon_1.5.1
## [34] magrittr_2.0.3 readxl_1.3.1 evaluate_0.16
## [37] fs_1.5.2 fansi_0.4.1 nlme_3.1-148
## [40] MASS_7.3-51.6 xml2_1.3.3 tools_4.0.2
## [43] hms_1.1.2 gargle_1.2.1 lifecycle_1.0.2
## [46] munsell_0.5.0 reprex_2.0.2 compiler_4.0.2
## [49] jquerylib_0.1.4 rlang_1.0.5 grid_4.0.2
## [52] nloptr_1.2.2.2 rstudioapi_0.14 labeling_0.3
## [55] rmarkdown_2.16 boot_1.3-25 gtable_0.3.0
## [58] DBI_1.1.3 R6_2.4.1 lubridate_1.8.0
## [61] knitr_1.40 fastmap_1.1.0 utf8_1.1.4
## [64] stringi_1.5.3 Rcpp_1.0.8.3 vctrs_0.4.1
## [67] dbplyr_2.2.1 tidyselect_1.1.2 xfun_0.33