#
pacman::p_load(mlmRev, tidyverse, lme4)
# data from mlmRev package
data(Hsb82, "mlmRev")
## Warning in data(Hsb82, "mlmRev"): data set 'mlmRev' not found
str(Hsb82)
## 'data.frame': 7185 obs. of 8 variables:
## $ school : Ord.factor w/ 160 levels "8367"<"8854"<..: 59 59 59 59 59 59 59 59 59 59 ...
## $ minrty : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ sx : Factor w/ 2 levels "Male","Female": 2 2 1 1 1 1 2 1 2 1 ...
## $ ses : num -1.528 -0.588 -0.528 -0.668 -0.158 ...
## $ mAch : num 5.88 19.71 20.35 8.78 17.9 ...
## $ meanses: num -0.434 -0.434 -0.434 -0.434 -0.434 ...
## $ sector : Factor w/ 2 levels "Public","Catholic": 1 1 1 1 1 1 1 1 1 1 ...
## $ cses : num -1.0936 -0.1536 -0.0936 -0.2336 0.2764 ...
# make a copy of data to the dta object
dta <- Hsb82
# use sensible variable names
names(dta) <- c("School", "Minority", "Gender", "SES", "Math",
"Ave_SES", "Sector", "C_SES")
#重新命名變數名稱
head(dta)
## School Minority Gender SES Math Ave_SES Sector C_SES
## 1 1224 No Female -1.528 5.876 -0.434383 Public -1.09361702
## 2 1224 No Female -0.588 19.708 -0.434383 Public -0.15361702
## 3 1224 No Male -0.528 20.349 -0.434383 Public -0.09361702
## 4 1224 No Male -0.668 8.781 -0.434383 Public -0.23361702
## 5 1224 No Male -0.158 17.898 -0.434383 Public 0.27638298
## 6 1224 No Male 0.022 4.583 -0.434383 Public 0.45638298
##設定背景主題
ot <- theme_set(theme_bw())
# freedman-diaconis rule for binwidth
bw <- 2*IQR(dta$Math)/(dim(dta)[1]^(1/3))
# histograms大小設定
ggplot(dta, aes(Math)) +
stat_bin(aes(fill = ..count..), binwidth=bw) +
labs(x="Math achievement scores")
set.seed(20160923)
#隨機抽樣
# pick 60 out of 160
n <- sample(160, 60)
#從160個中重複抽取樣本60次。
ggplot(filter(dta, School %in% levels(School)[n]),
aes(reorder(School, Math, mean), Math, color=Sector)) +
geom_boxplot() +
geom_point(size= rel(0.6)) +
labs(x="School ID", y="Math achievement score")+
theme(axis.text.x=element_text(size=6))+
ggtitle("A sample of 60 schools")
依各校分數低到高繪圖,color=Sector表示不同部門是不同顏色,紅色是公立學校,藍色是catholic學校。圖顯示紅色處於較低分區域,亦即公立學校均分較低。
library(dplyr)
# math mean by school(學校的數學平均分數)
dta_mmath <- dta %>%
group_by(School) %>%
mutate( ave_math = mean(Math)) %>%
select( School, ave_math ) %>%
unique %>%
as.data.frame
summary(dta_mmath$ave_math)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.24 10.47 12.90 12.62 14.65 19.72
#呈現ave_math概要
#抽取的60所學校的平均數是12.62
sd(dta_mmath$ave_math)
## [1] 3.117651
#抽取的60所學校的標準差
summary(dta$Math)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.832 7.275 13.131 12.748 18.317 24.993
所有學校的分數的資料分析 平均數是12.748
# sd for overall Math scores
sd(dta$Math)
## [1] 6.878246
所有學校的分數的標準差
bw <- 2*IQR(dta_mmath$ave_math)/(160^(1/3))
#Plot
ggplot(dta_mmath, aes(ave_math)) +
geom_histogram(aes(y= ..density.., alpha= .2), binwidth=bw) +
geom_density(fill="NA") +
geom_vline(xintercept=mean(dta_mmath$ave_math), linetype="longdash") +
geom_text(aes(7, 0.17, label= "ave = 12.62, sd = 3.12")) +
labs(x="Math achievement school means", y="Density") +
theme(legend.position="NONE")
summary(m0 <- lmer(Math ~ (1 | School), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Math ~ (1 | School)
## Data: dta
##
## REML criterion at convergence: 47116.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.0631 -0.7539 0.0267 0.7606 2.7426
##
## Random effects:
## Groups Name Variance Std.Dev.
## School (Intercept) 8.614 2.935
## Residual 39.148 6.257
## Number of obs: 7185, groups: School, 160
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 12.6370 0.2444 51.71
confint(m0, method="boot")
## Computing bootstrap confidence intervals ...
## 2.5 % 97.5 %
## .sig01 2.596281 3.333500
## .sigma 6.151525 6.360499
## (Intercept) 12.128943 13.114645
# default residual plot
plot(m0, xlab="Fitted values", ylab="Pearson residuals",
pch=20, cex=.5, type = c("p","g"))