pacman::p_load(mlmRev, tidyverse, lme4, merTools)
#資料路徑、顯示結果
dta <- read.csv("C:/Users/HANK/Desktop/HOMEWORK/pts.csv")
#
head(dta)
## School Teacher Pupil Read_0 Read_1
## 1 1 11 18 -0.16407000 4.2426
## 2 1 11 19 -0.98126000 1.0000
## 3 1 11 20 -1.25370000 2.2361
## 4 1 11 15 -0.87230000 3.1623
## 5 1 11 17 -0.00063104 3.4641
## 6 1 11 16 -0.92678000 4.0000
library(dplyr)
# coerce variables to factor type
#將資料中的變項轉換成factor
dta1 <- dta %>% mutate( School = factor(School),
Teacher = factor(Teacher),
Pupil = factor(Pupil))
#
str(dta1)
## 'data.frame': 777 obs. of 5 variables:
## $ School : Factor w/ 20 levels "1","3","4","5",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Teacher: Factor w/ 46 levels "11","12","31",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Pupil : Factor w/ 777 levels "1","2","3","4",..: 17 18 19 14 16 15 20 21 22 4 ...
## $ Read_0 : num -0.16407 -0.98126 -1.2537 -0.8723 -0.000631 ...
## $ Read_1 : num 4.24 1 2.24 3.16 3.46 ...
#調用packages
library(themetagenomics)
## Loading required package: Rcpp
##
## Attaching package: 'themetagenomics'
## The following object is masked from 'package:tidyr':
##
## extract
library(ggplot2)
# create teacher ID nested in School
dta2 <- dta1 %>%
group_by(School) %>%
mutate(T_in_S = factor(match(Teacher, unique(Teacher)))) %>%
as.data.frame
#
theme_set(theme_bw())
ggplot(dta2, aes(Read_0, Read_1, group = T_in_S, color = T_in_S)) +
geom_smooth(method = "lm", se = F, formula= y ~ poly(x, 2)) +
geom_point(cex = 0.6, alpha = .3) +
facet_wrap(~ School, nrow = 2) +
labs(x = "Reading attainment at reception",
y = "Reading attainment at year 1") +
theme(legend.position = "NONE")
## Warning: Computation failed in `stat_smooth()`:
## 'degree' must be less than number of unique points
library(lmerTest)
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(merTools)
# random teacher and random school effects
summary(m3 <- lmer(Read_1 ~ Read_0 + I(Read_0*Read_0) +
(1 | School) + (1 | Teacher), data = dta))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Read_1 ~ Read_0 + I(Read_0 * Read_0) + (1 | School) + (1 | Teacher)
## Data: dta
##
## REML criterion at convergence: 1906.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.2824 -0.5059 -0.0041 0.6268 3.4686
##
## Random effects:
## Groups Name Variance Std.Dev.
## Teacher (Intercept) 0.02331 0.1527
## School (Intercept) 0.04335 0.2082
## Residual 0.63769 0.7986
## Number of obs: 777, groups: Teacher, 46; School, 20
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 3.47337 0.06740 30.87194 51.534 <2e-16 ***
## Read_0 0.95479 0.03592 760.04306 26.578 <2e-16 ***
## I(Read_0 * Read_0) -0.06688 0.03062 768.87963 -2.184 0.0292 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Read_0
## Read_0 0.250
## I(Rd_0*R_0) -0.462 -0.521
(1 | school)與(1 | Teacher) 代表截距在不同學校間與不同教師間的隨機效果
# residual plot
plot(m3, pch = 20, cex = 0.5, col = "steelblue",
xlab = "Fitted values", ylab = "Pearson residuals")
# quick summary of model parameter estimates
fastdisp(m3)
## lmer(formula = Read_1 ~ Read_0 + I(Read_0 * Read_0) + (1 | School) +
## (1 | Teacher), data = dta)
## coef.est coef.se
## (Intercept) 3.47 0.07
## Read_0 0.95 0.04
## I(Read_0 * Read_0) -0.07 0.03
##
## Error terms:
## Groups Name Std.Dev.
## Teacher (Intercept) 0.15
## School (Intercept) 0.21
## Residual 0.80
## ---
## number of obs: 777, groups: Teacher, 46; School, 20
## AIC = 1918.9
# estimate fixed-effects parameters by simulation
m3_fe <- FEsim(m3, 1000)
summary(m3_fe)
## term mean median sd
## Length:3 Min. :-0.06705 Min. :-0.06675 Min. :0.02985
## Class :character 1st Qu.: 0.44381 1st Qu.: 0.44412 1st Qu.:0.03327
## Mode :character Median : 0.95468 Median : 0.95499 Median :0.03669
## Mean : 1.45464 Mean : 1.45579 Mean :0.04459
## 3rd Qu.: 2.21549 3rd Qu.: 2.21707 3rd Qu.:0.05196
## Max. : 3.47630 Max. : 3.47915 Max. :0.06723
# plot for fixed effects
plotFEsim(m3_fe) +
labs(title = "Coefficient Plot", x = "Median Effect Estimate")
估計固定效應參數與畫圖
# estimate random-effects parameters by simulation
m3_re <- REsim(m3)
summary(m3_re)
## groupFctr groupID term mean
## Length:66 Length:66 Length:66 Min. :-0.4328004
## Class :character Class :character Class :character 1st Qu.:-0.0493361
## Mode :character Mode :character Mode :character Median : 0.0030559
## Mean :-0.0003546
## 3rd Qu.: 0.0433472
## Max. : 0.2521494
## median sd
## Min. :-0.426516 Min. :0.0913
## 1st Qu.:-0.047221 1st Qu.:0.1178
## Median : 0.002572 Median :0.1331
## Mean :-0.001425 Mean :0.1363
## 3rd Qu.: 0.049216 3rd Qu.:0.1453
## Max. : 0.248680 Max. :0.2027
# normality plot for random effects
plotREsim(m3_re)
估計隨機效應參數與畫圖
# ordinary regression
summary(m0 <- lm(Read_1 ~ Read_0 + I(Read_0*Read_0), data = dta))
##
## Call:
## lm(formula = Read_1 ~ Read_0 + I(Read_0 * Read_0), data = dta)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.0675 -0.4619 0.0146 0.5319 2.8194
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.46873 0.04323 80.242 <2e-16 ***
## Read_0 0.96445 0.03541 27.240 <2e-16 ***
## I(Read_0 * Read_0) -0.07293 0.03104 -2.349 0.0191 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8397 on 774 degrees of freedom
## Multiple R-squared: 0.5481, Adjusted R-squared: 0.5469
## F-statistic: 469.3 on 2 and 774 DF, p-value: < 2.2e-16
普通迴歸
# residual plot
plot(rstandard(m0) ~ fitted(m0), pch = 20, cex = 0.5, col = "steelblue",
xlab = "Fitted values", ylab = "Standardized residuals")
abline(h = 0, lty = 2)
grid()
殘差圖