1 load them

pacman::p_load(mlmRev, tidyverse, lme4, merTools)

2 input data

#資料路徑、顯示結果
dta <- read.csv("C:/Users/HANK/Desktop/HOMEWORK/pts.csv")
#
head(dta)
##   School Teacher Pupil      Read_0 Read_1
## 1      1      11    18 -0.16407000 4.2426
## 2      1      11    19 -0.98126000 1.0000
## 3      1      11    20 -1.25370000 2.2361
## 4      1      11    15 -0.87230000 3.1623
## 5      1      11    17 -0.00063104 3.4641
## 6      1      11    16 -0.92678000 4.0000
library(dplyr)
# coerce variables to factor type
#將資料中的變項轉換成factor
dta1 <- dta %>% mutate( School = factor(School), 
                       Teacher = factor(Teacher),
                       Pupil = factor(Pupil))
#
str(dta1)
## 'data.frame':    777 obs. of  5 variables:
##  $ School : Factor w/ 20 levels "1","3","4","5",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Teacher: Factor w/ 46 levels "11","12","31",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Pupil  : Factor w/ 777 levels "1","2","3","4",..: 17 18 19 14 16 15 20 21 22 4 ...
##  $ Read_0 : num  -0.16407 -0.98126 -1.2537 -0.8723 -0.000631 ...
##  $ Read_1 : num  4.24 1 2.24 3.16 3.46 ...
#調用packages
library(themetagenomics)
## Loading required package: Rcpp
## 
## Attaching package: 'themetagenomics'
## The following object is masked from 'package:tidyr':
## 
##     extract
library(ggplot2)
# create teacher ID nested in School
dta2 <- dta1 %>% 
        group_by(School) %>% 
        mutate(T_in_S = factor(match(Teacher, unique(Teacher)))) %>% 
        as.data.frame
#
theme_set(theme_bw())

3 plot

ggplot(dta2, aes(Read_0, Read_1, group = T_in_S, color = T_in_S)) +
 geom_smooth(method = "lm", se = F, formula= y ~ poly(x, 2)) +
 geom_point(cex = 0.6, alpha = .3) +
 facet_wrap(~ School, nrow = 2) +
 labs(x = "Reading attainment at reception", 
      y = "Reading attainment at year 1") +
 theme(legend.position = "NONE")
## Warning: Computation failed in `stat_smooth()`:
## 'degree' must be less than number of unique points

library(lmerTest)
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
library(merTools)
# random teacher and random school effects
summary(m3 <- lmer(Read_1 ~ Read_0 + I(Read_0*Read_0) + 
                  (1 | School) + (1 | Teacher), data = dta))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Read_1 ~ Read_0 + I(Read_0 * Read_0) + (1 | School) + (1 | Teacher)
##    Data: dta
## 
## REML criterion at convergence: 1906.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.2824 -0.5059 -0.0041  0.6268  3.4686 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Teacher  (Intercept) 0.02331  0.1527  
##  School   (Intercept) 0.04335  0.2082  
##  Residual             0.63769  0.7986  
## Number of obs: 777, groups:  Teacher, 46; School, 20
## 
## Fixed effects:
##                     Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)          3.47337    0.06740  30.87194  51.534   <2e-16 ***
## Read_0               0.95479    0.03592 760.04306  26.578   <2e-16 ***
## I(Read_0 * Read_0)  -0.06688    0.03062 768.87963  -2.184   0.0292 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Read_0
## Read_0       0.250       
## I(Rd_0*R_0) -0.462 -0.521

(1 | school)與(1 | Teacher) 代表截距在不同學校間與不同教師間的隨機效果

# residual plot
plot(m3, pch = 20, cex = 0.5, col = "steelblue", 
     xlab = "Fitted values", ylab = "Pearson residuals")

# quick summary of model parameter estimates
fastdisp(m3)
## lmer(formula = Read_1 ~ Read_0 + I(Read_0 * Read_0) + (1 | School) + 
##     (1 | Teacher), data = dta)
##                    coef.est coef.se
## (Intercept)         3.47     0.07  
## Read_0              0.95     0.04  
## I(Read_0 * Read_0) -0.07     0.03  
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  Teacher  (Intercept) 0.15    
##  School   (Intercept) 0.21    
##  Residual             0.80    
## ---
## number of obs: 777, groups: Teacher, 46; School, 20
## AIC = 1918.9
# estimate fixed-effects parameters by simulation
m3_fe <- FEsim(m3, 1000)
summary(m3_fe)
##      term                mean              median               sd         
##  Length:3           Min.   :-0.06705   Min.   :-0.06675   Min.   :0.02985  
##  Class :character   1st Qu.: 0.44381   1st Qu.: 0.44412   1st Qu.:0.03327  
##  Mode  :character   Median : 0.95468   Median : 0.95499   Median :0.03669  
##                     Mean   : 1.45464   Mean   : 1.45579   Mean   :0.04459  
##                     3rd Qu.: 2.21549   3rd Qu.: 2.21707   3rd Qu.:0.05196  
##                     Max.   : 3.47630   Max.   : 3.47915   Max.   :0.06723
# plot for fixed effects
plotFEsim(m3_fe) + 
 labs(title = "Coefficient Plot", x = "Median Effect Estimate")

估計固定效應參數與畫圖

# estimate random-effects parameters by simulation
m3_re <- REsim(m3)
summary(m3_re)
##   groupFctr           groupID              term                mean           
##  Length:66          Length:66          Length:66          Min.   :-0.4328004  
##  Class :character   Class :character   Class :character   1st Qu.:-0.0493361  
##  Mode  :character   Mode  :character   Mode  :character   Median : 0.0030559  
##                                                           Mean   :-0.0003546  
##                                                           3rd Qu.: 0.0433472  
##                                                           Max.   : 0.2521494  
##      median                sd        
##  Min.   :-0.426516   Min.   :0.0913  
##  1st Qu.:-0.047221   1st Qu.:0.1178  
##  Median : 0.002572   Median :0.1331  
##  Mean   :-0.001425   Mean   :0.1363  
##  3rd Qu.: 0.049216   3rd Qu.:0.1453  
##  Max.   : 0.248680   Max.   :0.2027
# normality plot for random effects
plotREsim(m3_re)

估計隨機效應參數與畫圖

# ordinary regression
summary(m0 <- lm(Read_1 ~ Read_0 + I(Read_0*Read_0), data = dta))
## 
## Call:
## lm(formula = Read_1 ~ Read_0 + I(Read_0 * Read_0), data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.0675 -0.4619  0.0146  0.5319  2.8194 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         3.46873    0.04323  80.242   <2e-16 ***
## Read_0              0.96445    0.03541  27.240   <2e-16 ***
## I(Read_0 * Read_0) -0.07293    0.03104  -2.349   0.0191 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8397 on 774 degrees of freedom
## Multiple R-squared:  0.5481, Adjusted R-squared:  0.5469 
## F-statistic: 469.3 on 2 and 774 DF,  p-value: < 2.2e-16

普通迴歸

# residual plot
plot(rstandard(m0) ~ fitted(m0), pch = 20, cex = 0.5, col = "steelblue", 
     xlab = "Fitted values", ylab = "Standardized residuals")
abline(h = 0, lty = 2)
grid()

殘差圖

4 The end