library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(stringr)
dta <- read.csv("D:/sheu/df_output_cleaned.csv", header=T)
dta<-dta[-1]
head(dta)
## studid sector urban gender classtype schoolID classID mathassesment1
## 1 1 0 2 2 1 t009 t0489 2
## 2 4 0 3 1 1 t304 t0129 3
## 3 6 0 2 2 2 t173 t1004 3
## 4 7 0 3 2 1 t218 t0414 2
## 5 8 0 3 2 1 t136 t0930 3
## 6 11 0 3 1 1 t205 t1071 3
## mathstudy1 mathwork1 mathquestion1 premath yearstea mathassesment2 mathstudy2
## 1 2 2 1 7 5 2 3
## 2 3 2 2 10 5 2 2
## 3 3 1 3 10 1 2 3
## 4 2 2 1 7 6 2 3
## 5 3 1 1 8 6 3 3
## 6 2 1 3 9 2 3 3
## mathwork2 mathquestion2 mathjr3
## 1 1 1 7
## 2 3 1 8
## 3 1 1 7
## 4 1 2 8
## 5 1 2 9
## 6 2 3 11
str(dta)
## 'data.frame': 7059 obs. of 18 variables:
## $ studid : int 1 4 6 7 8 11 12 13 23 27 ...
## $ sector : int 0 0 0 0 0 0 0 0 0 0 ...
## $ urban : int 2 3 2 3 3 3 1 2 3 2 ...
## $ gender : int 2 1 2 2 2 1 2 2 1 2 ...
## $ classtype : int 1 1 2 1 1 1 1 3 1 1 ...
## $ schoolID : chr "t009" "t304" "t173" "t218" ...
## $ classID : chr "t0489" "t0129" "t1004" "t0414" ...
## $ mathassesment1: int 2 3 3 2 3 3 2 3 3 2 ...
## $ mathstudy1 : int 2 3 3 2 3 2 2 3 3 2 ...
## $ mathwork1 : int 2 2 1 2 1 1 2 2 1 2 ...
## $ mathquestion1 : int 1 2 3 1 1 3 1 1 2 1 ...
## $ premath : int 7 10 10 7 8 9 7 9 9 7 ...
## $ yearstea : int 5 5 1 6 6 2 4 3 6 5 ...
## $ mathassesment2: int 2 2 2 2 3 3 3 2 2 1 ...
## $ mathstudy2 : int 3 2 3 3 3 3 2 2 2 1 ...
## $ mathwork2 : int 1 3 1 1 1 2 2 3 2 2 ...
## $ mathquestion2 : int 1 1 1 2 2 3 3 1 1 1 ...
## $ mathjr3 : int 7 8 7 8 9 11 10 8 7 5 ...
dta[,1:7] <- apply(dta[,1:7], 2, as.factor)
str(dta)
## 'data.frame': 7059 obs. of 18 variables:
## $ studid : chr " 1" " 4" " 6" " 7" ...
## $ sector : chr "0" "0" "0" "0" ...
## $ urban : chr "2" "3" "2" "3" ...
## $ gender : chr "2" "1" "2" "2" ...
## $ classtype : chr " 1" " 1" " 2" " 1" ...
## $ schoolID : chr "t009" "t304" "t173" "t218" ...
## $ classID : chr "t0489" "t0129" "t1004" "t0414" ...
## $ mathassesment1: int 2 3 3 2 3 3 2 3 3 2 ...
## $ mathstudy1 : int 2 3 3 2 3 2 2 3 3 2 ...
## $ mathwork1 : int 2 2 1 2 1 1 2 2 1 2 ...
## $ mathquestion1 : int 1 2 3 1 1 3 1 1 2 1 ...
## $ premath : int 7 10 10 7 8 9 7 9 9 7 ...
## $ yearstea : int 5 5 1 6 6 2 4 3 6 5 ...
## $ mathassesment2: int 2 2 2 2 3 3 3 2 2 1 ...
## $ mathstudy2 : int 3 2 3 3 3 3 2 2 2 1 ...
## $ mathwork2 : int 1 3 1 1 1 2 2 3 2 2 ...
## $ mathquestion2 : int 1 1 1 2 2 3 3 1 1 1 ...
## $ mathjr3 : int 7 8 7 8 9 11 10 8 7 5 ...
dta$sector <- as.factor(ifelse(dta$sector==0, "Public", "Private"))
head(dta)
## studid sector urban gender classtype schoolID classID mathassesment1
## 1 1 Public 2 2 1 t009 t0489 2
## 2 4 Public 3 1 1 t304 t0129 3
## 3 6 Public 2 2 2 t173 t1004 3
## 4 7 Public 3 2 1 t218 t0414 2
## 5 8 Public 3 2 1 t136 t0930 3
## 6 11 Public 3 1 1 t205 t1071 3
## mathstudy1 mathwork1 mathquestion1 premath yearstea mathassesment2 mathstudy2
## 1 2 2 1 7 5 2 3
## 2 3 2 2 10 5 2 2
## 3 3 1 3 10 1 2 3
## 4 2 2 1 7 6 2 3
## 5 3 1 1 8 6 3 3
## 6 2 1 3 9 2 3 3
## mathwork2 mathquestion2 mathjr3
## 1 1 1 7
## 2 3 1 8
## 3 1 1 7
## 4 1 2 8
## 5 1 2 9
## 6 2 3 11
dta$gender <- as.factor(ifelse(dta$gender==1, "M", "F"))
head(dta)
## studid sector urban gender classtype schoolID classID mathassesment1
## 1 1 Public 2 F 1 t009 t0489 2
## 2 4 Public 3 M 1 t304 t0129 3
## 3 6 Public 2 F 2 t173 t1004 3
## 4 7 Public 3 F 1 t218 t0414 2
## 5 8 Public 3 F 1 t136 t0930 3
## 6 11 Public 3 M 1 t205 t1071 3
## mathstudy1 mathwork1 mathquestion1 premath yearstea mathassesment2 mathstudy2
## 1 2 2 1 7 5 2 3
## 2 3 2 2 10 5 2 2
## 3 3 1 3 10 1 2 3
## 4 2 2 1 7 6 2 3
## 5 3 1 1 8 6 3 3
## 6 2 1 3 9 2 3 3
## mathwork2 mathquestion2 mathjr3
## 1 1 1 7
## 2 3 1 8
## 3 1 1 7
## 4 1 2 8
## 5 1 2 9
## 6 2 3 11
dta$classtype <- gsub("1", "N", dta$classtype)
dta$classtype <- gsub("2", "G", dta$classtype)
dta$urban <- gsub("1", "village", dta$urban)
dta$urban <- gsub("2", "town", dta$urban)
dta$urban <- gsub("3", "city", dta$urban)
dta$yearstea <- gsub("3", "5", dta$yearstea)
dta$yearstea <- gsub("4", "8", dta$yearstea)
dta$yearstea <- gsub("5", "15", dta$yearstea)
dta$yearstea <- gsub("6", "20", dta$yearstea)
dta$yearstea <-as.numeric(dta$yearstea)
dta$classtype <- gsub("3", "A", dta$classtype)
dta$classtype <- str_replace(dta$classtype,"97","F")
dta$classtype <- str_replace(dta$classtype,"99","F")
dta$classtype <-as.character(dta$classtype)
head(dta)
## studid sector urban gender classtype schoolID classID mathassesment1
## 1 1 Public town F N t009 t0489 2
## 2 4 Public city M N t304 t0129 3
## 3 6 Public town F G t173 t1004 3
## 4 7 Public city F N t218 t0414 2
## 5 8 Public city F N t136 t0930 3
## 6 11 Public city M N t205 t1071 3
## mathstudy1 mathwork1 mathquestion1 premath yearstea mathassesment2 mathstudy2
## 1 2 2 1 7 15 2 3
## 2 3 2 2 10 15 2 2
## 3 3 1 3 10 1 2 3
## 4 2 2 1 7 20 2 3
## 5 3 1 1 8 20 3 3
## 6 2 1 3 9 2 3 3
## mathwork2 mathquestion2 mathjr3
## 1 1 1 7
## 2 3 1 8
## 3 1 1 7
## 4 1 2 8
## 5 1 2 9
## 6 2 3 11
str(dta)
## 'data.frame': 7059 obs. of 18 variables:
## $ studid : chr " 1" " 4" " 6" " 7" ...
## $ sector : Factor w/ 2 levels "Private","Public": 2 2 2 2 2 2 2 2 2 2 ...
## $ urban : chr "town" "city" "town" "city" ...
## $ gender : Factor w/ 2 levels "F","M": 1 2 1 1 1 2 1 1 2 1 ...
## $ classtype : chr " N" " N" " G" " N" ...
## $ schoolID : chr "t009" "t304" "t173" "t218" ...
## $ classID : chr "t0489" "t0129" "t1004" "t0414" ...
## $ mathassesment1: int 2 3 3 2 3 3 2 3 3 2 ...
## $ mathstudy1 : int 2 3 3 2 3 2 2 3 3 2 ...
## $ mathwork1 : int 2 2 1 2 1 1 2 2 1 2 ...
## $ mathquestion1 : int 1 2 3 1 1 3 1 1 2 1 ...
## $ premath : int 7 10 10 7 8 9 7 9 9 7 ...
## $ yearstea : num 15 15 1 20 20 2 8 15 20 15 ...
## $ mathassesment2: int 2 2 2 2 3 3 3 2 2 1 ...
## $ mathstudy2 : int 3 2 3 3 3 3 2 2 2 1 ...
## $ mathwork2 : int 1 3 1 1 1 2 2 3 2 2 ...
## $ mathquestion2 : int 1 1 1 2 2 3 3 1 1 1 ...
## $ mathjr3 : int 7 8 7 8 9 11 10 8 7 5 ...
str(dta)
## 'data.frame': 7059 obs. of 18 variables:
## $ studid : chr " 1" " 4" " 6" " 7" ...
## $ sector : Factor w/ 2 levels "Private","Public": 2 2 2 2 2 2 2 2 2 2 ...
## $ urban : chr "town" "city" "town" "city" ...
## $ gender : Factor w/ 2 levels "F","M": 1 2 1 1 1 2 1 1 2 1 ...
## $ classtype : chr " N" " N" " G" " N" ...
## $ schoolID : chr "t009" "t304" "t173" "t218" ...
## $ classID : chr "t0489" "t0129" "t1004" "t0414" ...
## $ mathassesment1: int 2 3 3 2 3 3 2 3 3 2 ...
## $ mathstudy1 : int 2 3 3 2 3 2 2 3 3 2 ...
## $ mathwork1 : int 2 2 1 2 1 1 2 2 1 2 ...
## $ mathquestion1 : int 1 2 3 1 1 3 1 1 2 1 ...
## $ premath : int 7 10 10 7 8 9 7 9 9 7 ...
## $ yearstea : num 15 15 1 20 20 2 8 15 20 15 ...
## $ mathassesment2: int 2 2 2 2 3 3 3 2 2 1 ...
## $ mathstudy2 : int 3 2 3 3 3 3 2 2 2 1 ...
## $ mathwork2 : int 1 3 1 1 1 2 2 3 2 2 ...
## $ mathquestion2 : int 1 1 1 2 2 3 3 1 1 1 ...
## $ mathjr3 : int 7 8 7 8 9 11 10 8 7 5 ...
dta %>%
furniture::table1(premath, mathjr3, yearstea,
splitby= ~ gender,
test=FALSE )
##
## ────────────────────────────────
## gender
## F M
## n = 3379 n = 3680
## premath
## 8.7 (1.6) 8.7 (1.7)
## mathjr3
## 8.5 (1.9) 8.4 (2.0)
## yearstea
## 12.4 (6.0) 12.0 (6.2)
## ────────────────────────────────
dta %>%
furniture::table1(premath, mathjr3, yearstea,
splitby= ~ urban,
test=FALSE )
##
## ───────────────────────────────────────────
## urban
## city town village
## n = 4052 n = 2608 n = 399
## premath
## 8.8 (1.7) 8.7 (1.7) 8.3 (1.8)
## mathjr3
## 8.6 (2.0) 8.3 (1.9) 8.0 (1.9)
## yearstea
## 12.8 (6.0) 11.6 (6.1) 10.3 (6.0)
## ───────────────────────────────────────────
dta %>%
furniture::table1(premath, mathjr3, yearstea,
splitby= ~ sector,
test=FALSE )
##
## ────────────────────────────────
## sector
## Private Public
## n = 837 n = 6222
## premath
## 9.2 (1.5) 8.7 (1.7)
## mathjr3
## 8.9 (1.8) 8.4 (2.0)
## yearstea
## 12.1 (6.0) 12.2 (6.1)
## ────────────────────────────────
dta %>%
furniture::table1(premath, mathjr3,
splitby= ~ gender,
test=FALSE )
##
## ─────────────────────────────
## gender
## F M
## n = 3379 n = 3680
## premath
## 8.7 (1.6) 8.7 (1.7)
## mathjr3
## 8.5 (1.9) 8.4 (2.0)
## ─────────────────────────────
dta %>%
furniture::table1(premath, mathjr3,
splitby= ~ urban,
test=FALSE )
##
## ───────────────────────────────────────
## urban
## city town village
## n = 4052 n = 2608 n = 399
## premath
## 8.8 (1.7) 8.7 (1.7) 8.3 (1.8)
## mathjr3
## 8.6 (2.0) 8.3 (1.9) 8.0 (1.9)
## ───────────────────────────────────────
dta %>%
furniture::table1(premath, mathjr3,
splitby= ~ sector,
test=FALSE )
##
## ─────────────────────────────
## sector
## Private Public
## n = 837 n = 6222
## premath
## 9.2 (1.5) 8.7 (1.7)
## mathjr3
## 8.9 (1.8) 8.4 (2.0)
## ─────────────────────────────
dta %>%
furniture::table1(premath, mathjr3,
splitby= ~ yearstea,
test=FALSE )
##
## ───────────────────────────────────────────────────────────
## yearstea
## 1 2 8 15 20
## n = 320 n = 978 n = 1212 n = 3353 n = 1196
## premath
## 8.7 (1.7) 8.7 (1.7) 8.7 (1.6) 8.8 (1.7) 8.7 (1.7)
## mathjr3
## 8.1 (1.9) 8.3 (1.9) 8.5 (1.9) 8.5 (2.0) 8.4 (2.0)
## ───────────────────────────────────────────────────────────
ggplot(data=dta,
aes(x=reorder(schoolID, mathjr3, median),
y=mathjr3,
group=schoolID)) +
geom_point(size=rel(.5),
alpha=.5) +
geom_hline(yintercept=quantile(dta$mathjr3,
probs=c(.25,.75)),
linetype="dotted",
col=2) +
labs(y="9th score",
x="School ID") +
theme_minimal() +
theme(axis.text.x=element_text(size=rel(.5),
angle=90,
hjust=1),
legend.position="NONE")
ggplot(data=dta,
aes(x=reorder(schoolID, mathjr3, mean),
y=mathjr3,
group=schoolID)) +
stat_summary(fun.data="mean_cl_boot",
size=rel(.1))+
geom_hline(yintercept=quantile(dta$mathjr3,
probs=c(.25,.5,.75)),
linetype="dotted",
col=2) +
coord_flip()+
labs(y="9th score",
x="School ID") +
theme_minimal() +
theme(axis.text.y=element_text(size=rel(.5),
angle=0,
vjust=1),
legend.position="NONE")
ggplot(data=dta,
aes(x=reorder(schoolID, mathjr3, mean),
y=mathjr3,
group=schoolID)) +
stat_summary(fun.data="mean_cl_boot",
size=rel(.1)) +
coord_flip()+
labs(y="9th score",
x="School ID") +
theme_minimal() +
theme(axis.text.y=element_text(size=rel(.5),
angle=0,
vjust=1),
legend.position="NONE")
sdta <- dta %>%
group_by(schoolID) %>%
dplyr::summarize(jr3_ave=mean(mathjr3),
jr3_sd2=var(mathjr3),
n = n())
## `summarise()` ungrouping output (override with `.groups` argument)
ggplot(data=sdta,
aes(jr3_ave)) +
stat_bin(bins=grDevices::nclass.FD(sdta$jr3_ave),
alpha=0.3, fill=8,
col=1) +
geom_vline(xintercept=mean(dta$mathjr3),
linetype="dashed",
col=2) +
geom_vline(xintercept=mean(sdta$jr3_ave),
linetype="dotted",
col=3) +
labs(y="Count",
x="Average score by school") +
theme_minimal()
head(sdta)
## # A tibble: 6 x 4
## schoolID jr3_ave jr3_sd2 n
## <chr> <dbl> <dbl> <int>
## 1 t001 8.79 4.17 33
## 2 t002 8.57 4.81 37
## 3 t004 8.78 3.92 32
## 4 t005 8.46 2.85 61
## 5 t006 8.17 3.35 41
## 6 t009 8.45 4.71 62
sdta <- sdta %>%
mutate(dev2=(jr3_ave-mean(dta$mathjr3))^2)
t(apply(sdta[,c(5,3)], 2, quantile, probs=c(.025, .5, .975), na.rm=T))
## 2.5% 50% 97.5%
## dev2 0.0004679167 0.03589969 0.545860
## jr3_sd2 2.3978407557 3.84366327 5.221569
ggplot(data=sdta,
aes(dev2)) +
stat_bin(bins=grDevices::nclass.FD(sdta$dev2),
alpha=0.3, fill='white', col=4) +
stat_bin(aes(jr3_sd2),
bins=grDevices::nclass.FD(sdta$jr3_sd2),
alpha=0.3, fill=8, col=1) +
labs(y="Count",
x="Deviation estimates",
title="within- and between-school") +
theme_minimal()
m0 <- lme4::lmer(mathjr3 ~ (1 | schoolID), data=dta)
sjPlot::tab_model(m0, show.p=FALSE, show.r2=FALSE,digits.re = 3)
| mathjr3 | ||
|---|---|---|
| Predictors | Estimates | CI |
| (Intercept) | 8.42 | 8.38 – 8.47 |
| Random Effects | ||
| σ2 | 3.807 | |
| τ00 schoolID | 0.001 | |
| ICC | 0.000 | |
| N schoolID | 167 | |
| Observations | 7059 | |
summary(m0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathjr3 ~ (1 | schoolID)
## Data: dta
##
## REML criterion at convergence: 29476.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.2720 -0.7296 0.2915 0.8066 3.3754
##
## Random effects:
## Groups Name Variance Std.Dev.
## schoolID (Intercept) 0.001197 0.03459
## Residual 3.807165 1.95120
## Number of obs: 7059, groups: schoolID, 167
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 8.4232 0.0234 360
the estimated mean of 9th scores for all schools is 8.42
The variation between student within the same school is estimated to be 3.81
The estimated between school variance in 9th math score by school is 0.001.
The correlation between 9th scores of student attending the same school is 0.00 .
dta <- dta %>%
mutate(premath_a = mean(premath)) %>%
group_by(schoolID) %>%
mutate(premath_sa=mean(premath) - premath_a, #各校平均-整體平均
premath_c=premath - mean(premath)) #各校平均
head(dta)[1:5]
## # A tibble: 6 x 5
## studid sector urban gender classtype
## <chr> <fct> <chr> <fct> <chr>
## 1 " 1" Public town F " N"
## 2 " 4" Public city M " N"
## 3 " 6" Public town F " G"
## 4 " 7" Public city F " N"
## 5 " 8" Public city F " N"
## 6 " 11" Public city M " N"
head(dta)[10:13]
## # A tibble: 6 x 4
## mathwork1 mathquestion1 premath yearstea
## <int> <int> <int> <dbl>
## 1 2 1 7 15
## 2 2 2 10 15
## 3 1 3 10 1
## 4 2 1 7 20
## 5 1 1 8 20
## 6 1 3 9 2
head(dta)[18:21]
## # A tibble: 6 x 4
## mathjr3 premath_a premath_sa premath_c
## <int> <dbl> <dbl> <dbl>
## 1 7 8.72 0.0527 -1.77
## 2 8 8.72 0.0214 1.26
## 3 7 8.72 0.450 0.829
## 4 8 8.72 -0.0944 -1.63
## 5 9 8.72 0.167 -0.889
## 6 11 8.72 -0.235 0.514
ggplot(dta,
aes(premath_c, mathjr3,
group=schoolID)) +
geom_point(alpha=.2)+
stat_smooth(method="lm", formula=y~x,
se=FALSE, size=rel(.5), col=8)+
stat_smooth(aes(x=premath_sa, y=mathjr3,
group=1),
method="lm", formula=y~x,
se=FALSE, size=rel(.5), col=5)+
labs(x="˙7th score (school-centered)",
y="9th score")+
theme_minimal()
m1 <- lme4::lmer(mathjr3 ~ premath_c + (1 | schoolID), data=dta)
sjPlot::tab_model(m0, m1, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE)
| mathjr3 | mathjr3 | |||
|---|---|---|---|---|
| Predictors | Estimates | std. Error | Estimates | std. Error |
| (Intercept) | 8.42 | 0.02 | 8.42 | 0.02 |
| premath_c | 0.57 | 0.01 | ||
| Random Effects | ||||
| σ2 | 3.81 | 2.86 | ||
| τ00 | 0.00 schoolID | 0.02 schoolID | ||
| ICC | 0.00 | 0.01 | ||
summary(m1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathjr3 ~ premath_c + (1 | schoolID)
## Data: dta
##
## REML criterion at convergence: 27500.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.8223 -0.6672 0.0080 0.6785 3.7219
##
## Random effects:
## Groups Name Variance Std.Dev.
## schoolID (Intercept) 0.02101 0.145
## Residual 2.85781 1.691
## Number of obs: 7059, groups: schoolID, 167
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 8.41982 0.02327 361.84
## premath_c 0.57473 0.01199 47.93
##
## Correlation of Fixed Effects:
## (Intr)
## premath_c 0.000
The estimated mean of 9th scores for all students is 8.42 with an SD of √2.86+0.02=1.69.
For one point increase in 7th score, 9th score increase by 0.57 point, on average.
Adjusting for individual 7th scores, correlation between 9th scores among children attending the same school is 0.01.
About 24.93% (= 3.81−2.86/3.81) of variance in 9th scores can be attributed to differences in 7th scores among children attending different schools
m1g <- lme4::lmer(mathjr3 ~ gender+premath_c + (1 | schoolID), data=dta)
summary(m1g)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathjr3 ~ gender + premath_c + (1 | schoolID)
## Data: dta
##
## REML criterion at convergence: 27495.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7883 -0.6718 0.0075 0.6817 3.7606
##
## Random effects:
## Groups Name Variance Std.Dev.
## schoolID (Intercept) 0.02094 0.1447
## Residual 2.85444 1.6895
## Number of obs: 7059, groups: schoolID, 167
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 8.48438 0.03136 270.549
## genderM -0.12378 0.04035 -3.068
## premath_c 0.57473 0.01198 47.954
##
## Correlation of Fixed Effects:
## (Intr) gendrM
## genderM -0.671
## premath_c 0.000 0.000
m2 <- lme4::lmer(mathjr3 ~ premath_sa + (1 | schoolID), data=dta)
## boundary (singular) fit: see ?isSingular
summary(m2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathjr3 ~ premath_sa + (1 | schoolID)
## Data: dta
##
## REML criterion at convergence: 29426
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.4837 -0.7398 0.1428 0.7603 3.5544
##
## Random effects:
## Groups Name Variance Std.Dev.
## schoolID (Intercept) 0.00 0.000
## Residual 3.78 1.944
## Number of obs: 7059, groups: schoolID, 167
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 8.42343 0.02314 364.001
## premath_sa 0.69738 0.09530 7.318
##
## Correlation of Fixed Effects:
## (Intr)
## premath_sa 0.000
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
sjPlot::tab_model(m0, m2, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE,digits.re = 3)
| mathjr3 | mathjr3 | |||
|---|---|---|---|---|
| Predictors | Estimates | std. Error | Estimates | std. Error |
| (Intercept) | 8.42 | 0.02 | 8.42 | 0.02 |
| premath_sa | 0.70 | 0.10 | ||
| Random Effects | ||||
| σ2 | 3.807 | 3.780 | ||
| τ00 | 0.001 schoolID | 0.000 schoolID | ||
| ICC | 0.000 | |||
The estimated mean of 9th scores for all schools is 8.42. On average, there is a increase of 0.70 point on 9th score from an increase of one point in 7th average score.
m3 <- lme4::lmer(mathjr3 ~ premath_c + premath_sa + (1 | schoolID), data=dta)
## boundary (singular) fit: see ?isSingular
summary(m3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathjr3 ~ premath_c + premath_sa + (1 | schoolID)
## Data: dta
##
## REML criterion at convergence: 27439.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7422 -0.6717 -0.0027 0.6875 3.8353
##
## Random effects:
## Groups Name Variance Std.Dev.
## schoolID (Intercept) 0.00 0.000
## Residual 2.85 1.688
## Number of obs: 7059, groups: schoolID, 167
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 8.42343 0.02009 419.183
## premath_c 0.57473 0.01198 47.988
## premath_sa 0.69738 0.08275 8.427
##
## Correlation of Fixed Effects:
## (Intr) prmth_c
## premath_c 0.000
## premath_sa 0.000 0.000
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
sjPlot::tab_model(m0, m3, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE)
| mathjr3 | mathjr3 | |||
|---|---|---|---|---|
| Predictors | Estimates | std. Error | Estimates | std. Error |
| (Intercept) | 8.42 | 0.02 | 8.42 | 0.02 |
| premath_c | 0.57 | 0.01 | ||
| premath_sa | 0.70 | 0.08 | ||
| Random Effects | ||||
| σ2 | 3.81 | 2.85 | ||
| τ00 | 0.00 schoolID | 0.00 schoolID | ||
| ICC | 0.00 | |||
It is significant.
#加入班級
m3L0 <- lme4::lmer(mathjr3 ~ 1 + (1 | schoolID/classID), data=dta)
summary(m3L0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathjr3 ~ 1 + (1 | schoolID/classID)
## Data: dta
##
## REML criterion at convergence: 29476.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.2861 -0.7291 0.2851 0.8049 3.3655
##
## Random effects:
## Groups Name Variance Std.Dev.
## classID:schoolID (Intercept) 5.211e-03 0.0721899
## schoolID (Intercept) 4.159e-08 0.0002039
## Residual 3.803e+00 1.9501666
## Number of obs: 7059, groups: classID:schoolID, 612; schoolID, 167
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 8.42310 0.02341 359.8
sjPlot::tab_model(m0, m3L0, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE , digits.re = 3)
| mathjr3 | mathjr3 | |||
|---|---|---|---|---|
| Predictors | Estimates | std. Error | Estimates | std. Error |
| (Intercept) | 8.42 | 0.02 | 8.42 | 0.02 |
| Random Effects | ||||
| σ2 | 3.807 | 3.803 | ||
| τ00 | 0.001 schoolID | 0.005 classID:schoolID | ||
| 0.000 schoolID | ||||
| ICC | 0.000 | 0.001 | ||
# class level 的effect明顯是由個人來的
9th scores of any two children from the same class within the same school has a correlation of 0.013 (= 0.005+0/3.808)
9th scores of any two children from the same school has a correlation of 0
ggplot(dta,
aes(yearstea,
mathjr3)) +
geom_point(alpha=.3)+
stat_smooth(method="lm",
formula=y~x, se=FALSE,
size=rel(.3), col="purple")+
labs(x="Teaching experience (in years)",
y="9th score")+
theme_minimal()
老師教學年限與學生成績無關
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
m3L1 <- lmer(mathjr3 ~ yearstea + (1 | schoolID) + (1| classID:schoolID), data=dta)
## boundary (singular) fit: see ?isSingular
sjPlot::tab_model(m3L0, m3L1, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE, show.re.var =FALSE, show.icc = FALSE)
| mathjr3 | mathjr3 | |||
|---|---|---|---|---|
| Predictors | Estimates | std. Error | Estimates | std. Error |
| (Intercept) | 8.42 | 0.02 | 8.33 | 0.05 |
| yearstea | 0.01 | 0.00 | ||
summary(m3L1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathjr3 ~ yearstea + (1 | schoolID) + (1 | classID:schoolID)
## Data: dta
##
## REML criterion at convergence: 29481.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.3065 -0.7393 0.2604 0.7939 3.3557
##
## Random effects:
## Groups Name Variance Std.Dev.
## classID:schoolID (Intercept) 0.004509 0.06715
## schoolID (Intercept) 0.000000 0.00000
## Residual 3.802093 1.94990
## Number of obs: 7059, groups: classID:schoolID, 612; schoolID, 167
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 8.327338 0.052057 159.97
## yearstea 0.007846 0.003809 2.06
##
## Correlation of Fixed Effects:
## (Intr)
## yearstea -0.893
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
在一次驗證了老師教學年限與成績無關
m3L3 <- lmer(mathjr3 ~ premath_c + premath_sa + sector + (1 | schoolID) + (1| classID:schoolID), data=dta)
## boundary (singular) fit: see ?isSingular
sjPlot::tab_model(m3L0, m3L3, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE)
| mathjr3 | mathjr3 | |||
|---|---|---|---|---|
| Predictors | Estimates | std. Error | Estimates | std. Error |
| (Intercept) | 8.42 | 0.02 | 8.67 | 0.06 |
| premath_c | 0.57 | 0.01 | ||
| premath_sa | 0.69 | 0.08 | ||
| sector [Public] | -0.28 | 0.06 | ||
| Random Effects | ||||
| σ2 | 3.80 | 2.84 | ||
| τ00 | 0.01 classID:schoolID | 0.00 classID:schoolID | ||
| 0.00 schoolID | 0.00 schoolID | |||
| ICC | 0.00 | |||
ggplot(dta, aes(premath_c, mathjr3, color = sector)) +
geom_smooth(method = "lm") +
geom_jitter(alpha = 0.2) +
labs(x = "Centered 7th score", y = "9th Math score") +
theme(legend.position = c(.1, .8))
## `geom_smooth()` using formula 'y ~ x'
The effect of premath_c on 9th score for student in public school is similar than the student in private school.
ggplot(dta, aes(premath_c, mathjr3, color = classtype)) +
geom_smooth(method = "lm") +
geom_jitter(alpha = 0.2) +
labs(x = "Centered 7th score", y = "9th Math score") +
theme(legend.position = c(.1, .8))
## `geom_smooth()` using formula 'y ~ x'
ggplot(dta, aes(premath_c, mathjr3, color = urban)) +
geom_smooth(method = "lm") +
geom_jitter(alpha = 0.2) +
labs(x = "Centered 7th score", y = "9th Math score") +
theme(legend.position = c(.1, .8))
## `geom_smooth()` using formula 'y ~ x'
City is higher than town and village.
m3L4 <- lmer(mathjr3 ~ urban + (1 | schoolID) + (1| classID:schoolID), data=dta)
summary(m3L4)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathjr3 ~ urban + (1 | schoolID) + (1 | classID:schoolID)
## Data: dta
##
## REML criterion at convergence: 29434.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.3481 -0.7934 0.2243 0.7439 3.6070
##
## Random effects:
## Groups Name Variance Std.Dev.
## classID:schoolID (Intercept) 0.0005004 0.02237
## schoolID (Intercept) 0.0017061 0.04131
## Residual 3.7808432 1.94444
## Number of obs: 7059, groups: classID:schoolID, 612; schoolID, 167
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 8.55148 0.03075 278.106
## urbantown -0.26277 0.04883 -5.382
## urbanvillage -0.55441 0.10205 -5.433
##
## Correlation of Fixed Effects:
## (Intr) urbntw
## urbantown -0.622
## urbanvillag -0.297 0.187
sjPlot::tab_model(m3L0, m3L4, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE)
| mathjr3 | mathjr3 | |||
|---|---|---|---|---|
| Predictors | Estimates | std. Error | Estimates | std. Error |
| (Intercept) | 8.42 | 0.02 | 8.55 | 0.03 |
| urban [town] | -0.26 | 0.05 | ||
| urban [village] | -0.55 | 0.10 | ||
| Random Effects | ||||
| σ2 | 3.80 | 3.78 | ||
| τ00 | 0.01 classID:schoolID | 0.00 classID:schoolID | ||
| 0.00 schoolID | 0.00 schoolID | |||
| ICC | 0.00 | 0.00 | ||
plot(m3L3, ylim = c(-4, 4),
xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)
m3L5 <- lmer(mathjr3 ~ urban+ sector + (1 | schoolID) + (1| classID:schoolID), data=dta)
summary(m3L5)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathjr3 ~ urban + sector + (1 | schoolID) + (1 | classID:schoolID)
## Data: dta
##
## REML criterion at convergence: 29379.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.5939 -0.7614 -0.0118 0.7828 3.6215
##
## Random effects:
## Groups Name Variance Std.Dev.
## classID:schoolID (Intercept) 0.000000 0.00000
## schoolID (Intercept) 0.001514 0.03891
## Residual 3.750990 1.93675
## Number of obs: 7059, groups: classID:schoolID, 612; schoolID, 167
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 9.02672 0.06929 130.280
## urbantown -0.25391 0.04864 -5.220
## urbanvillage -0.48216 0.10208 -4.723
## sectorPublic -0.54746 0.07163 -7.643
##
## Correlation of Fixed Effects:
## (Intr) urbntw urbnvl
## urbantown -0.253
## urbanvillag -0.048 0.189
## sectorPublc -0.897 -0.024 -0.093
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
m3L6 <- lmer(mathjr3 ~ urban*sector + (1 | schoolID) + (1| classID:schoolID), data=dta)
summary(m3L6)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mathjr3 ~ urban * sector + (1 | schoolID) + (1 | classID:schoolID)
## Data: dta
##
## REML criterion at convergence: 29381.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.5887 -0.7623 -0.0068 0.7821 3.6209
##
## Random effects:
## Groups Name Variance Std.Dev.
## classID:schoolID (Intercept) 0.00000 0.00000
## schoolID (Intercept) 0.00143 0.03781
## Residual 3.75158 1.93690
## Number of obs: 7059, groups: classID:schoolID, 612; schoolID, 167
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 9.01683 0.08381 107.590
## urbantown -0.22645 0.13943 -1.624
## urbanvillage -0.48366 0.10234 -4.726
## sectorPublic -0.53605 0.08990 -5.963
## urbantown:sectorPublic -0.03127 0.14878 -0.210
##
## Correlation of Fixed Effects:
## (Intr) urbntw urbnvl sctrPb
## urbantown -0.600
## urbanvillag 0.000 0.000
## sectorPublc -0.931 0.560 -0.116
## urbntwn:scP 0.563 -0.937 0.070 -0.604
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 1 column / coefficient
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
anova(m3L0,m3L4,m3L5, m3L6)
## Data: dta
## Models:
## m3L0: mathjr3 ~ 1 + (1 | schoolID/classID)
## m3L4: mathjr3 ~ urban + (1 | schoolID) + (1 | classID:schoolID)
## m3L5: mathjr3 ~ urban + sector + (1 | schoolID) + (1 | classID:schoolID)
## m3L6: mathjr3 ~ urban * sector + (1 | schoolID) + (1 | classID:schoolID)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m3L0 4 29479 29506 -14735 29471
## m3L4 6 29434 29475 -14711 29422 49.0380 2 2.247e-11 ***
## m3L5 7 29378 29426 -14682 29364 58.2160 1 2.349e-14 ***
## m3L6 8 29380 29434 -14682 29364 0.0451 1 0.8318
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sjPlot::tab_model(m3L0, m3L6, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE)
| mathjr3 | mathjr3 | |||
|---|---|---|---|---|
| Predictors | Estimates | std. Error | Estimates | std. Error |
| (Intercept) | 8.42 | 0.02 | 9.02 | 0.08 |
| urban [town] | -0.23 | 0.14 | ||
| urban [village] | -0.48 | 0.10 | ||
| sector [Public] | -0.54 | 0.09 | ||
|
urban [town] * sector [Public] |
-0.03 | 0.15 | ||
| Random Effects | ||||
| σ2 | 3.80 | 3.75 | ||
| τ00 | 0.01 classID:schoolID | 0.00 classID:schoolID | ||
| 0.00 schoolID | 0.00 schoolID | |||
| ICC | 0.00 | |||
library(lattice)
qqmath(m3L5, grid=TRUE)
ggplot(dta, aes(premath, mathjr3, color = sector)) +
geom_smooth(method = "lm") +
geom_jitter(alpha = 0.2) +
labs(x = "Centered 7th score", y = "9th Math score") +
theme(legend.position = c(.1, .8))