#peijun
Refer to the questions here.
dta <- read.table("E:/201709/multilevel_analysis/1023/language_arith.csv", header=T, sep=",")
dta <- data.table(dta)
#correlation(individual)
cor(dta[, c("Language", "Arithmetic")])
Language Arithmetic
Language 1.000000 0.694228
Arithmetic 0.694228 1.000000
##correlation(mean)
dta_m <- aggregate(cbind(Language, Arithmetic) ~ School, data = dta, mean)
cor(dta_m[,-1])
Language Arithmetic
Language 1.0000000 0.8853051
Arithmetic 0.8853051 1.0000000
#correlation(scaled)
dta <- data.table(dta)
dta[, c("scale_Lan", "scale_Arith") := lapply(.SD, function(x) as.vector(scale(x))), by = factor(School)]
cor(dta[,c("scale_Lan", "scale_Arith")])
scale_Lan scale_Arith
scale_Lan 1.00000000 0.01536906
scale_Arith 0.01536906 1.00000000
# REM (lang)
m0 <- lmer(Language ~ (1 | School), data = dta)
summary(m0)
Linear mixed model fit by REML ['lmerMod']
Formula: Language ~ (1 | School)
Data: dta
REML criterion at convergence: 16253.1
Scaled residuals:
Min 1Q Median 3Q Max
-3.11618 -0.65703 0.07597 0.74128 2.50639
Random effects:
Groups Name Variance Std.Dev.
School (Intercept) 19.63 4.431
Residual 64.56 8.035
Number of obs: 2287, groups: School, 131
Fixed effects:
Estimate Std. Error t value
(Intercept) 40.3624 0.4282 94.26
# REM (arith)
m1 <- lmer(Arithmetic ~ (1 | School), data = dta)
summary(m1)
Linear mixed model fit by REML ['lmerMod']
Formula: Arithmetic ~ (1 | School)
Data: dta
REML criterion at convergence: 14695.8
Scaled residuals:
Min 1Q Median 3Q Max
-2.80522 -0.70245 0.08321 0.75577 2.68341
Random effects:
Groups Name Variance Std.Dev.
School (Intercept) 12.62 3.552
Residual 32.28 5.682
Number of obs: 2287, groups: School, 131
Fixed effects:
Estimate Std. Error t value
(Intercept) 18.9473 0.3365 56.31
#plot
ggplot(data = dta, aes(x = Arithmetic, y = Language)) +
geom_point(color = "deepskyblue") +
stat_smooth(method = "lm", color = "deepskyblue", se = F) +
labs(x = "Arithmetic Score", y = "Language score") +
geom_point(data = dta_m, aes(x = Arithmetic, y = Language), color = "blue") +
stat_smooth(data = dta_m, aes(x = Arithmetic, y = Language), method = "lm", color = "blue", se = F)
dta <- read.table("http://140.116.183.121/~sheu/lmm/Data/iq_language.txt", header = T)
m1 <- lmer(Language ~ IQ + (1 | School), data = dta)
summary(m1)
Linear mixed model fit by REML ['lmerMod']
Formula: Language ~ IQ + (1 | School)
Data: dta
REML criterion at convergence: 15255.8
Scaled residuals:
Min 1Q Median 3Q Max
-4.0939 -0.6375 0.0579 0.7061 3.1448
Random effects:
Groups Name Variance Std.Dev.
School (Intercept) 9.602 3.099
Residual 42.245 6.500
Number of obs: 2287, groups: School, 131
Fixed effects:
Estimate Std. Error t value
(Intercept) 11.16993 0.87952 12.7
IQ 2.48759 0.07008 35.5
Correlation of Fixed Effects:
(Intr)
IQ -0.937
tidy(m1)
term estimate std.error statistic group
1 (Intercept) 11.169926 0.87952108 12.70001 fixed
2 IQ 2.487591 0.07008105 35.49591 fixed
3 sd_(Intercept).School 3.098660 NA NA School
4 sd_Observation.Residual 6.499578 NA NA Residual
library(faraway)
dta <- jsp
m0 <- lmer(english ~ raven + (1 | school/class) , data = dta)
summary(m0)
Linear mixed model fit by REML ['lmerMod']
Formula: english ~ raven + (1 | school/class)
Data: dta
REML criterion at convergence: 29107.3
Scaled residuals:
Min 1Q Median 3Q Max
-2.90145 -0.76284 0.05924 0.78127 2.82146
Random effects:
Groups Name Variance Std.Dev.
class:school (Intercept) 40.58 6.370
school (Intercept) 15.59 3.949
Residual 451.47 21.248
Number of obs: 3236, groups: class:school, 94; school, 49
Fixed effects:
Estimate Std. Error t value
(Intercept) 9.24949 2.01350 4.594
raven 1.69687 0.06988 24.282
Correlation of Fixed Effects:
(Intr)
raven -0.873
dta <- classroom
# model 0 ?same as m2?
m0 <- glm(mathgain ~ mathkind + sex + minority + ses + (1 | schoolid/classid), data = dta)
summary(m0)
Call:
glm(formula = mathgain ~ mathkind + sex + minority + ses + (1 |
schoolid/classid), data = dta)
Deviance Residuals:
Min 1Q Median 3Q Max
-178.70 -19.04 -1.76 19.33 123.33
Coefficients: (1 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 273.3533 10.7895 25.335 < 2e-16 ***
mathkind -0.4515 0.0222 -20.336 < 2e-16 ***
sex -0.7463 1.7321 -0.431 0.666634
minority -6.8650 1.9849 -3.459 0.000562 ***
ses 4.9666 1.2411 4.002 6.68e-05 ***
1 | schoolid/classidTRUE NA NA NA NA
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for gaussian family taken to be 889.419)
Null deviance: 1424560 on 1189 degrees of freedom
Residual deviance: 1053962 on 1185 degrees of freedom
AIC: 11465
Number of Fisher Scoring iterations: 2
tidy(m0)
term estimate std.error statistic p.value
1 (Intercept) 273.3532495 10.78949160 25.335137 1.614011e-113
2 mathkind -0.4514971 0.02220194 -20.335930 4.244953e-79
3 sex -0.7463453 1.73214180 -0.430880 6.666340e-01
4 minority -6.8649740 1.98485448 -3.458679 5.621038e-04
5 ses 4.9666439 1.24112485 4.001728 6.678278e-05
#model 1
m1 <- lmer(mathgain ~ (1 | schoolid/classid), data = dta)
summary(m1)
Linear mixed model fit by REML ['lmerMod']
Formula: mathgain ~ (1 | schoolid/classid)
Data: dta
REML criterion at convergence: 11768.8
Scaled residuals:
Min 1Q Median 3Q Max
-4.6441 -0.5984 -0.0336 0.5334 5.6335
Random effects:
Groups Name Variance Std.Dev.
classid:schoolid (Intercept) 99.23 9.961
schoolid (Intercept) 77.49 8.803
Residual 1028.23 32.066
Number of obs: 1190, groups: classid:schoolid, 312; schoolid, 107
Fixed effects:
Estimate Std. Error t value
(Intercept) 57.427 1.443 39.79
#model 2
dta <- dta %>%
mutate(sex = factor(sex),
minority = factor(minority),
schoolid = factor(schoolid),
classid = factor(classid))
m2 <- lmer(mathgain ~ mathkind + sex + minority + ses + (1 | schoolid/classid), data = dta)
summary(m2)
Linear mixed model fit by REML ['lmerMod']
Formula:
mathgain ~ mathkind + sex + minority + ses + (1 | schoolid/classid)
Data: dta
REML criterion at convergence: 11385.8
Scaled residuals:
Min 1Q Median 3Q Max
-5.8257 -0.6110 -0.0337 0.5538 4.2678
Random effects:
Groups Name Variance Std.Dev.
classid:schoolid (Intercept) 83.28 9.126
schoolid (Intercept) 75.20 8.672
Residual 734.57 27.103
Number of obs: 1190, groups: classid:schoolid, 312; schoolid, 107
Fixed effects:
Estimate Std. Error t value
(Intercept) 282.79034 10.85323 26.056
mathkind -0.46980 0.02227 -21.100
sex1 -1.25119 1.65773 -0.755
minority1 -8.26213 2.34011 -3.531
ses 5.34638 1.24109 4.308
Correlation of Fixed Effects:
(Intr) mthknd sex1 mnrty1
mathkind -0.978
sex1 -0.044 -0.031
minority1 -0.307 0.163 -0.018
ses 0.140 -0.168 0.019 0.163
tidy(m2)
term estimate std.error statistic
1 (Intercept) 282.790339 10.853234 26.0558599
2 mathkind -0.469802 0.022266 -21.0995240
3 sex1 -1.251192 1.657730 -0.7547619
4 minority1 -8.262131 2.340113 -3.5306546
5 ses 5.346376 1.241094 4.3077937
6 sd_(Intercept).classid:schoolid 9.126039 NA NA
7 sd_(Intercept).schoolid 8.671992 NA NA
8 sd_Observation.Residual 27.102862 NA NA
group
1 fixed
2 fixed
3 fixed
4 fixed
5 fixed
6 classid:schoolid
7 schoolid
8 Residual
dta <- read.table("http://140.116.183.121/~sheu/lmm/Data/course_eval.txt", header = T)
dta <- dta %>%
mutate(Course = factor(Course),
Gender = factor(Gender),
Minority = factor(Minority),
Tenure = factor(Tenure))
#REM (all)
m0 <- lmer(Score ~ Beauty + Age + Gender + Minority + Tenure + (1 | Course), data = dta)
summary(m0)
Linear mixed model fit by REML ['lmerMod']
Formula: Score ~ Beauty + Age + Gender + Minority + Tenure + (1 | Course)
Data: dta
REML criterion at convergence: 749.3
Scaled residuals:
Min 1Q Median 3Q Max
-3.6110 -0.6666 0.0876 0.7275 2.0388
Random effects:
Groups Name Variance Std.Dev.
Course (Intercept) 0.07618 0.2760
Residual 0.26282 0.5127
Number of obs: 463, groups: Course, 31
Fixed effects:
Estimate Std. Error t value
(Intercept) 4.352253 0.159658 27.260
Beauty 0.131483 0.033863 3.883
Age -0.003687 0.002972 -1.240
Gender1 -0.232125 0.052992 -4.380
Minority1 -0.127408 0.075157 -1.695
Tenure1 -0.091118 0.054955 -1.658
Correlation of Fixed Effects:
(Intr) Beauty Age Gendr1 Mnrty1
Beauty -0.229
Age -0.869 0.265
Gender1 -0.309 -0.034 0.175
Minority1 -0.174 0.035 0.090 -0.072
Tenure1 0.059 0.047 -0.316 0.136 0.044
tidy(m0)
term estimate std.error statistic group
1 (Intercept) 4.352253312 0.159657863 27.259875 fixed
2 Beauty 0.131482534 0.033862964 3.882783 fixed
3 Age -0.003686554 0.002971837 -1.240497 fixed
4 Gender1 -0.232124994 0.052992167 -4.380364 fixed
5 Minority1 -0.127407526 0.075157095 -1.695216 fixed
6 Tenure1 -0.091118291 0.054954941 -1.658055 fixed
7 sd_(Intercept).Course 0.276006715 NA NA Course
8 sd_Observation.Residual 0.512663196 NA NA Residual
#REM
m1 <- lmer(Score ~ (1 | Course), data = dta)
summary(m1)
Linear mixed model fit by REML ['lmerMod']
Formula: Score ~ (1 | Course)
Data: dta
REML criterion at convergence: 764
Scaled residuals:
Min 1Q Median 3Q Max
-3.5723 -0.6522 0.1330 0.7197 1.8393
Random effects:
Groups Name Variance Std.Dev.
Course (Intercept) 0.0615 0.2480
Residual 0.2872 0.5359
Number of obs: 463, groups: Course, 31
Fixed effects:
Estimate Std. Error t value
(Intercept) 3.99001 0.06323 63.1
# plot
ggplot(dta, aes(x = Beauty, y = Score)) +
geom_point(shape = 1) +
stat_smooth(method = "lm", color = "black", se = F) +
scale_x_continuous(breaks = seq(-1.5, 2.0, by = 0.5)) +
scale_y_continuous(breaks = seq(2.0, 5.0, by = 0.5)) +
xlab("Beauty score") + ylab("Average teaching evaluation") +
theme_bw()
ggplot(dta, aes(x = Beauty, y = Score, group = Course)) +
geom_point(shape = 1, color = "blue") +
stat_smooth(method = "lm", color = "black", se = F) +
facet_wrap( ~ Course) +
labs(x ="Beauty judgement score", y = "Average course evaluation score") +
theme_bw()
dta <- read.table("E:/201709/multilevel_analysis/1023/attitudes.csv", header = T, sep=",")
m0 <- lmer(like ~ State + PrivPub + sex + (1 | school/class), data = dta)
m1 <- update(m0, . ~ . -State, - sex )
anova(m0, m1)
refitting model(s) with ML (instead of REML)
Data: dta
Models:
m1: like ~ PrivPub + sex + (1 | school/class)
m0: like ~ State + PrivPub + sex + (1 | school/class)
Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
m1 6 5551.1 5582.5 -2769.6 5539.1
m0 7 5553.1 5589.7 -2769.6 5539.1 0.0026 1 0.9592
summary(m1)
Linear mixed model fit by REML ['lmerMod']
Formula: like ~ PrivPub + sex + (1 | school/class)
Data: dta
REML criterion at convergence: 5546.5
Scaled residuals:
Min 1Q Median 3Q Max
-2.6927 -0.6891 0.1543 0.6792 2.5584
Random effects:
Groups Name Variance Std.Dev.
class:school (Intercept) 0.3206 0.5662
school (Intercept) 0.0000 0.0000
Residual 3.0521 1.7470
Number of obs: 1383, groups: class:school, 66; school, 41
Fixed effects:
Estimate Std. Error t value
(Intercept) 4.7218 0.1624 29.072
PrivPubpublic 0.4117 0.1857 2.217
sexm 0.1823 0.0982 1.857
Correlation of Fixed Effects:
(Intr) PrvPbp
PrivPubpblc -0.795
sexm -0.309 0.012
dta1 <- read.table("http://140.116.183.121/~sheu/lmm/Data/demo1.txt", header = T)
dta2 <- read.table("http://140.116.183.121/~sheu/lmm/Data/demo2.txt", header = T)
# shall scores the same to compare ICC?
c(mean(dta1$Score), sd(dta1$Score))
[1] 62.11111 34.22150
c(mean(dta2$Score), sd(dta2$Score))
[1] 57.44444 32.29207
#
m1 <-lmer(Score ~ (1 | School) , data = dta1)
summary(m1)
Linear mixed model fit by REML ['lmerMod']
Formula: Score ~ (1 | School)
Data: dta1
REML criterion at convergence: 51.5
Scaled residuals:
Min 1Q Median 3Q Max
-1.3280 -0.4745 0.0000 0.4608 1.3553
Random effects:
Groups Name Variance Std.Dev.
School (Intercept) 1679 40.976
Residual 5 2.236
Number of obs: 9, groups: School, 3
Fixed effects:
Estimate Std. Error t value
(Intercept) 53.01 23.67 2.24
m2 <-lmer(Score ~ (1 | School) , data = dta2)
summary(m2)
Linear mixed model fit by REML ['lmerMod']
Formula: Score ~ (1 | School)
Data: dta2
REML criterion at convergence: 80.4
Scaled residuals:
Min 1Q Median 3Q Max
-1.36734 -0.34286 -0.02776 1.07153 1.13999
Random effects:
Groups Name Variance Std.Dev.
School (Intercept) 104.2 10.21
Residual 967.8 31.11
Number of obs: 9, groups: School, 3
Fixed effects:
Estimate Std. Error t value
(Intercept) 56.36 12.01 4.692
dta <- read.table("http://140.116.183.121/~sheu/lmm/Data/leadership_math.txt", header = T)
dta <- dta %>%
mutate(School = factor(School),
Student = factor(Student))
m0 <- lmer(Math ~ Lead + Gender + SES + (1 | School), data = dta)
summary(m0)
Linear mixed model fit by REML ['lmerMod']
Formula: Math ~ Lead + Gender + SES + (1 | School)
Data: dta
REML criterion at convergence: 979.4
Scaled residuals:
Min 1Q Median 3Q Max
-2.0745 -0.5824 -0.0854 0.5489 3.3174
Random effects:
Groups Name Variance Std.Dev.
School (Intercept) 67.08 8.19
Residual 1386.77 37.24
Number of obs: 100, groups: School, 10
Fixed effects:
Estimate Std. Error t value
(Intercept) 488.406 64.642 7.556
Lead 239.205 94.923 2.520
GenderM 1.603 7.862 0.204
SESO 14.751 8.893 1.659
Correlation of Fixed Effects:
(Intr) Lead GendrM
Lead -0.993
GenderM 0.019 -0.064
SESO 0.292 -0.364 -0.121
m1 <- lmer(Math ~ Lead + (1 |School), data = dta)
summary(m1)
Linear mixed model fit by REML ['lmerMod']
Formula: Math ~ Lead + (1 | School)
Data: dta
REML criterion at convergence: 994.5
Scaled residuals:
Min 1Q Median 3Q Max
-2.1760 -0.5846 -0.0317 0.5396 3.3772
Random effects:
Groups Name Variance Std.Dev.
School (Intercept) 62.05 7.877
Residual 1402.95 37.456
Number of obs: 100, groups: School, 10
Fixed effects:
Estimate Std. Error t value
(Intercept) 455.61 61.20 7.444
Lead 300.74 87.07 3.454
Correlation of Fixed Effects:
(Intr)
Lead -0.997
m2 <- lm(Math ~ Lead, data = dta)
summary(m2)
Call:
lm(formula = Math ~ Lead, data = dta)
Residuals:
Min 1Q Median 3Q Max
-84.159 -21.431 -1.159 20.132 123.841
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 455.61 51.87 8.783 5.27e-14 ***
Lead 300.74 73.80 4.075 9.35e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 38.13 on 98 degrees of freedom
Multiple R-squared: 0.1449, Adjusted R-squared: 0.1362
F-statistic: 16.61 on 1 and 98 DF, p-value: 9.35e-05
anova(m0, m1) # can take away Gender, SES
refitting model(s) with ML (instead of REML)
Data: dta
Models:
m1: Math ~ Lead + (1 | School)
m0: Math ~ Lead + Gender + SES + (1 | School)
Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
m1 4 1017.9 1028.3 -504.93 1009.9
m0 6 1018.9 1034.5 -503.45 1006.9 2.9589 2 0.2278
anova(m1, m2) # no need to group by school
refitting model(s) with ML (instead of REML)
Data: dta
Models:
m2: Math ~ Lead
m1: Math ~ Lead + (1 | School)
Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
m2 3 1016.0 1023.8 -504.97 1010.0
m1 4 1017.9 1028.3 -504.93 1009.9 0.0957 1 0.757
# plot
ggplot(dta, aes(x = Lead, y = Math)) +
geom_point(color = "blue", alpha = 0.5) +
stat_smooth(method = "lm", color = "black", se = F) +
labs(x ="Lead score", y = "Math score") +
theme_bw()
dta <- read.csv("E:/201709/multilevel_analysis/1023/pts.csv")
# compute mean Read_0 by school
dta <- dta %>%
mutate(School = factor(School), Teacher = factor(Teacher),
Pupil = factor(Pupil)) %>%
group_by( School ) %>%
mutate(msRead_0 = mean(Read_0))
# compute mean Read_0 by teacher and
# centered teacher mean of Read_0 (from respective school means)
dta <- dta %>%
group_by( Teacher ) %>%
mutate(mtRead_0 = mean(Read_0), ctRead_0 = mean(Read_0) - msRead_0 )
# no predictor
summary(m0 <- lmer(Read_1 ~ (1 | School) + (1 | Teacher), data = dta))
Linear mixed model fit by REML ['lmerMod']
Formula: Read_1 ~ (1 | School) + (1 | Teacher)
Data: dta
REML criterion at convergence: 2486.8
Scaled residuals:
Min 1Q Median 3Q Max
-3.05438 -0.65686 0.06497 0.62523 2.47233
Random effects:
Groups Name Variance Std.Dev.
Teacher (Intercept) 0.1370 0.3701
School (Intercept) 0.1277 0.3574
Residual 1.3250 1.1511
Number of obs: 777, groups: Teacher, 46; School, 20
Fixed effects:
Estimate Std. Error t value
(Intercept) 3.3755 0.1065 31.69
# add a school-level predictor
summary(m0_s <- update(m0, . ~ . + msRead_0))
Linear mixed model fit by REML ['lmerMod']
Formula: Read_1 ~ (1 | School) + (1 | Teacher) + msRead_0
Data: dta
REML criterion at convergence: 2467.4
Scaled residuals:
Min 1Q Median 3Q Max
-3.06110 -0.65597 0.06822 0.62772 2.45391
Random effects:
Groups Name Variance Std.Dev.
Teacher (Intercept) 0.1234 0.3513
School (Intercept) 0.0000 0.0000
Residual 1.3224 1.1500
Number of obs: 777, groups: Teacher, 46; School, 20
Fixed effects:
Estimate Std. Error t value
(Intercept) 3.39822 0.06808 49.92
msRead_0 1.00159 0.17807 5.62
Correlation of Fixed Effects:
(Intr)
msRead_0 0.086
# add a teacher-level predictor
summary(m0_t <- update(m0, . ~ . + mtRead_0))
Linear mixed model fit by REML ['lmerMod']
Formula: Read_1 ~ (1 | School) + (1 | Teacher) + mtRead_0
Data: dta
REML criterion at convergence: 2434.9
Scaled residuals:
Min 1Q Median 3Q Max
-3.07539 -0.65979 0.06707 0.63259 2.49973
Random effects:
Groups Name Variance Std.Dev.
Teacher (Intercept) 8.785e-17 9.373e-09
School (Intercept) 4.471e-02 2.114e-01
Residual 1.309e+00 1.144e+00
Number of obs: 777, groups: Teacher, 46; School, 20
Fixed effects:
Estimate Std. Error t value
(Intercept) 3.40649 0.06307 54.01
mtRead_0 1.07319 0.11495 9.34
Correlation of Fixed Effects:
(Intr)
mtRead_0 0.024
# add a teacher-level predictor away from respective school means
summary(m0_ct <- update(m0, . ~ . + ctRead_0))
Linear mixed model fit by REML ['lmerMod']
Formula: Read_1 ~ (1 | School) + (1 | Teacher) + ctRead_0
Data: dta
REML criterion at convergence: 2454.1
Scaled residuals:
Min 1Q Median 3Q Max
-3.1804 -0.6461 0.0791 0.6420 2.5272
Random effects:
Groups Name Variance Std.Dev.
Teacher (Intercept) 0.0000 0.000
School (Intercept) 0.1764 0.420
Residual 1.3108 1.145
Number of obs: 777, groups: Teacher, 46; School, 20
Fixed effects:
Estimate Std. Error t value
(Intercept) 3.3896 0.1029 32.93
ctRead_0 1.1742 0.1581 7.43
Correlation of Fixed Effects:
(Intr)
ctRead_0 0.000