1023_HW

#peijun

Refer to the questions here.

Q1 Language and Arithmatic

dta <- read.table("E:/201709/multilevel_analysis/1023/language_arith.csv", header=T, sep=",") dta <- data.table(dta) #correlation(individual) cor(dta[, c("Language", "Arithmetic")])

Language Arithmetic Language 1.000000 0.694228 Arithmetic 0.694228 1.000000

##correlation(mean) dta_m <- aggregate(cbind(Language, Arithmetic) ~ School, data = dta, mean) cor(dta_m[,-1])

Language Arithmetic Language 1.0000000 0.8853051 Arithmetic 0.8853051 1.0000000

#correlation(scaled) dta <- data.table(dta) dta[, c("scale_Lan", "scale_Arith") := lapply(.SD, function(x) as.vector(scale(x))), by = factor(School)] cor(dta[,c("scale_Lan", "scale_Arith")])

scale_Lan scale_Arith scale_Lan 1.00000000 0.01536906 scale_Arith 0.01536906 1.00000000

# REM (lang) m0 <- lmer(Language ~ (1 | School), data = dta) summary(m0)

Linear mixed model fit by REML ['lmerMod'] Formula: Language ~ (1 | School) Data: dta REML criterion at convergence: 16253.1 Scaled residuals: Min 1Q Median 3Q Max -3.11618 -0.65703 0.07597 0.74128 2.50639 Random effects: Groups Name Variance Std.Dev. School (Intercept) 19.63 4.431 Residual 64.56 8.035 Number of obs: 2287, groups: School, 131 Fixed effects: Estimate Std. Error t value (Intercept) 40.3624 0.4282 94.26

# REM (arith) m1 <- lmer(Arithmetic ~ (1 | School), data = dta) summary(m1)

Linear mixed model fit by REML ['lmerMod'] Formula: Arithmetic ~ (1 | School) Data: dta REML criterion at convergence: 14695.8 Scaled residuals: Min 1Q Median 3Q Max -2.80522 -0.70245 0.08321 0.75577 2.68341 Random effects: Groups Name Variance Std.Dev. School (Intercept) 12.62 3.552 Residual 32.28 5.682 Number of obs: 2287, groups: School, 131 Fixed effects: Estimate Std. Error t value (Intercept) 18.9473 0.3365 56.31

#plot ggplot(data = dta, aes(x = Arithmetic, y = Language)) + geom_point(color = "deepskyblue") + stat_smooth(method = "lm", color = "deepskyblue", se = F) + labs(x = "Arithmetic Score", y = "Language score") + geom_point(data = dta_m, aes(x = Arithmetic, y = Language), color = "blue") + stat_smooth(data = dta_m, aes(x = Arithmetic, y = Language), method = "lm", color = "blue", se = F)

Q2 IQ and language

dta <- read.table("http://140.116.183.121/~sheu/lmm/Data/iq_language.txt", header = T) m1 <- lmer(Language ~ IQ + (1 | School), data = dta) summary(m1)

Linear mixed model fit by REML ['lmerMod'] Formula: Language ~ IQ + (1 | School) Data: dta REML criterion at convergence: 15255.8 Scaled residuals: Min 1Q Median 3Q Max -4.0939 -0.6375 0.0579 0.7061 3.1448 Random effects: Groups Name Variance Std.Dev. School (Intercept) 9.602 3.099 Residual 42.245 6.500 Number of obs: 2287, groups: School, 131 Fixed effects: Estimate Std. Error t value (Intercept) 11.16993 0.87952 12.7 IQ 2.48759 0.07008 35.5 Correlation of Fixed Effects: (Intr) IQ -0.937

tidy(m1)

term estimate std.error statistic group 1 (Intercept) 11.169926 0.87952108 12.70001 fixed 2 IQ 2.487591 0.07008105 35.49591 fixed 3 sd_(Intercept).School 3.098660 NA NA School 4 sd_Observation.Residual 6.499578 NA NA Residual

Q3 Ravens and English

library(faraway) dta <- jsp m0 <- lmer(english ~ raven + (1 | school/class) , data = dta) summary(m0)

Linear mixed model fit by REML ['lmerMod'] Formula: english ~ raven + (1 | school/class) Data: dta REML criterion at convergence: 29107.3 Scaled residuals: Min 1Q Median 3Q Max -2.90145 -0.76284 0.05924 0.78127 2.82146 Random effects: Groups Name Variance Std.Dev. class:school (Intercept) 40.58 6.370 school (Intercept) 15.59 3.949 Residual 451.47 21.248 Number of obs: 3236, groups: class:school, 94; school, 49 Fixed effects: Estimate Std. Error t value (Intercept) 9.24949 2.01350 4.594 raven 1.69687 0.06988 24.282 Correlation of Fixed Effects: (Intr) raven -0.873

Q4 Math score gain

dta <- classroom # model 0 ?same as m2? m0 <- glm(mathgain ~ mathkind + sex + minority + ses + (1 | schoolid/classid), data = dta) summary(m0)

Call: glm(formula = mathgain ~ mathkind + sex + minority + ses + (1 | schoolid/classid), data = dta) Deviance Residuals: Min 1Q Median 3Q Max -178.70 -19.04 -1.76 19.33 123.33 Coefficients: (1 not defined because of singularities) Estimate Std. Error t value Pr(>|t|) (Intercept) 273.3533 10.7895 25.335 < 2e-16 *** mathkind -0.4515 0.0222 -20.336 < 2e-16 *** sex -0.7463 1.7321 -0.431 0.666634 minority -6.8650 1.9849 -3.459 0.000562 *** ses 4.9666 1.2411 4.002 6.68e-05 *** 1 | schoolid/classidTRUE NA NA NA NA --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 (Dispersion parameter for gaussian family taken to be 889.419) Null deviance: 1424560 on 1189 degrees of freedom Residual deviance: 1053962 on 1185 degrees of freedom AIC: 11465 Number of Fisher Scoring iterations: 2

tidy(m0)

term estimate std.error statistic p.value 1 (Intercept) 273.3532495 10.78949160 25.335137 1.614011e-113 2 mathkind -0.4514971 0.02220194 -20.335930 4.244953e-79 3 sex -0.7463453 1.73214180 -0.430880 6.666340e-01 4 minority -6.8649740 1.98485448 -3.458679 5.621038e-04 5 ses 4.9666439 1.24112485 4.001728 6.678278e-05

#model 1 m1 <- lmer(mathgain ~ (1 | schoolid/classid), data = dta) summary(m1)

Linear mixed model fit by REML ['lmerMod'] Formula: mathgain ~ (1 | schoolid/classid) Data: dta REML criterion at convergence: 11768.8 Scaled residuals: Min 1Q Median 3Q Max -4.6441 -0.5984 -0.0336 0.5334 5.6335 Random effects: Groups Name Variance Std.Dev. classid:schoolid (Intercept) 99.23 9.961 schoolid (Intercept) 77.49 8.803 Residual 1028.23 32.066 Number of obs: 1190, groups: classid:schoolid, 312; schoolid, 107 Fixed effects: Estimate Std. Error t value (Intercept) 57.427 1.443 39.79

#model 2 dta <- dta %>% mutate(sex = factor(sex), minority = factor(minority), schoolid = factor(schoolid), classid = factor(classid)) m2 <- lmer(mathgain ~ mathkind + sex + minority + ses + (1 | schoolid/classid), data = dta) summary(m2)

Linear mixed model fit by REML ['lmerMod'] Formula: mathgain ~ mathkind + sex + minority + ses + (1 | schoolid/classid) Data: dta REML criterion at convergence: 11385.8 Scaled residuals: Min 1Q Median 3Q Max -5.8257 -0.6110 -0.0337 0.5538 4.2678 Random effects: Groups Name Variance Std.Dev. classid:schoolid (Intercept) 83.28 9.126 schoolid (Intercept) 75.20 8.672 Residual 734.57 27.103 Number of obs: 1190, groups: classid:schoolid, 312; schoolid, 107 Fixed effects: Estimate Std. Error t value (Intercept) 282.79034 10.85323 26.056 mathkind -0.46980 0.02227 -21.100 sex1 -1.25119 1.65773 -0.755 minority1 -8.26213 2.34011 -3.531 ses 5.34638 1.24109 4.308 Correlation of Fixed Effects: (Intr) mthknd sex1 mnrty1 mathkind -0.978 sex1 -0.044 -0.031 minority1 -0.307 0.163 -0.018 ses 0.140 -0.168 0.019 0.163

tidy(m2)

term estimate std.error statistic 1 (Intercept) 282.790339 10.853234 26.0558599 2 mathkind -0.469802 0.022266 -21.0995240 3 sex1 -1.251192 1.657730 -0.7547619 4 minority1 -8.262131 2.340113 -3.5306546 5 ses 5.346376 1.241094 4.3077937 6 sd_(Intercept).classid:schoolid 9.126039 NA NA 7 sd_(Intercept).schoolid 8.671992 NA NA 8 sd_Observation.Residual 27.102862 NA NA group 1 fixed 2 fixed 3 fixed 4 fixed 5 fixed 6 classid:schoolid 7 schoolid 8 Residual

Q5 Course evaluation and instructor’s beauty

dta <- read.table("http://140.116.183.121/~sheu/lmm/Data/course_eval.txt", header = T) dta <- dta %>% mutate(Course = factor(Course), Gender = factor(Gender), Minority = factor(Minority), Tenure = factor(Tenure)) #REM (all) m0 <- lmer(Score ~ Beauty + Age + Gender + Minority + Tenure + (1 | Course), data = dta) summary(m0)

Linear mixed model fit by REML ['lmerMod'] Formula: Score ~ Beauty + Age + Gender + Minority + Tenure + (1 | Course) Data: dta REML criterion at convergence: 749.3 Scaled residuals: Min 1Q Median 3Q Max -3.6110 -0.6666 0.0876 0.7275 2.0388 Random effects: Groups Name Variance Std.Dev. Course (Intercept) 0.07618 0.2760 Residual 0.26282 0.5127 Number of obs: 463, groups: Course, 31 Fixed effects: Estimate Std. Error t value (Intercept) 4.352253 0.159658 27.260 Beauty 0.131483 0.033863 3.883 Age -0.003687 0.002972 -1.240 Gender1 -0.232125 0.052992 -4.380 Minority1 -0.127408 0.075157 -1.695 Tenure1 -0.091118 0.054955 -1.658 Correlation of Fixed Effects: (Intr) Beauty Age Gendr1 Mnrty1 Beauty -0.229 Age -0.869 0.265 Gender1 -0.309 -0.034 0.175 Minority1 -0.174 0.035 0.090 -0.072 Tenure1 0.059 0.047 -0.316 0.136 0.044

tidy(m0)

term estimate std.error statistic group 1 (Intercept) 4.352253312 0.159657863 27.259875 fixed 2 Beauty 0.131482534 0.033862964 3.882783 fixed 3 Age -0.003686554 0.002971837 -1.240497 fixed 4 Gender1 -0.232124994 0.052992167 -4.380364 fixed 5 Minority1 -0.127407526 0.075157095 -1.695216 fixed 6 Tenure1 -0.091118291 0.054954941 -1.658055 fixed 7 sd_(Intercept).Course 0.276006715 NA NA Course 8 sd_Observation.Residual 0.512663196 NA NA Residual

#REM m1 <- lmer(Score ~ (1 | Course), data = dta) summary(m1)

Linear mixed model fit by REML ['lmerMod'] Formula: Score ~ (1 | Course) Data: dta REML criterion at convergence: 764 Scaled residuals: Min 1Q Median 3Q Max -3.5723 -0.6522 0.1330 0.7197 1.8393 Random effects: Groups Name Variance Std.Dev. Course (Intercept) 0.0615 0.2480 Residual 0.2872 0.5359 Number of obs: 463, groups: Course, 31 Fixed effects: Estimate Std. Error t value (Intercept) 3.99001 0.06323 63.1

# plot ggplot(dta, aes(x = Beauty, y = Score)) + geom_point(shape = 1) + stat_smooth(method = "lm", color = "black", se = F) + scale_x_continuous(breaks = seq(-1.5, 2.0, by = 0.5)) + scale_y_continuous(breaks = seq(2.0, 5.0, by = 0.5)) + xlab("Beauty score") + ylab("Average teaching evaluation") + theme_bw()

ggplot(dta, aes(x = Beauty, y = Score, group = Course)) + geom_point(shape = 1, color = "blue") + stat_smooth(method = "lm", color = "black", se = F) + facet_wrap( ~ Course) + labs(x ="Beauty judgement score", y = "Average course evaluation score") + theme_bw()

Q6 Attitude to science

dta <- read.table("E:/201709/multilevel_analysis/1023/attitudes.csv", header = T, sep=",") m0 <- lmer(like ~ State + PrivPub + sex + (1 | school/class), data = dta) m1 <- update(m0, . ~ . -State, - sex ) anova(m0, m1)

refitting model(s) with ML (instead of REML)

Data: dta Models: m1: like ~ PrivPub + sex + (1 | school/class) m0: like ~ State + PrivPub + sex + (1 | school/class) Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq) m1 6 5551.1 5582.5 -2769.6 5539.1 m0 7 5553.1 5589.7 -2769.6 5539.1 0.0026 1 0.9592

summary(m1)

Linear mixed model fit by REML ['lmerMod'] Formula: like ~ PrivPub + sex + (1 | school/class) Data: dta REML criterion at convergence: 5546.5 Scaled residuals: Min 1Q Median 3Q Max -2.6927 -0.6891 0.1543 0.6792 2.5584 Random effects: Groups Name Variance Std.Dev. class:school (Intercept) 0.3206 0.5662 school (Intercept) 0.0000 0.0000 Residual 3.0521 1.7470 Number of obs: 1383, groups: class:school, 66; school, 41 Fixed effects: Estimate Std. Error t value (Intercept) 4.7218 0.1624 29.072 PrivPubpublic 0.4117 0.1857 2.217 sexm 0.1823 0.0982 1.857 Correlation of Fixed Effects: (Intr) PrvPbp PrivPubpblc -0.795 sexm -0.309 0.012

Q7 Estimate grand mean by two data set

dta1 <- read.table("http://140.116.183.121/~sheu/lmm/Data/demo1.txt", header = T) dta2 <- read.table("http://140.116.183.121/~sheu/lmm/Data/demo2.txt", header = T) # shall scores the same to compare ICC? c(mean(dta1$Score), sd(dta1$Score))

[1] 62.11111 34.22150

c(mean(dta2$Score), sd(dta2$Score))

[1] 57.44444 32.29207

# m1 <-lmer(Score ~ (1 | School) , data = dta1) summary(m1)

Linear mixed model fit by REML ['lmerMod'] Formula: Score ~ (1 | School) Data: dta1 REML criterion at convergence: 51.5 Scaled residuals: Min 1Q Median 3Q Max -1.3280 -0.4745 0.0000 0.4608 1.3553 Random effects: Groups Name Variance Std.Dev. School (Intercept) 1679 40.976 Residual 5 2.236 Number of obs: 9, groups: School, 3 Fixed effects: Estimate Std. Error t value (Intercept) 53.01 23.67 2.24

m2 <-lmer(Score ~ (1 | School) , data = dta2) summary(m2)

Linear mixed model fit by REML ['lmerMod'] Formula: Score ~ (1 | School) Data: dta2 REML criterion at convergence: 80.4 Scaled residuals: Min 1Q Median 3Q Max -1.36734 -0.34286 -0.02776 1.07153 1.13999 Random effects: Groups Name Variance Std.Dev. School (Intercept) 104.2 10.21 Residual 967.8 31.11 Number of obs: 9, groups: School, 3 Fixed effects: Estimate Std. Error t value (Intercept) 56.36 12.01 4.692

Q8 Leadership and math score

dta <- read.table("http://140.116.183.121/~sheu/lmm/Data/leadership_math.txt", header = T) dta <- dta %>% mutate(School = factor(School), Student = factor(Student)) m0 <- lmer(Math ~ Lead + Gender + SES + (1 | School), data = dta) summary(m0)

Linear mixed model fit by REML ['lmerMod'] Formula: Math ~ Lead + Gender + SES + (1 | School) Data: dta REML criterion at convergence: 979.4 Scaled residuals: Min 1Q Median 3Q Max -2.0745 -0.5824 -0.0854 0.5489 3.3174 Random effects: Groups Name Variance Std.Dev. School (Intercept) 67.08 8.19 Residual 1386.77 37.24 Number of obs: 100, groups: School, 10 Fixed effects: Estimate Std. Error t value (Intercept) 488.406 64.642 7.556 Lead 239.205 94.923 2.520 GenderM 1.603 7.862 0.204 SESO 14.751 8.893 1.659 Correlation of Fixed Effects: (Intr) Lead GendrM Lead -0.993 GenderM 0.019 -0.064 SESO 0.292 -0.364 -0.121

m1 <- lmer(Math ~ Lead + (1 |School), data = dta) summary(m1)

Linear mixed model fit by REML ['lmerMod'] Formula: Math ~ Lead + (1 | School) Data: dta REML criterion at convergence: 994.5 Scaled residuals: Min 1Q Median 3Q Max -2.1760 -0.5846 -0.0317 0.5396 3.3772 Random effects: Groups Name Variance Std.Dev. School (Intercept) 62.05 7.877 Residual 1402.95 37.456 Number of obs: 100, groups: School, 10 Fixed effects: Estimate Std. Error t value (Intercept) 455.61 61.20 7.444 Lead 300.74 87.07 3.454 Correlation of Fixed Effects: (Intr) Lead -0.997

m2 <- lm(Math ~ Lead, data = dta) summary(m2)

Call: lm(formula = Math ~ Lead, data = dta) Residuals: Min 1Q Median 3Q Max -84.159 -21.431 -1.159 20.132 123.841 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 455.61 51.87 8.783 5.27e-14 *** Lead 300.74 73.80 4.075 9.35e-05 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 38.13 on 98 degrees of freedom Multiple R-squared: 0.1449, Adjusted R-squared: 0.1362 F-statistic: 16.61 on 1 and 98 DF, p-value: 9.35e-05

anova(m0, m1) # can take away Gender, SES

refitting model(s) with ML (instead of REML)

Data: dta Models: m1: Math ~ Lead + (1 | School) m0: Math ~ Lead + Gender + SES + (1 | School) Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq) m1 4 1017.9 1028.3 -504.93 1009.9 m0 6 1018.9 1034.5 -503.45 1006.9 2.9589 2 0.2278

anova(m1, m2) # no need to group by school

refitting model(s) with ML (instead of REML)

Data: dta Models: m2: Math ~ Lead m1: Math ~ Lead + (1 | School) Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq) m2 3 1016.0 1023.8 -504.97 1010.0 m1 4 1017.9 1028.3 -504.93 1009.9 0.0957 1 0.757

# plot ggplot(dta, aes(x = Lead, y = Math)) + geom_point(color = "blue", alpha = 0.5) + stat_smooth(method = "lm", color = "black", se = F) + labs(x ="Lead score", y = "Math score") + theme_bw()

Q9 School-teacher-pupil

dta <- read.csv("E:/201709/multilevel_analysis/1023/pts.csv") # compute mean Read_0 by school dta <- dta %>% mutate(School = factor(School), Teacher = factor(Teacher), Pupil = factor(Pupil)) %>% group_by( School ) %>% mutate(msRead_0 = mean(Read_0)) # compute mean Read_0 by teacher and # centered teacher mean of Read_0 (from respective school means) dta <- dta %>% group_by( Teacher ) %>% mutate(mtRead_0 = mean(Read_0), ctRead_0 = mean(Read_0) - msRead_0 ) # no predictor summary(m0 <- lmer(Read_1 ~ (1 | School) + (1 | Teacher), data = dta))

Linear mixed model fit by REML ['lmerMod'] Formula: Read_1 ~ (1 | School) + (1 | Teacher) Data: dta REML criterion at convergence: 2486.8 Scaled residuals: Min 1Q Median 3Q Max -3.05438 -0.65686 0.06497 0.62523 2.47233 Random effects: Groups Name Variance Std.Dev. Teacher (Intercept) 0.1370 0.3701 School (Intercept) 0.1277 0.3574 Residual 1.3250 1.1511 Number of obs: 777, groups: Teacher, 46; School, 20 Fixed effects: Estimate Std. Error t value (Intercept) 3.3755 0.1065 31.69

# add a school-level predictor summary(m0_s <- update(m0, . ~ . + msRead_0))

Linear mixed model fit by REML ['lmerMod'] Formula: Read_1 ~ (1 | School) + (1 | Teacher) + msRead_0 Data: dta REML criterion at convergence: 2467.4 Scaled residuals: Min 1Q Median 3Q Max -3.06110 -0.65597 0.06822 0.62772 2.45391 Random effects: Groups Name Variance Std.Dev. Teacher (Intercept) 0.1234 0.3513 School (Intercept) 0.0000 0.0000 Residual 1.3224 1.1500 Number of obs: 777, groups: Teacher, 46; School, 20 Fixed effects: Estimate Std. Error t value (Intercept) 3.39822 0.06808 49.92 msRead_0 1.00159 0.17807 5.62 Correlation of Fixed Effects: (Intr) msRead_0 0.086

# add a teacher-level predictor summary(m0_t <- update(m0, . ~ . + mtRead_0))

Linear mixed model fit by REML ['lmerMod'] Formula: Read_1 ~ (1 | School) + (1 | Teacher) + mtRead_0 Data: dta REML criterion at convergence: 2434.9 Scaled residuals: Min 1Q Median 3Q Max -3.07539 -0.65979 0.06707 0.63259 2.49973 Random effects: Groups Name Variance Std.Dev. Teacher (Intercept) 8.785e-17 9.373e-09 School (Intercept) 4.471e-02 2.114e-01 Residual 1.309e+00 1.144e+00 Number of obs: 777, groups: Teacher, 46; School, 20 Fixed effects: Estimate Std. Error t value (Intercept) 3.40649 0.06307 54.01 mtRead_0 1.07319 0.11495 9.34 Correlation of Fixed Effects: (Intr) mtRead_0 0.024

# add a teacher-level predictor away from respective school means summary(m0_ct <- update(m0, . ~ . + ctRead_0))

Linear mixed model fit by REML ['lmerMod'] Formula: Read_1 ~ (1 | School) + (1 | Teacher) + ctRead_0 Data: dta REML criterion at convergence: 2454.1 Scaled residuals: Min 1Q Median 3Q Max -3.1804 -0.6461 0.0791 0.6420 2.5272 Random effects: Groups Name Variance Std.Dev. Teacher (Intercept) 0.0000 0.000 School (Intercept) 0.1764 0.420 Residual 1.3108 1.145 Number of obs: 777, groups: Teacher, 46; School, 20 Fixed effects: Estimate Std. Error t value (Intercept) 3.3896 0.1029 32.93 ctRead_0 1.1742 0.1581 7.43 Correlation of Fixed Effects: (Intr) ctRead_0 0.000

m0_s reduces the variance of school to 0, huge difference; teacher is reduced in some degree

m0_t reduced both variance of teacher and school, but higher degree in teacher

m0_ct reduced the variance of teacher to 0; school is reduced in some degree (look more alike to m0_s)