Q2
setwd("C:\\Users\\USER\\Desktop\\Multilevel Statistical\\w6 exercise")
dat = read.csv("language_arith.csv", header = TRUE)
str(dat)
## 'data.frame': 2287 obs. of 4 variables:
## $ School : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Pupil : int 17001 17002 17003 17004 17005 17006 17007 17008 17009 17010 ...
## $ Language : int 46 45 33 46 20 30 30 57 36 36 ...
## $ Arithmetic: int 24 19 24 26 9 13 13 30 23 22 ...
as.factor('dat$School')
## [1] dat$School
## Levels: dat$School
#correlation beteen lan and ari
cor(dat$Language, dat$Arithmetic)
## [1] 0.694228
require(lme4)
## Loading required package: lme4
## Loading required package: Matrix
require(magrittr)
## Loading required package: magrittr
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
require(ggplot2)
## Loading required package: ggplot2
#Different intercepts on school level
summary(m0 <- lmer(Language~(1|School), data = dat))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Language ~ (1 | School)
## Data: dat
##
## REML criterion at convergence: 16253.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.11618 -0.65703 0.07597 0.74128 2.50639
##
## Random effects:
## Groups Name Variance Std.Dev.
## School (Intercept) 19.63 4.431
## Residual 64.56 8.035
## Number of obs: 2287, groups: School, 131
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 40.3624 0.4282 94.26
summary(m1 <- lmer(Arithmetic~(1|School), data = dat))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Arithmetic ~ (1 | School)
## Data: dat
##
## REML criterion at convergence: 14695.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.80522 -0.70245 0.08321 0.75577 2.68341
##
## Random effects:
## Groups Name Variance Std.Dev.
## School (Intercept) 12.62 3.552
## Residual 32.28 5.682
## Number of obs: 2287, groups: School, 131
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 18.9473 0.3365 56.31
#ICC
print(vcm0<-VarCorr(m0),comp='Variance')
## Groups Name Variance
## School (Intercept) 19.633
## Residual 64.564
print(vcm1<-VarCorr(m1),comp='Variance')
## Groups Name Variance
## School (Intercept) 12.617
## Residual 32.282
vcm0<-as.data.frame(vcm0)
vcm1<-as.data.frame(vcm1)
vcm0[1,4] /sum( vcm0 [,4] )
## [1] 0.2331792
vcm1[1,4] /sum( vcm1 [,4] )
## [1] 0.281007
#the school mean of lan and ari
mla_s<- dat %>% group_by(School) %>% summarize(ml = mean(Language),
ma = mean(Arithmetic))
#Plot
ggplot(dat, aes(x = Arithmetic, y = Language))+
geom_point(color = "skyblue", alpha = .5)+ #data not centering
stat_smooth(method = "lm", color = "skyblue", size = .8)+
geom_point(data = mla_s, aes(x = ma, y = ml),color = "blue", alpha = .5)+ #centering data
stat_smooth(data = mla_s, aes(x = ma, y = ml),method = "lm", color = "blue", size = .8)+
labs(x = "Arithmetic Score", y = "Language Score")+
theme_bw()

Q4
#教師外貌vs 課程評分分數
dat = read.table("course_eval.txt", header = TRUE)
head(dat)
## Score Beauty Gender Age Minority Tenure Course
## 1 4.3 0.2015666 1 36 1 0 3
## 2 4.5 -0.8260813 0 59 0 1 0
## 3 3.7 -0.6603327 0 51 0 1 4
## 4 4.3 -0.7663125 1 40 0 1 2
## 5 4.4 1.4214450 1 31 0 0 0
## 6 4.2 0.5002196 0 62 0 1 0
str(dat)
## 'data.frame': 463 obs. of 7 variables:
## $ Score : num 4.3 4.5 3.7 4.3 4.4 4.2 4 3.4 4.5 3.9 ...
## $ Beauty : num 0.202 -0.826 -0.66 -0.766 1.421 ...
## $ Gender : int 1 0 0 1 1 0 1 1 1 0 ...
## $ Age : int 36 59 51 40 31 62 33 51 33 47 ...
## $ Minority: int 1 0 0 0 0 0 0 0 0 0 ...
## $ Tenure : int 0 1 1 1 0 1 0 1 0 0 ...
## $ Course : int 3 0 4 2 0 0 4 0 0 4 ...
#Plot all in one
ggplot(dat, aes(x = Beauty, y = Score))+
geom_point(alpha = .5)+
scale_x_continuous(name = "Beauty Score")+
scale_y_continuous(name = "Average course evaluation")+
stat_smooth(method = "lm", se=T)+ #se=加入信賴區間
theme_bw()

#beauty score of 30 courses
ggplot(dat, aes(x = Beauty, y = Score))+
geom_point(alpha = .5, size = .8)+
labs(x = "Beauty Score", y = "Average teaching evaluation")+
stat_smooth(method = "lm")+
scale_x_continuous(limits = c(-2,2))+
scale_y_continuous(limits = c(2,5))+
facet_wrap(~Course)+
theme_bw()
## Warning in qt((1 - level)/2, df): NaNs produced
## Warning in qt((1 - level)/2, df): NaNs produced
## Warning in qt((1 - level)/2, df): NaNs produced
## Warning in qt((1 - level)/2, df): NaNs produced
## Warning in qt((1 - level)/2, df): NaNs produced
## Warning in qt((1 - level)/2, df): NaNs produced

# lm plot
ggplot(dat, aes(x = Beauty, y = Score ,group = Course))+
stat_smooth(method = "lm", se=F , color='skyblue')+ #every course
stat_smooth(aes(group = 1), method = "lm", se=F , color='black')+ #total data
labs(x='beauty score',y='course score')+
theme_bw()

#every course has different correlation between beauty and score
summary(m0 <- lmer(Score~Beauty+Age+Gender+Minority+Tenure+(1|Course), data = dat))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Score ~ Beauty + Age + Gender + Minority + Tenure + (1 | Course)
## Data: dat
##
## REML criterion at convergence: 749.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6110 -0.6666 0.0876 0.7275 2.0388
##
## Random effects:
## Groups Name Variance Std.Dev.
## Course (Intercept) 0.07618 0.2760
## Residual 0.26282 0.5127
## Number of obs: 463, groups: Course, 31
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 4.352253 0.159658 27.260
## Beauty 0.131483 0.033863 3.883
## Age -0.003687 0.002972 -1.240
## Gender -0.232125 0.052992 -4.380
## Minority -0.127408 0.075157 -1.695
## Tenure -0.091118 0.054955 -1.658
##
## Correlation of Fixed Effects:
## (Intr) Beauty Age Gender Minrty
## Beauty -0.229
## Age -0.869 0.265
## Gender -0.309 -0.034 0.175
## Minority -0.174 0.035 0.090 -0.072
## Tenure 0.059 0.047 -0.316 0.136 0.044
Q6
##assess the effects of principal leadership on mathematic perfomance in the sixth grade.
dat = read.table("leadership_math.txt", header = TRUE)
str(dat)
## 'data.frame': 100 obs. of 6 variables:
## $ Student: int 1 2 3 4 5 6 7 8 9 10 ...
## $ School : int 100 100 100 100 100 100 100 100 100 100 ...
## $ Lead : num 0.74 0.74 0.74 0.74 0.74 0.74 0.74 0.74 0.74 0.74 ...
## $ Math : int 623 635 700 663 656 695 802 648 594 679 ...
## $ Gender : Factor w/ 2 levels "F","M": 2 1 1 2 1 1 2 2 2 2 ...
## $ SES : Factor w/ 2 levels "L","O": 2 2 2 2 2 2 2 2 1 2 ...
#plot lm
ggplot(dat, aes(x = Lead, y = Math,col=School))+
geom_point(alpha = .5)+
scale_x_continuous(name = "principal leadership")+
scale_y_continuous(name = "mathematic score ")+
stat_smooth(method = "lm", se=F)+ #se=加入信賴區間
theme_bw()

#mathematic perfomance of 10 school principal leadership
ggplot(dat, aes(x = Lead, y = Math))+
geom_point(alpha = .5, size = .8)+
scale_x_continuous(limits = c(0.6,0.8))+
scale_y_continuous(limits = c(550,850))+
labs(x='principal leadership',y='mathematic score')+
facet_wrap(~School)

#math score sd is different in every school
summary(m0 <- lmer(Math~ Lead +(1|School), data = dat))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Math ~ Lead + (1 | School)
## Data: dat
##
## REML criterion at convergence: 994.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.1760 -0.5846 -0.0317 0.5396 3.3772
##
## Random effects:
## Groups Name Variance Std.Dev.
## School (Intercept) 62.05 7.877
## Residual 1402.95 37.456
## Number of obs: 100, groups: School, 10
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 455.61 61.20 7.444
## Lead 300.74 87.07 3.454
##
## Correlation of Fixed Effects:
## (Intr)
## Lead -0.997
Q8
##how the estimate of grand mean depend on the intra-class correlation in a simple random-effects model
demo1 <- read.delim2("demo1.csv" ,header = T ,sep = ",")
demo2 <- read.delim2("demo2.csv" ,header = T ,sep = ",")
str(demo1)
## 'data.frame': 9 obs. of 3 variables:
## $ Score : int 11 13 51 53 55 91 93 95 97
## $ School: Factor w/ 3 levels "S1","S2","S3": 1 1 2 2 2 3 3 3 3
## $ Pupil : Factor w/ 9 levels "P1","P2","P3",..: 1 2 3 4 5 6 7 8 9
str(demo2)
## 'data.frame': 9 obs. of 3 variables:
## $ Score : int 11 51 13 55 91 51 53 95 97
## $ School: Factor w/ 3 levels "S1","S2","S3": 1 1 2 2 2 3 3 3 3
## $ Pupil : Factor w/ 9 levels "P1","P2","P3",..: 1 2 3 4 5 6 7 8 9
summary(m1 <- lmer(Score~(1|School), data = demo1))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Score ~ (1 | School)
## Data: demo1
##
## REML criterion at convergence: 51.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.3280 -0.4745 0.0000 0.4608 1.3553
##
## Random effects:
## Groups Name Variance Std.Dev.
## School (Intercept) 1679 40.976
## Residual 5 2.236
## Number of obs: 9, groups: School, 3
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 53.01 23.67 2.24
summary(m2 <- lmer(Score~(1|School), data = demo2))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Score ~ (1 | School)
## Data: demo2
##
## REML criterion at convergence: 80.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.36734 -0.34286 -0.02776 1.07153 1.13999
##
## Random effects:
## Groups Name Variance Std.Dev.
## School (Intercept) 104.2 10.21
## Residual 967.8 31.11
## Number of obs: 9, groups: School, 3
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 56.36 12.01 4.692
#ICC
print(vcm1<-VarCorr(m1),comp='Variance')
## Groups Name Variance
## School (Intercept) 1679.0557
## Residual 5.0001
print(vcm2<-VarCorr(m2),comp='Variance')
## Groups Name Variance
## School (Intercept) 104.17
## Residual 967.76
vcm1<-as.data.frame(vcm1)
vcm2<-as.data.frame(vcm2)
vcm1[1,4] /sum( vcm1 [,4] )
## [1] 0.9970309
vcm2[1,4] /sum( vcm2 [,4] )
## [1] 0.0971812