Q2

setwd("C:\\Users\\USER\\Desktop\\Multilevel Statistical\\w6 exercise")
dat = read.csv("language_arith.csv", header = TRUE)
str(dat)
## 'data.frame':    2287 obs. of  4 variables:
##  $ School    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Pupil     : int  17001 17002 17003 17004 17005 17006 17007 17008 17009 17010 ...
##  $ Language  : int  46 45 33 46 20 30 30 57 36 36 ...
##  $ Arithmetic: int  24 19 24 26 9 13 13 30 23 22 ...
as.factor('dat$School') 
## [1] dat$School
## Levels: dat$School
#correlation beteen lan and ari
cor(dat$Language, dat$Arithmetic)
## [1] 0.694228
require(lme4)
## Loading required package: lme4
## Loading required package: Matrix
require(magrittr)
## Loading required package: magrittr
require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
require(ggplot2)
## Loading required package: ggplot2
#Different intercepts on school level
summary(m0 <- lmer(Language~(1|School), data = dat))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Language ~ (1 | School)
##    Data: dat
## 
## REML criterion at convergence: 16253.1
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.11618 -0.65703  0.07597  0.74128  2.50639 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept) 19.63    4.431   
##  Residual             64.56    8.035   
## Number of obs: 2287, groups:  School, 131
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  40.3624     0.4282   94.26
summary(m1 <- lmer(Arithmetic~(1|School), data = dat))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Arithmetic ~ (1 | School)
##    Data: dat
## 
## REML criterion at convergence: 14695.8
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.80522 -0.70245  0.08321  0.75577  2.68341 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept) 12.62    3.552   
##  Residual             32.28    5.682   
## Number of obs: 2287, groups:  School, 131
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  18.9473     0.3365   56.31
#ICC
print(vcm0<-VarCorr(m0),comp='Variance')
##  Groups   Name        Variance
##  School   (Intercept) 19.633  
##  Residual             64.564
print(vcm1<-VarCorr(m1),comp='Variance')
##  Groups   Name        Variance
##  School   (Intercept) 12.617  
##  Residual             32.282
vcm0<-as.data.frame(vcm0)
vcm1<-as.data.frame(vcm1)
vcm0[1,4] /sum( vcm0 [,4] )
## [1] 0.2331792
vcm1[1,4] /sum( vcm1 [,4] )
## [1] 0.281007
#the school mean of lan and ari 
mla_s<- dat %>% group_by(School) %>% summarize(ml = mean(Language),
                                               ma = mean(Arithmetic))

#Plot
ggplot(dat, aes(x = Arithmetic, y = Language))+
  geom_point(color = "skyblue", alpha = .5)+                #data not centering
  stat_smooth(method = "lm", color = "skyblue", size = .8)+
  geom_point(data = mla_s, aes(x = ma, y = ml),color = "blue", alpha = .5)+   #centering data 
  stat_smooth(data = mla_s, aes(x = ma, y = ml),method = "lm", color = "blue", size = .8)+
  labs(x = "Arithmetic Score", y = "Language Score")+
  theme_bw()

Q4

#教師外貌vs 課程評分分數

dat = read.table("course_eval.txt", header = TRUE)
head(dat)
##   Score     Beauty Gender Age Minority Tenure Course
## 1   4.3  0.2015666      1  36        1      0      3
## 2   4.5 -0.8260813      0  59        0      1      0
## 3   3.7 -0.6603327      0  51        0      1      4
## 4   4.3 -0.7663125      1  40        0      1      2
## 5   4.4  1.4214450      1  31        0      0      0
## 6   4.2  0.5002196      0  62        0      1      0
str(dat)
## 'data.frame':    463 obs. of  7 variables:
##  $ Score   : num  4.3 4.5 3.7 4.3 4.4 4.2 4 3.4 4.5 3.9 ...
##  $ Beauty  : num  0.202 -0.826 -0.66 -0.766 1.421 ...
##  $ Gender  : int  1 0 0 1 1 0 1 1 1 0 ...
##  $ Age     : int  36 59 51 40 31 62 33 51 33 47 ...
##  $ Minority: int  1 0 0 0 0 0 0 0 0 0 ...
##  $ Tenure  : int  0 1 1 1 0 1 0 1 0 0 ...
##  $ Course  : int  3 0 4 2 0 0 4 0 0 4 ...
#Plot all in one
ggplot(dat, aes(x = Beauty, y = Score))+
  geom_point(alpha = .5)+
  scale_x_continuous(name = "Beauty Score")+
  scale_y_continuous(name = "Average course evaluation")+
  stat_smooth(method = "lm", se=T)+   #se=加入信賴區間
  theme_bw()

#beauty score of 30 courses
ggplot(dat, aes(x = Beauty, y = Score))+
  geom_point(alpha = .5, size = .8)+
  labs(x = "Beauty Score", y = "Average teaching evaluation")+
  stat_smooth(method = "lm")+
  scale_x_continuous(limits = c(-2,2))+
  scale_y_continuous(limits = c(2,5))+
  facet_wrap(~Course)+
  theme_bw()
## Warning in qt((1 - level)/2, df): NaNs produced

## Warning in qt((1 - level)/2, df): NaNs produced

## Warning in qt((1 - level)/2, df): NaNs produced

## Warning in qt((1 - level)/2, df): NaNs produced

## Warning in qt((1 - level)/2, df): NaNs produced

## Warning in qt((1 - level)/2, df): NaNs produced

# lm plot
ggplot(dat, aes(x = Beauty, y = Score ,group = Course))+
  stat_smooth(method = "lm", se=F , color='skyblue')+                 #every course
  stat_smooth(aes(group = 1), method = "lm", se=F , color='black')+   #total data
  labs(x='beauty score',y='course score')+
  theme_bw()

#every course has different correlation between beauty and score 
summary(m0 <- lmer(Score~Beauty+Age+Gender+Minority+Tenure+(1|Course), data = dat))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Score ~ Beauty + Age + Gender + Minority + Tenure + (1 | Course)
##    Data: dat
## 
## REML criterion at convergence: 749.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6110 -0.6666  0.0876  0.7275  2.0388 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Course   (Intercept) 0.07618  0.2760  
##  Residual             0.26282  0.5127  
## Number of obs: 463, groups:  Course, 31
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept)  4.352253   0.159658  27.260
## Beauty       0.131483   0.033863   3.883
## Age         -0.003687   0.002972  -1.240
## Gender      -0.232125   0.052992  -4.380
## Minority    -0.127408   0.075157  -1.695
## Tenure      -0.091118   0.054955  -1.658
## 
## Correlation of Fixed Effects:
##          (Intr) Beauty Age    Gender Minrty
## Beauty   -0.229                            
## Age      -0.869  0.265                     
## Gender   -0.309 -0.034  0.175              
## Minority -0.174  0.035  0.090 -0.072       
## Tenure    0.059  0.047 -0.316  0.136  0.044

Q6

##assess the effects of principal leadership on mathematic perfomance in the sixth grade.
dat = read.table("leadership_math.txt", header = TRUE)
str(dat)
## 'data.frame':    100 obs. of  6 variables:
##  $ Student: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ School : int  100 100 100 100 100 100 100 100 100 100 ...
##  $ Lead   : num  0.74 0.74 0.74 0.74 0.74 0.74 0.74 0.74 0.74 0.74 ...
##  $ Math   : int  623 635 700 663 656 695 802 648 594 679 ...
##  $ Gender : Factor w/ 2 levels "F","M": 2 1 1 2 1 1 2 2 2 2 ...
##  $ SES    : Factor w/ 2 levels "L","O": 2 2 2 2 2 2 2 2 1 2 ...
#plot lm
ggplot(dat, aes(x = Lead, y = Math,col=School))+
  geom_point(alpha = .5)+
  scale_x_continuous(name = "principal leadership")+
  scale_y_continuous(name = "mathematic score ")+
  stat_smooth(method = "lm", se=F)+   #se=加入信賴區間
  theme_bw()

#mathematic perfomance of 10 school principal leadership
 ggplot(dat, aes(x = Lead, y = Math))+
    geom_point(alpha = .5, size = .8)+
    scale_x_continuous(limits = c(0.6,0.8))+
    scale_y_continuous(limits = c(550,850))+ 
    labs(x='principal leadership',y='mathematic score')+
    facet_wrap(~School)

#math score sd is different in every school
summary(m0 <- lmer(Math~ Lead +(1|School), data = dat))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Math ~ Lead + (1 | School)
##    Data: dat
## 
## REML criterion at convergence: 994.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.1760 -0.5846 -0.0317  0.5396  3.3772 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept)   62.05   7.877  
##  Residual             1402.95  37.456  
## Number of obs: 100, groups:  School, 10
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   455.61      61.20   7.444
## Lead          300.74      87.07   3.454
## 
## Correlation of Fixed Effects:
##      (Intr)
## Lead -0.997

Q8

##how the estimate of grand mean depend on the intra-class correlation in a simple random-effects model

demo1 <- read.delim2("demo1.csv" ,header = T ,sep = ",")
demo2 <- read.delim2("demo2.csv" ,header = T ,sep = ",")
str(demo1)
## 'data.frame':    9 obs. of  3 variables:
##  $ Score : int  11 13 51 53 55 91 93 95 97
##  $ School: Factor w/ 3 levels "S1","S2","S3": 1 1 2 2 2 3 3 3 3
##  $ Pupil : Factor w/ 9 levels "P1","P2","P3",..: 1 2 3 4 5 6 7 8 9
str(demo2)
## 'data.frame':    9 obs. of  3 variables:
##  $ Score : int  11 51 13 55 91 51 53 95 97
##  $ School: Factor w/ 3 levels "S1","S2","S3": 1 1 2 2 2 3 3 3 3
##  $ Pupil : Factor w/ 9 levels "P1","P2","P3",..: 1 2 3 4 5 6 7 8 9
summary(m1 <- lmer(Score~(1|School), data = demo1))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Score ~ (1 | School)
##    Data: demo1
## 
## REML criterion at convergence: 51.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3280 -0.4745  0.0000  0.4608  1.3553 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept) 1679     40.976  
##  Residual                5      2.236  
## Number of obs: 9, groups:  School, 3
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)    53.01      23.67    2.24
summary(m2 <- lmer(Score~(1|School), data = demo2))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Score ~ (1 | School)
##    Data: demo2
## 
## REML criterion at convergence: 80.4
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.36734 -0.34286 -0.02776  1.07153  1.13999 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept) 104.2    10.21   
##  Residual             967.8    31.11   
## Number of obs: 9, groups:  School, 3
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)    56.36      12.01   4.692
#ICC
print(vcm1<-VarCorr(m1),comp='Variance')
##  Groups   Name        Variance 
##  School   (Intercept) 1679.0557
##  Residual                5.0001
print(vcm2<-VarCorr(m2),comp='Variance')
##  Groups   Name        Variance
##  School   (Intercept) 104.17  
##  Residual             967.76
vcm1<-as.data.frame(vcm1)
vcm2<-as.data.frame(vcm2)
vcm1[1,4] /sum( vcm1 [,4] )
## [1] 0.9970309
vcm2[1,4] /sum( vcm2 [,4] )
## [1] 0.0971812