0.1 Data management

# install.package("mlmRev")
library(mlmRev)
# load the data from the package
data(Gcsemv, package="mlmRev")
# view first 6 lines
head(Gcsemv)
  school student gender written course
1  20920      16      M      23     NA
2  20920      25      F      NA   71.2
3  20920      27      F      39   76.8
4  20920      31      F      36   87.9
5  20920      42      M      16   44.4
6  20920      62      F      36     NA

0.2 Summary statistics

with(Gcsemv, cor(written, course, use="pairwise"))
[1] 0.47417
# compute the means by school
course_schavg <- with(Gcsemv, tapply(course, school, mean, na.rm=T))
written_schavg <- with(Gcsemv, tapply(written, school, mean, na.rm=T))
cor(course_schavg, written_schavg)
[1] 0.39568

0.3 Visualization

library(tidyverse)
dta <- Gcsemv %>% 
  group_by(school) %>%
  mutate(r_sch = cor(course, written, use="pairwise")) 
dtar <- dta[!duplicated(dta$school),"r_sch"]
##使用GGplot 畫圖疊層

ggplot(data=Gcsemv, aes(x=course, y=written)) +
 geom_point(color="skyblue") +
geom_smooth(method="lm", formula=y ~ x, se=F, col="skyblue") +
geom_point(data=dtar, aes(x=course_schavg, y=written_schavg), color="steelblue") + geom_smooth(data=dtar, aes(course_schavg, written_schavg),
             method="lm", formula= y ~ x, se=F, color="steelblue") +labs(x="course score",y="written score") +

 theme_bw()

0.4 The end