# install.package("mlmRev")
library(mlmRev)
# load the data from the package
data(Gcsemv, package="mlmRev")
# invoke help document
?Gcsemv
# view first 6 lines
head(Gcsemv)
school student gender written course
1 20920 16 M 23 NA
2 20920 25 F NA 71.2
3 20920 27 F 39 76.8
4 20920 31 F 36 87.9
5 20920 42 M 16 44.4
6 20920 62 F 36 NA
with(Gcsemv, cor(written, course, use="pairwise"))
[1] 0.47417
# compute the means by school
course_schavg <- with(Gcsemv, tapply(course, school, mean, na.rm=T))
written_schavg <- with(Gcsemv, tapply(written, school, mean, na.rm=T))
cor(course_schavg, written_schavg)
[1] 0.39568
library(tidyverse)
dta <- Gcsemv %>%
group_by(school) %>%
mutate(r_sch = cor(course, written, use="pairwise"))
dtar <- dta[!duplicated(dta$school),"r_sch"]
ggplot(data=dtar, aes(r_sch)) +
geom_histogram(fill="skyblue") +
geom_vline(xintercept=c(0.39568, 0.47414),
col=c("peru","red"),
lty=c(1,3)) +
labs(x="Estimated correlation coefficients",
y="Counts") +
theme_bw()
The peru line is averaged correlations over schools and the red line is individuals correlations ignoring school effect