Edit this rmarkdown template in RStudio to reproduce the following histogram for correlation coefficients between written and course variables by school from the data set Gcsemv{mlmRev}. The two vertical lines indicate averaged correlations over schools and correlation computed over individuals ignoring school label. Which is which?
# install.package("mlmRev")
library(mlmRev)# load the data from the package
data(Gcsemv, package="mlmRev")# invoke help document
?Gcsemv# view first 6 lines
head(Gcsemv)  school student gender written course
1  20920      16      M      23     NA
2  20920      25      F      NA   71.2
3  20920      27      F      39   76.8
4  20920      31      F      36   87.9
5  20920      42      M      16   44.4
6  20920      62      F      36     NAwith(Gcsemv, cor(written, course, use="pairwise"))[1] 0.47417# compute the means by school
course_schavg <- with(Gcsemv, tapply(course, school, mean, na.rm=T))
written_schavg <- with(Gcsemv, tapply(written, school, mean, na.rm=T))cor(course_schavg, written_schavg)[1] 0.39568library(tidyverse)dta <- Gcsemv %>% 
  group_by(school) %>%
  mutate(r_sch = cor(course, written, use="pairwise")) dtar <- dta[!duplicated(dta$school),"r_sch"]ggplot(data=dtar, aes(r_sch)) +
  geom_histogram(fill="skyblue") +
  geom_vline(xintercept=c(0.39568, 0.47414), 
             col=c("peru","red"), 
             lty=c(1,3)) +
  labs(x="Estimated correlation coefficients",
       y="Counts") +
  theme_bw() 紅色虛線是對個人計算written和course的相關係數=0.47417。 橘色實線是對學校計算written和course的相關係數=0.39568。