data management and graphics package

library(tidyverse)
## ─ Attaching packages ───────────────────────── tidyverse 1.3.0 ─
## ✓ ggplot2 3.3.0     ✓ purrr   0.3.4
## ✓ tibble  3.0.1     ✓ dplyr   0.8.5
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ─ Conflicts ─────────────────────────── tidyverse_conflicts() ─
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

input data

getwd()
## [1] "/Users/chenyuwen/Documents"
dta <- read.csv("./LMM/langMath.csv",  h=T)

compute averages by school

dta_a <- dta %>%
        group_by(School) %>%
        summarize(ave_lang = mean(Lang, na.rm=TRUE),
                  ave_arith = mean(Arith, na.rm=TRUE))

superimpose two plots

ggplot(data=dta, aes(x=Arith, y=Lang)) +
 geom_point(color="skyblue") +
 stat_smooth(method="lm", formula=y ~ x, se=F, col="skyblue") +
 geom_point(data=dta_a, aes(ave_arith, ave_lang), color="steelblue") +
 stat_smooth(data=dta_a, aes(ave_arith, ave_lang),
             method="lm", formula= y ~ x, se=F, color="steelblue") +
 labs(x="Arithmetic score", 
      y="Language score") +
 theme_bw()