Introduction

Column 1: school ID Column 2: student ID Column 2: grnder Column 4: written score Column 5: course score

# input data
data <- Gcsemv
# compute averages by school
data_a <- data %>%
        group_by(school) %>%
        summarize(ave_written = mean(written, na.rm=TRUE),
                  ave_course = mean(course, na.rm=TRUE))
# superimpose two plots
ggplot(data=data, aes(x=written, y=course)) +
 geom_point(color="skyblue") +
 stat_smooth(method="lm", formula=y ~ x, se=F, col="skyblue") +
 geom_point(data=data_a, aes(ave_written, ave_course), color="steelblue") +
 stat_smooth(data=data_a, aes(ave_written, ave_course),
             method="lm", formula= y ~ x, se=F, color="steelblue") +
 labs(x="Written score", 
      y="Course score") +
 theme_bw()
## Warning: Removed 382 rows containing non-finite values (stat_smooth).
## Warning: Removed 382 rows containing missing values (geom_point).