Column 1: school ID Column 2: student ID Column 2: grnder Column 4: written score Column 5: course score
# input data
data <- Gcsemv
# compute averages by school
data_a <- data %>%
group_by(school) %>%
summarize(ave_written = mean(written, na.rm=TRUE),
ave_course = mean(course, na.rm=TRUE))
# superimpose two plots
ggplot(data=data, aes(x=written, y=course)) +
geom_point(color="skyblue") +
stat_smooth(method="lm", formula=y ~ x, se=F, col="skyblue") +
geom_point(data=data_a, aes(ave_written, ave_course), color="steelblue") +
stat_smooth(data=data_a, aes(ave_written, ave_course),
method="lm", formula= y ~ x, se=F, color="steelblue") +
labs(x="Written score",
y="Course score") +
theme_bw()
## Warning: Removed 382 rows containing non-finite values (stat_smooth).
## Warning: Removed 382 rows containing missing values (geom_point).