This document contains the R code required to perform the analysis and produce the graphics in the accompanying article.
library(dplyr); library(magrittr);
library(ggplot2); library(ggsci)
library(reshape2); library(knitr)
library(stringr)
theme_set(theme_minimal())
selectify = function(X){
library(dplyr); library(magrittr)
ret = X %>% select(Gender, Grade.Level, Academic.Year,
Course.Long.Name, Course.Department.Name, Grade.Mark,
Site.Name, Term.Name)
return(ret)
}
#Import
#Note - this is not glamorous, but is repeatable.
Y16G9S1 = read.csv("2016 9th Final Marks S1.csv")
Y16G9S2 = read.csv("2016 9th Final Marks S2.csv")
Y16G10S1 = read.csv("2016 10th Final Marks S1.csv")
Y16G10S2 = read.csv("2016 10th Final Marks S2.csv")
Y16G11Y = read.csv("2016 11th Final Marks Y.csv")
Y16G12Y = read.csv("2016 12th Final Marks Y.csv")
### Note on Format
#12th Graders have a slightly different format, to wit:
#Munge
Y16 = rbind(Y16G9S1, Y16G9S2, Y16G10S1, Y16G10S2,
Y16G11Y, Y16G12Y %>%
select(-c(Quarter.Number, Grading.Period.Name)))
Y16$Year = 2016
Y16$Distance = "In Person"
#For expansion (adding years)
#2017 (Should be similar to 2016)
Y17G9S1 = read.csv("2017 9th Final Marks S1.csv") %>% selectify()
Y17G9S2 = read.csv("2017 9th Final Marks S2.csv") %>% selectify()
Y17G10S1 = read.csv("2017 10th Final Marks S1.csv") %>% selectify()
Y17G10S2 = read.csv("2017 10th Final Marks S2.csv") %>% selectify()
Y17G11S1 = read.csv("2017 11th Final Marks S1.csv") %>% selectify()
Y17G11S2 = read.csv("2017 11th Final Marks S2.csv") %>% selectify()
Y17G12Y = read.csv("2017 12th Final Marks Y.csv") %>% selectify()
Y17 = rbind(Y17G9S1, Y17G9S2, Y17G10S1, Y17G10S2,
Y17G11S1, Y17G11S2, Y17G12Y)
Y17$Year = 2017
Y17$Distance = "In Person"
#2018
Y18G9S1 = read.csv("2018 9th Final Marks S1.csv") %>% selectify()
Y18G9S2 = read.csv("2018 9th Final Marks S2.csv") %>% selectify()
Y18G10S1 = read.csv("2018 10th Final Marks S1.csv") %>% selectify()
Y18G10S2 = read.csv("2018 10th Final Marks S2.csv") %>% selectify()
Y18G11S1 = read.csv("2018 11th Final Marks S1.csv") %>% selectify()
Y18G11S2 = read.csv("2018 11th Final Marks S2.csv") %>% selectify()
Y18G12S1 = read.csv("2018 12th Final Marks S1.csv") %>% selectify()
Y18G12S2 = read.csv("2018 12th Final Marks S2.csv") %>% selectify()
Y18 = rbind(Y18G9S1, Y18G9S2, Y18G10S1,
Y18G10S2, Y18G11S1, Y18G11S2,
Y18G12S1, Y18G12S2)
Y18$Year = 2018
Y18$Distance = "In Person"
#2019
Y19G9S1 = read.csv("2019 9th Final Marks S1.csv") %>% selectify()
Y19G9S2 = read.csv("2019 9th Final Marks S2.csv") %>% selectify()
Y19G10S1 = read.csv("2019 10th Final Marks S1.csv") %>% selectify()
Y19G10S2 = read.csv("2019 10th Final Marks S2.csv") %>% selectify()
Y19G11S1 = read.csv("2019 11th Final Marks S1.csv") %>% selectify()
Y19G11S2 = read.csv("2019 11th Final Marks S2.csv") %>% selectify()
Y19G12S1 = read.csv("2019 12th Final Marks S1.csv") %>% selectify()
Y19G12S2 = read.csv("2019 12th Final Marks S2.csv") %>% selectify()
Y19 = rbind(Y19G9S1, Y19G9S2, Y19G10S1,
Y19G10S2, Y19G11S1, Y19G11S2,
Y19G12S1, Y19G12S2)
Y19$Year = 2019
Y19$Distance = "In Person"
#2020
Y20G9S1 = read.csv("2020 9th Final Marks S1.csv") %>% selectify()
Y20G9S2 = read.csv("2020 9th Final Marks S2.csv") %>% selectify()
Y20G10S1 = read.csv("2020 10th Final Marks S1.csv") %>% selectify()
Y20G10S2 = read.csv("2020 10th Final Marks S2.csv") %>% selectify()
Y20G11S1 = read.csv("2020 11th Final Marks S1.csv") %>% selectify()
Y20G11S2 = read.csv("2020 11th Final Marks S2.csv") %>% selectify()
Y20G12S1 = read.csv("2020 12th Final Marks S1.csv") %>% selectify()
Y20G12S2 = read.csv("2020 12th Final Marks S2.csv") %>% selectify()
Y20 = rbind(Y20G9S1, Y20G9S2, Y20G10S1,
Y20G10S2, Y20G11S1, Y20G11S2,
Y20G12S1, Y20G12S2)
Y20$Year = 2020
Y20$Distance = "In Person"
Y20$Distance[Y20$Term.Name == "S2"] = "Hold Harmless"
Y21G9S1 = read.csv("2021 9th Final Marks S1.csv") %>% selectify()
Y21G10S1 = read.csv("2021 10th Final Marks S1.csv") %>% selectify()
Y21G11S1 = read.csv("2021 11th Final Marks S1.csv") %>% selectify()
Y21G12S1 = read.csv("2021 12th Final Marks S1.csv") %>% selectify()
Y21 = rbind(Y21G9S1, Y21G10S1, Y21G11S1, Y21G12S1)
Y21$Year = 2021
Y21$Distance = "Distance"
Grades = rbind(Y16, Y17, Y18, Y19, Y20, Y21)
Marks = c("A+", "A", "A-", "B+", "B", "B-", "C+","C", "C-",
"D+", "D", "D-", "F", "P")
Points = c(4.33, 4, 3.67, 3.33, 3, 2.67, 2.33, 2, 1.67, 1.33, 1, .67, 0, NA)
GradeMat = data.frame(Marks = Marks,
Points = Points)
GradeMat %>% kable()
| Marks | Points |
|---|---|
| A+ | 4.33 |
| A | 4.00 |
| A- | 3.67 |
| B+ | 3.33 |
| B | 3.00 |
| B- | 2.67 |
| C+ | 2.33 |
| C | 2.00 |
| C- | 1.67 |
| D+ | 1.33 |
| D | 1.00 |
| D- | 0.67 |
| F | 0.00 |
| P | NA |
Assign Numeric Grades
Grades %<>% left_join(GradeMat, by = c("Grade.Mark" = "Marks"))
Grades$PF = FALSE
Grades$PF[which(Grades$Grade.Mark == "P")] = TRUE
Grades %>% filter(PF == FALSE) %>%
ggplot(aes(x = Grade.Level, y= Points, fill = Gender)) + geom_boxplot()+
scale_fill_lancet()
required = c("Science", "Math", "English", "Social Science")
Grades %>% filter(PF == FALSE) %>%
filter(Course.Department.Name %in% required) %>%
ggplot(aes(x = Course.Department.Name, y= Points, fill = Gender)) +
geom_boxplot() + scale_fill_lancet() + facet_wrap(~Grade.Level) +
xlab("") + ylab("Grade Points") +
theme(axis.text.x = element_text(angle = 45))
Grades %>% filter(Academic.Year > 2015) %>%
ggplot(aes(x = as.factor(Grade.Level), y= Points, fill = Distance)) +
geom_boxplot() + scale_fill_aaas()
Grades$Distance %<>% factor(levels = c("In Person", "Hold Harmless", "Distance"))
Grades %>% filter(Academic.Year > 2015) %>%
filter(Course.Department.Name == "Math") %>%
ggplot(aes( y= Points, fill = Distance)) +
geom_boxplot() + scale_fill_aaas() + facet_grid(Gender ~ Grade.Level) +
ylab("Grade Points") + theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
Grades %>% filter(Academic.Year > 2015) %>%
filter(Course.Department.Name == "Math") %>%
ggplot(aes( y= Points, fill = Gender)) +
geom_boxplot() + scale_fill_aaas() + facet_grid(Grade.Level~Distance) +
ylab("Grade Points") + theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
GG = Grades
#For adding the Semester Callouts
GG %<>% filter(Academic.Year %in% 2018:2021)
GG %<>% mutate(Term = paste0(Academic.Year, Term.Name))
GG$Term2 = GG$Term
GG$Term2[GG$Term == "2020S2"] = "Hold Harmless"
GG$Term2[GG$Term == "2021S1"] = "Distance"
GG$Term2 %<>% factor(levels = c("2018S1", "2018S2", "2019S1", "2019S2",
"2020S1", "Hold Harmless", "Distance"))
GG %>%
filter(Course.Department.Name %in% required) %>%
group_by(Course.Department.Name, Term2) %>%
summarize(Grade = mean(Points, na.rm = TRUE), count = n()) %>%
ggplot(aes(x = Term2, y= Grade,
color = Course.Department.Name)) +
geom_point(size = 5) +
scale_color_lancet() + labs(color = "Course") +
ylim(c(2.4, 3.3)) +
labs(color = "Department") + xlab("Term") +
ylab("Grade Average") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
geom_line(aes(x = Term2, y = Grade, color = Course.Department.Name,
group = Course.Department.Name))
#Failing Grades - D and F
Grades$Gender %<>% factor(levels = c("M", "F"))
Grades %>% filter(Points < 1.5) %>%
filter(Points < 1.5) %>% filter(!Term.Name == "Y") %>%
mutate(YearTerm = paste0(Academic.Year, Term.Name)) %>%
group_by(YearTerm, Gender, Grade.Level) %>%
summarize(AdverseGrades = n()) %>%
ggplot(aes(x = Grade.Level, y = AdverseGrades, fill = Gender)) +
geom_col(position = "dodge") + facet_wrap(~YearTerm) + scale_fill_aaas() +
xlab("Grade Level")
Grades %>% filter(PF == FALSE) %>%
filter(Course.Department.Name %in% required) %>%
filter(!Term.Name == "Y") %>%
group_by(Grade.Level, Distance, Gender) %>%
summarize(AandB = sum(Points>3, na.rm = TRUE)/n()) %>%
ggplot(aes(x = Grade.Level,, y = AandB, fill = Gender)) +
geom_col( position = "dodge") +
facet_wrap(~Distance) + scale_fill_aaas() +
xlab("Grade Level") + ggtitle("As and Bs assigned by Grade Level and Gender") + xlab("Grade Level") + scale_y_continuous(labels = scales::percent, name = "A and B Grades") + labs(subtitle = "Core Courses")
Grades %>% filter(PF == FALSE) %>%
filter(Course.Department.Name %in% required) %>%
filter(!Term.Name == "Y") %>%
group_by(Grade.Level, Distance, Gender) %>%
summarize(AandB = sum(Points<1.5, na.rm = TRUE)/n()) %>%
ggplot(aes(x = Grade.Level,, y = AandB, fill = Gender)) +
geom_col( position = "dodge") +
facet_wrap(~Distance) + scale_fill_aaas() +
xlab("Grade Level") + ggtitle("Ds and Fs assigned by Grade Level and Gender") + xlab("Grade Level") + scale_y_continuous(labels = scales::percent, name = "D and F Grades") + labs(subtitle = "Core Courses")
Grades %>%
filter(!Term.Name == "Y") %>% filter(Academic.Year > 2015) %>%
mutate(YearTerm = paste0(Academic.Year, Term.Name)) %>%
filter(YearTerm %in% c("2016S1", "2018S1", "2020S2", "2021S1")) %>%
ggplot(aes(x = Points, color = Gender)) +
stat_ecdf( geom= "point") + facet_wrap(~YearTerm) +
scale_color_lancet()