Purpose

This document contains the R code required to perform the analysis and produce the graphics in the accompanying article.

library(dplyr); library(magrittr);
library(ggplot2); library(ggsci)
library(reshape2); library(knitr)
library(stringr)

theme_set(theme_minimal())

selectify = function(X){
  library(dplyr); library(magrittr)
  ret = X %>% select(Gender, Grade.Level, Academic.Year,
                     Course.Long.Name, Course.Department.Name, Grade.Mark,
                     Site.Name, Term.Name)
  return(ret)
}
#Import

#Note - this is not glamorous, but is repeatable.  

Y16G9S1 = read.csv("2016 9th Final Marks S1.csv")
Y16G9S2 = read.csv("2016 9th Final Marks S2.csv")
Y16G10S1 = read.csv("2016 10th Final Marks S1.csv")
Y16G10S2 = read.csv("2016 10th Final Marks S2.csv")
Y16G11Y = read.csv("2016 11th Final Marks Y.csv")
Y16G12Y = read.csv("2016 12th Final Marks Y.csv")
### Note on Format
#12th Graders have a slightly different format, to wit:
#Munge
Y16 = rbind(Y16G9S1, Y16G9S2, Y16G10S1, Y16G10S2,
            Y16G11Y, Y16G12Y %>%
              select(-c(Quarter.Number, Grading.Period.Name)))

Y16$Year = 2016
Y16$Distance = "In Person"
#For expansion (adding years) 
#2017 (Should be similar to 2016)

Y17G9S1 = read.csv("2017 9th Final Marks S1.csv") %>% selectify()
Y17G9S2 = read.csv("2017 9th Final Marks S2.csv") %>% selectify()
Y17G10S1 = read.csv("2017 10th Final Marks S1.csv") %>% selectify()
Y17G10S2 = read.csv("2017 10th Final Marks S2.csv") %>% selectify()
Y17G11S1 = read.csv("2017 11th Final Marks S1.csv") %>% selectify()
Y17G11S2 = read.csv("2017 11th Final Marks S2.csv") %>% selectify()
Y17G12Y = read.csv("2017 12th Final Marks Y.csv") %>% selectify()

Y17 = rbind(Y17G9S1, Y17G9S2, Y17G10S1, Y17G10S2,
            Y17G11S1, Y17G11S2,  Y17G12Y)

Y17$Year = 2017
Y17$Distance = "In Person"
#2018

Y18G9S1 = read.csv("2018 9th Final Marks S1.csv") %>% selectify()
Y18G9S2 = read.csv("2018 9th Final Marks S2.csv") %>% selectify()
Y18G10S1 = read.csv("2018 10th Final Marks S1.csv") %>% selectify()
Y18G10S2 = read.csv("2018 10th Final Marks S2.csv") %>% selectify()
Y18G11S1 = read.csv("2018 11th Final Marks S1.csv") %>% selectify()
Y18G11S2 = read.csv("2018 11th Final Marks S2.csv") %>% selectify()
Y18G12S1 = read.csv("2018 12th Final Marks S1.csv") %>% selectify()
Y18G12S2 = read.csv("2018 12th Final Marks S2.csv") %>% selectify()

Y18 = rbind(Y18G9S1, Y18G9S2, Y18G10S1,
            Y18G10S2, Y18G11S1, Y18G11S2,
            Y18G12S1, Y18G12S2)

Y18$Year = 2018

Y18$Distance = "In Person"
#2019

Y19G9S1 = read.csv("2019 9th Final Marks S1.csv") %>% selectify()
Y19G9S2 = read.csv("2019 9th Final Marks S2.csv") %>% selectify()
Y19G10S1 = read.csv("2019 10th Final Marks S1.csv") %>% selectify()
Y19G10S2 = read.csv("2019 10th Final Marks S2.csv") %>% selectify()
Y19G11S1 = read.csv("2019 11th Final Marks S1.csv") %>% selectify()
Y19G11S2 = read.csv("2019 11th Final Marks S2.csv") %>% selectify()
Y19G12S1 = read.csv("2019 12th Final Marks S1.csv") %>% selectify()
Y19G12S2 = read.csv("2019 12th Final Marks S2.csv") %>% selectify()

Y19 = rbind(Y19G9S1, Y19G9S2, Y19G10S1,
            Y19G10S2, Y19G11S1, Y19G11S2,
            Y19G12S1, Y19G12S2)

Y19$Year = 2019

Y19$Distance = "In Person"
#2020


Y20G9S1 = read.csv("2020 9th Final Marks S1.csv") %>% selectify()
Y20G9S2 = read.csv("2020 9th Final Marks S2.csv") %>% selectify()
Y20G10S1 = read.csv("2020 10th Final Marks S1.csv") %>% selectify()
Y20G10S2 = read.csv("2020 10th Final Marks S2.csv") %>% selectify()
Y20G11S1 = read.csv("2020 11th Final Marks S1.csv") %>% selectify()
Y20G11S2 = read.csv("2020 11th Final Marks S2.csv") %>% selectify()
Y20G12S1 = read.csv("2020 12th Final Marks S1.csv") %>% selectify()
Y20G12S2 = read.csv("2020 12th Final Marks S2.csv") %>% selectify()

Y20 = rbind(Y20G9S1, Y20G9S2, Y20G10S1,
            Y20G10S2, Y20G11S1, Y20G11S2,
            Y20G12S1, Y20G12S2)
Y20$Year = 2020
Y20$Distance = "In Person"
Y20$Distance[Y20$Term.Name == "S2"] = "Hold Harmless"
Y21G9S1 = read.csv("2021 9th Final Marks S1.csv") %>% selectify()
Y21G10S1 = read.csv("2021 10th Final Marks S1.csv") %>% selectify()
Y21G11S1 = read.csv("2021 11th Final Marks S1.csv") %>% selectify()
Y21G12S1 = read.csv("2021 12th Final Marks S1.csv") %>% selectify()

Y21 = rbind(Y21G9S1, Y21G10S1, Y21G11S1, Y21G12S1)

Y21$Year = 2021
Y21$Distance = "Distance"
Grades = rbind(Y16, Y17, Y18, Y19, Y20, Y21)
Marks = c("A+", "A", "A-", "B+", "B", "B-", "C+","C", "C-",
           "D+", "D", "D-", "F", "P")
Points = c(4.33, 4, 3.67, 3.33, 3, 2.67, 2.33, 2, 1.67, 1.33, 1, .67, 0, NA)

GradeMat = data.frame(Marks = Marks,
                      Points = Points)

GradeMat %>% kable()
Marks Points
A+ 4.33
A 4.00
A- 3.67
B+ 3.33
B 3.00
B- 2.67
C+ 2.33
C 2.00
C- 1.67
D+ 1.33
D 1.00
D- 0.67
F 0.00
P NA

Assign Numeric Grades

Grades %<>% left_join(GradeMat, by = c("Grade.Mark" = "Marks"))

Grades$PF = FALSE
Grades$PF[which(Grades$Grade.Mark == "P")] = TRUE

Grade by Gender

Grades %>% filter(PF == FALSE) %>% 
  ggplot(aes(x = Grade.Level, y= Points, fill = Gender)) + geom_boxplot()+
  scale_fill_lancet()

“Required” courses

required = c("Science", "Math", "English", "Social Science")

Grades %>% filter(PF == FALSE) %>%
  filter(Course.Department.Name %in% required) %>% 
  ggplot(aes(x = Course.Department.Name, y= Points, fill = Gender)) +
  geom_boxplot() + scale_fill_lancet() + facet_wrap(~Grade.Level) +
  xlab("") + ylab("Grade Points") +
  theme(axis.text.x = element_text(angle = 45))

Grades %>% filter(Academic.Year > 2015) %>%
  ggplot(aes(x = as.factor(Grade.Level), y= Points, fill = Distance)) + 
  geom_boxplot() + scale_fill_aaas()

Grades$Distance %<>% factor(levels = c("In Person", "Hold Harmless", "Distance"))

Grades %>% filter(Academic.Year > 2015) %>%
  filter(Course.Department.Name == "Math") %>%
  ggplot(aes( y= Points, fill = Distance)) + 
  geom_boxplot() + scale_fill_aaas() + facet_grid(Gender ~ Grade.Level) + 
  ylab("Grade Points") + theme(axis.title.x=element_blank(),
    axis.text.x=element_blank(),
    axis.ticks.x=element_blank())

Grades %>% filter(Academic.Year > 2015) %>%
  filter(Course.Department.Name == "Math") %>%
  ggplot(aes( y= Points, fill = Gender)) + 
  geom_boxplot() + scale_fill_aaas() + facet_grid(Grade.Level~Distance) + 
  ylab("Grade Points") + theme(axis.title.x=element_blank(),
    axis.text.x=element_blank(),
    axis.ticks.x=element_blank())

GG = Grades
#For adding the Semester Callouts

GG %<>% filter(Academic.Year %in% 2018:2021)
GG %<>% mutate(Term = paste0(Academic.Year, Term.Name))
GG$Term2 = GG$Term
GG$Term2[GG$Term == "2020S2"] = "Hold Harmless"
GG$Term2[GG$Term == "2021S1"] = "Distance"

GG$Term2 %<>% factor(levels = c("2018S1", "2018S2", "2019S1", "2019S2",
                               "2020S1", "Hold Harmless", "Distance"))

GG %>% 
  filter(Course.Department.Name %in% required) %>% 
  group_by(Course.Department.Name, Term2) %>%
  summarize(Grade = mean(Points, na.rm = TRUE), count = n()) %>%
  ggplot(aes(x = Term2, y= Grade, 
             color = Course.Department.Name)) +
  geom_point(size = 5) +
  scale_color_lancet() + labs(color = "Course") +
  ylim(c(2.4, 3.3)) +  
  labs(color = "Department") + xlab("Term") + 
  ylab("Grade Average") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
  geom_line(aes(x = Term2, y = Grade, color = Course.Department.Name,
                group = Course.Department.Name))

#Failing Grades - D and F
Grades$Gender %<>% factor(levels = c("M", "F"))

Grades %>% filter(Points < 1.5) %>% 
  filter(Points < 1.5) %>% filter(!Term.Name == "Y") %>% 
  mutate(YearTerm = paste0(Academic.Year, Term.Name)) %>% 
  group_by(YearTerm, Gender, Grade.Level) %>% 
  summarize(AdverseGrades = n()) %>%
  ggplot(aes(x = Grade.Level, y = AdverseGrades, fill = Gender)) +
  geom_col(position = "dodge") + facet_wrap(~YearTerm) + scale_fill_aaas() + 
  xlab("Grade Level")

Grades %>% filter(PF == FALSE) %>%
  filter(Course.Department.Name %in% required) %>%
  filter(!Term.Name == "Y") %>%
  group_by(Grade.Level, Distance, Gender) %>%
  summarize(AandB = sum(Points>3, na.rm = TRUE)/n()) %>%
  ggplot(aes(x = Grade.Level,, y  = AandB, fill = Gender)) +
  geom_col( position = "dodge") +
  facet_wrap(~Distance) + scale_fill_aaas() + 
  xlab("Grade Level") + ggtitle("As and Bs assigned by Grade Level and Gender") + xlab("Grade Level") + scale_y_continuous(labels = scales::percent, name = "A and B Grades") + labs(subtitle = "Core Courses")

Grades %>% filter(PF == FALSE) %>%
  filter(Course.Department.Name %in% required) %>%
  filter(!Term.Name == "Y") %>%
  group_by(Grade.Level, Distance, Gender) %>%
  summarize(AandB = sum(Points<1.5, na.rm = TRUE)/n()) %>%
  ggplot(aes(x = Grade.Level,, y  = AandB, fill = Gender)) +
  geom_col( position = "dodge") +
  facet_wrap(~Distance) + scale_fill_aaas() + 
  xlab("Grade Level") + ggtitle("Ds and Fs assigned by Grade Level and Gender") + xlab("Grade Level") + scale_y_continuous(labels = scales::percent, name = "D and F Grades") + labs(subtitle = "Core Courses")

  Grades %>% 
   filter(!Term.Name == "Y") %>% filter(Academic.Year > 2015) %>%
  mutate(YearTerm = paste0(Academic.Year, Term.Name)) %>% 
  filter(YearTerm %in% c("2016S1", "2018S1", "2020S2", "2021S1")) %>% 
  ggplot(aes(x = Points, color = Gender)) +
  stat_ecdf( geom= "point") + facet_wrap(~YearTerm) +
  scale_color_lancet()