library(tidyverse)
library(ggthemes)
library(plyr)
library(kableExtra)
library(magrittr)
library(here)
library(ggrepel)
library(tippy)

theme_alan <- function(base_size = 12 , base_family = "")
{
  half_line <- base_size/2
  colors <- ggthemes_data$few
  gray <- colors$medium["gray"]
  black <- colors$dark["black"]
  
  theme(
    line = element_line(colour = "black", size = 0.5, linetype = 1, lineend = "butt"),
    rect = element_rect(fill = "white", 
                        colour = "black", size = 0.5, linetype = 1),
    text = element_text(family = base_family, face = "plain", colour = "black", 
                        size = base_size, lineheight = 0.9, hjust = 0.5, vjust = 0.5,
                        angle = 0, margin = margin(), debug = FALSE),
    
    axis.line = element_blank(),
    axis.line.x = NULL,
    axis.line.y = NULL, 
    axis.text = element_text(size = rel(0.8), colour = "grey30"),
    axis.text.x = element_text(margin = margin(t = 0.8 * half_line/2), vjust = 1),
    axis.text.x.top = element_text(margin = margin(b = 0.8 * half_line/2), vjust = 0),
    axis.text.y = element_text(margin = margin(r = 0.8 * half_line/2), hjust = 1),
    axis.text.y.right = element_text(margin = margin(l = 0.8 * half_line/2), hjust = 0), 
    axis.ticks = element_line(colour = "grey20"), 
    axis.ticks.length = unit(half_line/2, "pt"),
    axis.title.x = element_text(margin = margin(t = half_line), vjust = 1),
    axis.title.x.top = element_text(margin = margin(b = half_line), vjust = 0),
    axis.title.y = element_text(angle = 90, margin = margin(r = half_line), vjust = 1),
    axis.title.y.right = element_text(angle = -90, margin = margin(l = half_line), vjust = 0),
    
    legend.background = element_rect(colour = NA),
    legend.spacing = unit(0.4, "cm"), 
    legend.spacing.x = NULL, 
    legend.spacing.y = NULL,
    legend.margin = margin(0.2, 0.2, 0.2, 0.2, "cm"),
    legend.key = element_rect(fill = "white", colour = NA), 
    legend.key.size = unit(1.2, "lines"), 
    legend.key.height = NULL,
    legend.key.width = NULL,
    legend.text = element_text(size = rel(0.8)), 
    legend.text.align = NULL,
    legend.title = element_text(hjust = 0),
    legend.title.align = NULL,
    legend.position = "right", 
    legend.direction = NULL,
    legend.justification = "center", 
    legend.box = NULL,
    legend.box.margin = margin(0, 0, 0, 0, "cm"),
    legend.box.background = element_blank(),
    legend.box.spacing = unit(0.4, "cm"),
    
    panel.background = element_rect(fill = "white", colour = NA),
    panel.border = element_rect(fill = NA, colour = "grey20"),
    panel.grid.major = element_line(colour = "grey92"),
    panel.grid.minor = element_line(colour = "grey92", size = 0.25),
    panel.spacing = unit(half_line, "pt"),
    panel.spacing.x = NULL,
    panel.spacing.y = NULL,
    panel.ontop = FALSE,
    
    strip.background = element_rect(fill = "NA", colour = "NA"),
    strip.text = element_text(colour = "grey10", size = rel(0.8)),
    strip.text.x = element_text(margin = margin(t = half_line, b = half_line)),
    strip.text.y = element_text(angle = 0, margin = margin(l = half_line, r = half_line)),
    strip.placement = "inside",
    strip.placement.x = NULL, 
    strip.placement.y = NULL,
    strip.switch.pad.grid = unit(0.1, "cm"), 
    strip.switch.pad.wrap = unit(0.1, "cm"), 
    
    plot.background = element_rect(colour = "white"),
    plot.title = element_text(size = rel(1.2), hjust = 0, vjust = 1, margin = margin(b = half_line * 1.2)),
    plot.subtitle = element_text(size = rel(0.9), hjust = 0, vjust = 1, margin = margin(b = half_line * 0.9)),
    plot.caption = element_text(size = rel(0.9), hjust = 1, vjust = 1, margin = margin(t = half_line * 0.9)), 
    plot.margin = margin(half_line, half_line, half_line, half_line),
    
    complete = TRUE)
}

# Wrapper Function for Long Graph Titles
wrapper <- function(x, ...) 
{
  paste(strwrap(x, ...), collapse = "\n")
}


pd <- position_dodge(width = 0.8)       #My standard dodging for graphs


#RDFZ Reds
RDFZPink <- "#cf8f8d"
RDFZRed1 <- "#ae002b"
RDFZRed2 <- "#991815"
RDFZRed3 <- "#78011e"
RDFZRed4 <- "#4b0315"

#Grade Colors
GradeColors <- c("#8900df", "#0092df", "#00df76", "#94df00", "#d4df25", "#df5300", "#9b0f00")

#function for allowing inline code chunks to be shown verbatim
rinline <- function(code){
  html <- '<code  class="r">``` `r CODE` ```</code>'
  sub("CODE", code, html)
}
## Read in the Data File
MockGrades <- read.csv(here("GPMock.csv"))

## Make a Unique Column for Grouping
MockGrades %<>%
  subset(Question != "Total") %>%
  unite(Combo, Name, Teacher, sep = "-", remove = FALSE) 

## Obtain Maximum Values (total score of test)
P3MaxVal <- 30

## Summarize the Data for Each Participant
Paper3Totals <- 
  MockGrades %>%
    group_by(Combo) %>%
    dplyr::summarise(sum = sum(Grade)) %>%
    mutate(Grade = round(sum/P3MaxVal*100, 1)) %>%
    separate(Combo, into = c("Name", "Teacher"), sep = "-", remove = TRUE) %>%
    arrange(Teacher, -Grade) 


write.csv(Paper3Totals, here("GPMockTotals.csv"))

## Make a Unique Column for Grouping
MockGrades2 <- read.csv(here("GPMock.csv"))

MockGrades2 %<>%
  subset(Question != "Total") %>%
  unite(Combo, Name, Teacher2, sep = "-", remove = FALSE) 

Paper3Totals2 <- 
  MockGrades2 %>%
    group_by(Combo) %>%
    dplyr::summarise(sum = sum(Grade)) %>%
    mutate(Grade = round(sum/P3MaxVal*100, 1)) %>%
    separate(Combo, into = c("Name", "Teacher2"), sep = "-", remove = TRUE) %>%
    arrange(-Grade) 
#FILL THIS IN WHEN SETTING UP THE SHEET - IT WILL HELP WITH NAMING THE TABLES AND IMAGES THAT ARE OUTPUT, SAVING YOU TIME

Class <- "AS Global Perspectives"            #Name of the Class Goes Here
Eval <- "Cambridge Mock Exam"       #Name of what is being evaluated goes here

Overall Performance

#Single Histogram

OverallTitle <- paste(Class, Eval, "Histogram of Grades", sep = " - ")

ggplot(data=Paper3Totals, aes(x=Grade)) +
  geom_histogram(aes(y=..density..), alpha = 1, position = "identity", fill = RDFZRed2) +
  labs(x="Grade (Percentage)", y="Density") +
  stat_function(fun=dnorm, args = list(mean=mean(Paper3Totals$Grade), sd=sd(Paper3Totals$Grade)), 
                color=RDFZRed4, size   = 1.4) +
  scale_x_continuous(limits = c(0,100)) + 
  theme_alan() +
  theme(legend.position = "none") +
  ggtitle(wrapper(OverallTitle, width = 45))

ggsave(here(paste(OverallTitle, ".png", sep = "")), plot = last_plot(), device = NULL, path = NULL,
       width = 10, height = 6, units = c("in", "cm", "mm"),
       dpi = 600)

Overall performance can be seen in the graph above. We had a fairly normal distribution of grades (Shapiro-Wilk Normality test: W= 0.93, p= 0) with a mean score of 64.8% (Median = 63.3%) and a standard deviation of 16.3%.

The high score on the test was 30 out of 0 marks (100%), which was achieved by 2 student(s).

The low score on the test was 2 out of 0 marks (6.7%), which was achieved by 1 student(s).

Letter Grades (Raw)

Typical raw letter grade boundaries for the school can be seen in the table below:

#Note that this already includes rounding up any grade above 0.5 to the next letter grade (e.g. 79.5% = A)
Letters <- c("A*", "A", "B", "C", "D", "E", "U")
MinVal <- c(89.5, 79.5, 69.5, 59.5, 49.5, 39.5, 0)
MaxVal <- c (100, 89.49, 79.49, 69.49, 59.49, 49.49, 39.49)

LetterBoundaries <- 
  cbind.data.frame(Letters, MinVal, MaxVal) %>%
    setNames(c("Letter", "Bottom", "Top"))

LetterGradesTable <-
  LetterBoundaries  %>%
    setNames(c("Letter Grade", "Bottom Boundary", "Top Boundary")) %>%
    knitr::kable(caption = "RDFZ Letter Grade Boundaries", row.names = F) %>%
    row_spec(0, bold = T, color = "white", background = RDFZRed3)%>%
    kable_styling(full_width = FALSE, 
                  bootstrap_options = c("striped", "hover", "condensed"),
                  fixed_thead = TRUE) %>%
    footnote(general = "Note this includes rounding of grades like 79.5% up a grade boundary" )

LetterGradesTable
RDFZ Letter Grade Boundaries
Letter Grade Bottom Boundary Top Boundary
A* 89.5 100.00
A 79.5 89.49
B 69.5 79.49
C 59.5 69.49
D 49.5 59.49
E 39.5 49.49
U 0.0 39.49
Note:
Note this includes rounding of grades like 79.5% up a grade boundary

If we assign grade boundaries based on these we can see the following distribution of grades

GB1Title <- paste(Class, Eval, "Letter Grades (Raw)", sep = " - ")
  
  
Paper3Totals %<>%
  mutate(Letter = cut(x= Paper3Totals$Grade, 
                      breaks = c(LetterBoundaries$Bottom,100), 
                      labels= map_df(LetterBoundaries,rev)$Letter)) %>%
  mutate(Letter = factor(Letter, levels =c("U", "E", "D", "C", "B", "A", "A*")))
    

ggplot(data=Paper3Totals, aes(x=Letter, fill = Letter)) +
  geom_bar(stat = "count", position = pd, width = 0.8) +
  scale_x_discrete(drop = FALSE) +
  scale_fill_manual(values = (c(rev(GradeColors[1:7]))  )) +  #Janky because of missing grade boundaries (no students in bins)
  labs(x="Letter Grade", y="Count") +
  theme_alan() +
  ggtitle(wrapper(GB1Title, width = 45))

ggsave(here(paste(GB1Title, ".png", sep = "")), plot = last_plot(), device = NULL, path = NULL,
       width = 8, height = 6, units = c("in", "cm", "mm"),
       dpi = 600)


LetterCounts <- count(Paper3Totals$Letter)

LetterBoundaries2 <-
  LetterBoundaries  %>%
    mutate(Letter = factor(Letter, levels =c("U", "E", "D", "C", "B", "A", "A*")))  %>%
    mutate(Count = plyr::mapvalues(Letter, from = LetterCounts$x, to = LetterCounts$freq))  %>%
    mutate(Count = as.numeric(as.character(Count)))  %>%
    mutate(Count = replace_na(Count, 0))

When we look at this via grade boundaries its a terrible distribution - well over 1/3 of all students received a C grade, which is simply too low for the school’s comfort. Meanwhile, only 20 students earned an A or A-star, with 8 students earning an E or a U on the exam. The total counts of students in each grade category can be seen in the table below

LetterBoundaries2  %>%
    setNames(c("Letter Grade", "Bottom Boundary", "Top Boundary", "Count")) %>%
    knitr::kable(caption = "RDFZ Letter Grade Boundaries", row.names = F) %>%
    row_spec(0, bold = T, color = "white", background = RDFZRed3)%>%
    kable_styling(full_width = FALSE, 
                  bootstrap_options = c("striped", "hover", "condensed"),
                  fixed_thead = TRUE) 
RDFZ Letter Grade Boundaries
Letter Grade Bottom Boundary Top Boundary Count
A* 89.5 100.00 9
A 79.5 89.49 11
B 69.5 79.49 10
C 59.5 69.49 44
D 49.5 59.49 19
E 39.5 49.49 3
U 0.0 39.49 5

Between Class Performance

How do the three classes for AS Global Perspectives compare to one another on Cambridge Mock Exam?

Below, you can see a histogram of performance between the two classes.

BetweenTitle <- paste(Class, Eval, "Histogram of Grades by Class", sep = " - ")

#Histograms by Class
AlanGrades<- subset(Paper3Totals, Teacher == "Alan")
MoGrades <- subset(Paper3Totals, Teacher == "Mo")
SuzanaGrades <- subset(Paper3Totals, Teacher == "Suzana")

ggplot(data=Paper3Totals, aes(x=Grade, fill = Teacher)) +
  geom_histogram(aes(y=..density..), alpha = 0.75, position = "identity") +
  labs(x="Grade (Percentage)", y="Density") +
  scale_fill_manual(values= c(RDFZRed4, RDFZRed2, RDFZRed1)) +
  scale_x_continuous(limits = c(0,100)) +
  theme_alan() + 
  facet_wrap(~Teacher, ncol = 1) +
  ggtitle(wrapper(BetweenTitle, width = 45))

ggsave(here(paste(BetweenTitle, ".png", sep = "")), plot = last_plot(), device = NULL, path = NULL,
       width = 8, height = 10, units = c("in", "cm", "mm"),
       dpi = 600)

MeanScores <- 
  tapply(Paper3Totals$Grade, Paper3Totals$Teacher, mean) %>%
  cbind(tapply(Paper3Totals$Grade, Paper3Totals$Teacher, sd)) %>%
  data.frame() %>%
  setNames(c("Mean", "sd")) %>%
  mutate(Mean = round(Mean,1)) %>%
  mutate(sd = round(sd, 1)) %>%
    knitr::kable(caption = "Mean and Standard Deviation of Grades by Class", row.names = T) %>%
    row_spec(0, bold = T, color = "white", background = RDFZRed3)%>%
    kable_styling(full_width = FALSE, 
                  bootstrap_options = c("striped", "hover", "condensed"),
                  fixed_thead = TRUE) 

MeanScores
Mean and Standard Deviation of Grades by Class
Mean sd
Alan 74.2 18.1
Mo 61.0 6.7
Suzana 64.0 24.0

There are two things that jump out here:

  • The grades given by Alan/to Alan’s students are on average quite a bit higher
  • There is very little variance in Mo’s grades - they are very tightly clumped up around their mean score of 18.

The superior performance of Alan’s class has two possible explanations.

The first is that Brindley’s students have brought up the grades of Alan’s class because they are superior. There is some hint of this if we look at the overall grades table, where they are 4 of the top 5 students. So lets see what it looks like if we exclude them.

BetweenTitle <- paste(Class, Eval, "Histogram of Grades by Class", sep = " - ")


MeanScores2 <- 
  tapply(Paper3Totals2$Grade, Paper3Totals2$Teacher2, mean) %>%
  cbind(tapply(Paper3Totals2$Grade, Paper3Totals2$Teacher2, sd)) %>%
  data.frame() %>%
  setNames(c("Mean", "sd")) %>%
  mutate(Mean = round(Mean,1)) %>%
  mutate(sd = round(sd, 1)) %>%
    knitr::kable(caption = "Mean and Standard Deviation of Grades by Class", row.names = T) %>%
    row_spec(0, bold = T, color = "white", background = RDFZRed3)%>%
    kable_styling(full_width = FALSE, 
                  bootstrap_options = c("striped", "hover", "condensed"),
                  fixed_thead = TRUE) 

MeanScores2
Mean and Standard Deviation of Grades by Class
Mean sd
Alan 67.9 16.2
AlanB 82.3 17.8
Mo 61.0 6.7
Suzana 64.0 24.0

That does, indeed, take care of a big part of the difference for Alan’s class - but it can’t explain everything, because Mo also has approximately 10 of Brindley’s students in one of his classes, and they do not appear to be doing systematically better (I can’t check this because I don’t know who is who exactly).

The second explanation is that Alan’s students took the test a day later, so they were tipped off as to its material by the other students. This is almost certainly some of the explanation, and it may be that Brindley’s former students in Alan’s class were tipped off the most.

The second problem is the clumping of Grades in Mo’s class. The cluster is too low, first of all, which is easy enough to push up, but there probably also needs to be some separation between the grades. Nonetheless, we can try to apply a fairly simple grade correction and see what happens/if this must be addressed in some other way (like rank-ordering grades within the main grade cluster).

Grade Correction

There are broadly two ways that we can correct grades on these tests. First, we can correct for any mistakes we made as teachers - either by giving unfair questions, questions that are too hard, or by not preparing our students adequately for certain types of questions. The second is to curve the entire grade upward to be in line with achievement goals for the course and to align our results with what students could expect from their actual CAIE exams.

Applying an Overall Curve

Rather than correcting for poor performance on individual questions, Cambridge curves performance by applying grade boundaries. This is not the best approach for us for two reasons

  • We used a previous question, which some of our students have almost certainly seen before when preparing
  • We almost certainly grade things more easily than Cambridge
count(Paper3Totals$Grade) %>%
    knitr::kable(caption = "AS GP Grade Counts", "html", row.names = F) %>%
    kable_styling(full_width = F) 
AS GP Grade Counts
x freq
6.7 1
16.7 2
30.0 1
36.7 1
43.3 2
46.7 1
50.0 4
53.3 6
56.7 9
60.0 14
63.3 17
66.7 13
70.0 5
73.3 4
76.7 1
80.0 6
86.7 5
90.0 2
93.3 2
96.7 3
100.0 2

Above you can see the counts of actual grades, which we can use to try to set boundaries as we have done before. For the EoT we have grades from 101 students.

First we set a U boundary. Let’s aim for 3 students again, which meaning all students under 30% get a U

Next are Es. Lets give 5 of those. So students between 30 and 50 get an E

Next our A-stars. Here this works out well with our existing scheme - 9 students got grades of 90 or above, so they get an A*

Lets give 16 students an A (similar to last semester)

We want less Ds than As, so lets give 10 of them - all the kids who scored from 50 to 53.3

This leaves us with 58 students between B and C. Because of where the clumps of grades are we give 35 of these a B grade and 23 a C grade

So we end up with these for grade divisions (number of scores is in brackets):

Letters <- c("A*", "A", "B", "C", "D", "E", "U")
MinVal <- c(89, 73, 63, 56, 50, 30, 0)
MaxVal <- c(100, 87, 70, 61, 54, 49, 29)

CambridgeBoundaries <- 
  cbind.data.frame(Letters, MinVal, MaxVal) %>%
    setNames(c("Letter", "Bottom", "Top")) 

CambridgeGradesTable <-
  CambridgeBoundaries  %>%
    setNames(c("Letter Grade", "Bottom Boundary", "Top Boundary")) %>%
    knitr::kable(caption = "Cambridge Letter Grade Boundaries", row.names = F) %>%
    row_spec(0, bold = T, color = "white", background = RDFZRed3)%>%
    kable_styling(full_width = FALSE, 
                  bootstrap_options = c("striped", "hover", "condensed"),
                  fixed_thead = TRUE) 

CambridgeGradesTable
Cambridge Letter Grade Boundaries
Letter Grade Bottom Boundary Top Boundary
A* 89 100
A 73 87
B 63 70
C 56 61
D 50 54
E 30 49
U 0 29

When we apply these new curved grade boundaries our grades look like this

#Curving and Assigning Letter Boundaries
Paper3TotalsCurved <-
  Paper3Totals %>%
    mutate(`Letter (Revised- Cambridge)` = cut(x= Grade, 
                      breaks = c(CambridgeBoundaries$Bottom,100), 
                      labels= map_df(CambridgeBoundaries,rev)$Letter)) %>%
    mutate(`Letter (Revised- Cambridge)` = factor(`Letter (Revised- Cambridge)`, levels =c("U", "E", "D", "C", "B", "A", "A*"))) 


#Plotting Distribution of Letter Grades
CurveTitle2 <- paste(Class, Eval, "Distribution of Grades after Curving", sep = " - ")
  

ggplot(data=Paper3TotalsCurved, aes(x=`Letter (Revised- Cambridge)`, fill = `Letter (Revised- Cambridge)`)) +
  geom_bar(stat = "count", position = pd, width = 0.8) +
  scale_x_discrete(drop = FALSE) +
  scale_fill_manual(values = (c(rev(GradeColors[1:7]))  )) +  
  labs(x="Letter Grade  (Curved)", y="Count") +
  theme_alan() +
  ggtitle(wrapper(CurveTitle2, width = 45))

ggsave(here(paste(CurveTitle2, ".png", sep = "")), plot = last_plot(), device = NULL, path = NULL,
       width = 8, height = 6, units = c("in", "cm", "mm"),
       dpi = 600)

CambridgeCounts <- count(Paper3TotalsCurved$`Letter (Revised- Cambridge)`)

CambridgeBoundaries2 <-
  CambridgeBoundaries  %>%
    mutate(Letter = factor(Letter, levels =c("U", "E", "D", "C", "B", "A", "A*")))  %>%
    mutate(Count = plyr::mapvalues(Letter, from = CambridgeCounts$x, to = CambridgeCounts$freq))  %>%
    mutate(Count = as.numeric(as.character(Count)))  %>%
    mutate(Count = replace_na(Count, 0))

CambridgeBoundaries2 %>%
    setNames(c("Letter Grade", "Bottom Boundary", "Top Boundary", "Count")) %>%
    knitr::kable(caption = "RDFZ Letter Grade Boundaries", row.names = F) %>%
    row_spec(0, bold = T, color = "white", background = RDFZRed3)%>%
    kable_styling(full_width = FALSE, 
                  bootstrap_options = c("striped", "hover", "condensed"),
                  fixed_thead = TRUE) 
RDFZ Letter Grade Boundaries
Letter Grade Bottom Boundary Top Boundary Count
A* 89 100 9
A 73 87 16
B 63 70 35
C 56 61 23
D 50 54 6
E 30 49 8
U 0 29 4

We can also break this down to take a look by class:

#Plotting Distribution of Letter Grades
CurveTitle3 <- paste(Class, Eval, "Distribution of Grades after Final Curve", sep = " - ")

ggplot(data=Paper3TotalsCurved, aes(x=`Letter (Revised- Cambridge)`, fill = `Letter (Revised- Cambridge)`)) +
  geom_bar(stat = "count", position = pd, width = 0.8) +
  scale_x_discrete(drop = FALSE) +
  scale_fill_manual(values = (c(rev(GradeColors[1:7]))  )) +  
  labs(x="Letter Grade  (Curved)", y="Count") +
  theme_alan() +
  facet_wrap(~ Teacher, ncol = 1) +
  ggtitle(wrapper(CurveTitle3, width = 45))

ggsave(here(paste(CurveTitle3, ".png", sep = "")), plot = last_plot(), device = NULL, path = NULL,
       width = 8, height = 10, units = c("in", "cm", "mm"),
       dpi = 600)

This looks… okay? I think probably still not great to have so much clumping in one class (i.e. currently Mo has only a single A and no A*s), but otherwise the distribution looks sensible.

Curving Round 2

It was agreed that Mo’s grades were a bit too clumpy to apply a smooth curve to them, so they were returned to him with instructions to split the biggest clumps (20, 19, and 18 into a wider range from 17-23). Let’s take a look at how this influenced the data

Paper3TotalsR <- read.csv(here("GPMockTotalsR.csv"))
                        

BetweenTitle <- paste(Class, Eval, "Histogram of Grades by Class (R)", sep = " - ")

#Histograms by Class
AlanGrades<- subset(Paper3TotalsR, Teacher == "Alan")
MoGrades <- subset(Paper3TotalsR, Teacher == "Mo")
SuzanaGrades <- subset(Paper3TotalsR, Teacher == "Suzana")

ggplot(data=Paper3TotalsR, aes(x=sum, fill = Teacher)) +
  geom_histogram(aes(y=..density..), alpha = 0.75, position = "identity") +
  labs(x="Grade (sum)", y="Density") +
  scale_fill_manual(values= c(RDFZRed4, RDFZRed2, RDFZRed1)) +
  scale_x_continuous(limits = c(0,30)) +
  theme_alan() + 
  facet_wrap(~Teacher, ncol = 1) +
  ggtitle(wrapper(BetweenTitle, width = 45))

ggsave(here(paste(BetweenTitle, ".png", sep = "")), plot = last_plot(), device = NULL, path = NULL,
       width = 8, height = 10, units = c("in", "cm", "mm"),
       dpi = 600)

MeanScores2 <- 
  tapply(Paper3TotalsR$sum, Paper3TotalsR$Teacher, mean) %>%
  cbind(tapply(Paper3TotalsR$sum, Paper3TotalsR$Teacher, sd)) %>%
  data.frame() %>%
  setNames(c("Mean", "sd")) %>%
  mutate(Mean = round(Mean,1)) %>%
  mutate(sd = round(sd, 1)) %>%
    knitr::kable(caption = "Mean and Standard Deviation of Grades by Class (R)", row.names = T) %>%
    row_spec(0, bold = T, color = "white", background = RDFZRed3)%>%
    kable_styling(full_width = FALSE, 
                  bootstrap_options = c("striped", "hover", "condensed"),
                  fixed_thead = TRUE) 

MeanScores2
Mean and Standard Deviation of Grades by Class (R)
Mean sd
Alan 22.3 5.4
Mo 18.8 3.2
Suzana 19.2 7.2

So we can see right away that this flattened Mo’s distribution a bit and also brought it in line with Suzanna’s mean (although it still has the smallest deviation). Overall the distribution of grades then looks like this:

ggplot(data=Paper3TotalsR, aes(x=sum)) +
  geom_histogram(aes(y=..density..), alpha = 0.75, position = "identity") +
  labs(x="Grade (sum)", y="Density") +
  scale_x_continuous(limits = c(0,30)) +
  theme_alan() + 
  ggtitle(wrapper(BetweenTitle, width = 45))

Our uncurved distribution of grades would now look like this:

GB1Title <- paste(Class, Eval, "Letter Grades (Raw-Revised)", sep = " - ")
  
  
Paper3TotalsR %<>%
  mutate(Grade = round(sum/30*100),1) %>%
  mutate(Letter = cut(x= Grade, 
                      breaks = c(LetterBoundaries$Bottom,100), 
                      labels= map_df(LetterBoundaries,rev)$Letter)) %>%
  mutate(Letter = factor(Letter, levels =c("U", "E", "D", "C", "B", "A", "A*")))
    

ggplot(data=Paper3TotalsR, aes(x=Letter, fill = Letter)) +
  geom_bar(stat = "count", position = pd, width = 0.8) +
  scale_x_discrete(drop = FALSE) +
  scale_fill_manual(values = (c(rev(GradeColors[1:7]))  )) +  #Janky because of missing grade boundaries (no students in bins)
  labs(x="Letter Grade", y="Count") +
  theme_alan() +
  ggtitle(wrapper(GB1Title, width = 45))

ggsave(here(paste(GB1Title, ".png", sep = "")), plot = last_plot(), device = NULL, path = NULL,
       width = 8, height = 6, units = c("in", "cm", "mm"),
       dpi = 600)


LetterCounts <- count(Paper3TotalsR$Letter)

LetterBoundaries2 <-
  LetterBoundaries  %>%
    mutate(Letter = factor(Letter, levels =c("U", "E", "D", "C", "B", "A", "A*")))  %>%
    mutate(Count = plyr::mapvalues(Letter, from = LetterCounts$x, to = LetterCounts$freq))  %>%
    mutate(Count = as.numeric(as.character(Count)))  %>%
    mutate(Count = replace_na(Count, 0))

LetterBoundaries2 %>%
    setNames(c("Letter Grade", "Bottom Boundary", "Top Boundary", "Count")) %>%
    knitr::kable(caption = "RDFZ Letter Grade Boundaries", row.names = F) %>%
    row_spec(0, bold = T, color = "white", background = RDFZRed3)%>%
    kable_styling(full_width = FALSE, 
                  bootstrap_options = c("striped", "hover", "condensed"),
                  fixed_thead = TRUE) 
RDFZ Letter Grade Boundaries
Letter Grade Bottom Boundary Top Boundary Count
A* 89.5 100.00 10
A 79.5 89.49 12
B 69.5 79.49 19
C 59.5 69.49 28
D 49.5 59.49 22
E 39.5 49.49 5
U 0.0 39.49 5

Unsurprisingly we’d still end up with mostly Cs, so lets apply our same curve as before.

#Curving and Assigning Letter Boundaries
Paper3TotalsCurvedR <-
  Paper3TotalsR %>%
    mutate(`Letter (Revised- Cambridge)` = cut(x= Grade, 
                      breaks = c(CambridgeBoundaries$Bottom,100), 
                      labels= map_df(CambridgeBoundaries,rev)$Letter)) %>%
    mutate(`Letter (Revised- Cambridge)` = factor(`Letter (Revised- Cambridge)`, levels =c("U", "E", "D", "C", "B", "A", "A*"))) 


#Plotting Distribution of Letter Grades
CurveTitle2 <- paste(Class, Eval, "Distribution of Grades after Curving (R)", sep = " - ")
  

ggplot(data=Paper3TotalsCurvedR, aes(x=`Letter (Revised- Cambridge)`, fill = `Letter (Revised- Cambridge)`)) +
  geom_bar(stat = "count", position = pd, width = 0.8) +
  scale_x_discrete(drop = FALSE) +
  scale_fill_manual(values = (c(rev(GradeColors[1:7]))  )) +  
  labs(x="Letter Grade  (Curved)", y="Count") +
  theme_alan() +
  ggtitle(wrapper(CurveTitle2, width = 45))

ggsave(here(paste(CurveTitle2, ".png", sep = "")), plot = last_plot(), device = NULL, path = NULL,
       width = 8, height = 6, units = c("in", "cm", "mm"),
       dpi = 600)


CambridgeCounts <- count(Paper3TotalsCurvedR$`Letter (Revised- Cambridge)`)

CambridgeBoundaries2 <-
  CambridgeBoundaries  %>%
    mutate(Letter = factor(Letter, levels =c("U", "E", "D", "C", "B", "A", "A*")))  %>%
    mutate(Count = plyr::mapvalues(Letter, from = CambridgeCounts$x, to = CambridgeCounts$freq))  %>%
    mutate(Count = as.numeric(as.character(Count)))  %>%
    mutate(Count = replace_na(Count, 0))

CambridgeBoundaries2 %>%
    setNames(c("Letter Grade", "Bottom Boundary", "Top Boundary", "Count")) %>%
    knitr::kable(caption = "RDFZ Letter Grade Boundaries", row.names = F) %>%
    row_spec(0, bold = T, color = "white", background = RDFZRed3)%>%
    kable_styling(full_width = FALSE, 
                  bootstrap_options = c("striped", "hover", "condensed"),
                  fixed_thead = TRUE) 
RDFZ Letter Grade Boundaries
Letter Grade Bottom Boundary Top Boundary Count
A* 89 100 10
A 73 87 19
B 63 70 21
C 56 61 26
D 50 54 10
E 30 49 11
U 0 29 4

This actually managed to move a few people down a category, so we need to change our boundaries again slightly and reoutput

Letters <- c("A*", "A", "B", "C", "D", "E", "U")
MinVal <- c(89, 73, 60, 53, 47, 27, 0)
MaxVal <- c(100, 87, 70, 59, 52, 43, 26)

CambridgeBoundariesR <- 
  cbind.data.frame(Letters, MinVal, MaxVal) %>%
    setNames(c("Letter", "Bottom", "Top")) 

#Curving and Assigning Letter Boundaries
Paper3TotalsCurvedR2 <-
  Paper3TotalsR %>%
    mutate(`Letter (Revised- Cambridge)` = cut(x= Grade, 
                      breaks = c(CambridgeBoundariesR$Bottom,100), 
                      labels= map_df(CambridgeBoundariesR,rev)$Letter)) %>%
    mutate(`Letter (Revised- Cambridge)` = factor(`Letter (Revised- Cambridge)`, levels =c("U", "E", "D", "C", "B", "A", "A*"))) 


#Plotting Distribution of Letter Grades
CurveTitle2 <- paste(Class, Eval, "Distribution of Grades after Curving (R2)", sep = " - ")
  

ggplot(data=Paper3TotalsCurvedR2, aes(x=`Letter (Revised- Cambridge)`, fill = `Letter (Revised- Cambridge)`)) +
  geom_bar(stat = "count", position = pd, width = 0.8) +
  scale_x_discrete(drop = FALSE) +
  scale_fill_manual(values = (c(rev(GradeColors[1:7]))  )) +  
  labs(x="Letter Grade  (Curved)", y="Count") +
  theme_alan() +
  ggtitle(wrapper(CurveTitle2, width = 45))

ggsave(here(paste(CurveTitle2, ".png", sep = "")), plot = last_plot(), device = NULL, path = NULL,
       width = 8, height = 6, units = c("in", "cm", "mm"),
       dpi = 600)


ggplot(data=Paper3TotalsCurvedR2, aes(x=`Letter (Revised- Cambridge)`, fill = `Letter (Revised- Cambridge)`)) +
  geom_bar(stat = "count", position = pd, width = 0.8) +
  scale_x_discrete(drop = FALSE) +
  scale_fill_manual(values = (c(rev(GradeColors[1:7]))  )) +  
  labs(x="Letter Grade  (Curved)", y="Count") +
  theme_alan() +
  facet_wrap(~Teacher, ncol = 1) +
  ggtitle(wrapper(CurveTitle2, width = 45))

CambridgeCounts <- count(Paper3TotalsCurvedR2$`Letter (Revised- Cambridge)`)

CambridgeBoundaries3 <-
  CambridgeBoundaries  %>%
    mutate(Letter = factor(Letter, levels =c("U", "E", "D", "C", "B", "A", "A*")))  %>%
    mutate(Count = plyr::mapvalues(Letter, from = CambridgeCounts$x, to = CambridgeCounts$freq))  %>%
    mutate(Count = as.numeric(as.character(Count)))  %>%
    mutate(Count = replace_na(Count, 0))

CambridgeBoundaries3 %>%
    setNames(c("Letter Grade", "Bottom Boundary", "Top Boundary", "Count")) %>%
    knitr::kable(caption = "RDFZ Letter Grade Boundaries", row.names = F) %>%
    row_spec(0, bold = T, color = "white", background = RDFZRed3)%>%
    kable_styling(full_width = FALSE, 
                  bootstrap_options = c("striped", "hover", "condensed"),
                  fixed_thead = TRUE) 
RDFZ Letter Grade Boundaries
Letter Grade Bottom Boundary Top Boundary Count
A* 89 100 10
A 73 87 19
B 63 70 31
C 56 61 16
D 50 54 15
E 30 49 7
U 0 29 3

Looks good to you? Looks good to me?

And I also don’t want to touch it anymore.

So now I just need to assign percentage grades to each individual grade that will be input into engage
These transform our curved letter grades back to the standard grade scheme of RDFZ - e.g. A* = 90, A = 80
And they reintroduce some variation - e.g. here 63, 66, and 70 are all Bs, but this will transform them into the range between 70-80 for RDFZ B grades

## ALAN PLEASE WRITE A BETTER AND PARAMETERIZED WAY TO DO THIS IN THE FUTURE
## SHOULD BE PRETTY EASY WITH A FOR LOOP AT THE VERY LEAST

################################
## A STARs
RevisedAStars <- subset(Paper3TotalsCurvedR2, `Letter (Revised- Cambridge)` == "A*")
Levels <- levels(factor(RevisedAStars$Grade))
Ranks <- c(1:length(Levels))

RevisedAStars %<>%
  mutate(Increment = 10 / length(Levels)) %>%
  mutate(Rank = plyr::mapvalues(Grade, from = Levels, to = Ranks)) %>%
  mutate(`Grade (R)` = 89 + (Increment * Rank))

################################
## As
RevisedAs <- subset(Paper3TotalsCurvedR2, `Letter (Revised- Cambridge)` == "A")
Levels <- levels(factor(RevisedAs$Grade))
Ranks <- c(1:length(Levels))

RevisedAs %<>%
  mutate(Increment = 10 / length(Levels)) %>%
  mutate(Rank = plyr::mapvalues(Grade, from = Levels, to = Ranks)) %>%
  mutate(`Grade (R)` = 79 + (Increment * Rank))

   
################################
## Bs
RevisedBs <- subset(Paper3TotalsCurvedR2, `Letter (Revised- Cambridge)` == "B")
Levels <- levels(factor(RevisedBs$Grade))
Ranks <- c(1:length(Levels))

RevisedBs %<>%
  mutate(Increment = 10 / length(Levels)) %>%
  mutate(Rank = plyr::mapvalues(Grade, from = Levels, to = Ranks)) %>%
  mutate(`Grade (R)` = 69 + (Increment * Rank))

   
################################
## Cs
RevisedCs <- subset(Paper3TotalsCurvedR2, `Letter (Revised- Cambridge)` == "C")
Levels <- levels(factor(RevisedCs$Grade))
Ranks <- c(1:length(Levels))

RevisedCs %<>%
  mutate(Increment = 10 / length(Levels)) %>%
  mutate(Rank = plyr::mapvalues(Grade, from = Levels, to = Ranks)) %>%
  mutate(`Grade (R)` = 59 + (Increment * Rank))

   
################################
## Ds
RevisedDs <- subset(Paper3TotalsCurvedR2, `Letter (Revised- Cambridge)` == "D")
Levels <- levels(factor(RevisedDs$Grade))
Ranks <- c(1:length(Levels))

RevisedDs %<>%
  mutate(Increment = 10 / length(Levels)) %>%
  mutate(Rank = plyr::mapvalues(Grade, from = Levels, to = Ranks)) %>%
  mutate(`Grade (R)` = 49 + (Increment * Rank))

   
################################
## Es
RevisedEs <- subset(Paper3TotalsCurvedR2, `Letter (Revised- Cambridge)` == "E")
Levels <- levels(factor(RevisedEs$Grade))
Ranks <- c(1:length(Levels))

RevisedEs %<>%
  mutate(Increment = 10 / length(Levels)) %>%
  mutate(Rank = plyr::mapvalues(Grade, from = Levels, to = Ranks)) %>%
  mutate(`Grade (R)` = 39 + (Increment * Rank))

   
################################
## Us
RevisedUs <- subset(Paper3TotalsCurvedR2, `Letter (Revised- Cambridge)` == "U")
Levels <- levels(factor(RevisedUs$Grade))
Ranks <- c(1:length(Levels))

RevisedUs %<>%
  mutate(Increment = 10 / length(Levels)) %>%
  mutate(Rank = plyr::mapvalues(Grade, from = Levels, to = Ranks)) %>%
  mutate(`Grade (R)` = 29 + (Increment * Rank))


## ALL GRADES RECOMBINED

AllGrades <-
  RevisedAStars %>%
    rbind.data.frame(RevisedAs, RevisedBs, RevisedCs, RevisedDs, RevisedEs, RevisedUs) %>%
    mutate(PercR = round(pmax(Grade, `Grade (R)`)),2) %>%
    subset(select = c(Name, Teacher, sum, Grade, Letter, PercR, `Letter (Revised- Cambridge)`))%>%
      setNames(c("Student", "Teacher", "Raw Score", "Percentage", "Letter", "Percentage (R)", "Letter (R)")) %>%
      arrange(-`Percentage (R)`)


AllGrades  %>%
    knitr::kable(caption = "Mock Exam Curved Grades", row.names = F) %>%
    row_spec(0, bold = T, color = "white", background = RDFZRed3)%>%
    kable_styling(full_width = FALSE, 
                  bootstrap_options = c("striped", "hover", "condensed"),
                  fixed_thead = TRUE) 
Mock Exam Curved Grades
Student Teacher Raw Score Percentage Letter Percentage (R) Letter (R)
Caterina Chi Alan 30 100 A* 100 A*
Sophia Xie Alan 30 100 A* 100 A*
Frank Wu Alan 29 97 A* 97 A*
John Zhang Alan 29 97 A* 97 A*
Lavender Bian Alan 29 97 A* 97 A*
Chloie Chen Alan 28 93 A* 94 A*
Lemon Suzana 28 93 A* 94 A*
Steven Mo 27 90 A* 92 A*
Amy Suzana 27 90 A* 92 A*
Chelsea Suzana 27 90 A* 92 A*
Arthur Li Alan 26 87 A 89 A
Verna Xu Alan 26 87 A 89 A
Erica Suzana 26 87 A 89 A
Nina Suzana 26 87 A 89 A
Shuyu Suzana 26 87 A 89 A
Darcy Sun Alan 24 80 A 86 A
Jeremy Yin Alan 24 80 A 86 A
Liz Zheng Alan 24 80 A 86 A
Krystal Mo 24 80 A 86 A
Helena Suzana 24 80 A 86 A
Jolin Suzana 24 80 A 86 A
Kyle Suzana 24 80 A 86 A
Evageline Mo 23 77 B 82 A
Gwenyth Mo 23 77 B 82 A
Jane Mo 23 77 B 82 A
Claire Mo 23 77 B 82 A
Grace Mo 23 77 B 82 A
Sam Mo 23 77 B 82 A
Yun Suzana 23 77 B 82 A
Airlia Zhang Alan 22 73 B 79 B
Esther Mo 22 73 B 79 B
Leo Mo 22 73 B 79 B
Paulin Mo 22 73 B 79 B
Rika Mo 22 73 B 79 B
Xavier Mo 22 73 B 79 B
Bill Han Suzana 22 73 B 79 B
Eileen Suzana 22 73 B 79 B
Bryan Mo 21 70 B 76 B
Kevin Chang Mo 21 70 B 76 B
Florence Suzana 21 70 B 76 B
Zoe Suzana 21 70 B 76 B
Frank Feng Alan 20 67 C 74 B
Owen Liu Alan 20 67 C 74 B
Calvin Mo 20 67 C 74 B
Jaclyn Mo 20 67 C 74 B
Jessica Mo 20 67 C 74 B
Justin Mo 20 67 C 74 B
Makayla Mo 20 67 C 74 B
Miya Mo 20 67 C 74 B
Richard Wang Mo 20 67 C 74 B
Carlo Jiang Alan 19 63 C 72 B
Ilia Lu Alan 19 63 C 72 B
Michael Su Alan 19 63 C 72 B
Camille Mo 19 63 C 72 B
Ella Mo 19 63 C 72 B
Luisa Mo 19 63 C 72 B
Max Mo 19 63 C 72 B
William Mo 19 63 C 72 B
Daphne Suzana 19 63 C 72 B
Leonie Suzana 19 63 C 72 B
Sebastian Hei Alan 18 60 C 69 C
Bob Mo 18 60 C 69 C
Carrie Mo 18 60 C 69 C
Cathy Mo 18 60 C 69 C
Jack Lin Mo 18 60 C 69 C
May Mo 18 60 C 69 C
Robert Mo 18 60 C 69 C
Elena Suzana 18 60 C 69 C
Joe Suzana 18 60 C 69 C
Priscilla Wang Alan 17 57 D 64 C
Daniel Mo 17 57 D 64 C
Lucy Liu Mo 17 57 D 64 C
Michael Mo 17 57 D 64 C
Oliver Mo 17 57 D 64 C
Troye Mo 17 57 D 64 C
Molly Suzana 17 57 D 64 C
Mandy Wang Alan 16 53 D 59 D
Alice Mo 16 53 D 59 D
Anna Mo 16 53 D 59 D
Ellie Mo 16 53 D 59 D
Henry Mo 16 53 D 59 D
Jason Mo 16 53 D 59 D
Sky Mo 16 53 D 59 D
Tony Wu Mo 16 53 D 59 D
Jacky Suzana 16 53 D 59 D
Lisa Lu Suzana 16 53 D 59 D
Shelia Cheng Alan 15 50 D 54 D
Tony Cheng Alan 15 50 D 54 D
Ryan Li Mo 15 50 D 54 D
Thomas Mo 15 50 D 54 D
Wendy Mo 15 50 D 54 D
Franklin Mo 14 47 E 49 E
Joanna Mo 14 47 E 49 E
Lisa Pan Suzana 14 47 E 49 E
George Hao Alan 13 43 E 46 E
Jack Mo 13 43 E 46 E
Aurora Mo 11 37 U 44 E
Ann Suzana 9 30 U 42 E
Benjamin Suzana 5 17 U 39 U
Rex Suzana 5 17 U 39 U
Timon Suzana 2 7 U 34 U

Now we just need to output lists by teacher to send around

Tables

Tables for Engage

##Alan
AllGrades %>%
  subset(Teacher == "Alan") %>%
  separate(Student, into = c("First", "Last"), sep = " ") %>%
  arrange(Last) %>%
    write.csv(here("GPMock-Alan.csv"))


##Mo
AllGrades %>%
  subset(Teacher == "Mo") %>%
  arrange(Student) %>%
    write.csv(here("GPMock-Mo.csv"))

##Suzana
AllGrades %>%
  subset(Teacher == "Suzana") %>%
  arrange(Student) %>%
    write.csv(here("GPMock-Suzana.csv"))


##All
AllGrades %>%
  arrange(Student) %>%
    write.csv(here("GPMock-All.csv"))