Learners start with the same level of math knowledge (none) but over time a gap grows between the strongest students and the weakest students.

At some schools the gap grows wide. At others the gap remains narrow. Let’s dig into the trends.


###
#
# Load libraries and data
#
library(dplyr)
library(ggplot2)
data <- read.csv("~/Downloads/Spring 2016 Sample for Data Ninja Competition/sample_istrict_spring_2015-2016_151126559311.csv")

###
#
# Clean data
#
data <- data %>%
  mutate(score = TestRITScore,
         growth = SpringToSpringObservedGrowth / SpringToSpringProjectedGrowth,
         subject = Discipline,
         student = paste(StudentFirstName, StudentLastName),
         grade = Grade,
         school = SchoolName,
         district = DistrictName) %>%
  filter(!is.na(growth))

###
#
# Summarize data
#
all <- data %>%
  group_by(grade) %>%
  filter(subject == "Mathematics",
         grade <= 6) %>%
  summarize(growth = mean(growth),
            score_75th = quantile(score, probs=0.75),
            score_25th = quantile(score, probs=0.25))

by_school <- data %>%
  group_by(school, subject, grade) %>%
  summarize(growth = mean(growth),
            score_75th = quantile(score, probs=0.75),
            score_25th = quantile(score, probs=0.25)) %>%
  filter(subject == "Mathematics") %>%
  filter(school %in% c(#'Andrew High School',
                       'Angela School',
                       #'Anna Middle School',
                       'Anthony Elementary School',
                       'Brian Elementary School',
                       #'Bryan Middle School',
                       'Bruce School',
                       'Courtney School',
                       'Curtis Elementary School',
                       'Dakota Elementary School',
                       'Gregory School',
                       'Jack School',
                       'Jason Elementary School',
                       'Kathryn School',
                       'Mark Elementary School'#,
                       #'Sheila School',
                       #'Steven Middle School',
                       )) %>%
  filter(grade <= 6)

by_school <- data %>%
  group_by(school, subject, grade) %>%
  summarize(growth = mean(growth),
            score_75th = quantile(score, probs=0.75),
            score_25th = quantile(score, probs=0.25)) %>%
  filter(subject == "Mathematics") %>%
  filter(school %in% c(#'Andrew High School',
                       'Angela School',
                       #'Anna Middle School',
                       'Anthony Elementary School',
                       'Brian Elementary School',
                       #'Bryan Middle School',
                       'Bruce School',
                       'Courtney School',
                       'Curtis Elementary School',
                       'Dakota Elementary School',
                       'Gregory School',
                       'Jack School',
                       'Jason Elementary School',
                       'Kathryn School',
                       'Mark Elementary School'#,
                       #'Sheila School',
                       #'Steven Middle School',
                       )) %>%
  filter(grade <= 6)


###
#
# What grade level each score corresponds to
# according to [1]
#
# [1]: https://www.nwea.org/content/uploads/2015/06/2015-MAP-Normative-Data-AUG15.pdf
#
math_grade_level_equivilant <- function(math_score) {
  ifelse(math_score < 159.1, 0 + (math_score - 159.1) / (180.8 - 159.1),
  ifelse(math_score < 180.8, 0 + (math_score - 159.1) / (180.8 - 159.1),
  ifelse(math_score < 192.1, 1 + (math_score - 180.8) / (192.1 - 180.8),
  ifelse(math_score < 203.4, 2 + (math_score - 192.1) / (203.4 - 192.1),
  ifelse(math_score < 213.5, 3 + (math_score - 203.4) / (213.5 - 203.4),
  ifelse(math_score < 221.4, 4 + (math_score - 213.5) / (221.4 - 213.5),
  ifelse(math_score < 225.3, 5 + (math_score - 221.4) / (225.3 - 221.4),
  ifelse(math_score < 228.6, 6 + (math_score - 225.3) / (228.6 - 225.3),
  ifelse(math_score < 230.9, 7 + (math_score - 228.6) / (230.9 - 228.6),
  ifelse(math_score < 233.4, 8 + (math_score - 230.9) / (233.4 - 230.9),
         9))))))))))
}
math_grade_level_equivilant(c(185, 200)) # [1] 1.371681 2.699115

###
#
# Show off all
#
ggplot(all) + 
  geom_rect(aes(
    xmin= grade - 0.4, 
    xmax = grade + 0.4,
    ymin = 0,
    ymax = math_grade_level_equivilant(score_75th) - math_grade_level_equivilant(score_25th))) + 
  geom_text(aes(
    x = grade,
    y = math_grade_level_equivilant(score_75th) - math_grade_level_equivilant(score_25th),
    label = round(math_grade_level_equivilant(score_75th) - math_grade_level_equivilant(score_25th), 1)),
    vjust = 1.5,
    color = 'white') +
  coord_cartesian(ylim=c(0, 5)) +
  geom_hline(yintercept=0, color='grey') +
  theme(
    line = element_blank(), 
    rect = element_blank(), 
    axis.text.y = element_blank(), 
    axis.title.y = element_blank(), 
    legend.text = element_text(size = rel(0.8)), 
    legend.title = element_text(hjust = 0), 
    strip.text = element_text(size = rel(0.8)), plot.margin = unit(c(0, 0, 0, 0), "lines"), 
    complete = TRUE) +
  scale_x_continuous(breaks = 1:6,
                     labels = c('1st', '2nd', '3rd', '4th', '5th', '6th')) +
  scale_y_continuous(breaks=-4:4) + 
  ggtitle("\n    Grade level gap between 25th and 75th percentile math scores\n")


In first grade 75th percentile students are one grade level ahead of their 25th percentile peers. By 6th grade they are four grade levels ahead.

How does this trend change by school?


ggplot(by_school) + 
  geom_rect(aes(
    xmin= grade - 0.4, 
    xmax = grade + 0.4,
    ymin = 0,
    ymax = math_grade_level_equivilant(score_75th) - math_grade_level_equivilant(score_25th))) + 
  geom_text(aes(
    x = grade,
    y = math_grade_level_equivilant(score_75th) - math_grade_level_equivilant(score_25th),
    label = round(math_grade_level_equivilant(score_75th) - math_grade_level_equivilant(score_25th), 1)),
    vjust = 1.5,
    color = 'white') +
  facet_wrap( ~ school, ncol = 3) +
  coord_cartesian(ylim=c(0, 5)) +
  geom_hline(yintercept=0, color='grey') +
  theme(
    line = element_blank(), 
    rect = element_blank(), 
    axis.text.y = element_blank(), 
    axis.title.y = element_blank(), 
    legend.text = element_text(size = rel(0.8)), 
    legend.title = element_text(hjust = 0), 
    strip.text = element_text(size = rel(0.8)), plot.margin = unit(c(0, 0, 0, 0), "lines"), 
    complete = TRUE) +
  scale_x_continuous(breaks = 1:6,
                     labels = c('1st', '2nd', '3rd', '4th', '5th', '6th')) +
  scale_y_continuous(breaks=-4:4) + 
  ggtitle("\n    Grade level gap between 25th and 75th percentile math scores\n")


A couple of interesting trends stand out.

Some schools manage to keep the gap tight



Lots of gaps get big at 5th or 6th grade