Data from http://reportcard.ospi.k12.wa.us/DataDownload.aspx for the year 2016-2017
We will focus on the percentage of students in each school in Level 4, which is Advanced (exceeding state standard): http://www.k12.wa.us/assessment/StateTesting/FAQ.aspx
school_data <- readxl::read_excel("2_03_AIM-EOC-MSP-SBA Assessments School (with suppression - new format).xlsx")
Inspect the data a little
library(tidyverse)
## -- Attaching packages -------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1 v purrr 0.2.4
## v tibble 1.4.2 v dplyr 0.7.4
## v tidyr 0.8.0 v stringr 1.2.0
## v readr 1.1.1 v forcats 0.2.0
## -- Conflicts ----------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
# what does the data look like?
glimpse(school_data)
## Observations: 250,556
## Variables: 35
## $ ESD <chr> "Northwest Educational ...
## $ County <chr> "Snohomish County", "Sn...
## $ District <chr> "Marysville School Dist...
## $ DistrictCode <chr> "31025", "31025", "3102...
## $ School <chr> "10th Street School", "...
## $ SchoolCode <chr> "1656", "1656", "1656",...
## $ schoolYear <chr> "2016-2017", "2016-2017...
## $ testAdministration <chr> "SBA", "SBA", "SBA", "S...
## $ Subject <chr> "ELA", "ELA", "ELA", "E...
## $ GradeLevel <chr> "6th", "6th", "6th", "6...
## $ StudentGroup <chr> "All", "Male", "Female"...
## $ suppressed <chr> "y", "y", "y", "y", "y"...
## $ suppressReason <chr> "Cross Organization: Sc...
## $ countTotalTested_and_NotTested <dbl> 62, 30, 32, 1, 8, 43, 1...
## $ countMetStandardIncludingPP <chr> NA, NA, NA, NA, NA, "29...
## $ PercentMetStandardIncludingPP <chr> NA, NA, NA, NA, NA, "67...
## $ countMetStandardWithoutPP <dbl> NA, NA, NA, NA, NA, 29,...
## $ PercentMetStandardWithoutPP <dbl> NA, NA, NA, NA, NA, 67....
## $ percentMeetingStandardExcludingNoScore <dbl> NA, NA, NA, NA, NA, 70....
## $ countLevel4 <dbl> NA, NA, NA, NA, NA, 9, ...
## $ PercentLevel4 <dbl> NA, NA, NA, NA, NA, 20....
## $ countLevel3 <dbl> NA, NA, NA, NA, NA, 20,...
## $ PercentLevel3 <dbl> NA, NA, NA, NA, NA, 46....
## $ countLevelBasic <dbl> NA, NA, NA, NA, NA, 0, ...
## $ PercentLevelBasic <dbl> NA, NA, NA, NA, NA, 0, ...
## $ countLevel2 <dbl> NA, NA, NA, NA, NA, 9, ...
## $ PercentLevel2 <dbl> NA, NA, NA, NA, NA, 20....
## $ countLevel1 <dbl> NA, NA, NA, NA, NA, 3, ...
## $ PercentLevel1 <dbl> NA, NA, NA, NA, NA, 6.9...
## $ countNoScore <dbl> NA, NA, NA, NA, NA, 2, ...
## $ percentNoScore <dbl> NA, NA, NA, NA, NA, 4.6...
## $ countNotMet <dbl> NA, NA, NA, NA, NA, 14,...
## $ PercentNotMet <dbl> NA, NA, NA, NA, NA, 32....
## $ countExcusedAbsence <lgl> NA, NA, NA, NA, NA, NA,...
## $ countExempted <dbl> NA, NA, NA, NA, NA, NA,...
# what are the subjects tested?
table(school_data$Subject) # Biology, ELA, Math/MATH, Science
##
## Biology ELA Math MATH Science
## 8120 105460 15548 90617 30811
# do we have longitudinal data?
table(school_data$schoolYear) # no
##
## 2016-2017
## 250556
# what test systems are there?
table(school_data$testAdministration) # AIM EOC MSP SBA
##
## AIM EOC MSP SBA
## 35512 8120 26396 180528
Take a look at 6th grade, how many students take each test?
school_data %>%
filter(
schoolYear == "2016-2017",
GradeLevel == "6th",
StudentGroup == "All"
) %>%
pull(Subject) %>%
table()
## .
## ELA Math MATH
## 1102 273 829
Let’s look how the scores vary for ELA and Maths in 6th and 7th grade in the five biggest districts:
sixth_seventh_grade_ELA_maths <-
school_data %>%
filter(schoolYear == "2016-2017",
GradeLevel %in% c("6th", "7th"),
StudentGroup == "All",
District %in% c("Seattle Public Schools",
"Spokane School District",
"Tacoma School District",
"Lake Washington School District",
"Edmonds School District"),
testAdministration == "SBA") %>%
filter(!is.na(PercentLevel4)) %>%
select(District,
School,
Subject,
GradeLevel,
PercentLevel4)
sixth_seventh_grade_ELA_maths
## # A tibble: 325 x 5
## District School Subject GradeLevel PercentLevel4
## <chr> <chr> <chr> <chr> <dbl>
## 1 Spokane School District Adams Element~ ELA 6th 34.0
## 2 Spokane School District Adams Element~ MATH 6th 29.1
## 3 Seattle Public Schools Aki Kurose Mi~ ELA 6th 10.1
## 4 Seattle Public Schools Aki Kurose Mi~ ELA 7th 10.6
## 5 Seattle Public Schools Aki Kurose Mi~ MATH 6th 23.0
## 6 Seattle Public Schools Aki Kurose Mi~ MATH 7th 13.4
## 7 Edmonds School District Alderwood Mid~ ELA 7th 15.9
## 8 Edmonds School District Alderwood Mid~ MATH 7th 22.7
## 9 Tacoma School District Angelo Giaudr~ ELA 6th 7.10
## 10 Tacoma School District Angelo Giaudr~ ELA 7th 11.2
## # ... with 315 more rows
Here are some interactive plots for comparing the schools and districts, you can mouse-over the points to see the school names.
This one is for ELA:
library(ggbeeswarm)
sixth_seventh_grade_ELA_plot <-
sixth_seventh_grade_ELA_maths %>%
filter(Subject == "ELA") %>%
ggplot(aes(reorder(str_wrap(District, 15),
PercentLevel4),
PercentLevel4,
label = School)) +
geom_beeswarm(cex=2.5) +
xlab("") +
ylab("% in Level 4 for 6th & 7th grade ELA") +
theme_minimal() +
facet_wrap(~ GradeLevel, ncol = 1) +
ggtitle("Five WA State School Districts' Percentage in Level 4 ELA, SBA test",
subtitle = "Data from http://reportcard.ospi.k12.wa.us/DataDownload.aspx")
plotly::ggplotly(sixth_seventh_grade_ELA_plot)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
This one is for maths:
sixth_seventh_grade_MATH_plot <-
sixth_seventh_grade_ELA_maths %>%
filter(Subject == "MATH") %>%
ggplot(aes(reorder(str_wrap(District, 15),
PercentLevel4),
PercentLevel4,
label = School)) +
geom_beeswarm(cex=2.5) +
xlab("") +
ylab("% in Level 4 for 6th & 7th grade Math") +
theme_minimal() +
facet_wrap(~ GradeLevel, ncol = 1) +
ggtitle("Five WA State School Districts' Percentage in Level 4 Math, SBA test",
subtitle = "Data from http://reportcard.ospi.k12.wa.us/DataDownload.aspx")
plotly::ggplotly(sixth_seventh_grade_MATH_plot)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`