Introduction

Exploring 2021 DOE Middle School Directory data from New York City Open Data Portal using new packages.

library(tidyverse)
library(knitr)
library(RSocrata)
library(DT)
library(ggblanket)
library(ggcharts)

Methods

Data was downloaded directly from NYC Open Data Portal accessed with the RSocrata package.

data <- read.socrata("https://data.cityofnewyork.us/resource/f6s7-vytj.csv")
## Variable definitions
# coursepassrate = Percent of students who passed core courses
# mathprof = Percent of students proficient in math
# elaprof = Percent of students proficient in ELA

Analysis

Processing data by creating different dataframes, and visualizing data with tables and charts.

district name borough
1 P.S. 034 Franklin D. Roosevelt MANHATTAN
1 P.S. 140 Nathan Straus MANHATTAN
1 P.S. 184m Shuang Wen MANHATTAN
1 P.S. 188 The Island School MANHATTAN
1 University Neighborhood Middle School MANHATTAN
1 School for Global Leaders MANHATTAN
1 East Side Community School MANHATTAN

Ex. Figure 1: District #, school name, borough

mydata |> 
  filter(borough == "MANHATTAN") |> 
  select(c(name,coursepassrate,mathprof,elaprof)) |>
  arrange(desc(mathprof)) |> 
  slice(1:10) |> 
  kable()

Ex. Figure 2: School name, % of students who passed core courses, % of students proficient in math, % of students proficient in ELA

Borough-Level Statistics, Analysis, and Visualization

mydata <- mydata |> 
  mutate(borough = replace(borough, 
                           name == "P.S. 046 Arthur Tappan", "MANHATTAN")) |> 
  mutate(borough = replace(borough, 
                           name == "M.S. 935", "BROOKLYN"))

borough_stats<-mydata |> 
  group_by(borough) |> 
  summarize(med_mathprof = median(mathprof, na.rm=TRUE),
            avg_mathprof = mean(mathprof, na.rm=TRUE)) 

borough_stats |> 
  arrange(desc(med_mathprof)) |>
  datatable()

District-Level Statistics, Analysis, and Visualization

mydata |> 
  filter(district == 13 | district == 2) |> 
  select(c(district,name,mathprof)) |> 
  arrange(desc(mathprof)) |> 
  datatable()
district_stats <- mydata |> 
  group_by(district) |> 
  summarize(med_mathprof = median(mathprof, na.rm=TRUE),
            avg_mathprof = mean(mathprof, na.rm=TRUE))

district_stats |> 
  arrange(desc(med_mathprof)) |> 
  datatable()

mydata |> 
  group_by(district) |> 
  summarize(nschools = n()) |> 
  mutate(district=reorder(district,nschools)) |> 
  ggplot(aes(x=district,y=nschools,fill=district)) +
  geom_col(show.legend = FALSE) +
  coord_flip()

mathprof_zscore <- mydata |>
  select(district,borough,name,mathprof) |> 
  mutate(zscore = scale(mathprof)) |> 
  filter(!is.na(zscore))
mathprof_zscore |> 
  filter(borough %in% c("QUEENS","MANHATTAN","BROOKLYN","BRONX")) |>
  gg_histogram(x=zscore,facet=borough, bins=12)