This dashboard charts data from the Utah Department of Education fo rthe years 2018 - 2023. The data focuses on the “Free & Reduced Lunch” program in Utah schools, considering free lunch to be a proxy for poverty in a community.
# I paste some code in here, maybe to identify all of the libraries I need to use and then to read in the data and to report some details about the data.
library(data.table)
library(lubridate)
library(DescTools)
library(ggplot2)
library(scales)
library(ggthemes)
library(plyr)
library(dplyr)
library(ggrepel)
library(plotly)
library(htmlwidgets)
library(cowplot)
library(flexdashboard)
library(stringr)
library(tidyr)
library(leaflet)
library(sqldf)
library(reshape2)
library(paletteer)
library(stringi)
setwd("/Users/darylacumen/Dropbox/Loyola/Loyola_SP25/DS736/R/")
# ------------------------ Data Prep -----------------------------
lunch_df <- fread("R_datafiles//school lunch data (2019-2023).csv")
schools_df <- fread("R_datafiles//schools_master.csv")
schools_df <- separate(schools_df,
map,
c('lat', 'long'),
sep = ',',
remove = TRUE)
schools_df$district_id <- ifelse(nchar(schools_df$district_id) == 1, paste0("0", schools_df$district_id), schools_df$district_id)
schools_df$site_number <- paste0(schools_df$district_id, "-", schools_df$school_id)
schools_master_df <- sqldf("select a.private, a.charter, a.grades, a.lat, a.long,
b.year, b.district, b.school, b.enrollment, b.free, b.reduced
from schools_df a inner join lunch_df b on a.site_number = b.site_number")
schools_master_df$lat <- as.numeric(schools_master_df$lat)
schools_master_df$long <- as.numeric(schools_master_df$long)
schools_master_df$year <- as.factor(schools_master_df$year)
schools_master_df$enrollment <- as.numeric(schools_master_df$enrollment)
schools_master_df$free <- str_replace(schools_master_df$free, "%", "")
schools_master_df$reduced <- str_replace(schools_master_df$reduced, "%", "")
schools_master_df$free <- as.numeric(schools_master_df$free)
schools_master_df$reduced <- as.numeric(schools_master_df$reduced)
schools_master_df <- schools_master_df %>%
mutate(free_kids = enrollment * free / 100) %>%
mutate(reduced_kids = enrollment * reduced / 100) %>%
data.frame()
schools_2023 <- schools_master_df %>%
filter(year == '2023')
# ---------------------Map----------------------------
my_colors <- paletteer_c("ggthemes::Red-Green-Gold Diverging", 30, direction = -1)
pal <- colorNumeric(c(my_colors) , 1:50)
# ---------------------------stacked bar chart--------------------------------
schools_stacked <- schools_2023 %>%
drop_na(enrollment) %>%
filter(enrollment >= 1000) %>%
select(district, enrollment, free_kids, reduced_kids) %>%
mutate(full_kids = enrollment - (free_kids + reduced_kids)) %>%
group_by(district) %>%
summarise(free = sum(free_kids),
reduced = sum(reduced_kids),
full = sum(full_kids),
.groups = "keep") %>%
data.frame()
melted_schools_stacked <- reshape2::melt(schools_stacked,id=c("district"),
variable.name = "lunch",
value.name = "students")
# -----------------------nested donuts-------------------------------------
school_grades <- fread("R_datafiles//grade_lookup.csv")
schools_master_grades_df <- sqldf("select a.grades, a.Pre, a.Elementary, a.Middle, a.High,
b.year, b.district, b.school, b.enrollment, b.free_kids, b.reduced_kids
from school_grades a inner join schools_master_df b on a.grades = b.grades")
melted_schools_grades <- reshape2::melt(schools_master_grades_df,id=c("Pre", "Elementary", "Middle", "High"),
variable.name = "lunch",
value.name = "students")
school_by_grades <- schools_master_grades_df %>%
filter(year == 2023) %>%
select(Pre, Elementary, Middle, High, enrollment, free_kids, reduced_kids) %>%
drop_na(enrollment) %>%
mutate(enrollment = enrollment - free_kids - reduced_kids) %>%
rename(Full = enrollment, Free = free_kids, Reduced = reduced_kids) %>%
data.frame()
melted_schools_grades <- reshape2::melt(school_by_grades,id=c("Pre", "Elementary", "Middle", "High"),
variable.name = "lunch",
value.name = "students")
school_by_grades_pre <- melted_schools_grades %>%
filter(Pre == "Pre") %>%
group_by(lunch) %>%
summarise(students = sum(students),
.groups = "keep") %>%
data.frame()
school_by_grades_elem <- melted_schools_grades %>%
filter(Elementary == "Elementary") %>%
group_by(lunch) %>%
summarise(students = sum(students),
.groups = "keep") %>%
data.frame()
school_by_grades_mid <- melted_schools_grades %>%
filter(Middle == "Middle") %>%
group_by(lunch) %>%
summarise(students = sum(students),
.groups = "keep") %>%
data.frame()
school_by_grades_high <- melted_schools_grades %>%
filter(High == "High") %>%
group_by(lunch) %>%
summarise(students = sum(students),
.groups = "keep") %>%
data.frame()
# -------------------------line chart-------
school_by_year <- schools_master_grades_df %>%
select(year, free_kids, reduced_kids) %>%
drop_na(free_kids) %>%
rename(Free = free_kids, Reduced = reduced_kids) %>%
data.frame()
melted_schools_year <- reshape2::melt(school_by_year,id="year",
variable.name = "lunch",
value.name = "students")
agg_schools_year <- melted_schools_year %>%
filter(year != "2018") %>%
group_by(year, lunch) %>%
summarise(students = sum(students),
.groups = "keep") %>%
data.frame()
# ------------------------with math-----------------------
my_colors <- paletteer_c("grDevices::Magenta", 30, direction = -1)
pal <- colorNumeric(c(my_colors) , 1:3500)
schools_math_df <- fread("R_datafiles//schools_math.csv")
schools_math_master_df <- sqldf("select c.math, a.private, a.charter, a.grades, a.lat, a.long,
b.year, b.district, b.school, b.enrollment, b.free, b.reduced
from schools_df a inner join lunch_df b on a.site_number = b.site_number
inner join schools_math_df c on a.School = c.school")
schools_math_kpi <- schools_math_master_df %>%
filter(year == "2023") %>%
select(math, school, enrollment, free, grades) %>%
mutate(top_grade = as.numeric(substr(grades, nchar(grades)-1, nchar(grades)))) %>%
mutate(type = ifelse(top_grade <= 6, "Elementary",
ifelse(top_grade <= 9, "Middle",
ifelse(top_grade > 9, "High", "Pre")))) %>%
drop_na(grades, math, free, type) %>%
mutate(free = str_remove(free, "%")) %>%
mutate(free = as.numeric(free)) %>%
mutate(math = as.numeric(math)) %>%
mutate(enrollment = as.numeric(enrollment)) %>%
mutate(school = stri_trans_general(school, id = "Title")) %>%
data.frame()
This first chart shows poverty levels by school across the Wasatch Front. The chart allows you to zoom in and out and scroll to different communities across Utah. We see a few patterns:
Note: My color palette (which is BEAUTIFUL in R-Studio) did not translate into HTML and I don’t have the patience to debug it. >:-(
school_map <- leaflet() %>%
addProviderTiles(providers$OpenStreetMap) %>%
setView(lat = 40.47593478839567, lng= -111.83767829999998, zoom = 9) %>%
addCircles(
lng = schools_2023$long,
lat = schools_2023$lat,
opacity = 10,
color = pal(ifelse(schools_2023$free >= 50, 50, schools_2023$free)),
popup = paste0(schools_2023$school,": ", schools_2023$free, "%"),
radius = schools_2023$enrollment^(.9)
)
school_map
This chart visually shows poverty rates by school district. We can see here that the Alpine School District has nearly as many children receiving free and reduced lunch as the Granite School District, even though the Alpine School District has nearly twice as many kids.
Note: I did not place numbers on the bars here. My professor will disagree but without interactivity I think they just cluttered the chart.
ggplot(melted_schools_stacked, aes(x=reorder(district, students, sum), y=students, fill = lunch)) +
geom_bar(stat="identity", position = position_stack(reverse = TRUE)) +
coord_flip() +
theme(plot.title = element_text(hjust = 0.5)) +
labs(title = "Lunch status by District", x="", y="Number of Students", fill="Year") +
theme_light()
In this chart I compare free and reduced lunch ratios across the state by grade level. The chart is pretty useless because it shows that subsidized lunch rates change little across grade levels. There is a reduction at the High School level, but that could be because some high schoolers think they’re too cool to eat a healthy lunch (…my kids for example). ;-)
fig3 <- plot_ly(hole=.7) %>%
layout(title="Lunch status by Grade Level") %>%
add_trace(data = school_by_grades_high,
labels = ~lunch,
values = ~students,
type = "pie",
textposition = "inside",
hovertemplate = paste0("High School - ", school_by_grades_pre$lunch, ": ", comma(school_by_grades_pre$students), "<br><extra></extra>")) %>%
add_trace(data = school_by_grades_mid,
labels = ~lunch,
values = ~students,
type = "pie",
textposition = "inside",
hovertemplate = paste0("Middle School - ", school_by_grades_mid$lunch, ": ", comma(school_by_grades_mid$students), "<br><extra></extra>"),
domain = list(
x = c(0.16, 0.84),
y = c(0.16, 0.84))) %>%
add_trace(data = school_by_grades_elem,
labels = ~lunch,
values = ~students,
type = "pie",
textposition = "inside",
hovertemplate = paste0("Elementary School - ", school_by_grades_elem$lunch, ": ", comma(school_by_grades_elem$students), "<br><extra></extra>"),
domain = list(
x = c(0.27, 0.73),
y = c(0.27, 0.73))) %>%
add_trace(data = school_by_grades_pre,
labels = ~lunch,
values = ~students,
type = "pie",
textposition = "inside",
hovertemplate = paste0("Preschool - ", school_by_grades_pre$lunch, ": ", comma(school_by_grades_high$students), "<br><extra></extra>"),
domain = list(
x = c(0.35, 0.65),
y = c(0.35, 0.65)))
fig3
This chart shows the growth in subsidized lunches before, during and after COVID-19. I’m charting absolute numners, so this is not very useful.
ggplot(agg_schools_year, aes(x=year, y=students, group=lunch)) +
geom_line(aes(color=lunch), size=3) +
labs(title = "Lunches by Year", x = "Year", y = "Students") +
theme_light() +
theme(plot.title = element_text(hjust = 0.5)) +
geom_point(shape = 21, size = 5, color = "black", fill = "white") +
scale_y_continuous(labels = comma) +
scale_color_brewer(palette = "Paired", name = "Year", guide = guide_legend(reverse = TRUE))
This final chart, which is much more insightful when scaled properly, plots the free lunch population by math achievemt rates per school. Colors differentiate grade level and school size is represented by the radius of each circle.
With this chart you can see a clear inverse-correlation between subsidized lunch percentage (proxy for poverty) and math achievement. You can also see that math achievement declines with grade level (math gets harder in middle and high school).
I have called out several outlier schools in this chart. These are schools with relatively high poverty rates (as measured by subsidized lunch percentage) but with above average math achievement levels. Heritage Elementary School in Ogden is a clear outlier here, having a free lunch rate above 90% but a math achievement rate near 65%. I called the school to inquire what they were doing differently and the cousnelors attribute the performance to extraordinary teachers.
ggplot(schools_math_kpi, aes(x=math, y=free, size = enrollment, color = type)) +
geom_point(alpha=0.3) +
scale_size(range = c(.3, 18), name="range") +
labs(title = "Math scores by Lunch Status", x = "Math Achievement Scores", y = "Percentage of Students receiving Free Lunch", color = "School Type") +
guides(colour = guide_legend(override.aes = list(size=10))) +
theme(plot.title = element_text(hjust = 0.5), text=element_text(size=18), legend.key.size = unit(2, 'cm')) +
geom_label_repel(aes(label = ifelse((math > 50 & free > 60), school, "")),
box.padding = 1,
point.padding = 1,
size = 5,
color = "Grey50",
segment.color = "darkblue")
Of these five charts, the most effective is #5. The palette failure in chart #1 renders that chart useless, but (scalign issues aside) chart #5 highlights several schools whose ability to achieve extraordinary math proficiency with students in poorer communities (i.e.: “Stand and Deliver”) can serve as a guide for statewide education policy. I shared this chart in its original, properly scaled form with several state legislators, the Governor and the state Auditor. They were all impressed.