top30_pop <- master %>%
arrange(desc(Population)) %>%
slice(1:30) %>%
mutate(Pop_Rank = row_number())
top30_fund <- master %>%
arrange(desc(Total_funding)) %>%
slice(1:30) %>%
mutate(Fund_Rank = row_number())
top30_comparison <- top30_pop %>%
select(State, Population, Pop_Rank) %>%
full_join(
top30_fund %>% select(State, Total_funding, Fund_Rank),
by = "State"
) %>%
mutate(
In_Both = ifelse(!is.na(Pop_Rank) & !is.na(Fund_Rank),
"In Both Top 20", "In One List Only")
)
# ── Separate data with correct ordering for each panel ──
pop_data <- top30_comparison %>%
filter(!is.na(Pop_Rank)) %>%
arrange(desc(Population)) %>%
mutate(State_Label = factor(State, levels = rev(State)))
fund_data <- top30_comparison %>%
filter(!is.na(Fund_Rank)) %>%
arrange(desc(Total_funding)) %>%
mutate(State_Label = factor(State, levels = rev(State)))
# ── Left panel: Population ──
p1 <- ggplot(pop_data,
aes(x = Population / 1e6, y = State_Label,
fill = Population / 1e6)) +
geom_col(width = 0.7,
color = ifelse(pop_data$In_Both == "In One List Only",
"#2A9D8F", NA),
linewidth = ifelse(pop_data$In_Both == "In One List Only",
1.2, 0)) +
scale_fill_gradient(low = "#C9D6E5", high = "#1B2A4A",
guide = "none") +
scale_x_continuous(labels = function(x) paste0(x, "M")) +
labs(x = "Population (Millions)", y = NULL,
title = "Top 30 by Population") +
theme_minimal() +
theme(
plot.title = element_text(face = "plain", size = 12),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(color = "grey")
)
# ── Right panel: Funding ──
p2 <- ggplot(fund_data,
aes(x = Total_funding, y = State_Label,
fill = Total_funding)) +
geom_col(width = 0.7,
color = ifelse(fund_data$In_Both == "In One List Only",
"#1B2A4A", NA),
linewidth = ifelse(fund_data$In_Both == "In One List Only",
1.2, 0)) +
scale_fill_gradient(low = "#D3E8E5", high = "#204204",
guide = "none") +
scale_x_continuous(labels = dollar_format(suffix = "B")) +
labs(x = "Funding ($ Billions)", y = NULL,
title = "Top 30 by Funding") +
theme_minimal() +
theme(
plot.title = element_text(face = "plain", size = 12),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(color = "grey")
)
grid.arrange(
p1, p2, ncol = 2,
top = grid::textGrob(
"The Most Populated States Get the Biggest\nFunding — With Exceptions",
gp = grid::gpar(
fontsize = 30,
fontface = "bold",
fontfamily = "Helvetica",
col = "black")))
library(ggplot2)
library(dplyr)
library(scales)
# ── Identify the exceptions ──
top30_pop <- master %>%
arrange(desc(Population)) %>%
slice(1:30) %>%
pull(State)
top30_fund <- master %>%
arrange(desc(Total_funding)) %>%
slice(1:30) %>%
pull(State)
pop_only <- setdiff(top30_pop, top30_fund) # Big population, not top 30 funded
fund_only <- setdiff(top30_fund, top30_pop) # Top 30 funded, not big population
# ── Totals for percentage calculations ──
total_us_pop <- sum(master$Population)
total_us_funding <- sum(master$Total_funding)
# ── Build the outlier dataset ──
outlier_df <- master %>%
filter(State %in% c(pop_only, fund_only)) %>%
mutate(
Pct_Pop = (Population / total_us_pop) * 100,
Pct_Funding = (Total_funding / total_us_funding) * 100,
Gap_pp = Pct_Funding - Pct_Pop,
Per_Capita = (Total_funding * 1e9) / Population,
Ratio = Pct_Funding / Pct_Pop,
Group = case_when(
State %in% pop_only ~ "Top 30 Population, Not Funding",
State %in% fund_only ~ "Top 30 Funding, Not Population"
)
) %>%
arrange(Gap_pp) %>%
mutate(State = factor(State, levels = State))
# ── Dumbbell Plot ──
ggplot(outlier_df, aes(y = State)) +
# Connecting line
geom_segment(aes(x = Pct_Pop, xend = Pct_Funding,
y = State, yend = State),
color = "grey40", linewidth = 1.5) +
# Population dot
geom_point(aes(x = Pct_Pop), size = 3, color = "#1B2A4A") +
# Funding dot
geom_point(aes(x = Pct_Funding), size = 3, color = "#0D9488") +
# Gap label above the line
geom_text(aes(x = (Pct_Pop + Pct_Funding) / 2,
label = paste0(ifelse(Gap_pp > 0, "+", ""),
round(Ratio, 2), "x")),
vjust = -1.3, size = 3.2, fontface = "bold",
color = "grey30") +
scale_fill_manual(values = c(
"Top 30 Population, Not Funding" = "#1B2A4A",
"Top 30 Funding, Not Population" = "#0D9488"
), guide = "none") +
scale_x_continuous(labels = function(x) paste0(x, "%"),
expand = expansion(mult = c(0.15, 0.15))) +
labs(
title = "The disparity in funding allows Alasks with only .2% of the \nUS population to receive a disproportionate share of \nfederal funding (8.8times). Other states also follow this pattern, \nreceiveing either more or less funding than their population share",
subtitle = paste0(
"<span style='color:#1B2A4A; font-size:18pt;'>\u25CF</span>",
" Population Share ",
"<span style='color:#0D9488; font-size:18pt;'>\u25CF</span>",
" Funding Share "),
x = "State's Share of U.S. Population and Federal Funding (%)",
y = NULL,
caption = paste0(
"Ratio interpretation: 1.0x = funding matches population share | ",
">1.0x = overfunded | <1.0x = underfunded")
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", size = 20,
color = "#1B2A4A"),
plot.subtitle = element_markdown(size = 12, color = "gray30",
margin = margin(b = 25)),
plot.caption = element_text(size = 11, color = "gray30"),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
axis.text.y = element_text(face = "bold", size = 11,
color = "#1B2A4A"),
panel.grid.major.x = element_blank(),
axis.line.x = element_line(color = "grey10", linewidth = 0.5),
axis.line.y = element_line(color = "grey30", linewidth = 0.5),
axis.text.x = element_text(color = "grey20", size = 11),
axis.title.x = element_text(margin = margin(t = 15), color = "grey20"),
)
ggsave("outlier_gap_top30.png", width = 10, height = 7, dpi = 300)
The ratio compares each state’s share of federal funding to its share of the U.S. population. A ratio of 1.0x means a state receives funding proportional to its population size. Above 1.0x indicates the state receives more than its proportional share; below 1.0x means it receives less.”
library(ggplot2)
library(dplyr)
library(scales)
library(ggtext)
# ── Identify the exceptions ──
top30_pop <- master %>%
arrange(desc(Population)) %>%
slice(1:30) %>%
pull(State)
top30_fund <- master %>%
arrange(desc(Total_funding)) %>%
slice(1:30) %>%
pull(State)
pop_only <- setdiff(top30_pop, top30_fund)
fund_only <- setdiff(top30_fund, top30_pop)
# ── National mean per capita ──
national_per_capita <- sum(master$Total_funding * 1e9) / sum(master$Population)
# ── Build the outlier dataset ──
outlier_df <- master %>%
filter(State %in% c(pop_only, fund_only)) %>%
mutate(
Per_Capita = (Total_funding * 1e9) / Population,
Diff_from_Mean = Per_Capita - national_per_capita,
Group = case_when(
State %in% pop_only ~ "Top 30 Population, Not Funding",
State %in% fund_only ~ "Top 30 Funding, Not Population"
)
) %>%
arrange(Per_Capita) %>%
mutate(State = factor(State, levels = State))
# ── Lollipop / Dot Plot with National Mean Line ──
ggplot(outlier_df, aes(x = Per_Capita, y = State)) +
# Shading by group
geom_rect(
aes(fill = Group),
xmin = -Inf, xmax = Inf,
ymin = as.numeric(outlier_df$State) - 0.4,
ymax = as.numeric(outlier_df$State) + 0.4,
alpha = 0.08
) +
# National mean reference line
geom_vline(
xintercept = national_per_capita,
linetype = "solid",
color = "#D97706",
linewidth = .5
) +
# Label for the mean line
annotate("text",
x = national_per_capita,
y = Inf,
label = paste0("National Mean: $",
comma(round(national_per_capita))),
hjust = -0.05, vjust = 1.5,
size = 4, fontface = "bold",
color = "#D97706") +
# Per capita dot
geom_point(aes(color = Group), size = 2.5) +
# Per capita label
geom_text(aes(label = paste0("$", comma(round(Per_Capita)))),
hjust = ifelse(outlier_df$Per_Capita > national_per_capita, -0.3, 1.3),
size = 4, fontface = "bold", color = "grey30") +
scale_color_manual(values = c(
"Top 30 Population, Not Funding" = "#1B2A4A",
"Top 30 Funding, Not Population" = "#1B2A4A"
), guide = "none") +
scale_fill_manual(values = c(
"Top 30 Population, Not Funding" = "#1B2A4A",
"Top 30 Funding, Not Population" = "#1B2A4A"
), guide = "none") +
scale_x_continuous(labels = dollar_format(),
expand = expansion(mult = c(0.15, 0.15))) +
labs(
title = "Alaska tops the list of per capita funding-\n even though it’s not in the top 30 for population",
x = "Federal Funding Per Capita ($)",
y = NULL,
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", size = 26, color = "#1B2A4A"),
plot.subtitle = element_markdown(size = 12, color = "gray30", margin = margin(b = 25)),
plot.caption = element_text(size = 11, color = "gray40"),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
axis.text.y = element_text(face = "bold", size = 11, color = "#1B2A4A"),
plot.margin = margin(15, 30, 10, 10),
panel.grid.major.x = element_blank(),
)
ggsave("outlier_percapita_top30.png", width = 10, height = 7, dpi = 300)
map_df <- master %>%
mutate(
region = tolower(State),
Per_Capita = (Total_funding * 1e9) / Population,
PC_Norm = (Per_Capita - min(Per_Capita)) /
(max(Per_Capita) - min(Per_Capita))
)
national_per_capita <- sum(master$Total_funding * 1e9) / sum(master$Population)
states_map <- map_data("state")
map_merged <- states_map %>%
left_join(map_df, by = "region")
state_centers <- map_merged %>%
group_by(region, State_Abbr) %>%
summarise(
long = mean(range(long)),
lat = mean(range(lat)),
.groups = "drop"
)
ggplot(map_merged, aes(x = long, y = lat, group = group,
fill = Party, alpha = PC_Norm)) +
geom_polygon(color = "white", linewidth = 0.3) +
geom_text(data = state_centers,
aes(x = long, y = lat, label = State_Abbr, group = NULL),
size = 3, color = "black", fontface = "bold",
inherit.aes = FALSE) +
coord_map("albers", lat0 = 29.5, lat1 = 45.5) +
scale_fill_manual(values = c("Blue" = "#2166AC",
"Red" = "#B2182B"),
name = "2020 Vote") +
scale_alpha_continuous(range = c(0.3, 1)) +
labs(
title = "How is Per Capita Funding Spread Accross \nThe United States?",
subtitle = paste0(
"Color = party alignment | Darker = higher per capita funding | ",
"National mean: $", comma(round(national_per_capita)), " per person"),
caption = "Source: Federal IIJA Funding & 2020 Election Data"
) +
theme_void(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 55,
color = "#1B2A4A", hjust = .5),
plot.subtitle = element_text(size = 18, color = "gray40",
hjust = 0.5, margin = margin(b = 15)),
plot.caption = element_text(size = 15, color = "gray50",
margin = margin(t = 20)),
legend.position = "bottom",
legend.box = "horizontal",
legend.title = element_text(size = 13, face = "bold"),
legend.text = element_text(size = 13),
plot.margin = margin(10, 10, 10, 10)
) +
guides(alpha = "none")
ggsave("map_percapita_funding.png", width = 14, height = 9, dpi = 300)
What the Map Reveals:
library(ggtext)
library(tidytext)
party_df <- master %>%
mutate(
Per_Capita = (Total_funding * 1e9) / Population,
Party_Label = ifelse(Party == "Blue", "Blue States", "Red States")
) %>%
arrange(Party, Per_Capita) %>%
mutate(State = reorder_within(State, Per_Capita, Party_Label))
national_mean <- sum(master$Total_funding * 1e9) / sum(master$Population)
ggplot(party_df, aes(x = State, y = Per_Capita, fill = Party)) +
geom_col(width = 0.7) +
# National mean reference line
geom_hline(yintercept = national_mean, color = "navy",
linetype = "dashed", linewidth = 0.8) +
# Dollar label at end of each bar
geom_text(aes(label = paste0("$", comma(round(Per_Capita)))),
hjust = -0.1, size = 3.5, fontface = "bold", color = "grey30") +
coord_flip(clip = "off") +
# Stacked vertically, same x scale
facet_wrap(~ Party_Label, ncol = 1, scales = "free_y") +
scale_fill_manual(values = c("Blue" = "#5A9BD5",
"Red" = "#E06666"),
guide = "none") +
scale_x_reordered() +
scale_y_continuous(labels = dollar_format(),
limits = c(0, max(party_df$Per_Capita) * 1.15),
expand = expansion(mult = c(0, 0.05))) +
labs(
title = "Was Funding Allocation Biased Towards Biden States\nas per 2020 Election Results?",
subtitle = paste0(
"Per capita federal funding by state | ",
"Dashed line = national mean ($", comma(round(national_mean)), ")"),
x = NULL,
y = "Federal Funding Per Capita ($)",
caption = "Source: Federal IIJA Funding & 2020 Election Data"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 28,
color = "#1B2A4A"),
plot.subtitle = element_text(size = 16, color = "gray40",
margin = margin(b = 15),
plot.caption = element_text(size = 9, color = "gray50"),
axis.text.y = element_text(size = 8, face = "bold",
color = "#1B2A4A"),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
strip.text = element_text(face = "bold", size = 14,
color = "#1B2A4A"),
panel.spacing = unit(1.5, "lines"),
plot.margin = margin(15, 50, 10, 10)))
Key Takeaway:
This project applied exploratory data analysis and visualization techniques to investigate whether federal IIJA funding distribution reveals political bias by examining total funding, per capita allocation, and population-funding ratios across all 50 states. Using dumbbell plots, diverging bar charts, choropleth maps, and scatter plots, the analysis layers multiple perspectives to guide the viewer through a data-driven narrative — from broad national patterns to state-level exceptions. The visualizations demonstrated that per capita funding favors smaller, predominantly Red states, suggesting that formula-based allocation and infrastructure need outweigh political affiliation in determining federal funding distribution.