Is IIJA Funding Equitable? A Data-Driven Investigation

Analyzing the Infrastructure Investment and Jobs Act Across 50 States

Candace Grant

2026-02-13

Agenda & Research Questions

Goal 1: Is IIJA funding distributed equitably across the states?

Goal 2: Was IIJA funding politically biased toward states that voted for Biden in 2020?

# Research Question
Q1 How is funding distributed geographically across the states?
Q2 Are the top 20 most populated states also the top 20 most funded?
Q3 Did the bottom 20 populated states also receive the lowest funding?
Q4 Does mean per-capita funding differ between red and blue states?
Q5 Is there a statistically significant bias in per-capita funding by party?

Data Preparation | Loading, Cleaning & Merging Three Datasets

Loading and Inspecting the Raw Datasets

# ---- Load Required Libraries ----

library(readxl)
library(dplyr)
library(ggplot2)
library(tidyr)
library(scales)
library(maps)
library(stringr)
library(knitr)
library(gridExtra)
library(maps)
library(mapproj)   # <-- add this line right after library(maps)
# ---- Load Raw Datasets ----
funding_raw  <- read_excel("Dataset_Funding.xlsx")
pop_raw      <- read_excel("Dataset_Population.xlsx")
votes_raw    <- read_excel("Votes_2020.xlsx")
cat("Funding dataset:", dim(funding_raw), "\n")
## Funding dataset: 57 2
cat("Population dataset:", dim(pop_raw), "\n")
## Population dataset: 66 43
cat("Votes dataset:", dim(votes_raw), "\n")
## Votes dataset: 58 7

Standardizing State Names for Seamless Merging

# ---- Clean Funding Dataset ----
# Rename Jurisdiction to 'State' and standardize to title case
funding <- funding_raw %>%
  rename(State = Jurisdiction) %>%
  mutate(State = str_to_title(State)) %>%
  # Fix the misspelling in the original data
  mutate(State = ifelse(State == "Deleware", "Delaware", State)) %>%
  # Filter to 50 states + DC (remove territories and tribal)
  filter(!State %in% c("American Samoa", "Guam",
                        "Northern Mariana Islands",
                        "Puerto Rico", "Us Virgin Islands",
                        "Tribal Communities"))

cat("Funding: Cleaned to", nrow(funding), "jurisdictions\n")
## Funding: Cleaned to 51 jurisdictions
head(funding, 5)
## # A tibble: 5 × 2
##   State      Total_funding
##   <chr>              <dbl>
## 1 Alabama              3  
## 2 Alaska               3.7
## 3 Arizona              3.5
## 4 Arkansas             2.8
## 5 California          18.4

Cleaning Population Data — Extracting State-Level Records

# ---- Clean Population Dataset ----
# Filter to state-level summaries only (SUMLEV == 40)
# Rename NAME to 'State' and select relevant columns
population <- pop_raw %>%
  filter(SUMLEV == 40) %>%
  rename(State = NAME) %>%
  select(State, Population = POPESTIMATE2020) %>%
  # Remove Puerto Rico to align with funding (50 states + DC)
  filter(State != "Puerto Rico")

cat("Population: Cleaned to", nrow(population), "jurisdictions\n")
## Population: Cleaned to 51 jurisdictions
head(population, 5)
## # A tibble: 5 × 2
##   State      Population
##   <chr>           <dbl>
## 1 Alabama       5032962
## 2 Alaska         732906
## 3 Arizona       7186647
## 4 Arkansas      3014399
## 5 California   39527808

Cleaning Votes Data — Mapping Abbreviations to Full State Names

# ---- Clean Votes Dataset ----
# The votes dataset has a multi-row header; extract and restructure
votes_clean <- votes_raw %>%
  slice(3:53) %>%  # Rows 3-53 contain the 50 states + DC
  setNames(c("State_Abbr", "Biden_EV", "Trump_EV",
             "Biden_Popular", "Trump_Popular",
             "Others_Popular", "Total_Vote")) %>%
  mutate(
    # Remove asterisks from ME* and NE*
    State_Abbr = str_remove(State_Abbr, "\\*"),
    Biden_EV = as.numeric(Biden_EV),
    Trump_EV = as.numeric(Trump_EV),
    Biden_Popular = as.numeric(Biden_Popular),
    Trump_Popular = as.numeric(Trump_Popular),
    Total_Vote = as.numeric(Total_Vote)
  )

Mapping State Abbreviations to Full Names

# ---- Create State Abbreviation to Full Name Mapping ----
state_lookup <- data.frame(
  State_Abbr = c("AL","AK","AZ","AR","CA","CO","CT","DE","DC","FL",
                 "GA","HI","ID","IL","IN","IA","KS","KY","LA","ME",
                 "MD","MA","MI","MN","MS","MO","MT","NE","NV","NH",
                 "NJ","NM","NY","NC","ND","OH","OK","OR","PA","RI",
                 "SC","SD","TN","TX","UT","VT","VA","WA","WV","WI","WY"),
  State = c("Alabama","Alaska","Arizona","Arkansas","California",
            "Colorado","Connecticut","Delaware",
            "District Of Columbia","Florida",
            "Georgia","Hawaii","Idaho","Illinois","Indiana",
            "Iowa","Kansas","Kentucky","Louisiana","Maine",
            "Maryland","Massachusetts","Michigan","Minnesota",
            "Mississippi","Missouri","Montana","Nebraska","Nevada",
            "New Hampshire","New Jersey","New Mexico","New York",
            "North Carolina","North Dakota","Ohio","Oklahoma",
            "Oregon","Pennsylvania","Rhode Island",
            "South Carolina","South Dakota","Tennessee","Texas",
            "Utah","Vermont","Virginia","Washington",
            "West Virginia","Wisconsin","Wyoming"),
  stringsAsFactors = FALSE
)

# Join to get full state names and determine winner
votes <- votes_clean %>%
  left_join(state_lookup, by = "State_Abbr") %>%
  mutate(
    Winner = case_when(
      !is.na(Biden_EV) & (is.na(Trump_EV) | Biden_EV > 0) ~ "Biden",
      !is.na(Trump_EV) & (is.na(Biden_EV) | Trump_EV > 0) ~ "Trump",
      TRUE ~ NA_character_
    ),
    Party = ifelse(Winner == "Biden", "Blue", "Red")
  ) %>%
  select(State, State_Abbr, Biden_EV, Trump_EV, Winner, Party,
         Biden_Popular, Trump_Popular, Total_Vote)

head(votes, 5)
## # A tibble: 5 × 9
##   State    State_Abbr Biden_EV Trump_EV Winner Party Biden_Popular Trump_Popular
##   <chr>    <chr>         <dbl>    <dbl> <chr>  <chr>         <dbl>         <dbl>
## 1 <NA>     <NA>             NA       NA <NA>   <NA>             NA            NA
## 2 Alabama  AL               NA        9 Trump  Red          849624       1441170
## 3 Alaska   AK               NA        3 Trump  Red          153778        189951
## 4 Arizona  AZ               11       NA Biden  Blue        1672143       1661686
## 5 Arkansas AR               NA        6 Trump  Red          423932        760647
## # ℹ 1 more variable: Total_Vote <dbl>

Merging Into a Master Analytical Dataset

# ---- Merge All Three Datasets ----
# Fix DC name mismatch between funding ("District Of Columbia")
# and population ("District of Columbia")
funding <- funding %>%
  mutate(State = ifelse(State == "District Of Columbia",
                        "District of Columbia", State))

votes <- votes %>%
  mutate(State = ifelse(State == "District Of Columbia",
                        "District of Columbia", State))

# Merge funding + population + votes
master <- funding %>%
  inner_join(population, by = "State") %>%
  inner_join(votes, by = "State") %>%
  mutate(
    # Convert funding from billions to dollars
    Funding_Dollars = Total_funding * 1e9,
    # Calculate per-capita funding
    Funding_Per_Capita = Funding_Dollars / Population
  )

cat("Master dataset:", nrow(master), "rows x", ncol(master), "cols\n")
## Master dataset: 50 rows x 13 cols

Master Dataset Preview — The Foundation of Our Analysis

master %>%
  select(State, Total_funding, Population, Party,
         Funding_Per_Capita) %>%
  arrange(desc(Total_funding)) %>%
  head(10) %>%
  kable(caption = "Top 10 States by Total IIJA Funding (Billions $)",
        digits = 2, format.args = list(big.mark = ","))
Top 10 States by Total IIJA Funding (Billions $)
State Total_funding Population Party Funding_Per_Capita
California 18.4 39,527,808 Blue 465.50
Texas 14.2 29,237,895 Red 485.67
New York 10.1 20,122,262 Blue 501.93
Illinois 8.4 12,795,348 Blue 656.49
Florida 8.2 21,591,325 Red 379.78
Pennsylvania 8.1 12,995,576 Blue 623.29
Ohio 6.6 11,798,534 Red 559.39
Michigan 5.2 10,072,294 Blue 516.27
New Jersey 5.1 9,270,476 Blue 550.13
Georgia 5.0 10,732,595 Blue 465.87

Q1: The Geographic Landscape | How Is IIJA Funding Distributed Across the States?

Q1: A Choropleth Reveals the Geographic Story of IIJA Dollars

# ---- Q1: Choropleth Map of Funding by State ----
# Prepare map data
us_map <- map_data("state")

# Create lowercase state names for joining with map data
map_funding <- master %>%
  mutate(region = tolower(State)) %>%
  select(region, Total_funding)

# Merge with map coordinates
map_merged <- us_map %>%
  left_join(map_funding, by = "region")

ggplot(map_merged, aes(x = long, y = lat, group = group,
                       fill = Total_funding)) +
  geom_polygon(color = "white", linewidth = 0.3) +
  scale_fill_gradient(low = "#FFF3E0", high = "#BF360C",
                      name = "Funding\n($ Billions)",
                      labels = dollar_format(suffix = "B"),
                      na.value = "grey85") +
  coord_map("albers", lat0 = 39, lat1 = 45) +
  labs(
    title = "IIJA Funding Distribution Across the Continental U.S.",
    subtitle = "Darker intensity = higher funding allocation",
    caption = "Source: White House IIJA Fact Sheets"
  ) +
  theme_void() +
  theme(
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = 11, hjust = 0.5, color = "gray40"),
    legend.position = "right"
  )

Q1 Insight: Funding Concentrates in a Few Large States

# Summary statistics
cat("Summary of Total Funding ($ Billions):\n")
## Summary of Total Funding ($ Billions):
summary(master$Total_funding)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.7518  1.8000  2.8500  3.7859  4.2250 18.4000
cat("\nStandard Deviation:", round(sd(master$Total_funding), 2), "B\n")
## 
## Standard Deviation: 3.35 B
cat("Coefficient of Variation:",
    round(sd(master$Total_funding) / mean(master$Total_funding) * 100, 1),
    "%\n")
## Coefficient of Variation: 88.5 %
# Top 5 vs Bottom 5
cat("\nTop 5 states receive $",
    sum(sort(master$Total_funding, decreasing = TRUE)[1:5]), "B\n")
## 
## Top 5 states receive $ 59.3 B
cat("Bottom 5 states receive $",
    sum(sort(master$Total_funding)[1:5]), "B\n")
## Bottom 5 states receive $ 4.4959 B

Q2: Population vs. Funding | Are the Top 20 Most Populated States Also the Top 20 Most Funded?

Q2: Mirrored Bar Chart — Population Rank vs. Funding Rank

# ---- Q2: Top 20 by Population vs Top 20 by Funding ----
top20_pop <- master %>%
  arrange(desc(Population)) %>%
  slice(1:20) %>%
  mutate(Pop_Rank = row_number())

top20_fund <- master %>%
  arrange(desc(Total_funding)) %>%
  slice(1:20) %>%
  mutate(Fund_Rank = row_number())

# Combine into one comparison dataset
top20_comparison <- top20_pop %>%
  select(State, Population, Pop_Rank) %>%
  full_join(
    top20_fund %>% select(State, Total_funding, Fund_Rank),
    by = "State"
  ) %>%
  mutate(
    In_Both = ifelse(!is.na(Pop_Rank) & !is.na(Fund_Rank),
                     "In Both Top 20", "In One List Only"),
    State_Label = State
  )

# Create ordered factor for display
top20_comparison <- top20_comparison %>%
  arrange(desc(Population)) %>%
  mutate(State_Label = factor(State_Label,
                              levels = rev(State_Label)))

# Side-by-side: Population (left) | Funding (right)
p1 <- ggplot(top20_comparison %>% filter(!is.na(Pop_Rank)),
             aes(x = Population / 1e6, y = State_Label,
                 fill = In_Both)) +
  geom_col(width = 0.7) +
  scale_fill_manual(values = c("In Both Top 20" = "#1565C0",
                               "In One List Only" = "#FF8F00")) +
  scale_x_continuous(labels = function(x) paste0(x, "M")) +
  labs(x = "Population (Millions)", y = NULL,
       title = "Top 20 by Population") +
  theme_minimal() +
  theme(legend.position = "bottom",
        plot.title = element_text(face = "bold", size = 11))

p2 <- ggplot(top20_comparison %>% filter(!is.na(Fund_Rank)) %>%
               mutate(State_Label = factor(State,
                        levels = levels(top20_comparison$State_Label))),
             aes(x = Total_funding, y = State_Label,
                 fill = In_Both)) +
  geom_col(width = 0.7) +
  scale_fill_manual(values = c("In Both Top 20" = "#1565C0",
                               "In One List Only" = "#FF8F00")) +
  scale_x_continuous(labels = dollar_format(suffix = "B")) +
  labs(x = "Funding ($ Billions)", y = NULL,
       title = "Top 20 by Funding") +
  theme_minimal() +
  theme(legend.position = "bottom",
        plot.title = element_text(face = "bold", size = 11))

grid.arrange(p1, p2, ncol = 2,
             top = grid::textGrob(
               "Q2: Do Population Giants Also Dominate Funding?",
               gp = grid::gpar(fontsize = 15, fontface = "bold")))

Q2: Overlap Analysis — Quantifying the Match

overlap_states <- intersect(top20_pop$State, top20_fund$State)
only_pop   <- setdiff(top20_pop$State, top20_fund$State)
only_fund  <- setdiff(top20_fund$State, top20_pop$State)

cat("States in BOTH Top 20 lists:", length(overlap_states), "out of 20\n")
## States in BOTH Top 20 lists: 17 out of 20
cat("Overlap:", paste(overlap_states, collapse = ", "), "\n\n")
## Overlap: California, Texas, Florida, New York, Pennsylvania, Illinois, Ohio, Georgia, North Carolina, Michigan, New Jersey, Virginia, Washington, Arizona, Massachusetts, Tennessee, Missouri
cat("In Top 20 Population ONLY:", paste(only_pop, collapse = ", "), "\n")
## In Top 20 Population ONLY: Indiana, Maryland, Wisconsin
cat("In Top 20 Funding ONLY:", paste(only_fund, collapse = ", "), "\n")
## In Top 20 Funding ONLY: Louisiana, Kentucky, Alaska
cat("\nOverlap Rate:", round(length(overlap_states)/20*100, 1), "%\n")
## 
## Overlap Rate: 85 %

Q3: The Bottom of the Distribution | Do the Least Populated States Receive the Least Funding?

Q3: Bottom 20 Population vs. Bottom 20 Funding — Who Falls Behind?

# ---- Q3: Bottom 20 by Population vs Bottom 20 by Funding ----
bottom20_pop <- master %>%
  arrange(Population) %>%
  slice(1:20) %>%
  mutate(Pop_Rank = row_number())

bottom20_fund <- master %>%
  arrange(Total_funding) %>%
  slice(1:20) %>%
  mutate(Fund_Rank = row_number())

# Combined comparison
bottom_combined <- bottom20_pop %>%
  select(State, Population, Pop_Rank) %>%
  full_join(
    bottom20_fund %>% select(State, Total_funding, Fund_Rank),
    by = "State"
  ) %>%
  mutate(
    Category = case_when(
      !is.na(Pop_Rank) & !is.na(Fund_Rank) ~ "Bottom 20 in Both",
      !is.na(Pop_Rank) & is.na(Fund_Rank)  ~ "Bottom 20 Pop Only",
      is.na(Pop_Rank) & !is.na(Fund_Rank)  ~ "Bottom 20 Funding Only"
    )
  )

# Dot plot for clearer comparison
bottom_long <- bottom_combined %>%
  arrange(Population) %>%
  mutate(State = factor(State, levels = rev(State)))

ggplot(bottom_long, aes(y = State, color = Category)) +
  geom_point(aes(x = "Population\nRank", size = Population / 1e6),
             alpha = 0.8) +
  geom_point(aes(x = "Funding\nRank", size = Total_funding),
             alpha = 0.8) +
  geom_segment(
    data = bottom_long %>% filter(Category == "Bottom 20 in Both"),
    aes(x = "Population\nRank", xend = "Funding\nRank",
        y = State, yend = State),
    linetype = "dashed", color = "grey50", linewidth = 0.3
  ) +
  scale_color_manual(values = c(
    "Bottom 20 in Both" = "#2E7D32",
    "Bottom 20 Pop Only" = "#E65100",
    "Bottom 20 Funding Only" = "#6A1B9A"
  )) +
  scale_size_continuous(range = c(2, 8), guide = "none") +
  labs(
    title = "Q3: Do the Smallest States Also Receive the Least Funding?",
    subtitle = "Comparing Bottom 20 states by population vs. Bottom 20 by funding",
    x = NULL, y = NULL
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(color = "gray40"),
    legend.position = "bottom"
  )

Q3: Dumbbell Plot — Direct Rank Comparison

# Create rank-based comparison for ALL states in either bottom 20
bottom_ranked <- bottom_combined %>%
  left_join(
    master %>%
      mutate(Pop_Overall_Rank = rank(Population),
             Fund_Overall_Rank = rank(Total_funding)) %>%
      select(State, Pop_Overall_Rank, Fund_Overall_Rank),
    by = "State"
  ) %>%
  arrange(Pop_Overall_Rank) %>%
  mutate(State = factor(State, levels = rev(State)))

ggplot(bottom_ranked, aes(y = State)) +
  geom_segment(aes(x = Pop_Overall_Rank, xend = Fund_Overall_Rank,
                   yend = State),
               color = "grey60", linewidth = 1) +
  geom_point(aes(x = Pop_Overall_Rank), color = "#1565C0",
             size = 3.5) +
  geom_point(aes(x = Fund_Overall_Rank), color = "#C62828",
             size = 3.5) +
  geom_vline(xintercept = 20.5, linetype = "dashed",
             color = "grey40", linewidth = 0.5) +
  annotate("text", x = 10, y = 0.5, label = "Bottom 20 Zone",
           color = "grey40", size = 3.5, fontface = "italic") +
  scale_x_continuous(breaks = seq(0, 51, 5)) +
  labs(
    title = "Q3: Population Rank vs. Funding Rank for Small States",
    subtitle = "Blue dot = Population rank | Red dot = Funding rank | Line = gap between ranks",
    x = "Overall Rank (1 = Smallest/Lowest)",
    y = NULL,
    caption = "Dashed line separates bottom 20 from the rest"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(color = "gray40", size = 10)
  )

Q3: Mismatch Summary

overlap_bottom <- intersect(bottom20_pop$State, bottom20_fund$State)
only_pop_b     <- setdiff(bottom20_pop$State, bottom20_fund$State)
only_fund_b    <- setdiff(bottom20_fund$State, bottom20_pop$State)

cat("States in BOTH Bottom 20 lists:", length(overlap_bottom), "\n")
## States in BOTH Bottom 20 lists: 16
cat("Overlap:", paste(overlap_bottom, collapse = ", "), "\n\n")
## Overlap: Vermont, District of Columbia, North Dakota, South Dakota, Delaware, Rhode Island, Maine, New Hampshire, Hawaii, West Virginia, Idaho, Nebraska, Kansas, Mississippi, Nevada, Iowa
cat("Bottom 20 Pop but NOT Bottom 20 Funding:\n  ",
    paste(only_pop_b, collapse = ", "), "\n")
## Bottom 20 Pop but NOT Bottom 20 Funding:
##    Alaska, Montana, New Mexico, Arkansas
cat("Bottom 20 Funding but NOT Bottom 20 Pop:\n  ",
    paste(only_fund_b, collapse = ", "), "\n")
## Bottom 20 Funding but NOT Bottom 20 Pop:
##    Utah, Oregon, South Carolina, Connecticut

Q4: The Political Lens | Does Mean Per-Capita Funding Differ Between Red and Blue States?

Q4: Box Plot — Per-Capita Funding by Political Affiliation

# ---- Q4: Per-Capita Funding by Party ----
ggplot(master, aes(x = Party, y = Funding_Per_Capita,
                   fill = Party)) +
  geom_boxplot(alpha = 0.7, width = 0.5, outlier.shape = 21,
               outlier.size = 3) +
  geom_jitter(width = 0.15, alpha = 0.5, size = 2, color = "grey30") +
  scale_fill_manual(values = c("Blue" = "#1565C0",
                               "Red" = "#C62828")) +
  scale_y_continuous(labels = dollar_format()) +
  labs(
    title = "Q4: Per-Capita IIJA Funding — Blue States vs. Red States",
    subtitle = "Each point represents one state; box shows median & IQR",
    x = "2020 Election Result",
    y = "Funding Per Capita ($)",
    caption = "Per capita = Total State Funding / 2020 Census Population"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 15),
    plot.subtitle = element_text(color = "gray40"),
    legend.position = "none"
  )

Q4: Statistical Comparison — Are the Means Different?

# Descriptive statistics by party
party_stats <- master %>%
  group_by(Party) %>%
  summarise(
    N = n(),
    Mean_Per_Capita = mean(Funding_Per_Capita),
    Median_Per_Capita = median(Funding_Per_Capita),
    SD_Per_Capita = sd(Funding_Per_Capita),
    Min = min(Funding_Per_Capita),
    Max = max(Funding_Per_Capita),
    .groups = "drop"
  )

kable(party_stats,
      caption = "Per-Capita Funding Summary by Party",
      digits = 2,
      format.args = list(big.mark = ","))
Per-Capita Funding Summary by Party
Party N Mean_Per_Capita Median_Per_Capita SD_Per_Capita Min Max
Blue 27 675.57 545.42 295.71 437.01 1,639.45
Red 23 1,052.69 648.88 1,075.30 379.78 5,048.40
# Welch's t-test
blue <- master %>% filter(Party == "Blue") %>%
  pull(Funding_Per_Capita)
red  <- master %>% filter(Party == "Red") %>%
  pull(Funding_Per_Capita)

t_result <- t.test(blue, red)
cat("\nWelch's Two-Sample t-test:\n")
## 
## Welch's Two-Sample t-test:
cat("  t =", round(t_result$statistic, 4), "\n")
##   t = -1.6303
cat("  p-value =", round(t_result$p.value, 4), "\n")
##   p-value = 0.1157
cat("  95% CI: [", round(t_result$conf.int[1], 2), ",",
    round(t_result$conf.int[2], 2), "]\n")
##   95% CI: [ -853.7 , 99.46 ]
cat("\nInterpretation at alpha = 0.05:",
    ifelse(t_result$p.value < 0.05,
           "SIGNIFICANT difference in per-capita funding.",
           "NO significant difference in per-capita funding."), "\n")
## 
## Interpretation at alpha = 0.05: NO significant difference in per-capita funding.

Q5: The Bias Question | Is Per-Capita Funding Biased Toward Blue States?

Q5: Side-by-Side Per-Capita Funding — Every State Labeled

# ---- Q5: Per-Capita Funding Comparison ----
blue_states <- master %>%
  filter(Party == "Blue") %>%
  arrange(desc(Funding_Per_Capita)) %>%
  mutate(Rank = row_number())

red_states <- master %>%
  filter(Party == "Red") %>%
  arrange(desc(Funding_Per_Capita)) %>%
  mutate(Rank = row_number())

p_blue <- ggplot(blue_states,
                 aes(x = reorder(State_Abbr, Funding_Per_Capita),
                     y = Funding_Per_Capita)) +
  geom_col(fill = "#1565C0", alpha = 0.85, width = 0.7) +
  geom_hline(yintercept = mean(blue_states$Funding_Per_Capita),
             linetype = "dashed", color = "#0D47A1", linewidth = 0.7) +
  annotate("text",
           x = 1, y = mean(blue_states$Funding_Per_Capita) + 200,
           label = paste0("Mean: $",
                          round(mean(blue_states$Funding_Per_Capita))),
           color = "#0D47A1", size = 3, hjust = 0) +
 scale_y_continuous(labels = dollar_format(), limits = c(0, 5000)) +
  coord_flip() +
  labs(title = "Blue States (Biden 2020)",
       x = NULL, y = "Funding Per Capita ($)") +
  theme_minimal() +
  theme(plot.title = element_text(face = "bold", size = 12,
                                  color = "#1565C0"))

p_red <- ggplot(red_states,
                aes(x = reorder(State_Abbr, Funding_Per_Capita),
                    y = Funding_Per_Capita)) +
  geom_col(fill = "#C62828", alpha = 0.85, width = 0.7) +
  geom_hline(yintercept = mean(red_states$Funding_Per_Capita),
             linetype = "dashed", color = "#B71C1C", linewidth = 0.7) +
  annotate("text",
           x = 1, y = mean(red_states$Funding_Per_Capita) + 200,
           label = paste0("Mean: $",
                          round(mean(red_states$Funding_Per_Capita))),
           color = "#B71C1C", size = 3, hjust = 0) +
  scale_y_continuous(labels = dollar_format(), limits = c(0, 5000)) +
  coord_flip() +
  labs(title = "Red States (Trump 2020)",
       x = NULL, y = "Funding Per Capita ($)") +
  theme_minimal() +
  theme(plot.title = element_text(face = "bold", size = 12,
                                  color = "#C62828"))

grid.arrange(p_blue, p_red, ncol = 2,
             top = grid::textGrob(
               "Q5: Per-Capita Funding — Blue States vs. Red States",
               gp = grid::gpar(fontsize = 15, fontface = "bold")))

Q5: Density Overlay — Distribution Shapes Tell the Story

ggplot(master, aes(x = Funding_Per_Capita, fill = Party,
                   color = Party)) +
  geom_density(alpha = 0.35, linewidth = 1) +
  geom_vline(data = master %>% group_by(Party) %>%
               summarise(Mean = mean(Funding_Per_Capita)),
             aes(xintercept = Mean, color = Party),
             linetype = "dashed", linewidth = 1) +
  scale_fill_manual(values = c("Blue" = "#1565C0",
                               "Red" = "#C62828")) +
  scale_color_manual(values = c("Blue" = "#1565C0",
                                "Red" = "#C62828")) +
  scale_x_continuous(labels = dollar_format()) +
  labs(
    title = "Q5: Per-Capita Funding Density — Overlapping Distributions",
    subtitle = "Dashed lines show group means; substantial overlap suggests limited bias",
    x = "Funding Per Capita ($)", y = "Density"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(color = "gray40"),
    legend.position = "top"
  )

Q5: Wilcoxon Rank-Sum Test — A Nonparametric Check

# Non-parametric alternative (robust to outliers/skewness)
w_result <- wilcox.test(blue, red)

cat("Wilcoxon Rank-Sum (Mann-Whitney U) Test:\n")
## Wilcoxon Rank-Sum (Mann-Whitney U) Test:
cat("  W =", w_result$statistic, "\n")
##   W = 249
cat("  p-value =", round(w_result$p.value, 4), "\n")
##   p-value = 0.2374
cat("\nInterpretation at α = 0.05:",
    ifelse(w_result$p.value < 0.05,
           "SIGNIFICANT difference — evidence of distributional shift.",
           "NO significant difference — insufficient evidence of bias."),
    "\n")
## 
## Interpretation at α = 0.05: NO significant difference — insufficient evidence of bias.
# Effect size (Cohen's d)
cohens_d <- (mean(blue) - mean(red)) /
  sqrt((sd(blue)^2 + sd(red)^2) / 2)
cat("\nCohen's d (effect size):", round(cohens_d, 3), "\n")
## 
## Cohen's d (effect size): -0.478
cat("Interpretation:",
    case_when(
      abs(cohens_d) < 0.2 ~ "Negligible effect",
      abs(cohens_d) < 0.5 ~ "Small effect",
      abs(cohens_d) < 0.8 ~ "Medium effect",
      TRUE                ~ "Large effect"
    ), "\n")
## Interpretation: Small effect

Conclusions | What the Data Tells Us

Key Findings — The Equity and Bias Story

Goal 1 — Is funding equitable?

Goal 2 — Is there political bias?

Limitations & Future Directions

Limitations:

Future Research: