top30_pop <- master %>%
  arrange(desc(Population)) %>%
  slice(1:30) %>%
  mutate(Pop_Rank = row_number())

top30_fund <- master %>%
  arrange(desc(Total_funding)) %>%
  slice(1:30) %>%
  mutate(Fund_Rank = row_number())

top30_comparison <- top30_pop %>%
  select(State, Population, Pop_Rank) %>%
  full_join(
    top30_fund %>% select(State, Total_funding, Fund_Rank),
    by = "State"
  ) %>%
  mutate(
    In_Both = ifelse(!is.na(Pop_Rank) & !is.na(Fund_Rank),
                     "In Both Top 20", "In One List Only")
  )

# ── Separate data with correct ordering for each panel ──
pop_data <- top30_comparison %>%
  filter(!is.na(Pop_Rank)) %>%
  arrange(desc(Population)) %>%
  mutate(State_Label = factor(State, levels = rev(State)))

fund_data <- top30_comparison %>%
  filter(!is.na(Fund_Rank)) %>%
  arrange(desc(Total_funding)) %>%
  mutate(State_Label = factor(State, levels = rev(State)))

# ── Left panel: Population ──
p1 <- ggplot(pop_data,
             aes(x = Population / 1e6, y = State_Label,
                 fill = Population / 1e6)) +
  geom_col(width = 0.7,
           color = ifelse(pop_data$In_Both == "In One List Only",
                          "#2A9D8F", NA),
           linewidth = ifelse(pop_data$In_Both == "In One List Only",
                              1.2, 0)) +
  scale_fill_gradient(low = "#C9D6E5", high = "#1B2A4A",
                      guide = "none") +
  scale_x_continuous(labels = function(x) paste0(x, "M")) +
  labs(x = "Population (Millions)", y = NULL,
       title = "Top 30 by Population") +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "plain", size = 12),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(color = "grey")
  )

# ── Right panel: Funding ──
p2 <- ggplot(fund_data,
             aes(x = Total_funding, y = State_Label,
                 fill = Total_funding)) +
  geom_col(width = 0.7,
           color = ifelse(fund_data$In_Both == "In One List Only",
                          "#1B2A4A", NA),
           linewidth = ifelse(fund_data$In_Both == "In One List Only",
                              1.2, 0)) +
  scale_fill_gradient(low = "#D3E8E5", high = "#204204",
                      guide = "none") +
  scale_x_continuous(labels = dollar_format(suffix = "B")) +
  labs(x = "Funding ($ Billions)", y = NULL,
       title = "Top 30 by Funding") +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "plain", size = 12),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(color = "grey")
  )

grid.arrange(
  p1, p2, ncol = 2,
  top = grid::textGrob(
    "The Most Populated States Get the Biggest\nFunding — With Exceptions",
    gp = grid::gpar(
      fontsize = 30,
      fontface = "bold",
      fontfamily = "Helvetica",
       col = "black")))



library(ggplot2)
library(dplyr)
library(scales)


# ── Identify the exceptions ──
top30_pop <- master %>%
  arrange(desc(Population)) %>%
  slice(1:30) %>%
  pull(State)

top30_fund <- master %>%
  arrange(desc(Total_funding)) %>%
  slice(1:30) %>%
  pull(State)

pop_only  <- setdiff(top30_pop, top30_fund)   # Big population, not top 30 funded
fund_only <- setdiff(top30_fund, top30_pop)    # Top 30 funded, not big population


# ── Totals for percentage calculations ──
total_us_pop     <- sum(master$Population)
total_us_funding <- sum(master$Total_funding)

# ── Build the outlier dataset ──
outlier_df <- master %>%
  filter(State %in% c(pop_only, fund_only)) %>%
  mutate(
    Pct_Pop       = (Population / total_us_pop) * 100,
    Pct_Funding   = (Total_funding / total_us_funding) * 100,
    Gap_pp        = Pct_Funding - Pct_Pop,
    Per_Capita    = (Total_funding * 1e9) / Population,
    Ratio         = Pct_Funding / Pct_Pop,
    Group = case_when(
      State %in% pop_only  ~ "Top 30 Population, Not Funding",
      State %in% fund_only ~ "Top 30 Funding, Not Population"
    )
  ) %>%
  arrange(Gap_pp) %>%
  mutate(State = factor(State, levels = State))


# ── Dumbbell Plot ──
ggplot(outlier_df, aes(y = State)) +

 

  # Connecting line
  geom_segment(aes(x = Pct_Pop, xend = Pct_Funding,
                   y = State, yend = State),
               color = "grey40", linewidth = 1.5) +

  # Population dot
  geom_point(aes(x = Pct_Pop), size = 3, color = "#1B2A4A") +

  # Funding dot
  geom_point(aes(x = Pct_Funding), size = 3, color = "#0D9488") +

  # Gap label above the line
  geom_text(aes(x = (Pct_Pop + Pct_Funding) / 2,
                label = paste0(ifelse(Gap_pp > 0, "+", ""),
                          
                               round(Ratio, 2), "x")),
            vjust = -1.3, size = 3.2, fontface = "bold",
            color = "grey30") +

 

  scale_fill_manual(values = c(
    "Top 30 Population, Not Funding" = "#1B2A4A",
    "Top 30 Funding, Not Population" = "#0D9488"
  ), guide = "none") +

  scale_x_continuous(labels = function(x) paste0(x, "%"),
                     expand = expansion(mult = c(0.15, 0.15))) +

 labs(
    title = "The disparity in funding allows Alasks with only .2% of the \nUS population to receive a disproportionate share of \nfederal funding (8.8times). Other states also follow this pattern, \nreceiveing either more or less funding than their population share",
    subtitle = paste0(
      "<span style='color:#1B2A4A; font-size:18pt;'>\u25CF</span>",
      " Population Share          ",
      "<span style='color:#0D9488; font-size:18pt;'>\u25CF</span>",
      " Funding Share          "),
    x = "State's Share of U.S. Population and Federal Funding (%)",
    y = NULL,
    caption = paste0(
      "Ratio interpretation: 1.0x = funding matches population share  |  ",
      ">1.0x = overfunded  |  <1.0x = underfunded")
  ) +
  
  theme_minimal(base_size = 13) +
  theme(
    plot.title       = element_text(face = "bold", size = 20,
                                    color = "#1B2A4A"),
    plot.subtitle    = element_markdown(size = 12, color = "gray30",
                                        margin = margin(b = 25)),
    plot.caption     = element_text(size = 11, color = "gray30"),
    panel.grid.major.y = element_blank(),
    panel.grid.minor   = element_blank(),
    axis.text.y = element_text(face = "bold", size = 11,
                                color = "#1B2A4A"),
    
    panel.grid.major.x = element_blank(),
    axis.line.x = element_line(color = "grey10", linewidth = 0.5),
    axis.line.y = element_line(color = "grey30", linewidth = 0.5),
    axis.text.x = element_text(color = "grey20", size = 11),
    axis.title.x = element_text(margin = margin(t = 15), color = "grey20"),
  )

ggsave("outlier_gap_top30.png", width = 10, height = 7, dpi = 300)

The ratio compares each state’s share of federal funding to its share of the U.S. population. A ratio of 1.0x means a state receives funding proportional to its population size. Above 1.0x indicates the state receives more than its proportional share; below 1.0x means it receives less.”




library(ggplot2)
library(dplyr)
library(scales)
library(ggtext)


# ── Identify the exceptions ──
top30_pop <- master %>%
  arrange(desc(Population)) %>%
  slice(1:30) %>%
  pull(State)

top30_fund <- master %>%
  arrange(desc(Total_funding)) %>%
  slice(1:30) %>%
  pull(State)

pop_only  <- setdiff(top30_pop, top30_fund)
fund_only <- setdiff(top30_fund, top30_pop)

# ── National mean per capita ──
national_per_capita <- sum(master$Total_funding * 1e9) / sum(master$Population)

# ── Build the outlier dataset ──
outlier_df <- master %>%
  filter(State %in% c(pop_only, fund_only)) %>%
  mutate(
    Per_Capita = (Total_funding * 1e9) / Population,
    Diff_from_Mean = Per_Capita - national_per_capita,
    Group = case_when(
      State %in% pop_only  ~ "Top 30 Population, Not Funding",
      State %in% fund_only ~ "Top 30 Funding, Not Population"
    )
  ) %>%
  arrange(Per_Capita) %>%
  mutate(State = factor(State, levels = State))

# ── Lollipop / Dot Plot with National Mean Line ──
ggplot(outlier_df, aes(x = Per_Capita, y = State)) +

  # Shading by group
  geom_rect(
    aes(fill = Group),
    xmin = -Inf, xmax = Inf,
    ymin = as.numeric(outlier_df$State) - 0.4,
    ymax = as.numeric(outlier_df$State) + 0.4,
    alpha = 0.08
  ) +

  # National mean reference line
  geom_vline(
    xintercept = national_per_capita,
    linetype   = "solid",
    color      = "#D97706",
    linewidth  = .5
  ) +

  # Label for the mean line
  annotate("text",
           x     = national_per_capita,
           y     = Inf,
           label = paste0("National Mean: $",
                          comma(round(national_per_capita))),
           hjust = -0.05, vjust = 1.5,
           size  = 4, fontface = "bold",
           color = "#D97706") +



  # Per capita dot
  geom_point(aes(color = Group), size = 2.5) +

  # Per capita label
  geom_text(aes(label = paste0("$", comma(round(Per_Capita)))),
            hjust = ifelse(outlier_df$Per_Capita > national_per_capita, -0.3, 1.3),
            size  = 4, fontface = "bold", color = "grey30") +

  scale_color_manual(values = c(
    "Top 30 Population, Not Funding" = "#1B2A4A",
    "Top 30 Funding, Not Population" = "#1B2A4A"
  ), guide = "none") +

  scale_fill_manual(values = c(
    "Top 30 Population, Not Funding" = "#1B2A4A",
    "Top 30 Funding, Not Population" = "#1B2A4A"
  ), guide = "none") +

  scale_x_continuous(labels = dollar_format(), 
                     expand = expansion(mult = c(0.15, 0.15))) +

  labs(
    title = "Alaska tops the list of per capita funding-\n even though it’s not in the top 30 for population",
    x = "Federal Funding Per Capita ($)",
    y = NULL,
    
  ) +

  theme_minimal(base_size = 13) +
  theme(
    plot.title         = element_text(face = "bold", size = 26, color = "#1B2A4A"),
    plot.subtitle      = element_markdown(size = 12, color = "gray30", margin = margin(b = 25)),
    plot.caption       = element_text(size = 11, color = "gray40"),
    panel.grid.major.y = element_blank(),
    panel.grid.minor   = element_blank(),
    axis.text.y        = element_text(face = "bold", size = 11, color = "#1B2A4A"),
    plot.margin        = margin(15, 30, 10, 10),
    panel.grid.major.x = element_blank(),
  )

ggsave("outlier_percapita_top30.png", width = 10, height = 7, dpi = 300)




map_df <- master %>%
  mutate(
    region = tolower(State),
    Per_Capita = (Total_funding * 1e9) / Population,
    PC_Norm = (Per_Capita - min(Per_Capita)) / 
               (max(Per_Capita) - min(Per_Capita))
  )

national_per_capita <- sum(master$Total_funding * 1e9) / sum(master$Population)

states_map <- map_data("state")
map_merged <- states_map %>%
  left_join(map_df, by = "region")

state_centers <- map_merged %>%
  group_by(region, State_Abbr) %>%
  summarise(
    long = mean(range(long)),
    lat = mean(range(lat)),
    .groups = "drop"
  )

ggplot(map_merged, aes(x = long, y = lat, group = group,
                        fill = Party, alpha = PC_Norm)) +
  geom_polygon(color = "white", linewidth = 0.3) +
  geom_text(data = state_centers,
            aes(x = long, y = lat, label = State_Abbr, group = NULL),
            size = 3, color = "black", fontface = "bold",
            inherit.aes = FALSE) +
  coord_map("albers", lat0 = 29.5, lat1 = 45.5) +
  scale_fill_manual(values = c("Blue" = "#2166AC",
                                "Red" = "#B2182B"),
                    name = "2020 Vote") +
  scale_alpha_continuous(range = c(0.3, 1)) +
  labs(
    title = "How is Per Capita Funding Spread Accross \nThe United States?",
    subtitle = paste0(
      "Color = party alignment  |  Darker = higher per capita funding  |  ",
      "National mean: $", comma(round(national_per_capita)), " per person"),
     
    caption = "Source: Federal IIJA Funding & 2020 Election Data"
  ) +
  theme_void(base_size = 12) +
  theme(
    plot.title    = element_text(face = "bold", size = 55,
                                color = "#1B2A4A", hjust = .5),
    plot.subtitle = element_text(size = 18, color = "gray40",
                                 hjust = 0.5, margin = margin(b = 15)),
    plot.caption  = element_text(size = 15, color = "gray50",
                                 margin = margin(t = 20)),
    legend.position = "bottom",
    legend.box = "horizontal",
    legend.title = element_text(size = 13, face = "bold"),
    legend.text  = element_text(size = 13),
    plot.margin = margin(10, 10, 10, 10)
  ) +
  guides(alpha = "none")

ggsave("map_percapita_funding.png", width = 14, height = 9, dpi = 300)

What the Map Reveals:



library(ggtext)
library(tidytext)


party_df <- master %>%
  mutate(
    Per_Capita = (Total_funding * 1e9) / Population,
    Party_Label = ifelse(Party == "Blue", "Blue States", "Red States")
  ) %>%
  arrange(Party, Per_Capita) %>%
  mutate(State = reorder_within(State, Per_Capita, Party_Label))

 national_mean <- sum(master$Total_funding * 1e9) / sum(master$Population)

ggplot(party_df, aes(x = State, y = Per_Capita, fill = Party)) +
  geom_col(width = 0.7) +

  # National mean reference line
  geom_hline(yintercept = national_mean, color = "navy",
             linetype = "dashed", linewidth = 0.8) +

  # Dollar label at end of each bar
  geom_text(aes(label = paste0("$", comma(round(Per_Capita)))),
            hjust = -0.1, size = 3.5, fontface = "bold", color = "grey30") +

  coord_flip(clip = "off") +

  # Stacked vertically, same x scale
  facet_wrap(~ Party_Label, ncol = 1, scales = "free_y") +

  scale_fill_manual(values = c("Blue" = "#5A9BD5",
                                "Red" = "#E06666"),
                    guide = "none") +
  scale_x_reordered() +
  scale_y_continuous(labels = dollar_format(),
                     limits = c(0, max(party_df$Per_Capita) * 1.15),
                     expand = expansion(mult = c(0, 0.05))) +

  labs(
    title = "Was Funding Allocation Biased Towards Biden States\nas per 2020 Election Results?",
    subtitle = paste0(
      "Per capita federal funding by state  |  ",
      "Dashed line = national mean ($", comma(round(national_mean)), ")"),
    x = NULL,
    y = "Federal Funding Per Capita ($)",
    caption = "Source: Federal IIJA Funding & 2020 Election Data"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title       = element_text(face = "bold", size = 28,
                                    color = "#1B2A4A"),
    plot.subtitle    = element_text(size = 16, color = "gray40",
                                    margin = margin(b = 15),
    plot.caption     = element_text(size = 9, color = "gray50"),
    axis.text.y      = element_text(size = 8, face = "bold",
                                    color = "#1B2A4A"),
    panel.grid.major.y = element_blank(),
    panel.grid.minor   = element_blank(),
    strip.text       = element_text(face = "bold", size = 14,
                                    color = "#1B2A4A"),
    panel.spacing    = unit(1.5, "lines"),
    plot.margin      = margin(15, 50, 10, 10)))

Key Takeaway:



This project applied exploratory data analysis and visualization techniques to investigate whether federal IIJA funding distribution reveals political bias by examining total funding, per capita allocation, and population-funding ratios across all 50 states. Using dumbbell plots, diverging bar charts, choropleth maps, and scatter plots, the analysis layers multiple perspectives to guide the viewer through a data-driven narrative — from broad national patterns to state-level exceptions. The visualizations demonstrated that per capita funding favors smaller, predominantly Red states, suggesting that formula-based allocation and infrastructure need outweigh political affiliation in determining federal funding distribution.