Packages Used

Click to see code for packages used
library(tidyverse)
library(readxl)
library(robotstxt)
library(rvest)
library(janitor)

Load Data

Click to see code for loading data
iija_data <- read_excel("IIJA FUNDING AS OF MARCH 2023.xlsx") |> 
  clean_names()
paths_allowed("https://en.wikipedia.org/wiki/List_of_U.S._states_and_territories_by_population")
## [1] TRUE
pop_data <- read_html("https://en.wikipedia.org/wiki/List_of_U.S._states_and_territories_by_population") |> 
  html_element("table") |> 
  html_table() |> 
  clean_names()
paths_allowed("https://en.wikipedia.org/wiki/2020_United_States_presidential_election")
## [1] TRUE
election_data <- read_html("https://en.wikipedia.org/wiki/2020_United_States_presidential_election") |> 
  html_elements("#mw-content-text > div.mw-content-ltr.ext-chart-js.mw-parser-output > div:nth-child(264) > table") |> 
  html_table()

election_data <- election_data[[1]] |>  
  as_tibble(.name_repair = "minimal") |> 
  select(state = 1, biden = 2, biden_perc = 3, trump = 5)

Data Cleaning

Click to see code for cleaning data
pop_data <- pop_data |> 
  mutate(state_or_territory = toupper(state_or_territory),
         state_or_territory = case_when(str_detect(state_or_territory, "VIRGIN IS") ~ "US VIRGIN ISLANDS",
                                        str_detect(state_or_territory, "MARIANA") ~ "NORTHERN MARIANA ISLANDS",
                                        str_detect(state_or_territory, "GUAM") ~ "GUAM",
                                        str_detect(state_or_territory, "SAMOA") ~ "AMERICAN SAMOA",
                                        .default = state_or_territory))

election_data <- election_data |> 
  mutate(state = toupper(state),
         state = str_replace_all(state, "[^A-Za-z0-9 ]", ""),
         state = ifelse(str_detect(state, "(?i)maine") | str_detect(state, "(?i)nebraska"),
                        str_remove_all(state, " "), state),
         state = ifelse(str_detect(state, "NEVADA"), "NEVADA", state),
         state = ifelse(str_detect(state, "NEW JERSEY"), "NEW JERSEY", state),
         state = ifelse(str_detect(state, "TEXAS"), "TEXAS", state))

data <- iija_data |> 
  mutate(state_teritory_or_tribal_nation = ifelse(state_teritory_or_tribal_nation == "DELEWARE", "DELAWARE",
                                                  state_teritory_or_tribal_nation)) |> 
  left_join(pop_data |> select(1,3),
            by = join_by(state_teritory_or_tribal_nation == state_or_territory)) |> 
  left_join(election_data,
            by = join_by(state_teritory_or_tribal_nation == state))


# Tribal Communities population data comes from the U.S. Census:
# https://www.census.gov/library/stories/2023/10/2020-census-dhc-a-aian-population.html
data <- data |> 
  mutate(census_population_8_9_a_2 = str_remove_all(census_population_8_9_a_2, ","),
         census_population_8_9_a_2 = as.numeric(census_population_8_9_a_2),
         census_population_8_9_a_2 = ifelse(state_teritory_or_tribal_nation == "TRIBAL COMMUNITIES",
                                            6605593, census_population_8_9_a_2),
         biden = str_remove_all(biden, ","),
         biden = as.numeric(biden),
         biden_perc = str_remove_all(biden_perc, "[%]"),
         biden_perc = as.numeric(biden_perc),
         trump = str_remove_all(trump, ","),
         trump = as.numeric(trump))

Visualization 1

viz1_data <- data |> 
  mutate(fund_perc = total_billions/sum(total_billions),
         pop_perc = census_population_8_9_a_2/ sum(census_population_8_9_a_2),
         perc_diff = fund_perc/pop_perc)

viz1_data |> 
  ggplot(aes(x = perc_diff,
             y = reorder(state_teritory_or_tribal_nation, abs(perc_diff)))) +
  geom_col(fill = ifelse(abs(viz1_data$perc_diff) > 2, "orange", "grey")) +
  theme_minimal() +
  scale_x_continuous(n.breaks = 10,
                     labels = scales::percent) +
  geom_vline(xintercept = 2, lty = 2, color = "orange4", linewidth = 1) +
  labs(title = "IIJA Funding for U.S States and Territories Relative to their Total Population Percentage",
       subtitle = "Almost a quarter of states and territories received IIJA funding greater than 2x than their population percentage.",
       y = "",
       x = "")

Visualization 2

viz2_data <- data |> 
  drop_na(biden, biden_perc, trump) |> 
  mutate(fund_perc = total_billions/sum(total_billions),
         biden_perc = biden_perc/100)

viz2_data |> 
  ggplot(aes(x = biden_perc, y = fund_perc)) +
  geom_step(color = ifelse(viz2_data$biden_perc > .5, "blue", "red")) +
  geom_vline(xintercept = .5, lty = 2, color = "grey") +
  scale_x_continuous(limits = c(.25, 1),
                     n.breaks = 4,
                     labels = scales::percent) +
  scale_y_continuous(labels = scales::percent) +
  theme_minimal() +
  labs(title = "IIJA Funding Compared with Biden Voter Turnout by State",
       subtitle = "Higher percentages of voting in favor of Biden did not result in noticeably greater proportions of funding from IIJA.",
       x = "% Votes for Biden",
       y = "% Funding from IIJA")