library(tidyverse)
library(readxl)
library(robotstxt)
library(rvest)
library(janitor)
iija_data <- read_excel("IIJA FUNDING AS OF MARCH 2023.xlsx") |>
clean_names()
paths_allowed("https://en.wikipedia.org/wiki/List_of_U.S._states_and_territories_by_population")
## [1] TRUE
pop_data <- read_html("https://en.wikipedia.org/wiki/List_of_U.S._states_and_territories_by_population") |>
html_element("table") |>
html_table() |>
clean_names()
paths_allowed("https://en.wikipedia.org/wiki/2020_United_States_presidential_election")
## [1] TRUE
election_data <- read_html("https://en.wikipedia.org/wiki/2020_United_States_presidential_election") |>
html_elements("#mw-content-text > div.mw-content-ltr.ext-chart-js.mw-parser-output > div:nth-child(264) > table") |>
html_table()
election_data <- election_data[[1]] |>
as_tibble(.name_repair = "minimal") |>
select(state = 1, biden = 2, biden_perc = 3, trump = 5)
pop_data <- pop_data |>
mutate(state_or_territory = toupper(state_or_territory),
state_or_territory = case_when(str_detect(state_or_territory, "VIRGIN IS") ~ "US VIRGIN ISLANDS",
str_detect(state_or_territory, "MARIANA") ~ "NORTHERN MARIANA ISLANDS",
str_detect(state_or_territory, "GUAM") ~ "GUAM",
str_detect(state_or_territory, "SAMOA") ~ "AMERICAN SAMOA",
.default = state_or_territory))
election_data <- election_data |>
mutate(state = toupper(state),
state = str_replace_all(state, "[^A-Za-z0-9 ]", ""),
state = ifelse(str_detect(state, "(?i)maine") | str_detect(state, "(?i)nebraska"),
str_remove_all(state, " "), state),
state = ifelse(str_detect(state, "NEVADA"), "NEVADA", state),
state = ifelse(str_detect(state, "NEW JERSEY"), "NEW JERSEY", state),
state = ifelse(str_detect(state, "TEXAS"), "TEXAS", state))
data <- iija_data |>
mutate(state_teritory_or_tribal_nation = ifelse(state_teritory_or_tribal_nation == "DELEWARE", "DELAWARE",
state_teritory_or_tribal_nation)) |>
left_join(pop_data |> select(1,3),
by = join_by(state_teritory_or_tribal_nation == state_or_territory)) |>
left_join(election_data,
by = join_by(state_teritory_or_tribal_nation == state))
# Tribal Communities population data comes from the U.S. Census:
# https://www.census.gov/library/stories/2023/10/2020-census-dhc-a-aian-population.html
data <- data |>
mutate(census_population_8_9_a_2 = str_remove_all(census_population_8_9_a_2, ","),
census_population_8_9_a_2 = as.numeric(census_population_8_9_a_2),
census_population_8_9_a_2 = ifelse(state_teritory_or_tribal_nation == "TRIBAL COMMUNITIES",
6605593, census_population_8_9_a_2),
biden = str_remove_all(biden, ","),
biden = as.numeric(biden),
biden_perc = str_remove_all(biden_perc, "[%]"),
biden_perc = as.numeric(biden_perc),
trump = str_remove_all(trump, ","),
trump = as.numeric(trump))
viz1_data <- data |>
mutate(fund_perc = total_billions/sum(total_billions),
pop_perc = census_population_8_9_a_2/ sum(census_population_8_9_a_2),
perc_diff = fund_perc/pop_perc)
viz1_data |>
ggplot(aes(x = perc_diff,
y = reorder(state_teritory_or_tribal_nation, abs(perc_diff)))) +
geom_col(fill = ifelse(abs(viz1_data$perc_diff) > 2, "orange", "grey")) +
theme_minimal() +
scale_x_continuous(n.breaks = 10,
labels = scales::percent) +
geom_vline(xintercept = 2, lty = 2, color = "orange4", linewidth = 1) +
labs(title = "IIJA Funding for U.S States and Territories Relative to their Total Population Percentage",
subtitle = "Almost a quarter of states and territories received IIJA funding greater than 2x than their population percentage.",
y = "",
x = "")
viz2_data <- data |>
drop_na(biden, biden_perc, trump) |>
mutate(fund_perc = total_billions/sum(total_billions),
biden_perc = biden_perc/100)
viz2_data |>
ggplot(aes(x = biden_perc, y = fund_perc)) +
geom_step(color = ifelse(viz2_data$biden_perc > .5, "blue", "red")) +
geom_vline(xintercept = .5, lty = 2, color = "grey") +
scale_x_continuous(limits = c(.25, 1),
n.breaks = 4,
labels = scales::percent) +
scale_y_continuous(labels = scales::percent) +
theme_minimal() +
labs(title = "IIJA Funding Compared with Biden Voter Turnout by State",
subtitle = "Higher percentages of voting in favor of Biden did not result in noticeably greater proportions of funding from IIJA.",
x = "% Votes for Biden",
y = "% Funding from IIJA")