We will be looking at the Infrastructure Investment and Jobs Act Funding which was signed in 2021 under the Biden Administration, we will be looking at the funding provided by the Act by State and Territory.
In doing so we explore two questions:
Our population estimates for each U.S. State comes from the U.S. government’s Census.gov website, the data can be found here: https://www2.census.gov/programs-surveys/popest/tables/
Our population estimates for each U.S. Territory comes from the Census.gov website which has seperate data for the Commonwealth of the Northern Mariana Islands, Guam, Puerto Rico, American Samoa, and the U.S. Virgin Islands.
The data can be found here: https://www.census.gov/programs-surveys/decennial-census/decade/2020/2020-census-results.html
Our election results come from the “2020 US Presidential Election Results by State” Kaggle dataset which can be found here: https://www.kaggle.com/datasets/callummacpherson14/2020-us-presidential-election-results-by-state
url <- "https://github.com/WendyR20/Data-608-Story-1/raw/refs/heads/main/IIJA%20FUNDING%20AS%20OF%20MARCH%202023.xlsx"
tmp_file <- tempfile(fileext = ".xlsx")
download.file(url, tmp_file, mode = "wb")
iija_data <- read_excel(tmp_file)
glimpse(iija_data)
url2 <- "https://github.com/WendyR20/Data-608-Story-1/raw/refs/heads/main/NST-EST2025-POP.xlsx"
pop_tmp_file <- tempfile(fileext = ".xlsx")
download.file(url2, pop_tmp_file, mode = "wb")
pop_data <- read_excel(pop_tmp_file)
## New names:
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
glimpse(pop_data)
#removing all rows before states
start_row <- 9
# Removing all roww before row 9
pop_subset <- pop_data[-(1:(start_row - 1)), ]
head(pop_subset)
#keeping only 2020 population estimates
#finding the index of the 2020 column
df_pop <- pop_subset[, 1:2]
head(df_pop)
#renaming state column and population estimate column
colnames(df_pop)[c(1, 2)] <- c("State, Teritory or Tribal Nation", "2020_Population_Estimate")
glimpse(df_pop)
#removing the '.' from each state
pop_data2 <- df_pop %>%
mutate(
`State, Teritory or Tribal Nation` =
sub("^\\.\\s*", "", `State, Teritory or Tribal Nation`)
)
#removing all rows after Puerto Rico
pop_data2 <- pop_data2[1:53, ]
tail(pop_data2)
#joining state data to funding data
pop_data2 <- pop_data2 %>%
mutate(`State, Teritory or Tribal Nation` = tolower(`State, Teritory or Tribal Nation`))
pop_data2 <- na.omit(pop_data2)
iija_data <- iija_data %>%
mutate(`State, Teritory or Tribal Nation` = tolower(`State, Teritory or Tribal Nation`))
state_df <- left_join(iija_data, pop_data2, by = "State, Teritory or Tribal Nation")
anti_join(iija_data, pop_data2, by = "State, Teritory or Tribal Nation")
iija_data <- iija_data %>%
mutate(
`State, Teritory or Tribal Nation` = case_when(
`State, Teritory or Tribal Nation` == "deleware" ~ "delaware",
TRUE ~ `State, Teritory or Tribal Nation`
)
)
state_df <- left_join(iija_data, pop_data2, by = "State, Teritory or Tribal Nation")
state_df <- state_df %>% mutate(`2020_Population_Estimate` = as.numeric(`2020_Population_Estimate`))
#finding the positions of NA values
na_df <- which(is.na(state_df), arr.ind = TRUE)
print(na_df)
#adding american samoa data
state_df[3,3]<- 49710
#adding guam data
state_df[13,3]<- 153836
#adding northern mariana islands data
state_df[38,3]<- 47329
#adding us virgin islands data
state_df[50,3]<- 87146
fund_data <- state_df %>%
mutate(`Total (Billions)` = `Total (Billions)` * 1e9)
fund_data <- fund_data %>%
mutate(
per_capita = round(`Total (Billions)` / `2020_Population_Estimate`, 0)
)
head(fund_data)
fund_data <- fund_data %>%
rename(
states_territories = `State, Teritory or Tribal Nation`,
iija_funding = `Total (Billions)`,
population = `2020_Population_Estimate`
)
fund_data2 <- fund_data %>%
mutate(iija_funding = iija_funding / 1e9,
population = population/1e6)
ggplot(fund_data2, aes(x = population, y = iija_funding)) +
geom_point(alpha = 0.7) +
geom_smooth(method = "lm", se = FALSE) +
labs(
title = "Infrastructure Investment and Jobs Act Funding vs Population",
x = "Population (in Millions)",
y = "Funding (in Biilions)"
) +
theme_minimal()
When we examine the relationship between the total IIJA Act funding and a state’s population we see a positive relationship; as the population of a state increases, states generally receive more funding which as what we would expect in terms of an equitable distribution of funding.
ggplot(fund_data2 %>% drop_na(per_capita), aes(x = reorder(states_territories, per_capita), y = per_capita)) +
geom_col() +
coord_flip() +
labs(
title = "Infrastructure Investment and Jobs Act Funding \n Per Capita by State and Territory",
x = "State or Territory",
y = "Funding per Capita (USD)"
) +
theme_minimal()
However, when we examine the IIJA Act funding per capita we can find that there are a few states who receive far more funding than can be explained by only by their population.
#adding 2020 election data
url3 <- "https://raw.githubusercontent.com/WendyR20/Data-608-Story-1/refs/heads/main/voting.csv"
election_results <- read_csv(url3)
glimpse(election_results)
#removing columns we won't use
results <- election_results[,-c(2:6)]
results <- results %>%
mutate(Winner = case_when(
trump_win == 1 ~ "Trump",
biden_win == 1 ~ "Biden",
))
results <- results[,-c(2:3)]
head(results)
results <- results %>%
mutate(state = tolower(state))
election_fund <- left_join(fund_data2, results, by = c("states_territories" = "state"))
ggplot(election_fund %>% drop_na(Winner), aes(x = Winner, y = per_capita)) +
geom_boxplot(alpha = 0.7,
outlier.colour = "red") +
labs(
title = "IIJA Act Funding Per Capita \n by 2020 Presidential Election Outcome",
x = "2020 Presidential Election Winner",
y = "Funding per Capita"
) +
theme_minimal()
When we group states by 2020 presidential election winner we find that states won by Trump show a slightly higher median and overall range of funding per capita.
outliers_df <- election_fund %>%
filter(!is.na(per_capita)) %>%
filter(per_capita >= quantile(per_capita, 0.95)) %>%
select(
states_territories,
per_capita,
Winner
) %>%
arrange(desc(per_capita)) %>%
mutate(
per_capita = dollar(per_capita)
)
outliers_df$states_territories <- str_to_title(outliers_df$states_territories)
outliers_df %>%
kable("html", col.names = c("State", "IIJA Act Funding Per Capita", "2020 Presidential Election Winner"),
caption = "States With More Than 95% of Average Funding Per Capita ") %>%
kable_styling(full_width = TRUE, position = "center")
| State | IIJA Act Funding Per Capita | 2020 Presidential Election Winner |
|---|---|---|
| Alaska | $5,045 | Trump |
| Wyoming | $3,987 | Trump |
| Montana | $3,044 | Trump |
Examining this difference further, we find that the states that receive IIJA funding higher than 95% than all other states are all states where Trump won the 2020 presidential election vote.