This is an R Markdown presentation. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(ggplot2)
library(dplyr)
library(stringr)
library(tools)
library(stringdist)
## Warning: package 'stringdist' was built under R version 4.3.3
##
## Attaching package: 'stringdist'
##
## The following object is masked from 'package:tidyr':
##
## extract
IIJA_funding_data <- read.csv("https://raw.githubusercontent.com/uzmabb182/Data_608/refs/heads/main/Week%201/IIJA%20FUNDING%20AS%20OF%20MARCH%202023.csv")
state_population_data <- read.csv("https://raw.githubusercontent.com/uzmabb182/Data_608/refs/heads/main/Week%201/State_Population_Data.csv")
election_results_2020_data <- read.csv("https://raw.githubusercontent.com/uzmabb182/Data_608/refs/heads/main/Week%201/2020_Presidential_Election_Results.csv")
#print(IIJA_funding_data)
# Check column names
colnames(IIJA_funding_data)
## [1] "State..Teritory.or.Tribal.Nation" "Total..Billions."
head(IIJA_funding_data)
## State..Teritory.or.Tribal.Nation Total..Billions.
## 1 ALABAMA 3.0000
## 2 ALASKA 3.7000
## 3 AMERICAN SAMOA 0.0686
## 4 ARIZONA 3.5000
## 5 ARKANSAS 2.8000
## 6 CALIFORNIA 18.4000
# Renaming the columns
IIJA_funding_data <- IIJA_funding_data %>%
rename(State_Name = State..Teritory.or.Tribal.Nation, Funding_Billions = Total..Billions.)
head(IIJA_funding_data)
## State_Name Funding_Billions
## 1 ALABAMA 3.0000
## 2 ALASKA 3.7000
## 3 AMERICAN SAMOA 0.0686
## 4 ARIZONA 3.5000
## 5 ARKANSAS 2.8000
## 6 CALIFORNIA 18.4000
#print(IIJA_funding_data)
# Convert column names to lowercase
colnames(IIJA_funding_data ) <- tolower(colnames(IIJA_funding_data ))
# View the modified data frame
#print(IIJA_funding_data )
head(IIJA_funding_data)
## state_name funding_billions
## 1 ALABAMA 3.0000
## 2 ALASKA 3.7000
## 3 AMERICAN SAMOA 0.0686
## 4 ARIZONA 3.5000
## 5 ARKANSAS 2.8000
## 6 CALIFORNIA 18.4000
# Sort by population (ascending order)
IIJA_funding_data <- IIJA_funding_data[order(IIJA_funding_data$state_name),]
# View the sorted dataframe
#print(IIJA_funding_data)
head(IIJA_funding_data)
## state_name funding_billions
## 1 ALABAMA 3.0000
## 2 ALASKA 3.7000
## 3 AMERICAN SAMOA 0.0686
## 4 ARIZONA 3.5000
## 5 ARKANSAS 2.8000
## 6 CALIFORNIA 18.4000
# Replace "Deleware" with "Delaware" conditionally
IIJA_funding_data$state_name[IIJA_funding_data$state_name == "Deleware"] <- "Delaware"
# View the updated DataFrame
#print(IIJA_funding_data)
head(IIJA_funding_data)
## state_name funding_billions
## 1 ALABAMA 3.0000
## 2 ALASKA 3.7000
## 3 AMERICAN SAMOA 0.0686
## 4 ARIZONA 3.5000
## 5 ARKANSAS 2.8000
## 6 CALIFORNIA 18.4000
# Add index column using mutate
IIJA_funding_data <- IIJA_funding_data %>% mutate(index = row_number())
# View the updated DataFrame
#print(IIJA_funding_data)
head(IIJA_funding_data)
## state_name funding_billions index
## 1 ALABAMA 3.0000 1
## 2 ALASKA 3.7000 2
## 3 AMERICAN SAMOA 0.0686 3
## 4 ARIZONA 3.5000 4
## 5 ARKANSAS 2.8000 5
## 6 CALIFORNIA 18.4000 6
# Convert first letter of each word to uppercase using str_to_title
IIJA_funding_data$state_name <- str_to_title(IIJA_funding_data$state_name)
# View the updated DataFrame
#print(IIJA_funding_data)
# Convert column names to lowercase
colnames(state_population_data ) <- tolower(colnames(state_population_data ))
# View the modified data frame
#print(state_population_data)
head(state_population_data)
## state name estimatesbase2020 popestimate2020 popestimate2021
## 1 1 Alabama 5025369 5033094 5049196
## 2 2 Alaska 733395 733017 734420
## 3 4 Arizona 7158110 7187135 7274078
## 4 5 Arkansas 3011553 3014546 3026870
## 5 6 California 39555674 39521958 39142565
## 6 8 Colorado 5775324 5787129 5814036
## popestimate2022 popestimate2023 popestimate2024 npopchg_2020 npopchg_2021
## 1 5076181 5117673 5157699 7725 16102
## 2 734442 736510 740133 -378 1403
## 3 7377566 7473027 7582384 29025 86943
## 4 3047704 3069463 3088354 2993 12324
## 5 39142414 39198693 39431263 -33716 -379393
## 6 5850935 5901339 5957493 11805 26907
## npopchg_2022 npopchg_2023 npopchg_2024 births2020 births2021 births2022
## 1 26985 41492 40026 13867 57184 58103
## 2 22 2068 3623 2406 9454 9359
## 3 103488 95461 109357 18110 75693 79173
## 4 20834 21759 18891 8509 34928 36115
## 5 -151 56279 232570 103133 412507 424071
## 6 36899 50404 56154 15593 62138 62659
## births2023 births2024 deaths2020 deaths2021 deaths2022 deaths2023 deaths2024
## 1 58528 57541 15146 69134 67246 60059 59273
## 2 9132 9014 1169 5333 6388 5597 5642
## 3 78647 78322 17991 80277 79806 70779 68662
## 4 35298 35161 8466 40117 40295 36414 35888
## 5 410377 400601 74572 345285 318719 303733 290135
## 6 62309 62497 11905 46475 49577 45275 45203
## naturalchg2020 naturalchg2021 naturalchg2022 naturalchg2023 naturalchg2024
## 1 -1279 -11950 -9143 -1531 -1732
## 2 1237 4121 2971 3535 3372
## 3 119 -4584 -633 7868 9660
## 4 43 -5189 -4180 -1116 -727
## 5 28561 67222 105352 106644 110466
## 6 3688 15663 13082 17034 17294
## internationalmig2020 internationalmig2021 internationalmig2022
## 1 133 1804 8155
## 2 65 873 2818
## 3 251 8017 33411
## 4 95 1347 6177
## 5 1372 44127 234953
## 6 240 3911 18507
## internationalmig2023 internationalmig2024 domesticmig2020 domesticmig2021
## 1 12995 15763 9757 25175
## 2 3391 4029 -1634 -3438
## 3 52767 64486 30189 83565
## 4 4966 6152 2601 15880
## 5 292721 361057 -67219 -477586
## 6 27177 33227 7365 5817
## domesticmig2022 domesticmig2023 domesticmig2024 netmig2020 netmig2021
## 1 28226 29946 26028 9890 26979
## 2 -5832 -4886 -3774 -1569 -2565
## 3 68790 35208 34902 30440 91582
## 4 18873 17821 13465 2696 17227
## 5 -336707 -344029 -239575 -65847 -433459
## 6 6227 6341 5422 7605 9728
## netmig2022 netmig2023 netmig2024 residual2020 residual2021 residual2022
## 1 36381 42941 41791 -886 1073 -253
## 2 -3014 -1495 255 -46 -153 65
## 3 102201 87975 99388 -1534 -55 1920
## 4 25050 22787 19617 254 286 -36
## 5 -101754 -51308 121482 3570 -13156 -3749
## 6 24734 33518 38649 512 1516 -917
## residual2023 residual2024 rbirth2021 rbirth2022 rbirth2023 rbirth2024
## 1 82 -33 11.34345 11.47671 11.48300 11.19979
## 2 28 -4 12.88505 12.74320 12.41645 12.20877
## 3 -382 309 10.46842 10.80739 10.59177 10.40450
## 4 88 1 11.56285 11.89055 11.54064 11.41996
## 5 943 622 10.48775 10.83403 10.47667 10.18953
## 6 -148 211 10.71237 10.74310 10.60374 10.54016
## rdeath2021 rdeath2022 rdeath2023 rdeath2024 rnaturalchg2021 rnaturalchg2022
## 1 13.713948 13.282666 11.783375 11.536906 -2.3704932 -1.80595745
## 2 7.268455 8.697890 7.610038 7.641657 5.6165955 4.04530855
## 3 11.102388 10.893795 9.532145 9.121239 -0.6339717 -0.08640669
## 4 13.280661 13.266774 11.905511 11.656079 -1.7178092 -1.37622819
## 5 8.778671 8.142533 7.754116 7.379757 1.7090805 2.69149973
## 6 8.012126 8.500150 7.704892 7.623516 2.7002461 2.24295457
## rnaturalchg2023 rnaturalchg2024 rinternationalmig2021 rinternationalmig2022
## 1 -0.3003771 -0.3371167 0.3578552 1.610804
## 2 4.8064111 4.5671161 1.1898296 3.836984
## 3 1.0596210 1.2832596 1.1087590 4.560717
## 4 -0.3648748 -0.2361226 0.4459219 2.033723
## 5 2.7225553 2.8097688 1.1219035 6.002505
## 6 2.8988432 2.9166447 0.6742426 3.173090
## rinternationalmig2023 rinternationalmig2024 rdomesticmig2021 rdomesticmig2022
## 1 2.549575 3.068113 4.993905 5.575299
## 2 4.610620 5.456972 -4.685721 -7.940841
## 3 7.106383 8.566488 11.557122 9.390073
## 4 1.623627 1.998111 5.257046 6.213769
## 5 7.472986 9.183701 -12.142348 -8.602084
## 6 4.624977 5.603756 1.002830 1.067641
## rdomesticmig2023 rdomesticmig2024 rnetmig2021 rnetmig2022 rnetmig2023
## 1 5.875305 5.0660940 5.351760 7.186103 8.424880
## 2 -6.643317 -5.1115943 -3.495891 -4.103857 -2.032697
## 3 4.741629 4.6364726 12.665881 13.950789 11.848012
## 4 5.826553 4.3733031 5.702968 8.247492 7.450181
## 5 -8.782848 -6.0937335 -11.020444 -2.599579 -1.309862
## 6 1.079110 0.9144239 1.677073 4.240731 5.704088
## rnetmig2024
## 1 8.134207
## 2 0.345378
## 3 13.202961
## 4 6.371414
## 5 3.089967
## 6 6.518180
# Sort by population (ascending order)
state_population_data <- state_population_data[order(state_population_data$name),]
# View the sorted dataframe
#print(state_population_data)
#head(state_population_data)
# Convert first letter of each word to uppercase using str_to_title
state_population_data$name <- str_to_title(state_population_data$name)
# View the updated DataFrame
#print(state_population_data)
#head(state_population_data)
# Convert column names to lowercase
colnames(election_results_2020_data ) <- tolower(colnames(election_results_2020_data ))
# View the modified data frame
#print(election_results_2020_data)
head(election_results_2020_data)
## state state_abr trump_pct biden_pct trump_vote biden_vote trump_win
## 1 Alaska AK 53.1 43.0 189543 153502 1
## 2 Hawaii HI 34.3 63.7 196864 366130 0
## 3 Washington WA 39.0 58.4 1584651 2369612 0
## 4 Oregon OR 40.7 56.9 958448 1340383 0
## 5 California CA 34.3 63.5 5982194 11082293 0
## 6 Idaho ID 63.9 33.1 554128 287031 1
## biden_win
## 1 0
## 2 1
## 3 1
## 4 1
## 5 1
## 6 0
# Convert first letter of each word to uppercase using str_to_title
election_results_2020_data$state <- str_to_title(election_results_2020_data$state)
# View the updated DataFrame
#print(election_results_2020_data)
head(election_results_2020_data)
## state state_abr trump_pct biden_pct trump_vote biden_vote trump_win
## 1 Alaska AK 53.1 43.0 189543 153502 1
## 2 Hawaii HI 34.3 63.7 196864 366130 0
## 3 Washington WA 39.0 58.4 1584651 2369612 0
## 4 Oregon OR 40.7 56.9 958448 1340383 0
## 5 California CA 34.3 63.5 5982194 11082293 0
## 6 Idaho ID 63.9 33.1 554128 287031 1
## biden_win
## 1 0
## 2 1
## 3 1
## 4 1
## 5 1
## 6 0
# Checking datatypes
#str(IIJA_funding_data)
#str(election_results_2020_data)
###str(state_population_data)
head(election_results_2020_data)
## state state_abr trump_pct biden_pct trump_vote biden_vote trump_win
## 1 Alaska AK 53.1 43.0 189543 153502 1
## 2 Hawaii HI 34.3 63.7 196864 366130 0
## 3 Washington WA 39.0 58.4 1584651 2369612 0
## 4 Oregon OR 40.7 56.9 958448 1340383 0
## 5 California CA 34.3 63.5 5982194 11082293 0
## 6 Idaho ID 63.9 33.1 554128 287031 1
## biden_win
## 1 0
## 2 1
## 3 1
## 4 1
## 5 1
## 6 0
# Convert 'state' column from integer to character
state_population_data$name <- as.character(state_population_data$name)
###str(state_population_data)
head(election_results_2020_data)
## state state_abr trump_pct biden_pct trump_vote biden_vote trump_win
## 1 Alaska AK 53.1 43.0 189543 153502 1
## 2 Hawaii HI 34.3 63.7 196864 366130 0
## 3 Washington WA 39.0 58.4 1584651 2369612 0
## 4 Oregon OR 40.7 56.9 958448 1340383 0
## 5 California CA 34.3 63.5 5982194 11082293 0
## 6 Idaho ID 63.9 33.1 554128 287031 1
## biden_win
## 1 0
## 2 1
## 3 1
## 4 1
## 5 1
## 6 0
# Merge on 'state_name' from df1 and 'state' from df2
result_df <- merge(IIJA_funding_data, state_population_data, by.x = "state_name", by.y = "name")
# View the merged dataframe
#print(result_df)
#head(result_df)
# Merge on 'state_name' from df1 and 'state' from df2
result_df <- merge(IIJA_funding_data, state_population_data, by.x = "state_name", by.y = "name")
# View the merged dataframe
#print(result_df)
#head(result_df)
# Merge on 'state_name' from df1 and 'state' from df2
merged_df <- merge(result_df, election_results_2020_data, by.x = "state_name", by.y = "state")
# View the merged dataframe
#print(merged_df)
#head(merged_df)
# Select specific columns
new_df <- merged_df %>% select(state_name, funding_billions, popestimate2020, trump_win, biden_win)
# View the new data frame
#print(new_df)
head(new_df)
## state_name funding_billions popestimate2020 trump_win biden_win
## 1 Alabama 3.0 5033094 1 0
## 2 Alaska 3.7 733017 1 0
## 3 Arizona 3.5 7187135 0 1
## 4 Arkansas 2.8 3014546 1 0
## 5 California 18.4 39521958 0 1
## 6 Colorado 3.2 5787129 0 1
# Add a new column
new_df <- new_df %>%
mutate(funding_per_state_percentage = round((funding_billions / sum(funding_billions)) * 100, 2))
# Print the merged data frame
#print(new_df)
head(new_df)
## state_name funding_billions popestimate2020 trump_win biden_win
## 1 Alabama 3.0 5033094 1 0
## 2 Alaska 3.7 733017 1 0
## 3 Arizona 3.5 7187135 0 1
## 4 Arkansas 2.8 3014546 1 0
## 5 California 18.4 39521958 0 1
## 6 Colorado 3.2 5787129 0 1
## funding_per_state_percentage
## 1 1.57
## 2 1.94
## 3 1.83
## 4 1.47
## 5 9.64
## 6 1.68
# Add a new column
new_df <- new_df %>%
mutate(population_per_state_percentage = round((popestimate2020 / sum(popestimate2020)) * 100, 2))
# Print the merged data frame
#print(new_df)
head(new_df)
## state_name funding_billions popestimate2020 trump_win biden_win
## 1 Alabama 3.0 5033094 1 0
## 2 Alaska 3.7 733017 1 0
## 3 Arizona 3.5 7187135 0 1
## 4 Arkansas 2.8 3014546 1 0
## 5 California 18.4 39521958 0 1
## 6 Colorado 3.2 5787129 0 1
## funding_per_state_percentage population_per_state_percentage
## 1 1.57 1.52
## 2 1.94 0.22
## 3 1.83 2.17
## 4 1.47 0.91
## 5 9.64 11.96
## 6 1.68 1.75
# Add a new columns for testing criteria
new_df <- new_df %>%
mutate(population_per_state_plus5percent = population_per_state_percentage + (population_per_state_percentage * 0.05))
new_df <- new_df %>%
mutate(population_per_state_minus5percent = population_per_state_percentage - (population_per_state_percentage * 0.05))
new_df <- new_df %>%
mutate(Equitable = ifelse(funding_per_state_percentage > population_per_state_minus5percent &
funding_per_state_percentage < population_per_state_plus5percent,
"Yes", "No"))
new_df <- new_df %>%
mutate(bias = ifelse(biden_win == 1 &
funding_per_state_percentage > population_per_state_plus5percent,
"Yes",
"No"))
# Print the merged data frame
head(new_df)
## state_name funding_billions popestimate2020 trump_win biden_win
## 1 Alabama 3.0 5033094 1 0
## 2 Alaska 3.7 733017 1 0
## 3 Arizona 3.5 7187135 0 1
## 4 Arkansas 2.8 3014546 1 0
## 5 California 18.4 39521958 0 1
## 6 Colorado 3.2 5787129 0 1
## funding_per_state_percentage population_per_state_percentage
## 1 1.57 1.52
## 2 1.94 0.22
## 3 1.83 2.17
## 4 1.47 0.91
## 5 9.64 11.96
## 6 1.68 1.75
## population_per_state_plus5percent population_per_state_minus5percent
## 1 1.5960 1.4440
## 2 0.2310 0.2090
## 3 2.2785 2.0615
## 4 0.9555 0.8645
## 5 12.5580 11.3620
## 6 1.8375 1.6625
## Equitable bias
## 1 Yes No
## 2 No No
## 3 No No
## 4 No No
## 5 No No
## 6 Yes No
The following new fields are created to analyze whether funding allocation is equitable and to detect potential political bias in how resources are distributed.
These fields define an acceptable funding range (+/- 5%) around each state’s population percentage.
If funding is within ±5% of the population share, it is considered fair (equitable).
If funding falls outside this range, the state may be overfunded or underfunded.
population_per_state_plus5percent → Upper threshold (+5% of population share).
population_per_state_minus5percent → Lower threshold (-5% of population share).
This field determines whether a state’s funding is proportional to its population based on the ±5% rule.
If funding falls within the ±5% range, the state is labeled “Yes” (Equitable).
If funding falls outside the range, it is labeled “No” (Inequitable).
This ensures we have an objective way to measure fairness in funding allocation.
This field identifies political bias in funding allocation, specifically if Democratic (Biden-won) states received more than the fair share in funding.
Why?
A state is considered biased (“Yes”) if:
The state voted for Biden (biden_win == 1)
The state received funding more than equitable range
Otherwise, the state is labeled as “No” (No bias detected).
This helps detect whether Democratic states were treated more favorably compared to Republican states.
This chart compares funding per state percentage to population per state percentage. If funding is fair, bars should align with the population.
ggplot(new_df, aes(x = reorder(state_name, population_per_state_percentage),
y = funding_per_state_percentage,
fill = Equitable)) +
geom_bar(stat = "identity") +
coord_flip() +
scale_fill_manual(values = c("Yes" = "green", "No" = "orange")) +
labs(title = "Funding Allocation vs. Population Percentage",
x = "State",
y = "Funding Percentage",
fill = "Equitable") +
theme_minimal()
Interpretation:
Green bars (“Yes”) → States where funding aligns with population.
Yellow bars (“No”) → States overfunded or underfunded, indicating inequity.
If many states are yellow, the allocation is not equitable.
If funding is fair, points should align in a linear trend.
ggplot(new_df, aes(x = population_per_state_percentage,
y = funding_per_state_percentage,
color = Equitable)) +
geom_point(size = 4, alpha = 0.7) +
geom_smooth(method = "lm", color = "black", linetype = "dashed") +
scale_color_manual(values = c("Yes" = "green", "No" = "orange")) +
labs(title = "Funding Allocation vs. Population Percentage",
x = "Population Percentage",
y = "Funding Percentage",
color = "Equitable") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
Interpretation:
A strong trend line suggests fair allocation.
Scattered points with many “No” (yellow) indicate funding was not proportional.
According to the chart below, about 80% of the states have inequitable allocation.
Does the allocation favor the political interests of the Biden administration?
No, it doesn’t serve the political interests of the Biden administration
ggplot(new_df, aes(x = reorder(state_name, population_per_state_percentage),
y = funding_per_state_percentage,
fill = bias)) +
geom_bar(stat = "identity") +
coord_flip() +
scale_fill_manual(values = c("Yes" = "orange", "No" = "green")) +
labs(title = "Funding Allocation vs. Population Percentage",
x = "State",
y = "Funding Percentage",
fill = "bias") +
theme_minimal()
# Define the file path with filename and extension
file_path <- "C:/Users/Uzma/Downloads/new_df.csv"
# Write dataframe to CSV
write.csv(new_df, file = file_path, row.names = FALSE)
# Confirm that the file was saved
print("File saved successfully!")
## [1] "File saved successfully!"
# Select specific columns
group_df <- new_df %>% select(state_name, funding_billions, popestimate2020, trump_win, biden_win)
# View the new data frame
###print(group_df)
# Calculate total funding and population
total_funding <- sum(group_df$funding_billions, na.rm = TRUE)
total_population <- sum(group_df$popestimate2020, na.rm = TRUE)
# Create a new table with grouped states and percentages
grouped_table <- group_df %>%
group_by(trump_win, biden_win) %>%
summarize(
trump_funding_percentage = round(sum(ifelse(trump_win == 1, funding_billions, 0), na.rm = TRUE) / total_funding * 100, 2),
biden_funding_percentage = round(sum(ifelse(biden_win == 1, funding_billions, 0), na.rm = TRUE) / total_funding * 100, 2),
trump_population_percentage = round(sum(ifelse(trump_win == 1, popestimate2020, 0), na.rm = TRUE) / total_population * 100, 2),
biden_population_percentage = round(sum(ifelse(biden_win == 1, popestimate2020, 0), na.rm = TRUE) / total_population * 100, 2)
) %>%
ungroup()
## `summarise()` has grouped output by 'trump_win'. You can override using the
## `.groups` argument.
# Print the new grouped table with rounded percentages
print(grouped_table)
## # A tibble: 2 × 6
## trump_win biden_win trump_funding_percentage biden_funding_percentage
## <int> <int> <dbl> <dbl>
## 1 0 1 0 53.6
## 2 1 0 46.4 0
## # ℹ 2 more variables: trump_population_percentage <dbl>,
## # biden_population_percentage <dbl>
head(grouped_table)
## # A tibble: 2 × 6
## trump_win biden_win trump_funding_percentage biden_funding_percentage
## <int> <int> <dbl> <dbl>
## 1 0 1 0 53.6
## 2 1 0 46.4 0
## # ℹ 2 more variables: trump_population_percentage <dbl>,
## # biden_population_percentage <dbl>
# Define the file path with filename and extension
file_path <- "C:/Users/Uzma/Downloads/grouped_table.csv"
# Write dataframe to CSV
write.csv(grouped_table, file = file_path, row.names = FALSE)
# Confirm that the file was saved
print("File saved successfully!")
## [1] "File saved successfully!"
No, it does not favor the the political interests of the Biden administration.
# Convert data to long format for easy visualization
grouped_long <- grouped_table %>%
pivot_longer(cols = c(trump_funding_percentage, biden_funding_percentage,
trump_population_percentage, biden_population_percentage),
names_to = "Category",
values_to = "Percentage")
# Create labels for clarity
grouped_long$Group <- ifelse(grepl("trump", grouped_long$Category), "Trump-Won States", "Biden-Won States")
grouped_long$Metric <- ifelse(grepl("funding", grouped_long$Category), "Funding Allocation", "Population Percentage")
# Create the bar chart
ggplot(grouped_long, aes(x = Group, y = Percentage, fill = Metric)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_manual(values = c("Funding Allocation" = "green", "Population Percentage" = "orange")) +
labs(title = "Funding Allocation vs. Population Share by Political Affiliation",
x = "Political Affiliation (2020 Election)",
y = "Percentage of Total",
fill = "Category") +
theme_minimal()
Analysis of the Bar Chart
If funding allocation closely matches population share, then the distribution is likely fair.
If Biden states receive significantly more funding than their population share, it suggests possible bias in allocation.
If Trump states receive less funding despite a larger population share, it may indicate under funding relative to need.
This scatter plot shows whether Biden states received disproportionately more funding.
ggplot(grouped_table, aes(x = trump_population_percentage, y = trump_funding_percentage, color = "Trump States")) +
geom_point(size = 4) +
geom_smooth(method = "lm", linetype = "dashed", color = "red") +
geom_point(aes(x = biden_population_percentage, y = biden_funding_percentage, color = "Biden States"), size = 4) +
geom_smooth(aes(x = biden_population_percentage, y = biden_funding_percentage),
method = "lm", linetype = "dashed", color = "blue") +
scale_color_manual(values = c("Trump States" = "red", "Biden States" = "blue")) +
labs(title = "Funding Allocation vs. Population Share for Trump and Biden States",
x = "Population Share (%)",
y = "Funding Share (%)",
color = "Political Group") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning in qt((1 - level)/2, df): NaNs produced
## `geom_smooth()` using formula = 'y ~ x'
## Warning in qt((1 - level)/2, df): NaNs produced
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
When the U.S. government passed the Infrastructure Investment and Jobs Act (IIJA), it promised to distribute funding fairly to support all states. However, was this promise kept? By analyzing funding allocations alongside state populations and election results, we discovered that about 80% of states received either more or less than their fair share based on population. To investigate potential bias, we checked whether Biden-won states received disproportionately higher funding. The results? While the distribution was inequitable, there was no clear pattern favoring Democratic states—both Republican and Democratic states experienced funding disparities. This suggests that funding decisions may be influenced by other factors beyond politics, such as infrastructure needs or economic priorities.
The federal infrastructure funding distribution was inequitable, but no clear political bias was found in favor of Biden-won states.