2024-12-21
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ── ## ✔ dplyr 1.1.4 ✔ readr 2.1.5 ## ✔ forcats 1.0.0 ✔ stringr 1.5.1 ## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1 ## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1 ## ✔ purrr 1.0.2 ## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ── ## ✖ dplyr::filter() masks stats::filter() ## ✖ dplyr::lag() masks stats::lag() ## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl) library(ggplot2) library(dplyr) library(stringr) library(tools) library(stringdist)
## Warning: package 'stringdist' was built under R version 4.3.3
## ## Attaching package: 'stringdist' ## ## The following object is masked from 'package:tidyr': ## ## extract
IIJA_funding_data <- read.csv("https://raw.githubusercontent.com/uzmabb182/Data_608/refs/heads/main/Week%201/IIJA%20FUNDING%20AS%20OF%20MARCH%202023.csv") state_population_data <- read.csv("https://raw.githubusercontent.com/uzmabb182/Data_608/refs/heads/main/Week%201/State_Population_Data.csv") election_results_2020_data <- read.csv("https://raw.githubusercontent.com/uzmabb182/Data_608/refs/heads/main/Week%201/2020_Presidential_Election_Results.csv")
#print(IIJA_funding_data)
# Check column names colnames(IIJA_funding_data)
## [1] "State..Teritory.or.Tribal.Nation" "Total..Billions."
# Renaming the columns IIJA_funding_data <- IIJA_funding_data %>% rename(State_Name = State..Teritory.or.Tribal.Nation, Funding_Billions = Total..Billions.) #print(IIJA_funding_data)
# Convert column names to lowercase colnames(IIJA_funding_data ) <- tolower(colnames(IIJA_funding_data )) # View the modified data frame #print(IIJA_funding_data )
# Sort by population (ascending order) IIJA_funding_data <- IIJA_funding_data[order(IIJA_funding_data$state_name),] # View the sorted dataframe #print(IIJA_funding_data)
# Replace "Deleware" with "Delaware" conditionally IIJA_funding_data$state_name[IIJA_funding_data$state_name == "Deleware"] <- "Delaware" # View the updated DataFrame #print(IIJA_funding_data)
# Add index column using mutate IIJA_funding_data <- IIJA_funding_data %>% mutate(index = row_number()) # View the updated DataFrame #print(IIJA_funding_data)
# Convert first letter of each word to uppercase using str_to_title IIJA_funding_data$state_name <- str_to_title(IIJA_funding_data$state_name) # View the updated DataFrame #print(IIJA_funding_data)
# Convert column names to lowercase colnames(state_population_data ) <- tolower(colnames(state_population_data )) # View the modified data frame #print(state_population_data)
# Sort by population (ascending order) state_population_data <- state_population_data[order(state_population_data$name),] # View the sorted dataframe #print(state_population_data)
# Convert first letter of each word to uppercase using str_to_title state_population_data$name <- str_to_title(state_population_data$name) # View the updated DataFrame #print(state_population_data)
# Convert column names to lowercase colnames(election_results_2020_data ) <- tolower(colnames(election_results_2020_data )) # View the modified data frame #print(election_results_2020_data)
# Convert first letter of each word to uppercase using str_to_title election_results_2020_data$state <- str_to_title(election_results_2020_data$state) # View the updated DataFrame #print(election_results_2020_data)
# Checking datatypes #str(IIJA_funding_data) #str(election_results_2020_data) ###str(state_population_data)
# Convert 'state' column from integer to character state_population_data$name <- as.character(state_population_data$name) ###str(state_population_data)
# Merge on 'state_name' from df1 and 'state' from df2 result_df <- merge(IIJA_funding_data, state_population_data, by.x = "state_name", by.y = "name") # View the merged dataframe #print(result_df)
# Merge on 'state_name' from df1 and 'state' from df2 result_df <- merge(IIJA_funding_data, state_population_data, by.x = "state_name", by.y = "name") # View the merged dataframe #print(result_df)
# Merge on 'state_name' from df1 and 'state' from df2 merged_df <- merge(result_df, election_results_2020_data, by.x = "state_name", by.y = "state") # View the merged dataframe #print(merged_df)
# Select specific columns new_df <- merged_df %>% select(state_name, funding_billions, popestimate2020, trump_win, biden_win) # View the new data frame #print(new_df)
# Add a new column new_df <- new_df %>% mutate(funding_per_state_percentage = round((funding_billions / sum(funding_billions)) * 100, 2)) # Print the merged data frame #print(new_df)
# Add a new column new_df <- new_df %>% mutate(population_per_state_percentage = round((popestimate2020 / sum(popestimate2020)) * 100, 2)) # Print the merged data frame #print(new_df)
# Add a new columns for testing criteria new_df <- new_df %>% mutate(population_per_state_plus5percent = population_per_state_percentage + (population_per_state_percentage * 0.05)) new_df <- new_df %>% mutate(population_per_state_minus5percent = population_per_state_percentage - (population_per_state_percentage * 0.05)) new_df <- new_df %>% mutate(Equitable = ifelse(funding_per_state_percentage > population_per_state_minus5percent & funding_per_state_percentage < population_per_state_plus5percent, "Yes", "No")) new_df <- new_df %>% mutate(bias = ifelse(biden_win == 1 & funding_per_state_percentage > population_per_state_plus5percent, "Yes", "No")) # Print the merged data frame head(new_df)
## state_name funding_billions popestimate2020 trump_win biden_win ## 1 Alabama 3.0 5033094 1 0 ## 2 Alaska 3.7 733017 1 0 ## 3 Arizona 3.5 7187135 0 1 ## 4 Arkansas 2.8 3014546 1 0 ## 5 California 18.4 39521958 0 1 ## 6 Colorado 3.2 5787129 0 1 ## funding_per_state_percentage population_per_state_percentage ## 1 1.57 1.52 ## 2 1.94 0.22 ## 3 1.83 2.17 ## 4 1.47 0.91 ## 5 9.64 11.96 ## 6 1.68 1.75 ## population_per_state_plus5percent population_per_state_minus5percent ## 1 1.5960 1.4440 ## 2 0.2310 0.2090 ## 3 2.2785 2.0615 ## 4 0.9555 0.8645 ## 5 12.5580 11.3620 ## 6 1.8375 1.6625 ## Equitable bias ## 1 Yes No ## 2 No No ## 3 No No ## 4 No No ## 5 No No ## 6 Yes No
The following new fields are created to analyze whether funding allocation is equitable and to detect potential political bias in how resources are distributed.
These fields define an acceptable funding range (+/- 5%) around each state’s population percentage.
If funding is within ±5% of the population share, it is considered fair (equitable).
If funding falls outside this range, the state may be overfunded or underfunded.
population_per_state_plus5percent → Upper threshold (+5% of population share).
population_per_state_minus5percent → Lower threshold (-5% of population share).
This field determines whether a state’s funding is proportional to its population based on the ±5% rule.
If funding falls within the ±5% range, the state is labeled “Yes” (Equitable).
If funding falls outside the range, it is labeled “No” (Inequitable).
This ensures we have an objective way to measure fairness in funding allocation.
This field identifies political bias in funding allocation, specifically if Democratic (Biden-won) states received more than the fair share in funding.
Why?
A state is considered biased (“Yes”) if:
The state voted for Biden (biden_win == 1)
The state received funding more than equitable range
Otherwise, the state is labeled as “No” (No bias detected).
This helps detect whether Democratic states were treated more favorably compared to Republican states.
This chart compares funding per state percentage to population per state percentage. If funding is fair, bars should align with the population.
ggplot(new_df, aes(x = reorder(state_name, population_per_state_percentage), y = funding_per_state_percentage, fill = Equitable)) + geom_bar(stat = "identity") + coord_flip() + scale_fill_manual(values = c("Yes" = "green", "No" = "orange")) + labs(title = "Funding Allocation vs. Population Percentage", x = "State", y = "Funding Percentage", fill = "Equitable") + theme_minimal()
Interpretation:
Green bars (“Yes”) → States where funding aligns with population.
Yellow bars (“No”) → States overfunded or underfunded, indicating inequity.
If many states are yellow, the allocation is not equitable.
If funding is fair, points should align in a linear trend.
ggplot(new_df, aes(x = population_per_state_percentage, y = funding_per_state_percentage, color = Equitable)) + geom_point(size = 4, alpha = 0.7) + geom_smooth(method = "lm", color = "black", linetype = "dashed") + scale_color_manual(values = c("Yes" = "green", "No" = "orange")) + labs(title = "Funding Allocation vs. Population Percentage", x = "Population Percentage", y = "Funding Percentage", color = "Equitable") + theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
Interpretation:
A strong trend line suggests fair allocation.
Scattered points with many “No” (yellow) indicate funding was not proportional.
According to the chart below, about 80% of the states have inequitable allocation.
Does the allocation favor the political interests of the Biden administration?
No, it doesn’t serve the political interests of the Biden administration
ggplot(new_df, aes(x = reorder(state_name, population_per_state_percentage), y = funding_per_state_percentage, fill = bias)) + geom_bar(stat = "identity") + coord_flip() + scale_fill_manual(values = c("Yes" = "orange", "No" = "green")) + labs(title = "Funding Allocation vs. Population Percentage", x = "State", y = "Funding Percentage", fill = "bias") + theme_minimal()
# Define the file path with filename and extension file_path <- "C:/Users/Uzma/Downloads/new_df.csv" # Write dataframe to CSV write.csv(new_df, file = file_path, row.names = FALSE) # Confirm that the file was saved print("File saved successfully!")
## [1] "File saved successfully!"
# Select specific columns group_df <- new_df %>% select(state_name, funding_billions, popestimate2020, trump_win, biden_win) # View the new data frame ###print(group_df)
# Calculate total funding and population total_funding <- sum(group_df$funding_billions, na.rm = TRUE) total_population <- sum(group_df$popestimate2020, na.rm = TRUE) # Create a new table with grouped states and percentages grouped_table <- group_df %>% group_by(trump_win, biden_win) %>% summarize( trump_funding_percentage = round(sum(ifelse(trump_win == 1, funding_billions, 0), na.rm = TRUE) / total_funding * 100, 2), biden_funding_percentage = round(sum(ifelse(biden_win == 1, funding_billions, 0), na.rm = TRUE) / total_funding * 100, 2), trump_population_percentage = round(sum(ifelse(trump_win == 1, popestimate2020, 0), na.rm = TRUE) / total_population * 100, 2), biden_population_percentage = round(sum(ifelse(biden_win == 1, popestimate2020, 0), na.rm = TRUE) / total_population * 100, 2) ) %>% ungroup()
## `summarise()` has grouped output by 'trump_win'. You can override using the ## `.groups` argument.
# Print the new grouped table with rounded percentages print(grouped_table)
## # A tibble: 2 × 6 ## trump_win biden_win trump_funding_percentage biden_funding_percentage ## <int> <int> <dbl> <dbl> ## 1 0 1 0 53.6 ## 2 1 0 46.4 0 ## # ℹ 2 more variables: trump_population_percentage <dbl>, ## # biden_population_percentage <dbl>
# Define the file path with filename and extension file_path <- "C:/Users/Uzma/Downloads/grouped_table.csv" # Write dataframe to CSV write.csv(grouped_table, file = file_path, row.names = FALSE) # Confirm that the file was saved print("File saved successfully!")
## [1] "File saved successfully!"
No, it does not favor the the political interests of the Biden administration.
# Convert data to long format for easy visualization grouped_long <- grouped_table %>% pivot_longer(cols = c(trump_funding_percentage, biden_funding_percentage, trump_population_percentage, biden_population_percentage), names_to = "Category", values_to = "Percentage") # Create labels for clarity grouped_long$Group <- ifelse(grepl("trump", grouped_long$Category), "Trump-Won States", "Biden-Won States") grouped_long$Metric <- ifelse(grepl("funding", grouped_long$Category), "Funding Allocation", "Population Percentage") # Create the bar chart ggplot(grouped_long, aes(x = Group, y = Percentage, fill = Metric)) + geom_bar(stat = "identity", position = "dodge") + scale_fill_manual(values = c("Funding Allocation" = "green", "Population Percentage" = "orange")) + labs(title = "Funding Allocation vs. Population Share by Political Affiliation", x = "Political Affiliation (2020 Election)", y = "Percentage of Total", fill = "Category") + theme_minimal()
Analysis of the Bar Chart
If funding allocation closely matches population share, then the distribution is likely fair.
If Biden states receive significantly more funding than their population share, it suggests possible bias in allocation.
If Trump states receive less funding despite a larger population share, it may indicate under funding relative to need.
This scatter plot shows whether Biden states received disproportionately more funding.
ggplot(grouped_table, aes(x = trump_population_percentage, y = trump_funding_percentage, color = "Trump States")) + geom_point(size = 4) + geom_smooth(method = "lm", linetype = "dashed", color = "red") + geom_point(aes(x = biden_population_percentage, y = biden_funding_percentage, color = "Biden States"), size = 4) + geom_smooth(aes(x = biden_population_percentage, y = biden_funding_percentage), method = "lm", linetype = "dashed", color = "blue") + scale_color_manual(values = c("Trump States" = "red", "Biden States" = "blue")) + labs(title = "Funding Allocation vs. Population Share for Trump and Biden States", x = "Population Share (%)", y = "Funding Share (%)", color = "Political Group") + theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning in qt((1 - level)/2, df): NaNs produced
## `geom_smooth()` using formula = 'y ~ x'
## Warning in qt((1 - level)/2, df): NaNs produced
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning ## -Inf ## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning ## -Inf
When the U.S. government passed the Infrastructure Investment and Jobs Act (IIJA), it promised to distribute funding fairly to support all states. However, was this promise kept? By analyzing funding allocations alongside state populations and election results, we discovered that about 80% of states received either more or less than their fair share based on population. To investigate potential bias, we checked whether Biden-won states received disproportionately higher funding. The results? While the distribution was inequitable, there was no clear pattern favoring Democratic states—both Republican and Democratic states experienced funding disparities. This suggests that funding decisions may be influenced by other factors beyond politics, such as infrastructure needs or economic priorities.
The federal infrastructure funding distribution was inequitable, but no clear political bias was found in favor of Biden-won states.