Data Prep
# overview of data
# unique_values_year_and_quarter <- unique(df_mortality$year_and_quarter)
# unique_values_cause_of_death <- unique(df_mortality$cause_of_death)
# unique_values_time_period <- unique(df_mortality$time_period)
# unique_values_rate_type <- unique(df_mortality$rate_type)
# unique_values_cause_of_death
# unique_values_year_and_quarter
# unique_values_time_period
# unique_values_rate_type
df_gun <- df_mortality[df_mortality$cause_of_death == "Firearm-related injury", ]
df_gun <- df_gun[df_gun$rate_type == "Crude", ]
df_gun <- df_gun[df_gun$time_period == "12 months ending with quarter", ]
df_gun <- data.frame(df_gun, row.names = NULL)
state_abbreviations <- c(AL = "alabama", AK = "alaska", AZ = "arizona", AR = "arkansas", CA = "california",
CO = "colorado", CT = "connecticut", DE = "delaware", FL = "florida", GA = "georgia",
HI = "hawaii", ID = "idaho", IL = "illinois", IN = "indiana", IA = "iowa",
KS = "kansas", KY = "kentucky", LA = "louisiana", ME = "maine", MD = "maryland",
MA = "massachusetts", MI = "michigan", MN = "minnesota", MS = "mississippi", MO = "missouri",
MT = "montana", NE = "nebraska", NV = "nevada", NH = "new_hampshire", NJ = "new_jersey",
NM = "new_mexico", NY = "new_york", NC = "north_carolina", ND = "north_dakota",
OH = "ohio", OK = "oklahoma", OR = "oregon", PA = "pennsylvania", RI = "rhode_island",
SC = "south_carolina", SD = "south_dakota", TN = "tennessee", TX = "texas", UT = "utah",
VT = "vermont", VA = "virginia", WA = "washington", WV = "west_virginia", WI = "wisconsin",
WY = "wyoming", DC = "district_of_columbia")
# Loop through each state abbreviation
for (abbrev in names(state_abbreviations)) {
pattern <- paste0("rate_", state_abbreviations[abbrev])
colnames(df_gun) <- gsub(pattern, abbrev, colnames(df_gun))
}
# Data type conversion: columns 6 to 69 are converted to double.
df_gun <- df_gun %>%
mutate(across(.cols = 6:69, .fns = as.double))
df_gun <- df_gun %>%
mutate(year = substr(year_and_quarter, 1, 4))%>%# Extract year
group_by(year)
df_gun_2022 <- df_gun %>%
filter(year_and_quarter == "2022 Q4")
# Piving Long
df_gun_2022_long <- df_gun_2022 %>%
pivot_longer(
cols = c(AK:WY), # Specify the range of columns to pivot
names_to = "state", # New column name
values_to = "rate" # New column for values
)
final_df <- df_gun_2022_long %>%
select(year, state, rate)
# Add gun law rank to final_df
final_df <- final_df %>%
mutate(
gun_laws = case_when(
state %in% c("AK", "AL", "AR", "AZ", "GA", "IA", "ID", "IN", "KS", "KY", "LA", "ME", "MO", "MS", "MT", "ND", "NH", "OH", "OK", "SC", "SD", "TN", "TX", "UT", "WV", "WY") ~ "1",
state == "WI" ~ "2",
state %in% c("FL", "MI", "MN", "NC", "NE", "NM", "NV", "VT") ~ "3",
state %in% c("CO", "DE", "OR", "PA", "RI", "VA", "WA") ~ "4",
state %in% c("CA", "CT", "DC", "HI", "IL", "MA", "MD", "NJ", "NY") ~ "5",
TRUE ~ NA_character_
)
)
final_df$gun_laws <- as.numeric(final_df$gun_laws)
final_df$year <- as.numeric(final_df$year)
final_df
## # A tibble: 51 × 4
## # Groups: year [1]
## year state rate gun_laws
## <dbl> <chr> <dbl> <dbl>
## 1 2022 AK 22.4 1
## 2 2022 AL 25.2 1
## 3 2022 AR 21.9 1
## 4 2022 AZ 20.9 1
## 5 2022 CA 8.9 5
## 6 2022 CO 17.7 4
## 7 2022 CT 6.9 5
## 8 2022 DC 22.9 5
## 9 2022 DE 12.2 4
## 10 2022 FL 14.5 3
## # ℹ 41 more rows
# Check the structure and the first few rows of the final dataframe
#final_df_sorted <- final_df %>%
#arrange(desc(rate))
#str(final_df)
#head(final_df_sorted)
#tail(final_df_sorted)