## retrieving firearm data from open data api
firearm_mortality_data <- read.csv("https://data.cdc.gov/resource/489q-934x.csv")
head(firearm_mortality_data)
## year_and_quarter time_period
## 1 2021 Q1 12 months ending with quarter
## 2 2021 Q1 12 months ending with quarter
## 3 2021 Q1 12 months ending with quarter
## 4 2021 Q1 12 months ending with quarter
## 5 2021 Q1 12 months ending with quarter
## 6 2021 Q1 12 months ending with quarter
## cause_of_death rate_type unit
## 1 All causes Age-adjusted Deaths per 100,000
## 2 Alzheimer disease Age-adjusted Deaths per 100,000
## 3 COVID-19 Age-adjusted Deaths per 100,000
## 4 Cancer Age-adjusted Deaths per 100,000
## 5 Chronic liver disease and cirrhosis Age-adjusted Deaths per 100,000
## 6 Chronic lower respiratory diseases Age-adjusted Deaths per 100,000
## rate_overall rate_sex_female rate_sex_male rate_age_1_4 rate_age_5_14
## 1 866.3 716.3 1040.4 NA NA
## 2 32.1 36.8 24.8 NA NA
## 3 120.7 94.0 153.9 NA NA
## 4 142.0 122.8 167.7 NA NA
## 5 13.9 9.8 18.3 NA NA
## 6 33.8 30.9 37.5 NA NA
## rate_age_15_24 rate_age_25_34 rate_age_35_44 rate_age_45_54 rate_age_55_64
## 1 NA NA NA NA NA
## 2 NA NA NA NA NA
## 3 NA NA NA NA NA
## 4 NA NA NA NA NA
## 5 NA NA NA NA NA
## 6 NA NA NA NA NA
## rate_65_74 rate_age_75_84 rate_age_85_plus rate_alaska rate_alabama
## 1 NA NA NA 779.2 1123.4
## 2 NA NA NA 28.2 51.2
## 3 NA NA NA 44.4 160.2
## 4 NA NA NA 143.0 160.5
## 5 NA NA NA 23.6 17.2
## 6 NA NA NA 28.4 50.4
## rate_arkansas rate_arizona rate_california rate_colorado rate_connecticut
## 1 1043.1 853.2 767.7 760.9 784.3
## 2 44.1 31.3 41.0 36.0 20.6
## 3 130.6 142.1 128.5 83.6 138.9
## 4 160.7 125.5 128.5 123.4 132.1
## 5 15.9 18.2 14.9 18.1 12.7
## 6 57.5 34.5 26.3 36.6 22.5
## rate_district_of_columbia rate_delaware rate_florida rate_georgia rate_hawaii
## 1 904.8 866.2 757.3 957.4 585.9
## 2 11.1 35.4 19.7 45.9 22.4
## 3 148.6 106.8 80.6 130.5 20.7
## 4 136.0 149.1 134.4 145.8 124.1
## 5 9.0 10.6 13.4 12.5 8.6
## 6 18.9 32.0 31.2 39.5 16.9
## rate_iowa rate_idaho rate_illinois rate_indiana rate_kansas rate_kentucky
## 1 848.3 789.6 861.5 981.8 901.2 1064.2
## 2 29.7 37.0 28.3 32.0 24.2 31.7
## 3 119.9 79.2 121.3 132.8 121.8 109.6
## 4 145.4 135.0 148.2 160.6 148.1 177.0
## 5 11.9 16.0 11.5 14.4 15.4 17.5
## 6 35.9 36.8 31.3 49.5 40.9 52.7
## rate_louisiana rate_massachusetts rate_maryland rate_maine rate_michigan
## 1 1080.3 768.0 847.9 785.3 911.2
## 2 45.3 17.8 16.0 27.6 36.3
## 3 146.7 126.0 111.0 36.1 101.7
## 4 158.1 132.8 139.5 157.5 155.5
## 5 11.1 11.5 9.6 14.2 14.5
## 6 38.9 25.2 25.3 36.8 38.5
## rate_minnesota rate_missouri rate_mississippi rate_montana
## 1 737.8 956.2 1197.0 836.6
## 2 32.9 33.9 58.3 22.6
## 3 85.1 113.0 171.5 91.8
## 4 136.4 157.4 172.0 136.0
## 5 14.0 12.9 16.3 21.6
## 6 28.3 43.4 56.7 38.8
## rate_north_carolina rate_north_dakota rate_nebraska rate_new_hampshire
## 1 893.7 835.5 822.7 744.9
## 2 36.8 37.6 32.0 25.8
## 3 99.3 132.8 101.3 60.9
## 4 146.0 136.7 143.3 143.8
## 5 13.1 17.9 14.0 12.5
## 6 35.4 32.1 40.2 30.5
## rate_new_jersey rate_new_mexico rate_nevada rate_new_york rate_ohio
## 1 853.1 952.7 895.2 817.6 983.2
## 2 22.2 25.9 27.5 14.0 37.5
## 3 172.6 144.1 133.4 174.2 122.2
## 4 129.8 130.0 143.1 125.7 157.6
## 5 9.7 36.8 16.6 8.3 13.1
## 6 22.8 38.0 40.6 22.9 40.4
## rate_oklahoma rate_oregon rate_pennsylvania rate_rhode_island
## 1 1063.5 751.4 895.1 828.9
## 2 36.6 36.4 23.5 32.0
## 3 157.8 37.1 123.0 138.4
## 4 167.7 146.9 150.8 141.3
## 5 18.9 16.5 10.4 13.7
## 6 56.3 32.4 30.2 24.1
## rate_south_carolina rate_south_dakota rate_tennessee rate_texas rate_utah
## 1 981.9 882.7 1056.8 922.0 771.2
## 2 40.0 37.4 42.8 44.9 41.1
## 3 126.6 145.8 122.5 162.3 68.7
## 4 150.5 147.0 161.9 137.0 116.2
## 5 16.3 29.0 16.8 16.4 10.7
## 6 41.9 33.6 47.6 34.3 30.2
## rate_virginia rate_vermont rate_washington rate_wisconsin rate_west_virginia
## 1 824.8 737.9 714.8 825.8 1096.9
## 2 28.3 34.3 42.0 31.7 35.2
## 3 92.0 21.5 46.5 86.1 94.2
## 4 144.3 151.9 139.8 146.9 176.3
## 5 11.8 12.0 15.0 12.8 17.4
## 6 29.9 33.0 27.5 32.4 56.0
## rate_wyoming
## 1 854.0
## 2 32.1
## 3 84.0
## 4 138.0
## 5 29.1
## 6 51.2
## one column didn't follow the naming conventions
firearm_mortality_data <- firearm_mortality_data %>%
rename(rate_age_65_74 = matches("^rate_65_74$"))
## repetitive states columns, so i store them all into one column for ease of use
firearm_mortality_data_long <- firearm_mortality_data %>%
pivot_longer(cols = c(rate_alaska:rate_wyoming),
names_to = "state",
values_to = "mortality_rate")
head(firearm_mortality_data_long)
## # A tibble: 6 × 20
## year_and_quarter time_period cause_of_death rate_type unit rate_overall
## <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 2021 Q1 12 months ending… All causes Age-adju… Deat… 866.
## 2 2021 Q1 12 months ending… All causes Age-adju… Deat… 866.
## 3 2021 Q1 12 months ending… All causes Age-adju… Deat… 866.
## 4 2021 Q1 12 months ending… All causes Age-adju… Deat… 866.
## 5 2021 Q1 12 months ending… All causes Age-adju… Deat… 866.
## 6 2021 Q1 12 months ending… All causes Age-adju… Deat… 866.
## # ℹ 14 more variables: rate_sex_female <dbl>, rate_sex_male <dbl>,
## # rate_age_1_4 <dbl>, rate_age_5_14 <dbl>, rate_age_15_24 <dbl>,
## # rate_age_25_34 <dbl>, rate_age_35_44 <dbl>, rate_age_45_54 <dbl>,
## # rate_age_55_64 <dbl>, rate_age_65_74 <dbl>, rate_age_75_84 <dbl>,
## # rate_age_85_plus <dbl>, state <chr>, mortality_rate <dbl>
# Filter the dataframe to include only rows where cause_of_death is "Firearm-related injury"
filtered_data <- firearm_mortality_data_long %>%
filter(cause_of_death == "Firearm-related injury")
## removing ditrict of columbia because it is not a state
filtered_data <- filtered_data[filtered_data$state != "district_of_columbia", ]
## remove the rate_ from the states in the state column
filtered_data$state <- gsub("^rate_", "", filtered_data$state)
I created a data frame for gun law rates from everytownresearch.org, which is a scale from 1 to 100 with 1 representing very poor and a minuscule amount of gun laws and 100 representing more restrictive amount of gun laws
gun_law_rates <- data.frame(
state = c("california", "new_york", "illinois", "connecticut", "hawaii",
"massachusetts", "new_jersey", "maryland", "washington", "oregon", "colorado", "delaware", "rhode_island", "minnesota", "virginia",
"new_mexico", "pennsylvania", "vermont", "nevada", "michigan",
"wisconsin", "florida", "north_carolina", "nebraska", "maine",
"louisiana", "west_virginia", "south_carolina", "tennessee",
"indiana", "iowa", "texas", "ohio", "alabama", "utah",
"north_dakota", "kansas", "missouri", "new_hampshire", "kentucky", "alaska", "arizona", "oklahoma", "wyoming", "south_dakota", "georgia", "montana", "idaho", "mississippi", "arkansas"),
gun_law_rate = c(89.5, 83.5, 83, 82.5, 82.5,
81, 79, 75, 69, 68,
63, 61.5, 57.5, 53.5, 49,
40.5, 40, 39.5, 35, 35,
28, 27.5, 25, 25, 20.5,
20.5, 18.5, 18, 16.5, 16.5,
15.5, 13.5, 13, 12.5, 12,
11.5, 9.5, 9, 9, 9,
9, 8.5, 7.5, 6.5, 5.5, 5,
5, 5, 3, 3)
)
# View the gun law rates data frame
head(gun_law_rates)
## state gun_law_rate
## 1 california 89.5
## 2 new_york 83.5
## 3 illinois 83.0
## 4 connecticut 82.5
## 5 hawaii 82.5
## 6 massachusetts 81.0
merged_data <- merge(filtered_data, gun_law_rates, by = "state", all.x = TRUE)
# View the merged data
head(merged_data)
## state year_and_quarter time_period cause_of_death
## 1 alabama 2021 Q1 12 months ending with quarter Firearm-related injury
## 2 alabama 2022 Q2 3-month period Firearm-related injury
## 3 alabama 2023 Q1 3-month period Firearm-related injury
## 4 alabama 2022 Q3 3-month period Firearm-related injury
## 5 alabama 2021 Q1 3-month period Firearm-related injury
## 6 alabama 2023 Q1 3-month period Firearm-related injury
## rate_type unit rate_overall rate_sex_female rate_sex_male
## 1 Age-adjusted Deaths per 100,000 14.0 4.0 24.4
## 2 Crude Deaths per 100,000 15.0 4.3 25.9
## 3 Age-adjusted Deaths per 100,000 13.4 4.0 23.1
## 4 Crude Deaths per 100,000 15.0 4.3 25.8
## 5 Age-adjusted Deaths per 100,000 13.6 4.0 23.4
## 6 Crude Deaths per 100,000 13.7 4.0 23.5
## rate_age_1_4 rate_age_5_14 rate_age_15_24 rate_age_25_34 rate_age_35_44
## 1 NA NA NA NA NA
## 2 1.0 1.5 22.5 24.0 18.3
## 3 NA NA NA NA NA
## 4 0.8 1.5 21.7 24.1 19.1
## 5 NA NA NA NA NA
## 6 0.5 1.8 19.9 20.3 17.3
## rate_age_45_54 rate_age_55_64 rate_age_65_74 rate_age_75_84 rate_age_85_plus
## 1 NA NA NA NA NA
## 2 15.2 13.9 12.4 17.7 16.3
## 3 NA NA NA NA NA
## 4 15.1 13.6 12.4 16.6 20.0
## 5 NA NA NA NA NA
## 6 14.2 12.8 12.1 15.4 16.8
## mortality_rate gun_law_rate
## 1 24.6 12.5
## 2 26.5 12.5
## 3 27.9 12.5
## 4 27.6 12.5
## 5 26.4 12.5
## 6 27.4 12.5
# Defining the cut points for the Likert scale categories
cut_points <- c(0, 10, 20, 30, 70, 100)
# Defining the labels for the Likert scale categories
labels <- c("National Failures", "Weak Systems", "Missing Key Laws", "Making Progress", "National Leaders")
# Create a new column for the Likert scale categories
merged_data$likert_category <- cut(merged_data$gun_law_rate, breaks = cut_points, labels = labels, include.lowest = TRUE)
# Replace underscores with spaces in the state names
merged_data$state <- gsub("_", " ", merged_data$state)
head(merged_data)
## state year_and_quarter time_period cause_of_death
## 1 alabama 2021 Q1 12 months ending with quarter Firearm-related injury
## 2 alabama 2022 Q2 3-month period Firearm-related injury
## 3 alabama 2023 Q1 3-month period Firearm-related injury
## 4 alabama 2022 Q3 3-month period Firearm-related injury
## 5 alabama 2021 Q1 3-month period Firearm-related injury
## 6 alabama 2023 Q1 3-month period Firearm-related injury
## rate_type unit rate_overall rate_sex_female rate_sex_male
## 1 Age-adjusted Deaths per 100,000 14.0 4.0 24.4
## 2 Crude Deaths per 100,000 15.0 4.3 25.9
## 3 Age-adjusted Deaths per 100,000 13.4 4.0 23.1
## 4 Crude Deaths per 100,000 15.0 4.3 25.8
## 5 Age-adjusted Deaths per 100,000 13.6 4.0 23.4
## 6 Crude Deaths per 100,000 13.7 4.0 23.5
## rate_age_1_4 rate_age_5_14 rate_age_15_24 rate_age_25_34 rate_age_35_44
## 1 NA NA NA NA NA
## 2 1.0 1.5 22.5 24.0 18.3
## 3 NA NA NA NA NA
## 4 0.8 1.5 21.7 24.1 19.1
## 5 NA NA NA NA NA
## 6 0.5 1.8 19.9 20.3 17.3
## rate_age_45_54 rate_age_55_64 rate_age_65_74 rate_age_75_84 rate_age_85_plus
## 1 NA NA NA NA NA
## 2 15.2 13.9 12.4 17.7 16.3
## 3 NA NA NA NA NA
## 4 15.1 13.6 12.4 16.6 20.0
## 5 NA NA NA NA NA
## 6 14.2 12.8 12.1 15.4 16.8
## mortality_rate gun_law_rate likert_category
## 1 24.6 12.5 Weak Systems
## 2 26.5 12.5 Weak Systems
## 3 27.9 12.5 Weak Systems
## 4 27.6 12.5 Weak Systems
## 5 26.4 12.5 Weak Systems
## 6 27.4 12.5 Weak Systems
selected_data <- merged_data %>% select(state, likert_category)
# Keep only unique combinations of state and Likert category
unique_data <- distinct(selected_data)
# Create a data frame with each state repeated for each Likert category
expanded_data <- unique_data %>%
group_by(likert_category) %>%
mutate(row = row_number()) %>%
ungroup() %>%
pivot_wider(names_from = likert_category, values_from = state)
desired_order <- c("National Leaders", "Making Progress", "Missing Key Laws", "Weak Systems", "National Failures", NA)
# Create the expanded data frame with the specified column order
expanded_data <- expanded_data %>%
select(one_of(desired_order))
## Warning: Unknown columns: `NA`
head(expanded_data)
## # A tibble: 6 × 5
## `National Leaders` `Making Progress` `Missing Key Laws` `Weak Systems`
## <chr> <chr> <chr> <chr>
## 1 california colorado florida alabama
## 2 connecticut delaware louisiana indiana
## 3 hawaii michigan maine iowa
## 4 illinois minnesota nebraska north dakota
## 5 maryland nevada north carolina ohio
## 6 massachusetts new mexico wisconsin south carolina
## # ℹ 1 more variable: `National Failures` <chr>
category_summary <- merged_data %>%
group_by(likert_category) %>%
summarize(total_mortality_rate = sum(mortality_rate, na.rm = TRUE),
num_rows = n())
# Join the summary data back to the original dataframe
merged_data <- merged_data %>%
left_join(category_summary, by = "likert_category")
avg_mortality <- merged_data %>%
group_by(likert_category) %>%
summarize(avg_mortality_rate = mean(mortality_rate, na.rm = TRUE))
plot_ly(avg_mortality, x = ~likert_category, y = ~avg_mortality_rate, type = 'bar', color = ~likert_category) %>%
layout(title = "Average Mortality Rate by Likert Category",
xaxis = list(title = "Likert Category"),
yaxis = list(title = "Average Mortality Rate"))
## Warning: Ignoring 1 observations
merged_data <- merged_data %>%
group_by(state) %>%
mutate(state_avg = sum(mortality_rate, na.rm = TRUE) / n())
# Define a lookup table for state abbreviations
state_abbreviations <- data.frame(
State = c("alabama", "alaska", "arizona", "arkansas", "california", "colorado", "connecticut", "delaware", "florida", "georgia", "hawaii", "idaho", "illinois", "indiana", "iowa", "kansas", "kentucky", "louisiana", "maine", "maryland", "massachusetts", "michigan", "minnesota", "mississippi", "missouri", "montana", "nebraska", "nevada", "new hampshire", "new jersey", "new mexico", "new york", "north carolina", "north dakota", "ohio", "oklahoma", "oregon", "pennsylvania", "rhode island", "south carolina", "south dakota", "tennessee", "texas", "utah", "vermont", "virginia", "washington", "west virginia", "wisconsin", "wyoming"),
state_abv = c("AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA", "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY")
)
# Merge the original dataframe with the lookup table to add state abbreviations
merged_data <- merge(merged_data, state_abbreviations, by.x = "state", by.y = "State", all.x = TRUE)
heat_map1 <- plot_geo(merged_data, locations = ~state_abv, text = ~state, z = ~mortality_rate) %>%
add_trace(
type = "choropleth",
colors = "YlOrRd",
locationmode = "USA-states"
) %>%
colorbar(title = "Mortality Rate") %>%
layout(
title = "2021 USA Mortality Rate",
geo = list(
scope = "usa",
projection = list(type = "albers usa"),
showlakes = TRUE,
lakecolor = toRGB("blue")
),
annotations = list(
list(
x = 0.5,
y = .95,
xref = "paper",
yref = "paper",
text = "Gun violence is a very common issue across the United States.",
showarrow = FALSE,
font = list(size = 14)
),
list(
x = 0.05,
y = 0.05,
xref = "paper",
yref = "paper",
text = "The number of deaths per 100,000 total population.",
showarrow = FALSE
)
)
)
## Warning: Ignoring 204 observations
# Display the heat map
heat_map1