## retrieving firearm data from open data api
firearm_mortality_data <- read.csv("https://data.cdc.gov/resource/489q-934x.csv")

head(firearm_mortality_data)
##   year_and_quarter                   time_period
## 1          2021 Q1 12 months ending with quarter
## 2          2021 Q1 12 months ending with quarter
## 3          2021 Q1 12 months ending with quarter
## 4          2021 Q1 12 months ending with quarter
## 5          2021 Q1 12 months ending with quarter
## 6          2021 Q1 12 months ending with quarter
##                        cause_of_death    rate_type               unit
## 1                          All causes Age-adjusted Deaths per 100,000
## 2                   Alzheimer disease Age-adjusted Deaths per 100,000
## 3                            COVID-19 Age-adjusted Deaths per 100,000
## 4                              Cancer Age-adjusted Deaths per 100,000
## 5 Chronic liver disease and cirrhosis Age-adjusted Deaths per 100,000
## 6  Chronic lower respiratory diseases Age-adjusted Deaths per 100,000
##   rate_overall rate_sex_female rate_sex_male rate_age_1_4 rate_age_5_14
## 1        866.3           716.3        1040.4           NA            NA
## 2         32.1            36.8          24.8           NA            NA
## 3        120.7            94.0         153.9           NA            NA
## 4        142.0           122.8         167.7           NA            NA
## 5         13.9             9.8          18.3           NA            NA
## 6         33.8            30.9          37.5           NA            NA
##   rate_age_15_24 rate_age_25_34 rate_age_35_44 rate_age_45_54 rate_age_55_64
## 1             NA             NA             NA             NA             NA
## 2             NA             NA             NA             NA             NA
## 3             NA             NA             NA             NA             NA
## 4             NA             NA             NA             NA             NA
## 5             NA             NA             NA             NA             NA
## 6             NA             NA             NA             NA             NA
##   rate_65_74 rate_age_75_84 rate_age_85_plus rate_alaska rate_alabama
## 1         NA             NA               NA       779.2       1123.4
## 2         NA             NA               NA        28.2         51.2
## 3         NA             NA               NA        44.4        160.2
## 4         NA             NA               NA       143.0        160.5
## 5         NA             NA               NA        23.6         17.2
## 6         NA             NA               NA        28.4         50.4
##   rate_arkansas rate_arizona rate_california rate_colorado rate_connecticut
## 1        1043.1        853.2           767.7         760.9            784.3
## 2          44.1         31.3            41.0          36.0             20.6
## 3         130.6        142.1           128.5          83.6            138.9
## 4         160.7        125.5           128.5         123.4            132.1
## 5          15.9         18.2            14.9          18.1             12.7
## 6          57.5         34.5            26.3          36.6             22.5
##   rate_district_of_columbia rate_delaware rate_florida rate_georgia rate_hawaii
## 1                     904.8         866.2        757.3        957.4       585.9
## 2                      11.1          35.4         19.7         45.9        22.4
## 3                     148.6         106.8         80.6        130.5        20.7
## 4                     136.0         149.1        134.4        145.8       124.1
## 5                       9.0          10.6         13.4         12.5         8.6
## 6                      18.9          32.0         31.2         39.5        16.9
##   rate_iowa rate_idaho rate_illinois rate_indiana rate_kansas rate_kentucky
## 1     848.3      789.6         861.5        981.8       901.2        1064.2
## 2      29.7       37.0          28.3         32.0        24.2          31.7
## 3     119.9       79.2         121.3        132.8       121.8         109.6
## 4     145.4      135.0         148.2        160.6       148.1         177.0
## 5      11.9       16.0          11.5         14.4        15.4          17.5
## 6      35.9       36.8          31.3         49.5        40.9          52.7
##   rate_louisiana rate_massachusetts rate_maryland rate_maine rate_michigan
## 1         1080.3              768.0         847.9      785.3         911.2
## 2           45.3               17.8          16.0       27.6          36.3
## 3          146.7              126.0         111.0       36.1         101.7
## 4          158.1              132.8         139.5      157.5         155.5
## 5           11.1               11.5           9.6       14.2          14.5
## 6           38.9               25.2          25.3       36.8          38.5
##   rate_minnesota rate_missouri rate_mississippi rate_montana
## 1          737.8         956.2           1197.0        836.6
## 2           32.9          33.9             58.3         22.6
## 3           85.1         113.0            171.5         91.8
## 4          136.4         157.4            172.0        136.0
## 5           14.0          12.9             16.3         21.6
## 6           28.3          43.4             56.7         38.8
##   rate_north_carolina rate_north_dakota rate_nebraska rate_new_hampshire
## 1               893.7             835.5         822.7              744.9
## 2                36.8              37.6          32.0               25.8
## 3                99.3             132.8         101.3               60.9
## 4               146.0             136.7         143.3              143.8
## 5                13.1              17.9          14.0               12.5
## 6                35.4              32.1          40.2               30.5
##   rate_new_jersey rate_new_mexico rate_nevada rate_new_york rate_ohio
## 1           853.1           952.7       895.2         817.6     983.2
## 2            22.2            25.9        27.5          14.0      37.5
## 3           172.6           144.1       133.4         174.2     122.2
## 4           129.8           130.0       143.1         125.7     157.6
## 5             9.7            36.8        16.6           8.3      13.1
## 6            22.8            38.0        40.6          22.9      40.4
##   rate_oklahoma rate_oregon rate_pennsylvania rate_rhode_island
## 1        1063.5       751.4             895.1             828.9
## 2          36.6        36.4              23.5              32.0
## 3         157.8        37.1             123.0             138.4
## 4         167.7       146.9             150.8             141.3
## 5          18.9        16.5              10.4              13.7
## 6          56.3        32.4              30.2              24.1
##   rate_south_carolina rate_south_dakota rate_tennessee rate_texas rate_utah
## 1               981.9             882.7         1056.8      922.0     771.2
## 2                40.0              37.4           42.8       44.9      41.1
## 3               126.6             145.8          122.5      162.3      68.7
## 4               150.5             147.0          161.9      137.0     116.2
## 5                16.3              29.0           16.8       16.4      10.7
## 6                41.9              33.6           47.6       34.3      30.2
##   rate_virginia rate_vermont rate_washington rate_wisconsin rate_west_virginia
## 1         824.8        737.9           714.8          825.8             1096.9
## 2          28.3         34.3            42.0           31.7               35.2
## 3          92.0         21.5            46.5           86.1               94.2
## 4         144.3        151.9           139.8          146.9              176.3
## 5          11.8         12.0            15.0           12.8               17.4
## 6          29.9         33.0            27.5           32.4               56.0
##   rate_wyoming
## 1        854.0
## 2         32.1
## 3         84.0
## 4        138.0
## 5         29.1
## 6         51.2
## one column didn't follow the naming conventions
firearm_mortality_data <- firearm_mortality_data %>%
  rename(rate_age_65_74 = matches("^rate_65_74$"))
## repetitive states columns, so i store them all into one column for ease of use
firearm_mortality_data_long <- firearm_mortality_data %>%
  pivot_longer(cols = c(rate_alaska:rate_wyoming),
               names_to = "state",
               values_to = "mortality_rate")
head(firearm_mortality_data_long)
## # A tibble: 6 × 20
##   year_and_quarter time_period       cause_of_death rate_type unit  rate_overall
##   <chr>            <chr>             <chr>          <chr>     <chr>        <dbl>
## 1 2021 Q1          12 months ending… All causes     Age-adju… Deat…         866.
## 2 2021 Q1          12 months ending… All causes     Age-adju… Deat…         866.
## 3 2021 Q1          12 months ending… All causes     Age-adju… Deat…         866.
## 4 2021 Q1          12 months ending… All causes     Age-adju… Deat…         866.
## 5 2021 Q1          12 months ending… All causes     Age-adju… Deat…         866.
## 6 2021 Q1          12 months ending… All causes     Age-adju… Deat…         866.
## # ℹ 14 more variables: rate_sex_female <dbl>, rate_sex_male <dbl>,
## #   rate_age_1_4 <dbl>, rate_age_5_14 <dbl>, rate_age_15_24 <dbl>,
## #   rate_age_25_34 <dbl>, rate_age_35_44 <dbl>, rate_age_45_54 <dbl>,
## #   rate_age_55_64 <dbl>, rate_age_65_74 <dbl>, rate_age_75_84 <dbl>,
## #   rate_age_85_plus <dbl>, state <chr>, mortality_rate <dbl>
# Filter the dataframe to include only rows where cause_of_death is "Firearm-related injury"
filtered_data <- firearm_mortality_data_long %>%
  filter(cause_of_death == "Firearm-related injury")
## removing ditrict of columbia because it is not a state
filtered_data <- filtered_data[filtered_data$state != "district_of_columbia", ]
## remove the rate_ from the states in the state column 
filtered_data$state <- gsub("^rate_", "", filtered_data$state)

I created a data frame for gun law rates from everytownresearch.org, which is a scale from 1 to 100 with 1 representing very poor and a minuscule amount of gun laws and 100 representing more restrictive amount of gun laws

gun_law_rates <- data.frame(
  state = c("california", "new_york", "illinois", "connecticut", "hawaii", 
            "massachusetts", "new_jersey", "maryland", "washington", "oregon", "colorado", "delaware", "rhode_island", "minnesota", "virginia", 
            "new_mexico", "pennsylvania", "vermont", "nevada", "michigan", 
            "wisconsin", "florida", "north_carolina", "nebraska", "maine", 
            "louisiana", "west_virginia", "south_carolina", "tennessee", 
            "indiana", "iowa", "texas", "ohio", "alabama", "utah", 
            "north_dakota", "kansas", "missouri", "new_hampshire", "kentucky", "alaska", "arizona", "oklahoma", "wyoming", "south_dakota", "georgia", "montana", "idaho", "mississippi", "arkansas"),
  gun_law_rate = c(89.5, 83.5, 83, 82.5, 82.5, 
                   81, 79, 75, 69, 68, 
                   63, 61.5, 57.5, 53.5, 49, 
                   40.5, 40, 39.5, 35, 35, 
                   28, 27.5, 25, 25, 20.5, 
                   20.5, 18.5, 18, 16.5, 16.5, 
                   15.5, 13.5, 13, 12.5, 12, 
                   11.5, 9.5, 9, 9, 9, 
                   9, 8.5, 7.5, 6.5, 5.5, 5,
                   5, 5, 3, 3)
)

# View the gun law rates data frame
head(gun_law_rates)
##           state gun_law_rate
## 1    california         89.5
## 2      new_york         83.5
## 3      illinois         83.0
## 4   connecticut         82.5
## 5        hawaii         82.5
## 6 massachusetts         81.0

Merging the two data frames

merged_data <- merge(filtered_data, gun_law_rates, by = "state", all.x = TRUE)

# View the merged data
head(merged_data)
##     state year_and_quarter                   time_period         cause_of_death
## 1 alabama          2021 Q1 12 months ending with quarter Firearm-related injury
## 2 alabama          2022 Q2                3-month period Firearm-related injury
## 3 alabama          2023 Q1                3-month period Firearm-related injury
## 4 alabama          2022 Q3                3-month period Firearm-related injury
## 5 alabama          2021 Q1                3-month period Firearm-related injury
## 6 alabama          2023 Q1                3-month period Firearm-related injury
##      rate_type               unit rate_overall rate_sex_female rate_sex_male
## 1 Age-adjusted Deaths per 100,000         14.0             4.0          24.4
## 2        Crude Deaths per 100,000         15.0             4.3          25.9
## 3 Age-adjusted Deaths per 100,000         13.4             4.0          23.1
## 4        Crude Deaths per 100,000         15.0             4.3          25.8
## 5 Age-adjusted Deaths per 100,000         13.6             4.0          23.4
## 6        Crude Deaths per 100,000         13.7             4.0          23.5
##   rate_age_1_4 rate_age_5_14 rate_age_15_24 rate_age_25_34 rate_age_35_44
## 1           NA            NA             NA             NA             NA
## 2          1.0           1.5           22.5           24.0           18.3
## 3           NA            NA             NA             NA             NA
## 4          0.8           1.5           21.7           24.1           19.1
## 5           NA            NA             NA             NA             NA
## 6          0.5           1.8           19.9           20.3           17.3
##   rate_age_45_54 rate_age_55_64 rate_age_65_74 rate_age_75_84 rate_age_85_plus
## 1             NA             NA             NA             NA               NA
## 2           15.2           13.9           12.4           17.7             16.3
## 3             NA             NA             NA             NA               NA
## 4           15.1           13.6           12.4           16.6             20.0
## 5             NA             NA             NA             NA               NA
## 6           14.2           12.8           12.1           15.4             16.8
##   mortality_rate gun_law_rate
## 1           24.6         12.5
## 2           26.5         12.5
## 3           27.9         12.5
## 4           27.6         12.5
## 5           26.4         12.5
## 6           27.4         12.5

More data tidying

# Defining the cut points for the Likert scale categories
cut_points <- c(0, 10, 20, 30, 70, 100)

# Defining the labels for the Likert scale categories
labels <- c("National Failures", "Weak Systems", "Missing Key Laws", "Making Progress", "National Leaders")

# Create a new column for the Likert scale categories
merged_data$likert_category <- cut(merged_data$gun_law_rate, breaks = cut_points, labels = labels, include.lowest = TRUE)

# Replace underscores with spaces in the state names
merged_data$state <- gsub("_", " ", merged_data$state)

head(merged_data)
##     state year_and_quarter                   time_period         cause_of_death
## 1 alabama          2021 Q1 12 months ending with quarter Firearm-related injury
## 2 alabama          2022 Q2                3-month period Firearm-related injury
## 3 alabama          2023 Q1                3-month period Firearm-related injury
## 4 alabama          2022 Q3                3-month period Firearm-related injury
## 5 alabama          2021 Q1                3-month period Firearm-related injury
## 6 alabama          2023 Q1                3-month period Firearm-related injury
##      rate_type               unit rate_overall rate_sex_female rate_sex_male
## 1 Age-adjusted Deaths per 100,000         14.0             4.0          24.4
## 2        Crude Deaths per 100,000         15.0             4.3          25.9
## 3 Age-adjusted Deaths per 100,000         13.4             4.0          23.1
## 4        Crude Deaths per 100,000         15.0             4.3          25.8
## 5 Age-adjusted Deaths per 100,000         13.6             4.0          23.4
## 6        Crude Deaths per 100,000         13.7             4.0          23.5
##   rate_age_1_4 rate_age_5_14 rate_age_15_24 rate_age_25_34 rate_age_35_44
## 1           NA            NA             NA             NA             NA
## 2          1.0           1.5           22.5           24.0           18.3
## 3           NA            NA             NA             NA             NA
## 4          0.8           1.5           21.7           24.1           19.1
## 5           NA            NA             NA             NA             NA
## 6          0.5           1.8           19.9           20.3           17.3
##   rate_age_45_54 rate_age_55_64 rate_age_65_74 rate_age_75_84 rate_age_85_plus
## 1             NA             NA             NA             NA               NA
## 2           15.2           13.9           12.4           17.7             16.3
## 3             NA             NA             NA             NA               NA
## 4           15.1           13.6           12.4           16.6             20.0
## 5             NA             NA             NA             NA               NA
## 6           14.2           12.8           12.1           15.4             16.8
##   mortality_rate gun_law_rate likert_category
## 1           24.6         12.5    Weak Systems
## 2           26.5         12.5    Weak Systems
## 3           27.9         12.5    Weak Systems
## 4           27.6         12.5    Weak Systems
## 5           26.4         12.5    Weak Systems
## 6           27.4         12.5    Weak Systems

Creating a 5 point Likert scale for categorizing gun control laws from most lax to strictest

selected_data <- merged_data %>% select(state, likert_category)

# Keep only unique combinations of state and Likert category
unique_data <- distinct(selected_data)

# Create a data frame with each state repeated for each Likert category
expanded_data <- unique_data %>%
  group_by(likert_category) %>%
  mutate(row = row_number()) %>%
  ungroup() %>%
  pivot_wider(names_from = likert_category, values_from = state)

desired_order <- c("National Leaders", "Making Progress", "Missing Key Laws", "Weak Systems", "National Failures", NA)

# Create the expanded data frame with the specified column order
expanded_data <- expanded_data %>%
  select(one_of(desired_order))
## Warning: Unknown columns: `NA`
head(expanded_data)
## # A tibble: 6 × 5
##   `National Leaders` `Making Progress` `Missing Key Laws` `Weak Systems`
##   <chr>              <chr>             <chr>              <chr>         
## 1 california         colorado          florida            alabama       
## 2 connecticut        delaware          louisiana          indiana       
## 3 hawaii             michigan          maine              iowa          
## 4 illinois           minnesota         nebraska           north dakota  
## 5 maryland           nevada            north carolina     ohio          
## 6 massachusetts      new mexico        wisconsin          south carolina
## # ℹ 1 more variable: `National Failures` <chr>

We can see that states such as California, New York, Hawaii, Illinois, Connecticut, Massachusetts, New Jersey, and Maryland have the stricter Gun laws

Do stricter gun laws mean a lower gun violence rate?

category_summary <- merged_data %>%
  group_by(likert_category) %>%
  summarize(total_mortality_rate = sum(mortality_rate, na.rm = TRUE),
            num_rows = n())

# Join the summary data back to the original dataframe
merged_data <- merged_data %>%
  left_join(category_summary, by = "likert_category")

On hover you will see the average mortality rate of each category based on the restrictiveness of the gun laws, overall the graph is skewed to the right, which supports the idea that the more restrictive the gun law, the lower the mortality rate

Creating a plot that has the average mortality rate by likert category

avg_mortality <- merged_data %>%
  group_by(likert_category) %>%
  summarize(avg_mortality_rate = mean(mortality_rate, na.rm = TRUE))

plot_ly(avg_mortality, x = ~likert_category, y = ~avg_mortality_rate, type = 'bar', color = ~likert_category) %>%
  layout(title = "Average Mortality Rate by Likert Category",
         xaxis = list(title = "Likert Category"),
         yaxis = list(title = "Average Mortality Rate"))
## Warning: Ignoring 1 observations
merged_data <- merged_data %>%
  group_by(state) %>%
  mutate(state_avg = sum(mortality_rate, na.rm = TRUE) / n())
# Define a lookup table for state abbreviations
state_abbreviations <- data.frame(
  State = c("alabama", "alaska", "arizona", "arkansas", "california", "colorado", "connecticut", "delaware", "florida", "georgia", "hawaii", "idaho", "illinois", "indiana", "iowa", "kansas", "kentucky", "louisiana", "maine", "maryland", "massachusetts", "michigan", "minnesota", "mississippi", "missouri", "montana", "nebraska", "nevada", "new hampshire", "new jersey", "new mexico", "new york", "north carolina", "north dakota", "ohio", "oklahoma", "oregon", "pennsylvania", "rhode island", "south carolina", "south dakota", "tennessee", "texas", "utah", "vermont", "virginia", "washington", "west virginia", "wisconsin", "wyoming"),
  state_abv = c("AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA", "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY")
)


# Merge the original dataframe with the lookup table to add state abbreviations
merged_data <- merge(merged_data, state_abbreviations, by.x = "state", by.y = "State", all.x = TRUE)

Create a heatmap to show the distribution of mortality rates across the US

heat_map1 <- plot_geo(merged_data, locations = ~state_abv, text = ~state, z = ~mortality_rate) %>%
  add_trace(
    type = "choropleth",
    colors = "YlOrRd",
    locationmode = "USA-states"
  ) %>%
  colorbar(title = "Mortality Rate") %>%
  layout(
    title = "2021 USA Mortality Rate",
    geo = list(
      scope = "usa",
      projection = list(type = "albers usa"),
      showlakes = TRUE,
      lakecolor = toRGB("blue")
    ),
    annotations = list(
      list(
        x = 0.5,  
        y = .95,  
        xref = "paper",
        yref = "paper",
        text = "Gun violence is a very common issue across the United States.", 
        showarrow = FALSE,  
        font = list(size = 14) 
      ),
      list(
        x = 0.05,  
        y = 0.05, 
        xref = "paper",
        yref = "paper",
        text = "The number of deaths per 100,000 total population.",  
        showarrow = FALSE  
      
      )
    )
  )
## Warning: Ignoring 204 observations
# Display the heat map
heat_map1