{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE)

1. Load the required libraries

library(httr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(jsonlite)
## 
## Attaching package: 'jsonlite'
## 
## The following object is masked from 'package:purrr':
## 
##     flatten
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:httr':
## 
##     config
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(dplyr)
library(stringr)
library(tidyr)
library(ggplot2)
library(tools)  # for toTitleCase()

2. Load and preview the data

Data Source

url <- "https://data.cdc.gov/resource/489q-934x.json"
res <- GET(url)
content <- fromJSON(content(res, "text", encoding = "UTF-8"))
data <- as.data.frame(content)

str(data)
## 'data.frame':    792 obs. of  69 variables:
##  $ year_and_quarter         : chr  "2022 Q1" "2022 Q1" "2022 Q1" "2022 Q1" ...
##  $ time_period              : chr  "12 months ending with quarter" "12 months ending with quarter" "12 months ending with quarter" "12 months ending with quarter" ...
##  $ cause_of_death           : chr  "All causes" "Alzheimer disease" "COVID-19" "Cancer" ...
##  $ rate_type                : chr  "Age-adjusted" "Age-adjusted" "Age-adjusted" "Age-adjusted" ...
##  $ unit                     : chr  "Deaths per 100,000" "Deaths per 100,000" "Deaths per 100,000" "Deaths per 100,000" ...
##  $ rate_overall             : chr  "873.2" "30.6" "95" "145.9" ...
##  $ rate_sex_female          : chr  "729.4" "35" "75.2" "127.4" ...
##  $ rate_sex_male            : chr  "1038" "23.8" "119.1" "170.9" ...
##  $ rate_alaska              : chr  "944.5" "28.5" "121.3" "156" ...
##  $ rate_alabama             : chr  "1109.8" "45.5" "133.6" "159.9" ...
##  $ rate_arkansas            : chr  "1097.1" "43.2" "123.6" "167.9" ...
##  $ rate_arizona             : chr  "882.5" "29.6" "113.9" "134.5" ...
##  $ rate_california          : chr  "719.5" "38.4" "62.4" "131.4" ...
##  $ rate_colorado            : chr  "808.2" "32.1" "89.9" "125" ...
##  $ rate_connecticut         : chr  "725.9" "21.6" "50.9" "134.1" ...
##  $ rate_district_of_columbia: chr  "844.4" "10.7" "54.8" "143.8" ...
##  $ rate_delaware            : chr  "868.3" "30.1" "78.4" "155.4" ...
##  $ rate_florida             : chr  "828" "19.3" "106.6" "141.3" ...
##  $ rate_georgia             : chr  "973.6" "43.4" "116.4" "150.4" ...
##  $ rate_hawaii              : chr  "647.1" "23.6" "43.9" "126.7" ...
##  $ rate_iowa                : chr  "860.8" "30.9" "78.1" "152" ...
##  $ rate_idaho               : chr  "892.8" "41.5" "118.9" "139.6" ...
##  $ rate_illinois            : chr  "839.4" "26.6" "82.1" "149.3" ...
##  $ rate_indiana             : chr  "1011.9" "29.6" "112.1" "169" ...
##  $ rate_kansas              : chr  "938.4" "22.9" "109.7" "152.8" ...
##  $ rate_kentucky            : chr  "1153.2" "32.5" "146.7" "179.6" ...
##  $ rate_louisiana           : chr  "1084.4" "42.8" "108.8" "161.9" ...
##  $ rate_massachusetts       : chr  "717" "17.5" "46.7" "136.1" ...
##  $ rate_maryland            : chr  "800.3" "15.9" "66.9" "139.2" ...
##  $ rate_maine               : chr  "910.3" "28.1" "70.9" "161.2" ...
##  $ rate_michigan            : chr  "956.2" "34.1" "115" "159.6" ...
##  $ rate_minnesota           : chr  "771.6" "34.1" "68" "143.6" ...
##  $ rate_missouri            : chr  "986.6" "33.5" "110.5" "162.4" ...
##  $ rate_mississippi         : chr  "1193.4" "51.8" "140.8" "184" ...
##  $ rate_montana             : chr  "925.3" "24.4" "111.8" "142.9" ...
##  $ rate_north_carolina      : chr  "952.8" "36" "98.3" "152.4" ...
##  $ rate_north_dakota        : chr  "810.5" "32.5" "80.2" "134.2" ...
##  $ rate_nebraska            : chr  "839.4" "29.8" "73.8" "152.2" ...
##  $ rate_new_hampshire       : chr  "791.9" "23.3" "56.5" "145.3" ...
##  $ rate_new_jersey          : chr  "723.5" "20.5" "62.9" "130.3" ...
##  $ rate_new_mexico          : chr  "1007.9" "25.4" "138.3" "135.6" ...
##  $ rate_nevada              : chr  "933.3" "26.3" "134.8" "140.8" ...
##  $ rate_new_york            : chr  "694.4" "12.8" "63.4" "125.3" ...
##  $ rate_ohio                : chr  "1019.8" "34" "128.1" "161.5" ...
##  $ rate_oklahoma            : chr  "1126.4" "37.1" "150.3" "176.7" ...
##  $ rate_oregon              : chr  "875.6" "40" "77.5" "153.7" ...
##  $ rate_pennsylvania        : chr  "893.5" "22.5" "97.4" "151.8" ...
##  $ rate_rhode_island        : chr  "771" "28.3" "55.5" "139.1" ...
##  $ rate_south_carolina      : chr  "1022" "40.1" "122.9" "154.3" ...
##  $ rate_south_dakota        : chr  "871.4" "39.1" "76.3" "148.6" ...
##  $ rate_tennessee           : chr  "1122.6" "37.2" "140.5" "165.2" ...
##  $ rate_texas               : chr  "918.3" "41.2" "126.7" "143.5" ...
##  $ rate_utah                : chr  "822.2" "41" "78.4" "120.6" ...
##  $ rate_virginia            : chr  "860.1" "26.1" "80.8" "149.8" ...
##  $ rate_vermont             : chr  "800.5" "36.2" "32.9" "155" ...
##  $ rate_washington          : chr  "811.1" "46" "66.6" "148.5" ...
##  $ rate_wisconsin           : chr  "857.1" "33.6" "77.7" "146.7" ...
##  $ rate_west_virginia       : chr  "1239.9" "35.4" "154" "183.8" ...
##  $ rate_wyoming             : chr  "956.8" "34.2" "145.1" "153.1" ...
##  $ rate_age_1_4             : chr  NA NA NA NA ...
##  $ rate_age_5_14            : chr  NA NA NA NA ...
##  $ rate_age_15_24           : chr  NA NA NA NA ...
##  $ rate_age_25_34           : chr  NA NA NA NA ...
##  $ rate_age_35_44           : chr  NA NA NA NA ...
##  $ rate_age_45_54           : chr  NA NA NA NA ...
##  $ rate_age_55_64           : chr  NA NA NA NA ...
##  $ rate_65_74               : chr  NA NA NA NA ...
##  $ rate_age_75_84           : chr  NA NA NA NA ...
##  $ rate_age_85_plus         : chr  NA NA NA NA ...
cat("\n", "\n")
gun_law_data <- read.csv("https://raw.githubusercontent.com/hawa1983/DATA-608/refs/heads/main/Story%203/strictest-gun-laws-by-state-2024.csv")
str(gun_law_data)
## 'data.frame':    50 obs. of  5 variables:
##  $ state                     : chr  "Alabama" "Alaska" "Arizona" "Arkansas" ...
##  $ GunLawsStrengthRank       : int  38 41 42 50 1 14 3 13 23 34 ...
##  $ GunLawsGiffordGrade       : chr  "F" "F" "F" "F" ...
##  $ GunLawsGunDeathRatePer100k: num  26.4 25.2 18.3 23.3 9 17.8 6.7 16.6 14.1 20.3 ...
##  $ GunLawsGunDeathRateRank   : int  4 6 17 8 43 18 45 23 34 14 ...
cat("\n", "\n")
population <- read.csv("https://raw.githubusercontent.com/hawa1983/DATA-608/refs/heads/main/Story%203/population.csv")
str(population)
## 'data.frame':    50 obs. of  2 variables:
##  $ state     : chr  "california" "texas" "florida" "new york" ...
##  $ population: int  38965193 30503301 22610726 19571216 12961683 12549689 11785935 11029227 10835491 10037261 ...
cat("\n", "\n")

3. Prepare the mortality rate data

# Step 1: Remove 'rate_district_of_columbia' column and filter out '2024 Q1' rows
mort_rate <- data %>%
  select(1:4, -rate_district_of_columbia, rate_alaska:rate_wyoming) %>%  # Select all except the 'rate_district_of_columbia' column
  filter(cause_of_death == "Firearm-related injury" &
         time_period == "12 months ending with quarter" &
         rate_type == "Crude" &
         year_and_quarter != "2024 Q1")  # Filter out the '2024 Q1' rows

# Step 2: Convert all rate columns (state columns) to numeric
mort_rate <- mort_rate %>%
  mutate(across(starts_with("rate_"), as.numeric))  # Convert all 'rate_' columns to numeric
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(starts_with("rate_"), as.numeric)`.
## Caused by warning:
## ! NAs introduced by coercion
# Step 3: Calculate the mean for each column and store it in a new dataset
mean_data <- mort_rate %>%
  select(-rate_type) %>%
  summarise(across(starts_with("rate_"), mean, na.rm = TRUE))  # Calculate mean for all columns starting with 'rate_'
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(starts_with("rate_"), mean, na.rm = TRUE)`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
## 
##   # Previously
##   across(a:b, mean, na.rm = TRUE)
## 
##   # Now
##   across(a:b, \(x) mean(x, na.rm = TRUE))
# Step 4: Remove 'rate_' prefix from the state column names
mean_data <- mean_data %>%
  rename_with(~ str_replace(., "rate_", ""), starts_with("rate_"))  # Remove 'rate_' prefix

# Step 5: Pivot the data to longer format, remove the 'rate_' prefix from state names, and rename the column
data_long <- mort_rate %>%
  select(1, rate_alaska:rate_wyoming) %>%  # Select year_and_quarter and all rate columns for states
  pivot_longer(cols = rate_alaska:rate_wyoming, 
               names_to = "states", 
               values_to = "mortality_rate") %>%  # Rename the value column to 'mortality_rate'
  mutate(states = str_replace_all(states, "rate_", ""),  # Remove 'rate_' prefix from state names
         states = str_replace_all(states, "_", " "))     # Replace underscores with spaces

# Step 6: Pivot the mean data to longer format and rename the column to 'mortality_rate'
mean_data_long <- mean_data %>%
  pivot_longer(cols = everything(), 
               names_to = "states", 
               values_to = "mortality_rate") %>%  # Rename the value column to 'mortality_rate'
  mutate(states = str_replace_all(states, "_", " "))     # Replace underscores with spaces

# Step 7: Print the head of the long data to check
head(mean_data_long)
## # A tibble: 6 × 2
##   states     mortality_rate
##   <chr>               <dbl>
## 1 alaska              23.3 
## 2 alabama             25.5 
## 3 arkansas            22.2 
## 4 arizona             20.0 
## 5 california           8.76
## 6 colorado            17.6

4. Prepare the gun law rating data

# 1. First, mutate gun_law_data to include state abbreviations
gun_law_data <- gun_law_data %>%
  mutate(States = state.abb[match(state, state.name)])

# 2. Ensure that the states column in both data frames is properly formatted
mean_Mort_long_data <- mean_data_long %>%
  mutate(states = tolower(states))  

# 3. Ensure that the states column in both data frames is properly formatted
gun_law_data <- gun_law_data %>%
  mutate(state = tolower(state))

# 4. Now proceed with the merge (join) by the state abbreviation
merged_data <- merge(mean_Mort_long_data, gun_law_data, by.x = "states", by.y = "state")

# Merge map data with the population for dots
population <- population %>%
  mutate(state_name = tolower(state)) %>%
  select(-state)

merged_data <- merged_data %>%
  left_join(population, by = c("states" = "state_name"))

head(merged_data)
##       states mortality_rate GunLawsStrengthRank GunLawsGiffordGrade
## 1    alabama        25.4875                  38                   F
## 2     alaska        23.2750                  41                   F
## 3    arizona        20.0125                  42                   F
## 4   arkansas        22.1750                  50                   F
## 5 california         8.7625                   1                   A
## 6   colorado        17.6375                  14                   B
##   GunLawsGunDeathRatePer100k GunLawsGunDeathRateRank States population
## 1                       26.4                       4     AL    5108468
## 2                       25.2                       6     AK     733406
## 3                       18.3                      17     AZ    7431344
## 4                       23.3                       8     AR    3067732
## 5                        9.0                      43     CA   38965193
## 6                       17.8                      18     CO    5877610

5. Plot the Heatmap of US States Map

# Load necessary libraries
library(ggplot2)
library(dplyr)
library(patchwork)  # For arranging plots side by side
library(maps)
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
library(scales)  # For rescaling
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
# Load US map data
states_map <- map_data("state") %>%
  filter(!region %in% c("district of columbia", "puerto rico", "guam"))

# Convert state names in the merged_data to lowercase to match with map data
merged_data$States <- tolower(merged_data$States)

# Filter out problematic names if necessary
merged_data <- merged_data %>%
  filter(!States %in% c("district of columbia", "puerto rico", "guam"))

# Step 1: Standardize GunLawsStrengthRank using min-max normalization
merged_data <- merged_data %>%
  mutate(GunLawsStrengthRank_std = (GunLawsStrengthRank - min(GunLawsStrengthRank, na.rm = TRUE)) /
                                   (max(GunLawsStrengthRank, na.rm = TRUE) - min(GunLawsStrengthRank, na.rm = TRUE)))

# Step 2: Standardize mortality_rate using min-max normalization
merged_data <- merged_data %>%
  mutate(mortality_rate_std = (mortality_rate - min(mortality_rate, na.rm = TRUE)) /
                              (max(mortality_rate, na.rm = TRUE) - min(mortality_rate, na.rm = TRUE)))

# Merge map data with the merged_data based on state names
map_data <- states_map %>%
  left_join(merged_data, by = c("region" = "states"))
# Load necessary libraries
library(ggplot2)
library(dplyr)
library(maps)

# Prepare map data
states_map <- map_data("state")

# Calculate centroids of states for plotting points
state_centroids <- states_map %>%
  group_by(region) %>%
  summarize(long = mean(range(long)), lat = mean(range(lat)))

# Merge centroids with the merged data
merged_data_centroids <- state_centroids %>%
  left_join(merged_data, by = c("region" = "states"))

# Standardize GunLawsStrengthRank using min-max normalization

merged_data <- merged_data %>%
  mutate(GunLawsStrengthRank_std = (GunLawsStrengthRank - min(GunLawsStrengthRank, na.rm = TRUE)) /
                                   (max(GunLawsStrengthRank, na.rm = TRUE) - min(GunLawsStrengthRank, na.rm = TRUE)))

# Standardize mortality_rate using min-max normalization
merged_data <- merged_data %>%
  mutate(mortality_rate_std = (mortality_rate - min(mortality_rate, na.rm = TRUE)) /
                              (max(mortality_rate, na.rm = TRUE) - min(mortality_rate, na.rm = TRUE)))

# Merge map data with the gun law and mortality data for the map background
map_data <- states_map %>%
  left_join(merged_data, by = c("region" = "states"))

# Rescale legends for GunLawsStrengthRank and Mortality Rate without affecting the plot data
actual_gun_law_labels <- pretty(merged_data$GunLawsStrengthRank, n = 5)
actual_mortality_rate_labels <- pretty(merged_data$mortality_rate, n = 5)

# Accessible color palettes from RColorBrewer for the rescaled values
gun_law_colors <- scale_fill_distiller(palette = "Blues", direction = 1, name = "Gun Law Rank\n(Lower is Stricter)",
                                       labels = actual_gun_law_labels, 
                                       breaks = scales::rescale(actual_gun_law_labels, to = range(merged_data$GunLawsStrengthRank_std)))

mortality_colors <- scale_color_distiller(palette = "Oranges", direction = 1, name = "Mortality Rate",
                                          labels = actual_mortality_rate_labels, 
                                          breaks = scales::rescale(actual_mortality_rate_labels, to = range(merged_data$mortality_rate_std)))

# Create the combined map with accessible color palettes and actual legend values
combined_map <- ggplot() +
  # Gun law strength represented by fill color using accessible palette
  geom_polygon(data = map_data, aes(x = long, y = lat, group = group, fill = GunLawsStrengthRank_std), color = "white") +
  # Firearm mortality rate represented by dots at state centroids with another accessible palette
  geom_point(data = merged_data_centroids, aes(x = long, y = lat, color = mortality_rate_std), size = 3, alpha = 0.7) +
  coord_fixed(1.3) +  # Fix aspect ratio
  gun_law_colors +  # Accessible color scale for Gun Law Rank
  mortality_colors +  # Accessible color scale for Mortality Rate
  labs(title = "Gun Law Strength & Firearm Mortality Rate by State") +
  theme_minimal() +
  theme(legend.position = "right",
        axis.title = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        panel.grid = element_blank())

# Show the plot
print(combined_map)

6. Plot the scatter plot of mortality rate versus gun control ranking

# Ensure that the states column in both data frames is properly formatted
merged_data <- merged_data %>%
  mutate(States = toupper(States))

# Round population to the nearest 10 million for legend labels
merged_data <- merged_data %>%
  mutate(population_rounded = round(population / 1e7) * 10)  # Round to nearest 10 million

# Scatter plot with regression line and dots sized by actual population
ggplot(merged_data, aes(x = GunLawsStrengthRank, y = mortality_rate)) +
  geom_point(aes(size = population), color = "#1a2309", alpha = 0.7) +  # Size of dot reflects actual population
  geom_smooth(method = "lm", se = TRUE, color = "red") +  # Add regression line with confidence interval
  geom_text(aes(label = States), vjust = -1, size = 3, color = "black") +  # Place state abbreviations at the centroid
  scale_size_continuous(name = "Population (in millions)", range = c(3, 8), 
                        breaks = seq(10, 40, by = 10),  # Use breaks to display 10 million increments
                        labels = function(x) paste0(x, "M")) +  # Format labels to show as '10M', '20M', etc.
  labs(#title = "Gun Law Strength vs Mortality Rate",
       x = "Gun Law Strength Rank",
       y = "Mortality Rate per 100k") +
  theme_minimal() +  # Minimal theme
  theme(
    panel.grid = element_blank()   # Remove gridlines
  )
## `geom_smooth()` using formula = 'y ~ x'

# Create a custom color scale for the Gun Laws Gifford Grades
grade_colors <- c("A" = "#1f77b4", "A-" = "#ff7f0e", "B+" = "#2ca02c", "B" = "#d62728", 
                  "B-" = "#9467bd", "C+" = "#8c564b", "C" = "#e377c2", 
                  "C-" = "#7f7f7f", "D+" = "#bcbd22", "F" = "#17becf")

# Scatter plot with regression line, annotations, and no legend
ggplot(merged_data, aes(x = GunLawsStrengthRank, y = mortality_rate, color = GunLawsGiffordGrade)) +
  geom_point(size = 2) +  # Plot the points with color based on grade
  geom_smooth(method = "lm", se = TRUE, color = "red") +  # Add regression line with confidence interval
  geom_text(aes(label = GunLawsGiffordGrade), vjust = -0.5, hjust = 0.5, size = 3, color = "black") +  # Annotate with grades
  labs(x = "Gun Law Strength Rank (Lower is Stricter)", y = "Mortality Rate per 100k") +
  scale_color_manual(values = grade_colors, guide = "none") +  # Remove the legend
  theme_minimal() +  # Minimal theme
  theme(
    panel.grid = element_blank()  # Remove gridlines
  )
## `geom_smooth()` using formula = 'y ~ x'

6. Plot the dot plot of mortality rate versus gun control ranking

# Reorder the GunLawsGiffordGrade factor levels
merged_data$GunLawsGiffordGrade <- factor(merged_data$GunLawsGiffordGrade, 
                                          levels = c("A", "A-", "B+", "B",  "B-",  "C+",  "C",  "C-",  "D+",  "F"))

# Create a consistent jitter effect for both points and labels
jitter_pos <- position_jitter(width = 0.3, height = 0)

# Swap the colors for "B" and "F" in the custom color set
custom_colors <- c("A" = "#2ca02c",      # Blue
                   "A-" = "#2ca02c",     # Orange
                   "B+" = "#7f7f7f",     # Green
                   "B" = "#7f7f7f",      # Swapped: Cyan (was originally red)
                   "B-" = "#7f7f7f",     # Purple
                   "C+" = "#7f7f7f",     # Brown
                   "C" = "#7f7f7f",      # Pink
                   "C-" = "#7f7f7f",     # Gray
                   "D+" = "#7f7f7f",     # Yellow-green
                   "F" = "#d62728")      # Swapped: Red (was originally cyan)

# Create a jittered dot plot (strip plot) with state labels and swapped custom colors
dot_plot <- ggplot(merged_data, aes(x = GunLawsGiffordGrade, y = mortality_rate, color = GunLawsGiffordGrade)) +
  geom_jitter(aes(x = GunLawsGiffordGrade, y = mortality_rate), position = jitter_pos, alpha = 0.7) +  # Jitter the dots to separate them
  geom_hline(yintercept = max(merged_data$mortality_rate[merged_data$GunLawsGiffordGrade == "A"]) + 0.5,  # Just above the highest A- dot
             linetype = "dashed", color = "lightgrey", linewidth = 1.0) +  # Dashed green line, thinner size
  scale_color_manual(values = custom_colors) +  # Use manually defined colors with swapped B and F
  labs(# title = "Firearm Control Laws vs Mortality Rates",
       x = "Giffords Gun Law Grade",
       y = "Firearm Mortality Rate per 100k",
       color = "Gun Law Grade") +
  theme_minimal() +  # Clean theme
  theme(legend.position = "none")  # Remove the legend as the color directly represents the grades

# Print the dot plot
print(dot_plot)