{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE)
library(httr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(jsonlite)
##
## Attaching package: 'jsonlite'
##
## The following object is masked from 'package:purrr':
##
## flatten
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:httr':
##
## config
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(dplyr)
library(stringr)
library(tidyr)
library(ggplot2)
library(tools) # for toTitleCase()
url <- "https://data.cdc.gov/resource/489q-934x.json"
res <- GET(url)
content <- fromJSON(content(res, "text", encoding = "UTF-8"))
data <- as.data.frame(content)
str(data)
## 'data.frame': 792 obs. of 69 variables:
## $ year_and_quarter : chr "2022 Q1" "2022 Q1" "2022 Q1" "2022 Q1" ...
## $ time_period : chr "12 months ending with quarter" "12 months ending with quarter" "12 months ending with quarter" "12 months ending with quarter" ...
## $ cause_of_death : chr "All causes" "Alzheimer disease" "COVID-19" "Cancer" ...
## $ rate_type : chr "Age-adjusted" "Age-adjusted" "Age-adjusted" "Age-adjusted" ...
## $ unit : chr "Deaths per 100,000" "Deaths per 100,000" "Deaths per 100,000" "Deaths per 100,000" ...
## $ rate_overall : chr "873.2" "30.6" "95" "145.9" ...
## $ rate_sex_female : chr "729.4" "35" "75.2" "127.4" ...
## $ rate_sex_male : chr "1038" "23.8" "119.1" "170.9" ...
## $ rate_alaska : chr "944.5" "28.5" "121.3" "156" ...
## $ rate_alabama : chr "1109.8" "45.5" "133.6" "159.9" ...
## $ rate_arkansas : chr "1097.1" "43.2" "123.6" "167.9" ...
## $ rate_arizona : chr "882.5" "29.6" "113.9" "134.5" ...
## $ rate_california : chr "719.5" "38.4" "62.4" "131.4" ...
## $ rate_colorado : chr "808.2" "32.1" "89.9" "125" ...
## $ rate_connecticut : chr "725.9" "21.6" "50.9" "134.1" ...
## $ rate_district_of_columbia: chr "844.4" "10.7" "54.8" "143.8" ...
## $ rate_delaware : chr "868.3" "30.1" "78.4" "155.4" ...
## $ rate_florida : chr "828" "19.3" "106.6" "141.3" ...
## $ rate_georgia : chr "973.6" "43.4" "116.4" "150.4" ...
## $ rate_hawaii : chr "647.1" "23.6" "43.9" "126.7" ...
## $ rate_iowa : chr "860.8" "30.9" "78.1" "152" ...
## $ rate_idaho : chr "892.8" "41.5" "118.9" "139.6" ...
## $ rate_illinois : chr "839.4" "26.6" "82.1" "149.3" ...
## $ rate_indiana : chr "1011.9" "29.6" "112.1" "169" ...
## $ rate_kansas : chr "938.4" "22.9" "109.7" "152.8" ...
## $ rate_kentucky : chr "1153.2" "32.5" "146.7" "179.6" ...
## $ rate_louisiana : chr "1084.4" "42.8" "108.8" "161.9" ...
## $ rate_massachusetts : chr "717" "17.5" "46.7" "136.1" ...
## $ rate_maryland : chr "800.3" "15.9" "66.9" "139.2" ...
## $ rate_maine : chr "910.3" "28.1" "70.9" "161.2" ...
## $ rate_michigan : chr "956.2" "34.1" "115" "159.6" ...
## $ rate_minnesota : chr "771.6" "34.1" "68" "143.6" ...
## $ rate_missouri : chr "986.6" "33.5" "110.5" "162.4" ...
## $ rate_mississippi : chr "1193.4" "51.8" "140.8" "184" ...
## $ rate_montana : chr "925.3" "24.4" "111.8" "142.9" ...
## $ rate_north_carolina : chr "952.8" "36" "98.3" "152.4" ...
## $ rate_north_dakota : chr "810.5" "32.5" "80.2" "134.2" ...
## $ rate_nebraska : chr "839.4" "29.8" "73.8" "152.2" ...
## $ rate_new_hampshire : chr "791.9" "23.3" "56.5" "145.3" ...
## $ rate_new_jersey : chr "723.5" "20.5" "62.9" "130.3" ...
## $ rate_new_mexico : chr "1007.9" "25.4" "138.3" "135.6" ...
## $ rate_nevada : chr "933.3" "26.3" "134.8" "140.8" ...
## $ rate_new_york : chr "694.4" "12.8" "63.4" "125.3" ...
## $ rate_ohio : chr "1019.8" "34" "128.1" "161.5" ...
## $ rate_oklahoma : chr "1126.4" "37.1" "150.3" "176.7" ...
## $ rate_oregon : chr "875.6" "40" "77.5" "153.7" ...
## $ rate_pennsylvania : chr "893.5" "22.5" "97.4" "151.8" ...
## $ rate_rhode_island : chr "771" "28.3" "55.5" "139.1" ...
## $ rate_south_carolina : chr "1022" "40.1" "122.9" "154.3" ...
## $ rate_south_dakota : chr "871.4" "39.1" "76.3" "148.6" ...
## $ rate_tennessee : chr "1122.6" "37.2" "140.5" "165.2" ...
## $ rate_texas : chr "918.3" "41.2" "126.7" "143.5" ...
## $ rate_utah : chr "822.2" "41" "78.4" "120.6" ...
## $ rate_virginia : chr "860.1" "26.1" "80.8" "149.8" ...
## $ rate_vermont : chr "800.5" "36.2" "32.9" "155" ...
## $ rate_washington : chr "811.1" "46" "66.6" "148.5" ...
## $ rate_wisconsin : chr "857.1" "33.6" "77.7" "146.7" ...
## $ rate_west_virginia : chr "1239.9" "35.4" "154" "183.8" ...
## $ rate_wyoming : chr "956.8" "34.2" "145.1" "153.1" ...
## $ rate_age_1_4 : chr NA NA NA NA ...
## $ rate_age_5_14 : chr NA NA NA NA ...
## $ rate_age_15_24 : chr NA NA NA NA ...
## $ rate_age_25_34 : chr NA NA NA NA ...
## $ rate_age_35_44 : chr NA NA NA NA ...
## $ rate_age_45_54 : chr NA NA NA NA ...
## $ rate_age_55_64 : chr NA NA NA NA ...
## $ rate_65_74 : chr NA NA NA NA ...
## $ rate_age_75_84 : chr NA NA NA NA ...
## $ rate_age_85_plus : chr NA NA NA NA ...
cat("\n", "\n")
gun_law_data <- read.csv("https://raw.githubusercontent.com/hawa1983/DATA-608/refs/heads/main/Story%203/strictest-gun-laws-by-state-2024.csv")
str(gun_law_data)
## 'data.frame': 50 obs. of 5 variables:
## $ state : chr "Alabama" "Alaska" "Arizona" "Arkansas" ...
## $ GunLawsStrengthRank : int 38 41 42 50 1 14 3 13 23 34 ...
## $ GunLawsGiffordGrade : chr "F" "F" "F" "F" ...
## $ GunLawsGunDeathRatePer100k: num 26.4 25.2 18.3 23.3 9 17.8 6.7 16.6 14.1 20.3 ...
## $ GunLawsGunDeathRateRank : int 4 6 17 8 43 18 45 23 34 14 ...
cat("\n", "\n")
population <- read.csv("https://raw.githubusercontent.com/hawa1983/DATA-608/refs/heads/main/Story%203/population.csv")
str(population)
## 'data.frame': 50 obs. of 2 variables:
## $ state : chr "california" "texas" "florida" "new york" ...
## $ population: int 38965193 30503301 22610726 19571216 12961683 12549689 11785935 11029227 10835491 10037261 ...
cat("\n", "\n")
# Step 1: Remove 'rate_district_of_columbia' column and filter out '2024 Q1' rows
mort_rate <- data %>%
select(1:4, -rate_district_of_columbia, rate_alaska:rate_wyoming) %>% # Select all except the 'rate_district_of_columbia' column
filter(cause_of_death == "Firearm-related injury" &
time_period == "12 months ending with quarter" &
rate_type == "Crude" &
year_and_quarter != "2024 Q1") # Filter out the '2024 Q1' rows
# Step 2: Convert all rate columns (state columns) to numeric
mort_rate <- mort_rate %>%
mutate(across(starts_with("rate_"), as.numeric)) # Convert all 'rate_' columns to numeric
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(starts_with("rate_"), as.numeric)`.
## Caused by warning:
## ! NAs introduced by coercion
# Step 3: Calculate the mean for each column and store it in a new dataset
mean_data <- mort_rate %>%
select(-rate_type) %>%
summarise(across(starts_with("rate_"), mean, na.rm = TRUE)) # Calculate mean for all columns starting with 'rate_'
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(starts_with("rate_"), mean, na.rm = TRUE)`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
##
## # Previously
## across(a:b, mean, na.rm = TRUE)
##
## # Now
## across(a:b, \(x) mean(x, na.rm = TRUE))
# Step 4: Remove 'rate_' prefix from the state column names
mean_data <- mean_data %>%
rename_with(~ str_replace(., "rate_", ""), starts_with("rate_")) # Remove 'rate_' prefix
# Step 5: Pivot the data to longer format, remove the 'rate_' prefix from state names, and rename the column
data_long <- mort_rate %>%
select(1, rate_alaska:rate_wyoming) %>% # Select year_and_quarter and all rate columns for states
pivot_longer(cols = rate_alaska:rate_wyoming,
names_to = "states",
values_to = "mortality_rate") %>% # Rename the value column to 'mortality_rate'
mutate(states = str_replace_all(states, "rate_", ""), # Remove 'rate_' prefix from state names
states = str_replace_all(states, "_", " ")) # Replace underscores with spaces
# Step 6: Pivot the mean data to longer format and rename the column to 'mortality_rate'
mean_data_long <- mean_data %>%
pivot_longer(cols = everything(),
names_to = "states",
values_to = "mortality_rate") %>% # Rename the value column to 'mortality_rate'
mutate(states = str_replace_all(states, "_", " ")) # Replace underscores with spaces
# Step 7: Print the head of the long data to check
head(mean_data_long)
## # A tibble: 6 × 2
## states mortality_rate
## <chr> <dbl>
## 1 alaska 23.3
## 2 alabama 25.5
## 3 arkansas 22.2
## 4 arizona 20.0
## 5 california 8.76
## 6 colorado 17.6
# 1. First, mutate gun_law_data to include state abbreviations
gun_law_data <- gun_law_data %>%
mutate(States = state.abb[match(state, state.name)])
# 2. Ensure that the states column in both data frames is properly formatted
mean_Mort_long_data <- mean_data_long %>%
mutate(states = tolower(states))
# 3. Ensure that the states column in both data frames is properly formatted
gun_law_data <- gun_law_data %>%
mutate(state = tolower(state))
# 4. Now proceed with the merge (join) by the state abbreviation
merged_data <- merge(mean_Mort_long_data, gun_law_data, by.x = "states", by.y = "state")
# Merge map data with the population for dots
population <- population %>%
mutate(state_name = tolower(state)) %>%
select(-state)
merged_data <- merged_data %>%
left_join(population, by = c("states" = "state_name"))
head(merged_data)
## states mortality_rate GunLawsStrengthRank GunLawsGiffordGrade
## 1 alabama 25.4875 38 F
## 2 alaska 23.2750 41 F
## 3 arizona 20.0125 42 F
## 4 arkansas 22.1750 50 F
## 5 california 8.7625 1 A
## 6 colorado 17.6375 14 B
## GunLawsGunDeathRatePer100k GunLawsGunDeathRateRank States population
## 1 26.4 4 AL 5108468
## 2 25.2 6 AK 733406
## 3 18.3 17 AZ 7431344
## 4 23.3 8 AR 3067732
## 5 9.0 43 CA 38965193
## 6 17.8 18 CO 5877610
# Load necessary libraries
library(ggplot2)
library(dplyr)
library(patchwork) # For arranging plots side by side
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
library(scales) # For rescaling
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
# Load US map data
states_map <- map_data("state") %>%
filter(!region %in% c("district of columbia", "puerto rico", "guam"))
# Convert state names in the merged_data to lowercase to match with map data
merged_data$States <- tolower(merged_data$States)
# Filter out problematic names if necessary
merged_data <- merged_data %>%
filter(!States %in% c("district of columbia", "puerto rico", "guam"))
# Step 1: Standardize GunLawsStrengthRank using min-max normalization
merged_data <- merged_data %>%
mutate(GunLawsStrengthRank_std = (GunLawsStrengthRank - min(GunLawsStrengthRank, na.rm = TRUE)) /
(max(GunLawsStrengthRank, na.rm = TRUE) - min(GunLawsStrengthRank, na.rm = TRUE)))
# Step 2: Standardize mortality_rate using min-max normalization
merged_data <- merged_data %>%
mutate(mortality_rate_std = (mortality_rate - min(mortality_rate, na.rm = TRUE)) /
(max(mortality_rate, na.rm = TRUE) - min(mortality_rate, na.rm = TRUE)))
# Merge map data with the merged_data based on state names
map_data <- states_map %>%
left_join(merged_data, by = c("region" = "states"))
# Load necessary libraries
library(ggplot2)
library(dplyr)
library(maps)
# Prepare map data
states_map <- map_data("state")
# Calculate centroids of states for plotting points
state_centroids <- states_map %>%
group_by(region) %>%
summarize(long = mean(range(long)), lat = mean(range(lat)))
# Merge centroids with the merged data
merged_data_centroids <- state_centroids %>%
left_join(merged_data, by = c("region" = "states"))
# Standardize GunLawsStrengthRank using min-max normalization
merged_data <- merged_data %>%
mutate(GunLawsStrengthRank_std = (GunLawsStrengthRank - min(GunLawsStrengthRank, na.rm = TRUE)) /
(max(GunLawsStrengthRank, na.rm = TRUE) - min(GunLawsStrengthRank, na.rm = TRUE)))
# Standardize mortality_rate using min-max normalization
merged_data <- merged_data %>%
mutate(mortality_rate_std = (mortality_rate - min(mortality_rate, na.rm = TRUE)) /
(max(mortality_rate, na.rm = TRUE) - min(mortality_rate, na.rm = TRUE)))
# Merge map data with the gun law and mortality data for the map background
map_data <- states_map %>%
left_join(merged_data, by = c("region" = "states"))
# Rescale legends for GunLawsStrengthRank and Mortality Rate without affecting the plot data
actual_gun_law_labels <- pretty(merged_data$GunLawsStrengthRank, n = 5)
actual_mortality_rate_labels <- pretty(merged_data$mortality_rate, n = 5)
# Accessible color palettes from RColorBrewer for the rescaled values
gun_law_colors <- scale_fill_distiller(palette = "Blues", direction = 1, name = "Gun Law Rank\n(Lower is Stricter)",
labels = actual_gun_law_labels,
breaks = scales::rescale(actual_gun_law_labels, to = range(merged_data$GunLawsStrengthRank_std)))
mortality_colors <- scale_color_distiller(palette = "Oranges", direction = 1, name = "Mortality Rate",
labels = actual_mortality_rate_labels,
breaks = scales::rescale(actual_mortality_rate_labels, to = range(merged_data$mortality_rate_std)))
# Create the combined map with accessible color palettes and actual legend values
combined_map <- ggplot() +
# Gun law strength represented by fill color using accessible palette
geom_polygon(data = map_data, aes(x = long, y = lat, group = group, fill = GunLawsStrengthRank_std), color = "white") +
# Firearm mortality rate represented by dots at state centroids with another accessible palette
geom_point(data = merged_data_centroids, aes(x = long, y = lat, color = mortality_rate_std), size = 3, alpha = 0.7) +
coord_fixed(1.3) + # Fix aspect ratio
gun_law_colors + # Accessible color scale for Gun Law Rank
mortality_colors + # Accessible color scale for Mortality Rate
labs(title = "Gun Law Strength & Firearm Mortality Rate by State") +
theme_minimal() +
theme(legend.position = "right",
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank())
# Show the plot
print(combined_map)
# Ensure that the states column in both data frames is properly formatted
merged_data <- merged_data %>%
mutate(States = toupper(States))
# Round population to the nearest 10 million for legend labels
merged_data <- merged_data %>%
mutate(population_rounded = round(population / 1e7) * 10) # Round to nearest 10 million
# Scatter plot with regression line and dots sized by actual population
ggplot(merged_data, aes(x = GunLawsStrengthRank, y = mortality_rate)) +
geom_point(aes(size = population), color = "#1a2309", alpha = 0.7) + # Size of dot reflects actual population
geom_smooth(method = "lm", se = TRUE, color = "red") + # Add regression line with confidence interval
geom_text(aes(label = States), vjust = -1, size = 3, color = "black") + # Place state abbreviations at the centroid
scale_size_continuous(name = "Population (in millions)", range = c(3, 8),
breaks = seq(10, 40, by = 10), # Use breaks to display 10 million increments
labels = function(x) paste0(x, "M")) + # Format labels to show as '10M', '20M', etc.
labs(#title = "Gun Law Strength vs Mortality Rate",
x = "Gun Law Strength Rank",
y = "Mortality Rate per 100k") +
theme_minimal() + # Minimal theme
theme(
panel.grid = element_blank() # Remove gridlines
)
## `geom_smooth()` using formula = 'y ~ x'
# Create a custom color scale for the Gun Laws Gifford Grades
grade_colors <- c("A" = "#1f77b4", "A-" = "#ff7f0e", "B+" = "#2ca02c", "B" = "#d62728",
"B-" = "#9467bd", "C+" = "#8c564b", "C" = "#e377c2",
"C-" = "#7f7f7f", "D+" = "#bcbd22", "F" = "#17becf")
# Scatter plot with regression line, annotations, and no legend
ggplot(merged_data, aes(x = GunLawsStrengthRank, y = mortality_rate, color = GunLawsGiffordGrade)) +
geom_point(size = 2) + # Plot the points with color based on grade
geom_smooth(method = "lm", se = TRUE, color = "red") + # Add regression line with confidence interval
geom_text(aes(label = GunLawsGiffordGrade), vjust = -0.5, hjust = 0.5, size = 3, color = "black") + # Annotate with grades
labs(x = "Gun Law Strength Rank (Lower is Stricter)", y = "Mortality Rate per 100k") +
scale_color_manual(values = grade_colors, guide = "none") + # Remove the legend
theme_minimal() + # Minimal theme
theme(
panel.grid = element_blank() # Remove gridlines
)
## `geom_smooth()` using formula = 'y ~ x'
# Reorder the GunLawsGiffordGrade factor levels
merged_data$GunLawsGiffordGrade <- factor(merged_data$GunLawsGiffordGrade,
levels = c("A", "A-", "B+", "B", "B-", "C+", "C", "C-", "D+", "F"))
# Create a consistent jitter effect for both points and labels
jitter_pos <- position_jitter(width = 0.3, height = 0)
# Swap the colors for "B" and "F" in the custom color set
custom_colors <- c("A" = "#2ca02c", # Blue
"A-" = "#2ca02c", # Orange
"B+" = "#7f7f7f", # Green
"B" = "#7f7f7f", # Swapped: Cyan (was originally red)
"B-" = "#7f7f7f", # Purple
"C+" = "#7f7f7f", # Brown
"C" = "#7f7f7f", # Pink
"C-" = "#7f7f7f", # Gray
"D+" = "#7f7f7f", # Yellow-green
"F" = "#d62728") # Swapped: Red (was originally cyan)
# Create a jittered dot plot (strip plot) with state labels and swapped custom colors
dot_plot <- ggplot(merged_data, aes(x = GunLawsGiffordGrade, y = mortality_rate, color = GunLawsGiffordGrade)) +
geom_jitter(aes(x = GunLawsGiffordGrade, y = mortality_rate), position = jitter_pos, alpha = 0.7) + # Jitter the dots to separate them
geom_hline(yintercept = max(merged_data$mortality_rate[merged_data$GunLawsGiffordGrade == "A"]) + 0.5, # Just above the highest A- dot
linetype = "dashed", color = "lightgrey", linewidth = 1.0) + # Dashed green line, thinner size
scale_color_manual(values = custom_colors) + # Use manually defined colors with swapped B and F
labs(# title = "Firearm Control Laws vs Mortality Rates",
x = "Giffords Gun Law Grade",
y = "Firearm Mortality Rate per 100k",
color = "Gun Law Grade") +
theme_minimal() + # Clean theme
theme(legend.position = "none") # Remove the legend as the color directly represents the grades
# Print the dot plot
print(dot_plot)