DATA PREPARATION
Upload API
## Install the required package with:
#install.packages("RSocrata")
library("RSocrata")
## Warning: package 'RSocrata' was built under R version 4.2.3
df <- read.socrata(
"https://data.cdc.gov/resource/489q-934x.csv",
app_token = "ojx8SGUqfbt4XTSo7NgUpmBQW",
email = "enid.roman62@spsmail.cuny.edu",
password = "Metro#310"
)
Install Packages
#install.packages("cli")
#install.packages("magrittr")
#install.packages("plotly")
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.2.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.2.2
Read Dataframe
head(df)
## year_and_quarter time_period
## 1 2020 Q1 12 months ending with quarter
## 2 2020 Q1 12 months ending with quarter
## 3 2020 Q1 12 months ending with quarter
## 4 2020 Q1 12 months ending with quarter
## 5 2020 Q1 12 months ending with quarter
## 6 2020 Q1 12 months ending with quarter
## cause_of_death rate_type unit
## 1 All causes Age-adjusted Deaths per 100,000
## 2 Alzheimer disease Age-adjusted Deaths per 100,000
## 3 COVID-19 Age-adjusted Deaths per 100,000
## 4 Cancer Age-adjusted Deaths per 100,000
## 5 Chronic liver disease and cirrhosis Age-adjusted Deaths per 100,000
## 6 Chronic lower respiratory diseases Age-adjusted Deaths per 100,000
## rate_overall rate_sex_female rate_sex_male rate_age_1_4 rate_age_5_14
## 1 718.6 605.1 851.0 NA NA
## 2 29.8 33.6 23.8 NA NA
## 3 1.7 1.2 2.3 NA NA
## 4 145.7 126.0 172.2 NA NA
## 5 11.5 8.1 15.2 NA NA
## 6 38.0 35.5 41.4 NA NA
## rate_age_15_24 rate_age_25_34 rate_age_35_44 rate_age_45_54 rate_age_55_64
## 1 NA NA NA NA NA
## 2 NA NA NA NA NA
## 3 NA NA NA NA NA
## 4 NA NA NA NA NA
## 5 NA NA NA NA NA
## 6 NA NA NA NA NA
## rate_65_74 rate_age_75_84 rate_age_85_plus rate_alaska rate_alabama
## 1 NA NA NA 691.2 898.2
## 2 NA NA NA 24.1 44.6
## 3 NA NA NA NA 0.7
## 4 NA NA NA 141.3 159.9
## 5 NA NA NA 16.0 13.7
## 6 NA NA NA 29.2 54.5
## rate_arkansas rate_arizona rate_california rate_colorado rate_connecticut
## 1 870.9 664.5 602.7 651.6 646.6
## 2 41.1 32.2 37.2 33.5 17.5
## 3 NA 0.6 0.6 1.6 2.3
## 4 165.8 130.6 131.4 128.0 132.3
## 5 12.9 14.5 12.4 15.0 9.4
## 6 59.4 37.4 28.9 40.7 27.0
## rate_district_of_columbia rate_delaware rate_florida rate_georgia rate_hawaii
## 1 715.4 732.1 652.9 776.6 570.9
## 2 12.1 26.9 18.2 41.8 20.1
## 3 NA NA 0.5 1.8 NA
## 4 147.9 150.5 138.1 150.8 126.4
## 5 9.2 9.3 11.2 10.2 6.7
## 6 19.8 34.8 35.1 41.9 17.6
## rate_iowa rate_idaho rate_illinois rate_indiana rate_kansas rate_kentucky
## 1 735.5 709.1 711.1 832.0 767.1 910.7
## 2 30.4 32.5 24.7 32.4 22.4 31.2
## 3 NA NA 1.3 1.5 NA 0.6
## 4 150.9 138.4 151.7 164.1 152.1 175.5
## 5 9.7 12.8 10.2 12.9 11.2 13.9
## 6 43.7 44.6 35.1 56.0 46.6 61.6
## rate_louisiana rate_massachusetts rate_maryland rate_maine rate_michigan
## 1 870.0 656.9 704.7 756.7 782.9
## 2 42.1 17.4 14.1 25.9 33.5
## 3 7.1 1.3 0.6 NA 3.8
## 4 165.1 139.3 142.9 162.8 157.9
## 5 9.7 9.2 8.2 11.0 11.7
## 6 39.5 30.6 28.8 47.5 44.9
## rate_minnesota rate_missouri rate_mississippi rate_montana
## 1 652.9 804.1 945.4 736.2
## 2 34.7 34.1 48.6 22.2
## 3 NA 0.3 1.0 NA
## 4 141.4 157.4 178.4 138.3
## 5 11.3 10.4 13.6 15.8
## 6 31.8 47.6 57.4 49.3
## rate_north_carolina rate_north_dakota rate_nebraska rate_new_hampshire
## 1 767.0 728.0 706.1 691.0
## 2 36.9 38.6 30.3 27.5
## 3 0.2 NA NA NA
## 4 150.9 144.3 148.2 145.4
## 5 11.6 15.3 11.4 11.3
## 6 40.8 35.8 44.4 39.2
## rate_new_jersey rate_new_mexico rate_nevada rate_new_york rate_ohio
## 1 666.8 758.9 738.4 635.5 827.0
## 2 21.0 21.0 23.0 14.0 33.0
## 3 5.7 NA 0.9 10.9 0.6
## 4 135.9 129.6 145.3 133.3 161.4
## 5 7.9 26.4 13.2 7.2 11.3
## 6 25.8 39.8 47.3 27.5 46.2
## rate_oklahoma rate_oregon rate_pennsylvania rate_rhode_island
## 1 876.7 695.4 750.4 704.3
## 2 37.8 37.4 20.7 29.0
## 3 0.7 0.5 0.6 NA
## 4 171.3 145.1 154.3 151.7
## 5 15.6 12.9 8.7 11.1
## 6 61.8 37.4 34.4 34.5
## rate_south_carolina rate_south_dakota rate_tennessee rate_texas rate_utah
## 1 809.9 738.5 889.6 717.5 687.6
## 2 38.2 39.1 40.8 38.5 40.3
## 3 0.5 NA 0.4 0.3 NA
## 4 154.6 155.4 165.3 141.8 116.5
## 5 13.2 18.6 14.1 14.4 8.6
## 6 42.8 45.8 53.4 37.8 31.3
## rate_virginia rate_vermont rate_washington rate_wisconsin rate_west_virginia
## 1 709.4 685.2 663.9 729.6 950.4
## 2 27.0 32.9 41.2 31.1 32.8
## 3 0.6 NA 3.1 0.6 NA
## 4 147.1 148.6 140.8 149.7 175.4
## 5 10.3 8.9 12.2 10.3 16.4
## 6 36.0 38.4 32.6 37.3 62.1
## rate_wyoming
## 1 739.4
## 2 34.6
## 3 NA
## 4 137.9
## 5 21.0
## 6 53.8
# Select Rows by Equal Condition
df2 <- df[df$cause_of_death == 'Firearm-related injury',]
head(df2)
## year_and_quarter time_period cause_of_death
## 10 2020 Q1 12 months ending with quarter Firearm-related injury
## 32 2020 Q1 3-month period Firearm-related injury
## 54 2020 Q2 12 months ending with quarter Firearm-related injury
## 76 2020 Q2 3-month period Firearm-related injury
## 98 2020 Q3 12 months ending with quarter Firearm-related injury
## 120 2020 Q3 3-month period Firearm-related injury
## rate_type unit rate_overall rate_sex_female rate_sex_male
## 10 Age-adjusted Deaths per 100,000 12.0 3.4 21.1
## 32 Age-adjusted Deaths per 100,000 12.1 3.2 21.3
## 54 Age-adjusted Deaths per 100,000 12.3 3.4 21.6
## 76 Age-adjusted Deaths per 100,000 13.1 3.7 22.9
## 98 Age-adjusted Deaths per 100,000 13.0 3.6 22.7
## 120 Age-adjusted Deaths per 100,000 14.9 4.2 26.0
## rate_age_1_4 rate_age_5_14 rate_age_15_24 rate_age_25_34 rate_age_35_44
## 10 NA NA NA NA NA
## 32 NA NA NA NA NA
## 54 NA NA NA NA NA
## 76 NA NA NA NA NA
## 98 NA NA NA NA NA
## 120 NA NA NA NA NA
## rate_age_45_54 rate_age_55_64 rate_65_74 rate_age_75_84 rate_age_85_plus
## 10 NA NA NA NA NA
## 32 NA NA NA NA NA
## 54 NA NA NA NA NA
## 76 NA NA NA NA NA
## 98 NA NA NA NA NA
## 120 NA NA NA NA NA
## rate_alaska rate_alabama rate_arkansas rate_arizona rate_california
## 10 23.0 21.8 19.7 15.4 7.3
## 32 21.4 23.0 20.8 15.9 7.5
## 54 22.7 22.6 22.0 14.8 7.5
## 76 20.2 23.4 27.1 15.4 7.9
## 98 24.5 23.1 21.5 15.9 7.8
## 120 26.4 24.9 21.3 18.4 9.2
## rate_colorado rate_connecticut rate_district_of_columbia rate_delaware
## 10 14.3 5.4 19.3 10.4
## 32 14.2 5.6 19.7 12.8
## 54 14.7 5.2 20.0 12.2
## 76 16.3 4.2 21.5 12.7
## 98 15.5 5.5 20.3 13.6
## 120 17.9 6.8 21.9 16.2
## rate_florida rate_georgia rate_hawaii rate_iowa rate_idaho rate_illinois
## 10 12.9 15.9 4.0 9.4 15.8 10.9
## 32 13.2 15.9 3.6 9.9 17.7 9.8
## 54 12.9 16.1 3.7 10.0 16.3 12.1
## 76 12.8 16.8 2.9 11.7 18.2 15.8
## 98 13.5 16.7 3.8 11.1 16.5 13.3
## 120 14.7 18.9 4.2 13.3 15.0 17.4
## rate_indiana rate_kansas rate_kentucky rate_louisiana rate_massachusetts
## 10 14.6 14.3 15.8 22.1 3.5
## 32 15.3 14.0 17.3 20.7 3.1
## 54 15.0 15.3 15.9 22.4 3.6
## 76 15.3 17.6 17.6 23.5 3.8
## 98 16.1 16.2 18.0 24.4 3.9
## 120 19.7 17.4 22.9 30.6 4.8
## rate_maryland rate_maine rate_michigan rate_minnesota rate_missouri
## 10 12.5 10.5 12.6 8.2 20.8
## 32 10.7 12.5 12.3 7.6 19.0
## 54 13.1 10.3 12.7 8.3 21.8
## 76 14.6 8.3 13.1 8.2 24.6
## 98 13.0 10.7 13.3 8.4 23.1
## 120 13.9 10.1 15.4 9.8 28.5
## rate_mississippi rate_montana rate_north_carolina rate_north_dakota
## 10 23.5 20.2 13.6 12.8
## 32 21.9 24.8 15.1 13.8
## 54 24.4 21.5 14.1 11.2
## 76 29.0 23.2 15.4 12.0
## 98 26.6 20.6 14.9 12.4
## 120 32.3 16.8 16.7 13.7
## rate_nebraska rate_new_hampshire rate_new_jersey rate_new_mexico
## 10 9.5 10.1 4.2 22.7
## 32 7.8 7.4 3.8 20.7
## 54 8.9 9.1 4.4 22.0
## 76 9.8 5.7 4.8 20.0
## 98 9.2 9.0 4.8 22.3
## 120 11.4 12.6 5.9 26.9
## rate_nevada rate_new_york rate_ohio rate_oklahoma rate_oregon
## 10 15.5 4.0 13.4 19.2 12.2
## 32 16.3 4.0 12.5 20.7 11.4
## 54 15.6 4.5 13.6 20.2 12.3
## 76 15.8 5.5 14.5 21.7 12.5
## 98 16.2 5.1 14.5 20.0 12.1
## 120 17.2 7.1 17.9 19.9 13.1
## rate_pennsylvania rate_rhode_island rate_south_carolina rate_south_dakota
## 10 12.1 4.3 19.9 12.2
## 32 12.0 3.9 20.5 8.5
## 54 12.3 4.1 20.7 12.0
## 76 12.2 4.9 21.9 11.8
## 98 12.9 4.4 21.5 12.5
## 120 15.0 4.6 24.2 17.8
## rate_tennessee rate_texas rate_utah rate_virginia rate_vermont
## 10 18.8 12.9 12.5 12.4 9.7
## 32 18.0 13.1 11.9 13.7 10.4
## 54 18.9 13.3 13.1 12.2 10.0
## 76 19.5 14.2 14.5 11.7 10.4
## 98 19.9 13.6 13.3 12.9 10.6
## 120 22.9 14.7 13.9 15.1 11.7
## rate_washington rate_wisconsin rate_west_virginia rate_wyoming
## 10 10.8 10.4 17.7 24.5
## 32 11.0 10.2 19.4 26.8
## 54 10.9 10.6 17.4 25.4
## 76 11.2 10.9 16.2 23.3
## 98 11.0 11.7 16.3 28.2
## 120 11.4 13.9 15.8 34.1
Get All Column Names
colnames(df2)
## [1] "year_and_quarter" "time_period"
## [3] "cause_of_death" "rate_type"
## [5] "unit" "rate_overall"
## [7] "rate_sex_female" "rate_sex_male"
## [9] "rate_age_1_4" "rate_age_5_14"
## [11] "rate_age_15_24" "rate_age_25_34"
## [13] "rate_age_35_44" "rate_age_45_54"
## [15] "rate_age_55_64" "rate_65_74"
## [17] "rate_age_75_84" "rate_age_85_plus"
## [19] "rate_alaska" "rate_alabama"
## [21] "rate_arkansas" "rate_arizona"
## [23] "rate_california" "rate_colorado"
## [25] "rate_connecticut" "rate_district_of_columbia"
## [27] "rate_delaware" "rate_florida"
## [29] "rate_georgia" "rate_hawaii"
## [31] "rate_iowa" "rate_idaho"
## [33] "rate_illinois" "rate_indiana"
## [35] "rate_kansas" "rate_kentucky"
## [37] "rate_louisiana" "rate_massachusetts"
## [39] "rate_maryland" "rate_maine"
## [41] "rate_michigan" "rate_minnesota"
## [43] "rate_missouri" "rate_mississippi"
## [45] "rate_montana" "rate_north_carolina"
## [47] "rate_north_dakota" "rate_nebraska"
## [49] "rate_new_hampshire" "rate_new_jersey"
## [51] "rate_new_mexico" "rate_nevada"
## [53] "rate_new_york" "rate_ohio"
## [55] "rate_oklahoma" "rate_oregon"
## [57] "rate_pennsylvania" "rate_rhode_island"
## [59] "rate_south_carolina" "rate_south_dakota"
## [61] "rate_tennessee" "rate_texas"
## [63] "rate_utah" "rate_virginia"
## [65] "rate_vermont" "rate_washington"
## [67] "rate_wisconsin" "rate_west_virginia"
## [69] "rate_wyoming"
Remove Columns Not Needed.
columns_to_exclude <- c(
"rate_type", "unit", "rate_overall", "rate_sex_female", "rate_sex_male",
"rate_age_1_4", "rate_age_5_14", "rate_age_15_24", "rate_age_25_34",
"rate_age_35_44", "rate_age_45_54", "rate_age_55_64", "rate_65_74",
"rate_age_75_84", "rate_age_85_plus"
)
# Subset the data frame to exclude specific columns
df3 <- df2[, !names(df2) %in% columns_to_exclude]
head(df3)
## year_and_quarter time_period cause_of_death
## 10 2020 Q1 12 months ending with quarter Firearm-related injury
## 32 2020 Q1 3-month period Firearm-related injury
## 54 2020 Q2 12 months ending with quarter Firearm-related injury
## 76 2020 Q2 3-month period Firearm-related injury
## 98 2020 Q3 12 months ending with quarter Firearm-related injury
## 120 2020 Q3 3-month period Firearm-related injury
## rate_alaska rate_alabama rate_arkansas rate_arizona rate_california
## 10 23.0 21.8 19.7 15.4 7.3
## 32 21.4 23.0 20.8 15.9 7.5
## 54 22.7 22.6 22.0 14.8 7.5
## 76 20.2 23.4 27.1 15.4 7.9
## 98 24.5 23.1 21.5 15.9 7.8
## 120 26.4 24.9 21.3 18.4 9.2
## rate_colorado rate_connecticut rate_district_of_columbia rate_delaware
## 10 14.3 5.4 19.3 10.4
## 32 14.2 5.6 19.7 12.8
## 54 14.7 5.2 20.0 12.2
## 76 16.3 4.2 21.5 12.7
## 98 15.5 5.5 20.3 13.6
## 120 17.9 6.8 21.9 16.2
## rate_florida rate_georgia rate_hawaii rate_iowa rate_idaho rate_illinois
## 10 12.9 15.9 4.0 9.4 15.8 10.9
## 32 13.2 15.9 3.6 9.9 17.7 9.8
## 54 12.9 16.1 3.7 10.0 16.3 12.1
## 76 12.8 16.8 2.9 11.7 18.2 15.8
## 98 13.5 16.7 3.8 11.1 16.5 13.3
## 120 14.7 18.9 4.2 13.3 15.0 17.4
## rate_indiana rate_kansas rate_kentucky rate_louisiana rate_massachusetts
## 10 14.6 14.3 15.8 22.1 3.5
## 32 15.3 14.0 17.3 20.7 3.1
## 54 15.0 15.3 15.9 22.4 3.6
## 76 15.3 17.6 17.6 23.5 3.8
## 98 16.1 16.2 18.0 24.4 3.9
## 120 19.7 17.4 22.9 30.6 4.8
## rate_maryland rate_maine rate_michigan rate_minnesota rate_missouri
## 10 12.5 10.5 12.6 8.2 20.8
## 32 10.7 12.5 12.3 7.6 19.0
## 54 13.1 10.3 12.7 8.3 21.8
## 76 14.6 8.3 13.1 8.2 24.6
## 98 13.0 10.7 13.3 8.4 23.1
## 120 13.9 10.1 15.4 9.8 28.5
## rate_mississippi rate_montana rate_north_carolina rate_north_dakota
## 10 23.5 20.2 13.6 12.8
## 32 21.9 24.8 15.1 13.8
## 54 24.4 21.5 14.1 11.2
## 76 29.0 23.2 15.4 12.0
## 98 26.6 20.6 14.9 12.4
## 120 32.3 16.8 16.7 13.7
## rate_nebraska rate_new_hampshire rate_new_jersey rate_new_mexico
## 10 9.5 10.1 4.2 22.7
## 32 7.8 7.4 3.8 20.7
## 54 8.9 9.1 4.4 22.0
## 76 9.8 5.7 4.8 20.0
## 98 9.2 9.0 4.8 22.3
## 120 11.4 12.6 5.9 26.9
## rate_nevada rate_new_york rate_ohio rate_oklahoma rate_oregon
## 10 15.5 4.0 13.4 19.2 12.2
## 32 16.3 4.0 12.5 20.7 11.4
## 54 15.6 4.5 13.6 20.2 12.3
## 76 15.8 5.5 14.5 21.7 12.5
## 98 16.2 5.1 14.5 20.0 12.1
## 120 17.2 7.1 17.9 19.9 13.1
## rate_pennsylvania rate_rhode_island rate_south_carolina rate_south_dakota
## 10 12.1 4.3 19.9 12.2
## 32 12.0 3.9 20.5 8.5
## 54 12.3 4.1 20.7 12.0
## 76 12.2 4.9 21.9 11.8
## 98 12.9 4.4 21.5 12.5
## 120 15.0 4.6 24.2 17.8
## rate_tennessee rate_texas rate_utah rate_virginia rate_vermont
## 10 18.8 12.9 12.5 12.4 9.7
## 32 18.0 13.1 11.9 13.7 10.4
## 54 18.9 13.3 13.1 12.2 10.0
## 76 19.5 14.2 14.5 11.7 10.4
## 98 19.9 13.6 13.3 12.9 10.6
## 120 22.9 14.7 13.9 15.1 11.7
## rate_washington rate_wisconsin rate_west_virginia rate_wyoming
## 10 10.8 10.4 17.7 24.5
## 32 11.0 10.2 19.4 26.8
## 54 10.9 10.6 17.4 25.4
## 76 11.2 10.9 16.2 23.3
## 98 11.0 11.7 16.3 28.2
## 120 11.4 13.9 15.8 34.1
As per dataset, /nchs/pressroom/states/alabama/al.htm, use only the
first set of Q4 12 month ending with qtr for each year. Remove 2022 Q4
12 month, not available. Also remove 2021 because I only found data for
2020 Gun Law Score Card.
df4 <- df3[c(7), ]
head(df4)
## year_and_quarter time_period cause_of_death
## 142 2020 Q4 12 months ending with quarter Firearm-related injury
## rate_alaska rate_alabama rate_arkansas rate_arizona rate_california
## 142 23.5 23.6 22.6 16.7 8.5
## rate_colorado rate_connecticut rate_district_of_columbia rate_delaware
## 142 15.4 6 21.9 14.4
## rate_florida rate_georgia rate_hawaii rate_iowa rate_idaho rate_illinois
## 142 13.7 17.7 3.4 11.2 17.6 14.1
## rate_indiana rate_kansas rate_kentucky rate_louisiana rate_massachusetts
## 142 17.3 16.9 20.1 26.3 3.7
## rate_maryland rate_maine rate_michigan rate_minnesota rate_missouri
## 142 13.5 10.4 14.6 8.9 23.9
## rate_mississippi rate_montana rate_north_carolina rate_north_dakota
## 142 28.6 20.9 16 13.8
## rate_nebraska rate_new_hampshire rate_new_jersey rate_new_mexico
## 142 10.1 8.9 5 22.7
## rate_nevada rate_new_york rate_ohio rate_oklahoma rate_oregon
## 142 17 5.3 15.2 20.7 13
## rate_pennsylvania rate_rhode_island rate_south_carolina rate_south_dakota
## 142 13.6 5.1 22 13.6
## rate_tennessee rate_texas rate_utah rate_virginia rate_vermont
## 142 21.3 14.2 13.6 13.4 11.6
## rate_washington rate_wisconsin rate_west_virginia rate_wyoming
## 142 10.9 12.2 18.1 25.9
Remove Columns Not Needed
df5 <- df4 %>% select(-c(time_period, cause_of_death))
head(df5)
## year_and_quarter rate_alaska rate_alabama rate_arkansas rate_arizona
## 142 2020 Q4 23.5 23.6 22.6 16.7
## rate_california rate_colorado rate_connecticut rate_district_of_columbia
## 142 8.5 15.4 6 21.9
## rate_delaware rate_florida rate_georgia rate_hawaii rate_iowa rate_idaho
## 142 14.4 13.7 17.7 3.4 11.2 17.6
## rate_illinois rate_indiana rate_kansas rate_kentucky rate_louisiana
## 142 14.1 17.3 16.9 20.1 26.3
## rate_massachusetts rate_maryland rate_maine rate_michigan rate_minnesota
## 142 3.7 13.5 10.4 14.6 8.9
## rate_missouri rate_mississippi rate_montana rate_north_carolina
## 142 23.9 28.6 20.9 16
## rate_north_dakota rate_nebraska rate_new_hampshire rate_new_jersey
## 142 13.8 10.1 8.9 5
## rate_new_mexico rate_nevada rate_new_york rate_ohio rate_oklahoma
## 142 22.7 17 5.3 15.2 20.7
## rate_oregon rate_pennsylvania rate_rhode_island rate_south_carolina
## 142 13 13.6 5.1 22
## rate_south_dakota rate_tennessee rate_texas rate_utah rate_virginia
## 142 13.6 21.3 14.2 13.6 13.4
## rate_vermont rate_washington rate_wisconsin rate_west_virginia rate_wyoming
## 142 11.6 10.9 12.2 18.1 25.9
Rename Columns to their Abbreviations
df6 <- df5 %>% rename("Year" = "year_and_quarter",
"AL" = "rate_alabama",
"AK" = "rate_alaska",
"AZ" = "rate_arizona",
"AR" = "rate_arkansas",
"CA" = "rate_california",
"CO" = "rate_colorado",
"CT" = "rate_connecticut",
"DC" = "rate_district_of_columbia",
"DE" = "rate_delaware",
"FL" = "rate_florida",
"GA" = "rate_georgia",
"HI" = "rate_hawaii",
"ID" = "rate_idaho",
"IL" = "rate_illinois",
"IN" = "rate_indiana",
"IA" = "rate_iowa",
"KS" = "rate_kansas",
"KY" = "rate_kentucky",
"LA" = "rate_louisiana",
"ME" = "rate_maine",
"MD" = "rate_maryland",
"MA" = "rate_massachusetts",
"MI" = "rate_michigan",
"MN" = "rate_minnesota",
"MS" = "rate_mississippi",
"MO" = "rate_missouri",
"MT" = "rate_montana",
"NC" = "rate_north_carolina",
"ND" = "rate_north_dakota",
"NE" = "rate_nebraska",
"NV" = "rate_nevada",
"NH" = "rate_new_hampshire",
"NJ" = "rate_new_jersey",
"NM" = "rate_new_mexico",
"NY" = "rate_new_york",
"OH" = "rate_ohio",
"OK" = "rate_oklahoma",
"OR" = "rate_oregon",
"PA" = "rate_pennsylvania",
"RI" = "rate_rhode_island",
"SC" = "rate_south_carolina",
"SD" = "rate_south_dakota",
"TN" = "rate_tennessee",
"TX" = "rate_texas",
"UT" = "rate_utah",
"VT" = "rate_vermont",
"VA" = "rate_virginia",
"WA" = "rate_washington",
"WV" = "rate_west_virginia",
"WI" = "rate_wisconsin",
"WY" = "rate_wyoming")
head(df6)
## Year AK AL AR AZ CA CO CT DC DE FL GA HI IA ID
## 142 2020 Q4 23.5 23.6 22.6 16.7 8.5 15.4 6 21.9 14.4 13.7 17.7 3.4 11.2 17.6
## IL IN KS KY LA MA MD ME MI MN MO MS MT NC ND NE
## 142 14.1 17.3 16.9 20.1 26.3 3.7 13.5 10.4 14.6 8.9 23.9 28.6 20.9 16 13.8 10.1
## NH NJ NM NV NY OH OK OR PA RI SC SD TN TX UT VA VT
## 142 8.9 5 22.7 17 5.3 15.2 20.7 13 13.6 5.1 22 13.6 21.3 14.2 13.6 13.4 11.6
## WA WI WV WY
## 142 10.9 12.2 18.1 25.9
Replace “2020 Q4” with “2020” in the “ColumnName” column
df6$Year <- sub("2020 Q4", "2020", df6$Year)
head(df6)
## Year AK AL AR AZ CA CO CT DC DE FL GA HI IA ID IL
## 142 2020 23.5 23.6 22.6 16.7 8.5 15.4 6 21.9 14.4 13.7 17.7 3.4 11.2 17.6 14.1
## IN KS KY LA MA MD ME MI MN MO MS MT NC ND NE NH
## 142 17.3 16.9 20.1 26.3 3.7 13.5 10.4 14.6 8.9 23.9 28.6 20.9 16 13.8 10.1 8.9
## NJ NM NV NY OH OK OR PA RI SC SD TN TX UT VA VT WA
## 142 5 22.7 17 5.3 15.2 20.7 13 13.6 5.1 22 13.6 21.3 14.2 13.6 13.4 11.6 10.9
## WI WV WY
## 142 12.2 18.1 25.9
Summary of the List is Obtained
str(df6)
## 'data.frame': 1 obs. of 52 variables:
## $ Year: chr "2020"
## $ AK : num 23.5
## $ AL : num 23.6
## $ AR : num 22.6
## $ AZ : num 16.7
## $ CA : num 8.5
## $ CO : num 15.4
## $ CT : num 6
## $ DC : num 21.9
## $ DE : num 14.4
## $ FL : num 13.7
## $ GA : num 17.7
## $ HI : num 3.4
## $ IA : num 11.2
## $ ID : num 17.6
## $ IL : num 14.1
## $ IN : num 17.3
## $ KS : num 16.9
## $ KY : num 20.1
## $ LA : num 26.3
## $ MA : num 3.7
## $ MD : num 13.5
## $ ME : num 10.4
## $ MI : num 14.6
## $ MN : num 8.9
## $ MO : num 23.9
## $ MS : num 28.6
## $ MT : num 20.9
## $ NC : num 16
## $ ND : num 13.8
## $ NE : num 10.1
## $ NH : num 8.9
## $ NJ : num 5
## $ NM : num 22.7
## $ NV : num 17
## $ NY : num 5.3
## $ OH : num 15.2
## $ OK : num 20.7
## $ OR : num 13
## $ PA : num 13.6
## $ RI : num 5.1
## $ SC : num 22
## $ SD : num 13.6
## $ TN : num 21.3
## $ TX : num 14.2
## $ UT : num 13.6
## $ VA : num 13.4
## $ VT : num 11.6
## $ WA : num 10.9
## $ WI : num 12.2
## $ WV : num 18.1
## $ WY : num 25.9
Remove Row Names (Index Numbers)
row.names(df6) <- NULL
head(df6)
## Year AK AL AR AZ CA CO CT DC DE FL GA HI IA ID IL
## 1 2020 23.5 23.6 22.6 16.7 8.5 15.4 6 21.9 14.4 13.7 17.7 3.4 11.2 17.6 14.1
## IN KS KY LA MA MD ME MI MN MO MS MT NC ND NE NH NJ
## 1 17.3 16.9 20.1 26.3 3.7 13.5 10.4 14.6 8.9 23.9 28.6 20.9 16 13.8 10.1 8.9 5
## NM NV NY OH OK OR PA RI SC SD TN TX UT VA VT WA WI
## 1 22.7 17 5.3 15.2 20.7 13 13.6 5.1 22 13.6 21.3 14.2 13.6 13.4 11.6 10.9 12.2
## WV WY
## 1 18.1 25.9
Remove Columns Name Needed
df6 <- df6 %>% select(-c(Year))
head(df6)
## AK AL AR AZ CA CO CT DC DE FL GA HI IA ID IL IN
## 1 23.5 23.6 22.6 16.7 8.5 15.4 6 21.9 14.4 13.7 17.7 3.4 11.2 17.6 14.1 17.3
## KS KY LA MA MD ME MI MN MO MS MT NC ND NE NH NJ NM
## 1 16.9 20.1 26.3 3.7 13.5 10.4 14.6 8.9 23.9 28.6 20.9 16 13.8 10.1 8.9 5 22.7
## NV NY OH OK OR PA RI SC SD TN TX UT VA VT WA WI WV
## 1 17 5.3 15.2 20.7 13 13.6 5.1 22 13.6 21.3 14.2 13.6 13.4 11.6 10.9 12.2 18.1
## WY
## 1 25.9
Use Pivot_Longer to Bring Columns to Rows
MORTALITYRATE <- df6 %>%
pivot_longer(cols = everything(), # Use 'everything()' to include all columns
names_to = "State.Abrev", # Create a new variable 'Variable' for column names
values_to = "Mortality.Rate") # Name for the values column
head(MORTALITYRATE)
## # A tibble: 6 × 2
## State.Abrev Mortality.Rate
## <chr> <dbl>
## 1 AK 23.5
## 2 AL 23.6
## 3 AR 22.6
## 4 AZ 16.7
## 5 CA 8.5
## 6 CO 15.4
Added ID column to prepare to merge both MORTALITYRATE AND GUNLAW
dataset
# Add an ID column
MORTALITYRATE$ID <- seq_along(MORTALITYRATE$State.Abrev)
# Reorder the columns
MORTALITYRATE <- MORTALITYRATE[, c("ID", "State.Abrev", "Mortality.Rate")]
# Print the updated data frame
head(MORTALITYRATE)
## # A tibble: 6 × 3
## ID State.Abrev Mortality.Rate
## <int> <chr> <dbl>
## 1 1 AK 23.5
## 2 2 AL 23.6
## 3 3 AR 22.6
## 4 4 AZ 16.7
## 5 5 CA 8.5
## 6 6 CO 15.4
GUNLAW <- read.csv("https://raw.githubusercontent.com/enidroman/Data_608_Knowledge_and_Visual_Analytics/main/2020%20GUN%20LAW%20SCORECARD.csv")
head(GUNLAW)
## Gun.Law.Strength.Ranked. State Grade Grade.Rank Gun.Death.Rate.Ranked.
## 1 41 Alaska F 5 6
## 2 31 Alabama F 5 5
## 3 50 Arkansas F 5 8
## 4 42 Arizona F 5 20
## 5 1 California A 1 44
## 6 13 Colorado B 2 22
## Gun.Death.Rate.per.100K.
## 1 23.5
## 2 23.6
## 3 22.6
## 4 16.7
## 5 8.5
## 6 15.4
Remove Columns Not Needed
columns_to_exclude2 <- c(
"Gun.Law.Strength.Ranked.", "Grade", "Gun.Death.Rate.Ranked.", "Gun.Death.Rate.per.100K.", "X",
"X.1"
)
# Subset the data frame to exclude specific columns
GUNLAW1<- GUNLAW[, !names(GUNLAW) %in% columns_to_exclude2]
# Change the name of the column "Grade.Rank" to "GunLaw.Rank"
colnames(GUNLAW1)[colnames(GUNLAW1) == "Grade.Rank"] <- "GunLaw.Rank"
head(GUNLAW1)
## State GunLaw.Rank
## 1 Alaska 5
## 2 Alabama 5
## 3 Arkansas 5
## 4 Arizona 5
## 5 California 1
## 6 Colorado 2
Added ID column to prepare to merge both MORTALITYRATE AND GUNLAW
dataset
# Add an ID column
GUNLAW1$ID <- seq_along(GUNLAW1$State)
# Reorder the columns
GUNLAW1 <- GUNLAW1[, c("ID", "State", "GunLaw.Rank")]
# Print the updated data frame
head(GUNLAW1)
## ID State GunLaw.Rank
## 1 1 Alaska 5
## 2 2 Alabama 5
## 3 3 Arkansas 5
## 4 4 Arizona 5
## 5 5 California 1
## 6 6 Colorado 2
Merge the data frames based on a common column, e.g., ‘ID’
MORTALITY_GUNLAW <- merge(MORTALITYRATE, GUNLAW1, by = "ID")
head(MORTALITY_GUNLAW)
## ID State.Abrev Mortality.Rate State GunLaw.Rank
## 1 1 AK 23.5 Alaska 5
## 2 2 AL 23.6 Alabama 5
## 3 3 AR 22.6 Arkansas 5
## 4 4 AZ 16.7 Arizona 5
## 5 5 CA 8.5 California 1
## 6 6 CO 15.4 Colorado 2
Reorder The Columns
MORTALITY_GUNLAW <- MORTALITY_GUNLAW[, c("ID", "State", "State.Abrev", "GunLaw.Rank", "Mortality.Rate")]
head(MORTALITY_GUNLAW)
## ID State State.Abrev GunLaw.Rank Mortality.Rate
## 1 1 Alaska AK 5 23.5
## 2 2 Alabama AL 5 23.6
## 3 3 Arkansas AR 5 22.6
## 4 4 Arizona AZ 5 16.7
## 5 5 California CA 1 8.5
## 6 6 Colorado CO 2 15.4
ANALYSIS
Create a Choropleth Interactive Heat Map For 2020 USA Mortality
Rate(Hover over the States)
heat_map1 <- plot_geo(MORTALITY_GUNLAW, locations = ~State.Abrev, text = ~State, z = ~Mortality.Rate) %>%
add_trace(
type = "choropleth",
colors = "Purples",
locationmode = "USA-states"
) %>%
colorbar(title = "Mortality Rate") %>%
layout(
title = "2020 USA Mortality Rate",
geo = list(
scope = "usa",
projection = list(type = "albers usa"),
showlakes = TRUE,
lakecolor = toRGB("white")
),
annotations = list(
list(
x = 0.5, # X-coordinate of the note
y = .95, # Y-coordinate of the note (adjusted for the subtitle)
xref = "paper",
yref = "paper",
text = "Gun violence and mass shootings are a regular occurrence in the
United States and other countries.", # Text of the subtitle
showarrow = FALSE, # Set to TRUE if you want an arrow pointing to the subtitle
font = list(size = 14) # Adjust the font size as needed
),
list(
x = 0.05, # X-coordinate of the note
y = 0.05, # Y-coordinate of the note
xref = "paper",
yref = "paper",
text = "The number of deaths per 100,000 total population.", # Text of the note
showarrow = FALSE # Set to TRUE if you want an arrow pointing to the note
)
)
)
# Display the heat map
heat_map1
Create a Choropleth Interactive Heat Map For 2020 USA Gun Law
Rank(Hover over the States)
heat_map2 <- plot_geo(MORTALITY_GUNLAW, locations = ~State.Abrev, text = ~State, z = ~GunLaw.Rank) %>%
add_trace(
type = "choropleth",
colors = "Purples",
locationmode = "USA-states"
) %>%
colorbar(title = "Gun Law Rank") %>%
layout(
title = "2020 USA Gun Law Rank",
geo = list(
scope = "usa",
projection = list(type = "albers usa"),
showlakes = TRUE,
lakecolor = toRGB("white")
),
annotations = list(
list(
x = 0.05, # X-coordinate of the subtitle
y = 1.00, # Y-coordinate of the subtitle (adjusted for the subtitle)
xref = "paper",
yref = "paper",
text = "Can stricter gun laws reduce firearm gun deaths?", # Text of the subtitle
showarrow = FALSE, # Set to TRUE if you want an arrow pointing to the subtitle
font = list(size = 14) # Adjust the font size as needed
),
list(
x = 0.05, # X-coordinate of the note
y = 0.05, # Y-coordinate of the note
xref = "paper",
yref = "paper",
text = "Rank is from 1 being an A and 5 being an F", # Text of the note
showarrow = FALSE # Set to TRUE if you want an arrow pointing to the note
)
)
)
# Display the heat map
heat_map2