library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.3.2
data= read.csv("C:/Users/Chafiaa/Downloads/489q-934x.csv")
head(data)
df2 <- data[data$cause_of_death == 'Firearm-related injury',]
head(df2)
columns_to_exclude <- c(
"rate_type", "unit", "rate_overall", "rate_sex_female", "rate_sex_male",
"rate_age_1_4", "rate_age_5_14", "rate_age_15_24", "rate_age_25_34",
"rate_age_35_44", "rate_age_45_54", "rate_age_55_64", "rate_65_74",
"rate_age_75_84", "rate_age_85_plus"
)
# exclude specific columns
df3 <- df2[, !names(df2) %in% columns_to_exclude]
head(df3)
df4 <- df3[c(7), ]
head(df4)
df5 <- df4 %>% select(-c(time_period, cause_of_death))
# rename columns
head(df5)
df6 <- df5 %>% rename("Year" = "year_and_quarter",
"AL" = "rate_alabama",
"AK" = "rate_alaska",
"AZ" = "rate_arizona",
"AR" = "rate_arkansas",
"CA" = "rate_california",
"CO" = "rate_colorado",
"CT" = "rate_connecticut",
"DC" = "rate_district_of_columbia",
"DE" = "rate_delaware",
"FL" = "rate_florida",
"GA" = "rate_georgia",
"HI" = "rate_hawaii",
"ID" = "rate_idaho",
"IL" = "rate_illinois",
"IN" = "rate_indiana",
"IA" = "rate_iowa",
"KS" = "rate_kansas",
"KY" = "rate_kentucky",
"LA" = "rate_louisiana",
"ME" = "rate_maine",
"MD" = "rate_maryland",
"MA" = "rate_massachusetts",
"MI" = "rate_michigan",
"MN" = "rate_minnesota",
"MS" = "rate_mississippi",
"MO" = "rate_missouri",
"MT" = "rate_montana",
"NC" = "rate_north_carolina",
"ND" = "rate_north_dakota",
"NE" = "rate_nebraska",
"NV" = "rate_nevada",
"NH" = "rate_new_hampshire",
"NJ" = "rate_new_jersey",
"NM" = "rate_new_mexico",
"NY" = "rate_new_york",
"OH" = "rate_ohio",
"OK" = "rate_oklahoma",
"OR" = "rate_oregon",
"PA" = "rate_pennsylvania",
"RI" = "rate_rhode_island",
"SC" = "rate_south_carolina",
"SD" = "rate_south_dakota",
"TN" = "rate_tennessee",
"TX" = "rate_texas",
"UT" = "rate_utah",
"VT" = "rate_vermont",
"VA" = "rate_virginia",
"WA" = "rate_washington",
"WV" = "rate_west_virginia",
"WI" = "rate_wisconsin",
"WY" = "rate_wyoming")
head(df6)
df6$Year <- sub("2020 Q4", "2020", df6$Year)
head(df6)
row.names(df6) <- NULL
head(df6)
df6 <- df6 %>% select(-c(Year))
head(df6)
#creat and rename columns
MORTALITYRATE <- df6 %>%
pivot_longer(cols = everything(),
names_to = "State.a",
values_to = "Mortality.Rate")
head(MORTALITYRATE)
MORTALITYRATE$ID <- seq_along(MORTALITYRATE$State.a)
head(MORTALITYRATE)# data that we can work with
GLAW <- read.csv("https://raw.githubusercontent.com/enidroman/Data_608_Knowledge_and_Visual_Analytics/main/2020%20GUN%20LAW%20SCORECARD.csv")
head(GLAW)
columns_to_exclude2 <- c(
"Gun.Law.Strength.Ranked.", "Grade", "Gun.Death.Rate.Ranked.", "Gun.Death.Rate.per.100K.", "X",
"X.1"
)
# remove some columns & use ID
GLAW1<- GLAW[, !names(GLAW) %in% columns_to_exclude2]
colnames(GLAW1)[colnames(GLAW1) == "Grade.Rank"] <- "Rank"
GLAW1$ID <- seq_along(GLAW1$State)
GLAW1 <- GLAW1[, c("ID", "State", "Rank")]
head(GLAW1)
MORTALITY_GUNLAW <- merge(MORTALITYRATE, GLAW1, by = "ID")
head(MORTALITY_GUNLAW)
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
heat_map1 <- plot_geo(MORTALITY_GUNLAW, locations = ~State.a, text = ~State, z = ~Mortality.Rate) %>%
add_trace(
type = "choropleth",
colors = "blue",
locationmode = "USA-states"
) %>%
colorbar(title = "Mortality Rate") %>%
layout(
title = "2020 USA Mortality Rate",
geo = list(
scope = "usa",
projection = list(type = "albers usa"),
showlakes = TRUE,
lakecolor = toRGB("brown")
),
annotations = list(
list(
x = 0.6,
y = .8,
xref = "paper",
yref = "paper",
text = "States where the gun violance is high",
showarrow = FALSE,
font = list(size = 15)
),
list(
x = 0.07,
y = 0.06,
xref = "paper",
yref = "paper",
text = "The number of deaths per 100,000 population.",
showarrow = FALSE
)
)
)
# Display the heat map
heat_map1
heat_map2 <- plot_geo(MORTALITY_GUNLAW, locations = ~State.a, text = ~State, z = ~Rank) %>%
add_trace(
type = "choropleth",
colors = "red",
locationmode = "USA-states"
) %>%
colorbar(title = "Gun Law Rank") %>%
layout(
title = "2020 USA Gun Law Rank",
geo = list(
scope = "usa",
projection = list(type = "albers usa"),
showlakes = TRUE,
lakecolor = toRGB("white")
),
annotations = list(
list(
x = 0.05,
y = 1.00,
xref = "paper",
yref = "paper",
text = "states with high rank of gun law",
showarrow = FALSE,
font = list(size = 14)
),
list(
x = 0.05,
y = 0.05,
xref = "paper",
yref = "paper",
text = "Rank is from 1 being an A and 5 being an F",
showarrow = FALSE
)
)
)
# Display the heat map
heat_map2
# from the heat maps that I created, I can say that in states where the gun law is a failure like Alaska, Mississippi, the mortality related to gun violence seems to be high, therefore we can say that when gun laws are strict it will be safe and the mortality related to guns will be lower .