death_data <-read.csv("C:/Users/brenn/OneDrive/Desktop/R/Capstone/State Homicide Data.csv", na.strings = ".")
temps_data <- read.csv("C:/Users/brenn/OneDrive/Desktop/R/Capstone/temp_data.csv", na.strings = ".")
suicide_1 <- read.csv("C:/Users/brenn/OneDrive/Desktop/R/Capstone/suicide_data_1.csv", na.strings = ".")
suicide_2 <- read.csv("C:/Users/brenn/OneDrive/Desktop/R/Capstone/suicide_data_2.csv", na.strings = ".")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
state_data <- left_join(death_data, temps_data, by = c("Year"="Year", "State"="State"))
suicide_data <- rbind(suicide_1, suicide_2)
suicide_data <- arrange(suicide_data, State)
state_abbr <- c(
"Alabama" = "AL", "Alaska" = "AK", "Arizona" = "AZ", "Arkansas" = "AR",
"California" = "CA", "Colorado" = "CO", "Connecticut" = "CT", "Delaware" = "DE",
"Florida" = "FL", "Georgia" = "GA", "Hawaii" = "HI", "Idaho" = "ID",
"Illinois" = "IL", "Indiana" = "IN", "Iowa" = "IA", "Kansas" = "KS",
"Kentucky" = "KY", "Louisiana" = "LA", "Maine" = "ME", "Maryland" = "MD",
"Massachusetts" = "MA", "Michigan" = "MI", "Minnesota" = "MN", "Mississippi" = "MS",
"Missouri" = "MO", "Montana" = "MT", "Nebraska" = "NE", "Nevada" = "NV",
"New Hampshire" = "NH", "New Jersey" = "NJ", "New Mexico" = "NM", "New York" = "NY",
"North Carolina" = "NC", "North Dakota" = "ND", "Ohio" = "OH", "Oklahoma" = "OK",
"Oregon" = "OR", "Pennsylvania" = "PA", "Rhode Island" = "RI", "South Carolina" = "SC",
"South Dakota" = "SD", "Tennessee" = "TN", "Texas" = "TX", "Utah" = "UT",
"Vermont" = "VT", "Virginia" = "VA", "Washington" = "WA", "West Virginia" = "WV",
"Wisconsin" = "WI", "Wyoming" = "WY"
)
Region <- c(
"CT" = "Northeast", "ME" = "Northeast", "MA" = "Northeast",
"NH" = "Northeast", "NJ" = "Northeast", "NY" = "Northeast",
"PA" = "Northeast", "RI" = "Northeast", "VT" = "Northeast",
"IL" = "Midwest", "IN" = "Midwest", "IA" = "Midwest", "KS" = "Midwest",
"MI" = "Midwest", "MN" = "Midwest", "MO" = "Midwest", "NE" = "Midwest",
"ND" = "Midwest", "OH" = "Midwest", "SD" = "Midwest", "WI" = "Midwest",
"AL" = "South", "AR" = "South", "DE" = "South", "FL" = "South",
"GA" = "South", "KY" = "South", "LA" = "South", "MD" = "South",
"MS" = "South", "NC" = "South", "OK" = "South", "SC" = "South",
"TN" = "South", "TX" = "South", "VA" = "South", "WV" = "South",
"AK" = "West", "AZ" = "West", "CA" = "West", "CO" = "West",
"HI" = "West", "ID" = "West", "MT" = "West", "NV" = "West",
"NM" = "West", "OR" = "West", "UT" = "West", "WA" = "West",
"WY" = "West"
)
suicides_only <- suicide_data %>%
group_by(Year, State) %>%
summarise(total_suicides = sum(Deaths, na.rm = TRUE))%>%
mutate(state_abbr = state_abbr[State],
region = Region[state_abbr])
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
suicides_only <- arrange(suicides_only, State)
states_deaths <- left_join(state_data, suicides_only, by = c("Year"="Year", "State" = "state_abbr")) %>%
rename(State_Abbr = State, State = State.y) %>%
mutate(Perc_Hom = (Homicides/Pop_Thousand)*100,
Perc_Suicide = (total_suicides/Pop_Thousand)*100,
Perc_Agg = (Agg_Assault/Population)*100,
Violent_Crimes = Homicides+Agg_Assault+total_suicides,
Deaths = Homicides+total_suicides,
Perc_Deaths = (Deaths/Pop_Thousand)*100,
Perc_Crimes = (Violent_Crimes/Population)*100)
Plots Using Raw Values
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Total_VC_Plot <- plot_ly(states_deaths, x = ~State, y = ~Violent_Crimes, frame = ~Year,
hoverinfo = "text", type = "scatter", mode = "markers",
showlegend = F, size = 4) %>%
layout(
title = "Total Violent Crimes by State",
xaxis = list(
title = "State",
tickvals = states_deaths$State, # Use the unique state abbreviations as tickvals
ticktext = states_deaths$State, # Show the state abbreviations as tick labels
tickangle = 90 # Rotate the labels to avoid overlap
),
yaxis = list(title = "Total Violent Crimes")
)
Total_Death_Plot <- plot_ly(states_deaths, x = ~State, y = ~Deaths, frame = ~Year,
hoverinfo = "text", type = "scatter", mode = "markers",
showlegend = F, size = 4) %>%
layout(
title = "Total Deaths by State",
xaxis = list(
title = "State",
tickvals = states_deaths$State, # Use the unique state abbreviations as tickvals
ticktext = states_deaths$State, # Show the state abbreviations as tick labels
tickangle = 90 # Rotate the labels to avoid overlap
),
yaxis = list(title = "Total Violent Crimes")
)
Homicide_Deaths_Plot <- plot_ly(states_deaths, x = ~State, y = ~Homicides, frame = ~Year,
hoverinfo = "text", type = "scatter", mode = "markers",
showlegend = F, size = 4) %>%
layout(
title = "Total Homicides by State",
xaxis = list(
title = "State",
tickvals = states_deaths$State, # Use the unique state abbreviations as tickvals
ticktext = states_deaths$State, # Show the state abbreviations as tick labels
tickangle = 90 # Rotate the labels to avoid overlap
),
yaxis = list(title = "Homicides")
)
Agg_Assault_Plot <- plot_ly(states_deaths, x = ~State, y = ~Agg_Assault, frame = ~Year,
hoverinfo = "text", type = "scatter", mode = "markers",
showlegend = F, size = 4) %>%
layout(
title = "Total Aggravated Assaults by State",
xaxis = list(
title = "State",
tickvals = states_deaths$State, # Use the unique state abbreviations as tickvals
ticktext = states_deaths$State, # Show the state abbreviations as tick labels
tickangle = 90 # Rotate the labels to avoid overlap
),
yaxis = list(title = "Aggravated Assaults")
)
Suicide_Plot <- plot_ly(states_deaths, x = ~State, y = ~total_suicides, frame = ~Year,
hoverinfo = "text", type = "scatter", mode = "markers",
showlegend = F, size = 4) %>%
layout(
title = "Total Suicides by State",
xaxis = list(
title = "State",
tickvals = states_deaths$State, # Use the unique state abbreviations as tickvals
ticktext = states_deaths$State, # Show the state abbreviations as tick labels
tickangle = 90 # Rotate the labels to avoid overlap
),
yaxis = list(title = "Total Suicides")
)
Temp_Plot <- plot_ly(states_deaths, x = ~State, y = ~avg_temp, frame = ~Year,
hoverinfo = "text", type = "scatter", mode = "markers",
showlegend = F, size = 4) %>%
layout(
title = "Average Temperature by State",
xaxis = list(
title = "State",
tickvals = states_deaths$State, # Use the unique state abbreviations as tickvals
ticktext = states_deaths$State, # Show the state abbreviations as tick labels
tickangle = 90),
yaxis = list(title = "Average Temperature")
)
Plots Adjusted for Population Differences
library(plotly)
P_Total_VC_Plot <- plot_ly(states_deaths, x = ~State, y = ~Perc_Crimes, frame = ~Year,
hoverinfo = "text", type = "scatter", mode = "markers",
showlegend = F, size = 4) %>%
layout(
title = "Percentage of Violent Crimes per 1000 People by State",
xaxis = list(
title = "State",
tickvals = states_deaths$State,
ticktext = states_deaths$State,
tickangle = 90
),
yaxis = list(title = "Percentage of Violent Crimes per 1000 People")
)
P_Total_Death_Plot <- plot_ly(states_deaths, x = ~State, y = ~Perc_Deaths, frame = ~Year,
hoverinfo = "text", type = "scatter", mode = "markers",
showlegend = F, size = 4) %>%
layout(
title = "Percentage of Deaths per 1000 People by State",
xaxis = list(
title = "State",
tickvals = states_deaths$State, # Use the unique state abbreviations as tickvals
ticktext = states_deaths$State, # Show the state abbreviations as tick labels
tickangle = 90 # Rotate the labels to avoid overlap
),
yaxis = list(title = "Percentage of Deaths per 1000 People")
)
P_Homicide_Deaths_Plot <- plot_ly(states_deaths, x = ~State, y = ~Perc_Hom, frame = ~Year,
hoverinfo = "text", type = "scatter", mode = "markers",
showlegend = F, size = 4) %>%
layout(
title = "Percentage of Homicides per 1000 People by State",
xaxis = list(
title = "State",
tickvals = states_deaths$State, # Use the unique state abbreviations as tickvals
ticktext = states_deaths$State, # Show the state abbreviations as tick labels
tickangle = 90 # Rotate the labels to avoid overlap
),
yaxis = list(title = "Percentage of Homicides per 1000 People")
)
P_Agg_Assault_Plot <- plot_ly(states_deaths, x = ~State, y = ~Perc_Agg, frame = ~Year,
hoverinfo = "text", type = "scatter", mode = "markers",
showlegend = F, size = 4) %>%
layout(
title = "Percentage of Aggravated Assaults per 1000 People by State",
xaxis = list(
title = "State",
tickvals = states_deaths$State, # Use the unique state abbreviations as tickvals
ticktext = states_deaths$State, # Show the state abbreviations as tick labels
tickangle = 90 # Rotate the labels to avoid overlap
),
yaxis = list(title = "Percentage of Aggravated Assaults Per 1000 People")
)
P_Suicide_Plot <- plot_ly(states_deaths, x = ~State, y = ~Perc_Suicide, frame = ~Year,
hoverinfo = "text", type = "scatter", mode = "markers",
showlegend = F, size = 4) %>%
layout(
title = "Percentage of Suicides per 1000 People by State",
xaxis = list(
title = "State",
tickvals = states_deaths$State, # Use the unique state abbreviations as tickvals
ticktext = states_deaths$State, # Show the state abbreviations as tick labels
tickangle = 90 # Rotate the labels to avoid overlap
),
yaxis = list(title = "Percentage of Suicides per 1000 People")
)
R_Temp_Plot <- plot_ly(states_deaths, x = ~State, y = ~avg_temp,
frame = ~Year, color = ~region, text = ~region,
hoverinfo = "text", type = "scatter", mode = "markers",
showlegend = F, size = 4) %>%
layout(
title = "Average Temperature by State",
xaxis = list(
title = "State",
tickvals = states_deaths$State, # Use the unique state abbreviations as tickvals
ticktext = states_deaths$State, # Show the state abbreviations as tick labels
tickangle = 90 # Rotate the labels to avoid overlap
),
yaxis = list(title = "Average Temperature")
)
twothousand8 <- states_deaths[states_deaths$State_Abbr=="CA",]
library(ggplot2)
ggplot(twothousand8) + geom_line(aes(x = Perc_Crimes, y = avg_temp))
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_line()`).
Stately Focus
state_dive <- states_deaths[states_deaths$State_Abbr==c("CA","NY", "FL", "AK", "OK", "VA"),]
## Warning in states_deaths$State_Abbr == c("CA", "NY", "FL", "AK", "OK", "VA"):
## longer object length is not a multiple of shorter object length
library(ggplot2)
temps_state <- ggplot(state_dive) + geom_line(aes(x = Year, y = avg_temp, color = State_Abbr))+
labs(title = "Average Temperature for Select States",
y = "Average Temperature (F)",
color = "State") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, margin = margin(b = 15)),
axis.title.x = element_text(hjust = 0.5, margin = margin(t = 15)),
axis.title.y = element_text(hjust = 0.5, margin = margin(r = 15)),
legend.title = element_text(size = 10),
legend.key.size = unit(0.5, 'cm'),
legend.text = element_text(size = 8),
axis.text.x = element_text(vjust = 1, hjust = 0.1))
hom_states <- ggplot(state_dive) + geom_line(aes(x = Year, y = Perc_Hom, color = State_Abbr))+
labs(title = "Percent Homicide per 1000 People for Select States",
y = "Percent Homicide per 1000 People",
color = "State") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, margin = margin(b = 15)),
axis.title.x = element_text(hjust = 0.5, margin = margin(t = 15)),
axis.title.y = element_text(hjust = 0.5, margin = margin(r = 15)),
legend.title = element_text(size = 10),
legend.key.size = unit(0.5, 'cm'),
legend.text = element_text(size = 8),
axis.text.x = element_text(vjust = 1, hjust = 0.1))
suistates <- ggplot(state_dive) + geom_line(aes(x = Year, y = Perc_Suicide, color = State_Abbr))+
labs(title = "Percent Suicides per 1000 People for Select States",
y = "Percent Suicides per 1000 People",
color = "State") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, margin = margin(b = 15)),
axis.title.x = element_text(hjust = 0.5, margin = margin(t = 15)),
axis.title.y = element_text(hjust = 0.5, margin = margin(r = 15)),
legend.title = element_text(size = 10),
legend.key.size = unit(0.5, 'cm'),
legend.text = element_text(size = 8),
axis.text.x = element_text(vjust = 1, hjust = 0.1))
assault_states <- ggplot(state_dive) + geom_line(aes(x = Year, y = Perc_Agg, color = State_Abbr))+
labs(title = "Percent Aggravated Assaults per 1000 People for Select States",
y = "Percent Aggravated Assaults per 1000 People",
color = "State") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, margin = margin(b = 15)),
axis.title.x = element_text(hjust = 0.5, margin = margin(t = 15)),
axis.title.y = element_text(hjust = 0.5, margin = margin(r = 15)),
legend.title = element_text(size = 10),
legend.key.size = unit(0.5, 'cm'),
legend.text = element_text(size = 8),
axis.text.x = element_text(vjust = 1, hjust = 0.1))
P_Total_VC_Plot
## Warning: Ignoring 457 observations
P_Total_Death_Plot
## Warning: Ignoring 457 observations
P_Homicide_Deaths_Plot
## Warning: Ignoring 457 observations
P_Agg_Assault_Plot
## Warning: Ignoring 456 observations
P_Suicide_Plot
## Warning: Ignoring 450 observations
R_Temp_Plot
## Warning: Ignoring 450 observations
temps_state
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_line()`).
hom_states
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_line()`).
suistates
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_line()`).
assault_states
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_line()`).