Capstone

death_data <-read.csv("C:/Users/brenn/OneDrive/Desktop/R/Capstone/State Homicide Data.csv", na.strings = ".")
temps_data <- read.csv("C:/Users/brenn/OneDrive/Desktop/R/Capstone/temp_data.csv", na.strings = ".")
suicide_1 <- read.csv("C:/Users/brenn/OneDrive/Desktop/R/Capstone/suicide_data_1.csv", na.strings = ".")
suicide_2 <- read.csv("C:/Users/brenn/OneDrive/Desktop/R/Capstone/suicide_data_2.csv", na.strings = ".")

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

state_data <- left_join(death_data, temps_data, by = c("Year"="Year", "State"="State"))

suicide_data <- rbind(suicide_1, suicide_2)
suicide_data <- arrange(suicide_data, State)

state_abbr <- c(
  "Alabama" = "AL", "Alaska" = "AK", "Arizona" = "AZ", "Arkansas" = "AR", 
  "California" = "CA", "Colorado" = "CO", "Connecticut" = "CT", "Delaware" = "DE", 
  "Florida" = "FL", "Georgia" = "GA", "Hawaii" = "HI", "Idaho" = "ID", 
  "Illinois" = "IL", "Indiana" = "IN", "Iowa" = "IA", "Kansas" = "KS", 
  "Kentucky" = "KY", "Louisiana" = "LA", "Maine" = "ME", "Maryland" = "MD", 
  "Massachusetts" = "MA", "Michigan" = "MI", "Minnesota" = "MN", "Mississippi" = "MS", 
  "Missouri" = "MO", "Montana" = "MT", "Nebraska" = "NE", "Nevada" = "NV", 
  "New Hampshire" = "NH", "New Jersey" = "NJ", "New Mexico" = "NM", "New York" = "NY", 
  "North Carolina" = "NC", "North Dakota" = "ND", "Ohio" = "OH", "Oklahoma" = "OK", 
  "Oregon" = "OR", "Pennsylvania" = "PA", "Rhode Island" = "RI", "South Carolina" = "SC", 
  "South Dakota" = "SD", "Tennessee" = "TN", "Texas" = "TX", "Utah" = "UT", 
  "Vermont" = "VT", "Virginia" = "VA", "Washington" = "WA", "West Virginia" = "WV", 
  "Wisconsin" = "WI", "Wyoming" = "WY"
)

Region <- c(
  "CT" = "Northeast", "ME" = "Northeast", "MA" = "Northeast", 
  "NH" = "Northeast", "NJ" = "Northeast", "NY" = "Northeast", 
  "PA" = "Northeast", "RI" = "Northeast", "VT" = "Northeast",
  
  "IL" = "Midwest", "IN" = "Midwest", "IA" = "Midwest", "KS" = "Midwest", 
  "MI" = "Midwest", "MN" = "Midwest", "MO" = "Midwest", "NE" = "Midwest", 
  "ND" = "Midwest", "OH" = "Midwest", "SD" = "Midwest", "WI" = "Midwest",
  
  "AL" = "South", "AR" = "South", "DE" = "South", "FL" = "South", 
  "GA" = "South", "KY" = "South", "LA" = "South", "MD" = "South", 
  "MS" = "South", "NC" = "South", "OK" = "South", "SC" = "South", 
  "TN" = "South", "TX" = "South", "VA" = "South", "WV" = "South",
  
  "AK" = "West", "AZ" = "West", "CA" = "West", "CO" = "West", 
  "HI" = "West", "ID" = "West", "MT" = "West", "NV" = "West", 
  "NM" = "West", "OR" = "West", "UT" = "West", "WA" = "West", 
  "WY" = "West"
)

suicides_only <- suicide_data %>%
  group_by(Year, State) %>%
  summarise(total_suicides = sum(Deaths, na.rm = TRUE))%>%
  mutate(state_abbr = state_abbr[State],
         region = Region[state_abbr])

## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.

suicides_only <- arrange(suicides_only, State)

states_deaths <- left_join(state_data, suicides_only, by = c("Year"="Year", "State" = "state_abbr")) %>%
  rename(State_Abbr = State, State = State.y) %>%
  mutate(Perc_Hom = (Homicides/Pop_Thousand)*100,
         Perc_Suicide = (total_suicides/Pop_Thousand)*100,
         Perc_Agg = (Agg_Assault/Population)*100,
         Violent_Crimes = Homicides+Agg_Assault+total_suicides,
         Deaths = Homicides+total_suicides,
         Perc_Deaths = (Deaths/Pop_Thousand)*100,
         Perc_Crimes = (Violent_Crimes/Population)*100)

Plots Using Raw Values

library(plotly)

## Loading required package: ggplot2

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

Total_VC_Plot <- plot_ly(states_deaths, x = ~State, y = ~Violent_Crimes, frame = ~Year, 
        hoverinfo = "text", type = "scatter", mode = "markers", 
        showlegend = F, size = 4) %>%
  layout(
    title = "Total Violent Crimes by State",
    xaxis = list(
      title = "State",
      tickvals = states_deaths$State,   # Use the unique state abbreviations as tickvals
      ticktext = states_deaths$State,   # Show the state abbreviations as tick labels
      tickangle = 90                        # Rotate the labels to avoid overlap
    ),
    yaxis = list(title = "Total Violent Crimes")
  )

Total_Death_Plot <- plot_ly(states_deaths, x = ~State, y = ~Deaths, frame = ~Year, 
                         hoverinfo = "text", type = "scatter", mode = "markers", 
                         showlegend = F, size = 4) %>%
  layout(
    title = "Total Deaths by State",
    xaxis = list(
      title = "State",
      tickvals = states_deaths$State,   # Use the unique state abbreviations as tickvals
      ticktext = states_deaths$State,   # Show the state abbreviations as tick labels
      tickangle = 90                        # Rotate the labels to avoid overlap
    ),
    yaxis = list(title = "Total Violent Crimes")
  )

Homicide_Deaths_Plot <- plot_ly(states_deaths, x = ~State, y = ~Homicides, frame = ~Year, 
        hoverinfo = "text", type = "scatter", mode = "markers", 
        showlegend = F, size = 4) %>%
  layout(
    title = "Total Homicides by State",
    xaxis = list(
      title = "State",
      tickvals = states_deaths$State,   # Use the unique state abbreviations as tickvals
      ticktext = states_deaths$State,   # Show the state abbreviations as tick labels
      tickangle = 90                        # Rotate the labels to avoid overlap
    ),
    yaxis = list(title = "Homicides")
  )


Agg_Assault_Plot <- plot_ly(states_deaths, x = ~State, y = ~Agg_Assault, frame = ~Year, 
        hoverinfo = "text", type = "scatter", mode = "markers", 
        showlegend = F, size = 4) %>%
  layout(
    title = "Total Aggravated Assaults by State",
    xaxis = list(
      title = "State",
      tickvals = states_deaths$State,   # Use the unique state abbreviations as tickvals
      ticktext = states_deaths$State,   # Show the state abbreviations as tick labels
      tickangle = 90                        # Rotate the labels to avoid overlap
    ),
    yaxis = list(title = "Aggravated Assaults")
  )


Suicide_Plot <- plot_ly(states_deaths, x = ~State, y = ~total_suicides, frame = ~Year, 
        hoverinfo = "text", type = "scatter", mode = "markers", 
        showlegend = F, size = 4) %>%
  layout(
    title = "Total Suicides by State",
    xaxis = list(
      title = "State",
      tickvals = states_deaths$State,   # Use the unique state abbreviations as tickvals
      ticktext = states_deaths$State,   # Show the state abbreviations as tick labels
      tickangle = 90                        # Rotate the labels to avoid overlap
    ),
    yaxis = list(title = "Total Suicides")
  )


Temp_Plot <- plot_ly(states_deaths, x = ~State, y = ~avg_temp, frame = ~Year, 
                        hoverinfo = "text", type = "scatter", mode = "markers", 
                        showlegend = F, size = 4) %>%
  layout(
    title = "Average Temperature by State",
    xaxis = list(
      title = "State",
      tickvals = states_deaths$State,   # Use the unique state abbreviations as tickvals
      ticktext = states_deaths$State,   # Show the state abbreviations as tick labels
      tickangle = 90),
    yaxis = list(title = "Average Temperature")
  )

Plots Adjusted for Population Differences

library(plotly)

P_Total_VC_Plot <- plot_ly(states_deaths, x = ~State, y = ~Perc_Crimes, frame = ~Year, 
        hoverinfo = "text", type = "scatter", mode = "markers", 
        showlegend = F, size = 4) %>%
  layout(
    title = "Percentage of Violent Crimes per 1000 People by State",
    xaxis = list(
      title = "State",
      tickvals = states_deaths$State,
      ticktext = states_deaths$State,
      tickangle = 90
    ),
    yaxis = list(title = "Percentage of Violent Crimes per 1000 People")
  )

P_Total_Death_Plot <- plot_ly(states_deaths, x = ~State, y = ~Perc_Deaths, frame = ~Year, 
                         hoverinfo = "text", type = "scatter", mode = "markers", 
                         showlegend = F, size = 4) %>%
  layout(
    title = "Percentage of Deaths per 1000 People by State",
    xaxis = list(
      title = "State",
      tickvals = states_deaths$State,   # Use the unique state abbreviations as tickvals
      ticktext = states_deaths$State,   # Show the state abbreviations as tick labels
      tickangle = 90                        # Rotate the labels to avoid overlap
    ),
    yaxis = list(title = "Percentage of Deaths per 1000 People")
  )

P_Homicide_Deaths_Plot <- plot_ly(states_deaths, x = ~State, y = ~Perc_Hom, frame = ~Year, 
        hoverinfo = "text", type = "scatter", mode = "markers", 
        showlegend = F, size = 4) %>%
  layout(
    title = "Percentage of Homicides per 1000 People by State",
    xaxis = list(
      title = "State",
      tickvals = states_deaths$State,   # Use the unique state abbreviations as tickvals
      ticktext = states_deaths$State,   # Show the state abbreviations as tick labels
      tickangle = 90                        # Rotate the labels to avoid overlap
    ),
    yaxis = list(title = "Percentage of Homicides per 1000 People")
  )


P_Agg_Assault_Plot <- plot_ly(states_deaths, x = ~State, y = ~Perc_Agg, frame = ~Year, 
        hoverinfo = "text", type = "scatter", mode = "markers", 
        showlegend = F, size = 4) %>%
  layout(
    title = "Percentage of Aggravated Assaults per 1000 People by State",
    xaxis = list(
      title = "State",
      tickvals = states_deaths$State,   # Use the unique state abbreviations as tickvals
      ticktext = states_deaths$State,   # Show the state abbreviations as tick labels
      tickangle = 90                        # Rotate the labels to avoid overlap
    ),
    yaxis = list(title = "Percentage of Aggravated Assaults Per 1000 People")
  )


P_Suicide_Plot <- plot_ly(states_deaths, x = ~State, y = ~Perc_Suicide, frame = ~Year, 
        hoverinfo = "text", type = "scatter", mode = "markers", 
        showlegend = F, size = 4) %>%
  layout(
    title = "Percentage of Suicides per 1000 People by State",
    xaxis = list(
      title = "State",
      tickvals = states_deaths$State,   # Use the unique state abbreviations as tickvals
      ticktext = states_deaths$State,   # Show the state abbreviations as tick labels
      tickangle = 90                        # Rotate the labels to avoid overlap
    ),
    yaxis = list(title = "Percentage of Suicides per 1000 People")
  )


R_Temp_Plot <- plot_ly(states_deaths, x = ~State, y = ~avg_temp, 
                       frame = ~Year, color = ~region, text = ~region,
                        hoverinfo = "text", type = "scatter", mode = "markers", 
                        showlegend = F, size = 4) %>%
  layout(
    title = "Average Temperature by State",
    xaxis = list(
      title = "State",
      tickvals = states_deaths$State,   # Use the unique state abbreviations as tickvals
      ticktext = states_deaths$State,   # Show the state abbreviations as tick labels
      tickangle = 90                        # Rotate the labels to avoid overlap
    ),
    yaxis = list(title = "Average Temperature")
  )

Single State Focus: California (not completed yet)

twothousand8 <- states_deaths[states_deaths$State_Abbr=="CA",]

library(ggplot2)
ggplot(twothousand8) + geom_line(aes(x = Perc_Crimes, y = avg_temp))

## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_line()`).

Stately Focus

state_dive <- states_deaths[states_deaths$State_Abbr==c("CA","NY", "FL", "AK", "OK", "VA"),]

## Warning in states_deaths$State_Abbr == c("CA", "NY", "FL", "AK", "OK", "VA"):
## longer object length is not a multiple of shorter object length

library(ggplot2)

temps_state <- ggplot(state_dive) + geom_line(aes(x = Year, y = avg_temp, color = State_Abbr))+
  labs(title = "Average Temperature for Select States",
       y = "Average Temperature (F)",
       color = "State") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, margin = margin(b = 15)), 
        axis.title.x = element_text(hjust = 0.5, margin = margin(t = 15)),
        axis.title.y = element_text(hjust = 0.5, margin = margin(r = 15)),
        legend.title = element_text(size = 10),
        legend.key.size = unit(0.5, 'cm'),
        legend.text = element_text(size = 8),
        axis.text.x = element_text(vjust = 1, hjust = 0.1))

hom_states <- ggplot(state_dive) + geom_line(aes(x = Year, y = Perc_Hom, color = State_Abbr))+
  labs(title = "Percent Homicide per 1000 People for Select States",
       y = "Percent Homicide per 1000 People",
       color = "State") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, margin = margin(b = 15)), 
        axis.title.x = element_text(hjust = 0.5, margin = margin(t = 15)),
        axis.title.y = element_text(hjust = 0.5, margin = margin(r = 15)),
        legend.title = element_text(size = 10),
        legend.key.size = unit(0.5, 'cm'),
        legend.text = element_text(size = 8),
        axis.text.x = element_text(vjust = 1, hjust = 0.1))

suistates <- ggplot(state_dive) + geom_line(aes(x = Year, y = Perc_Suicide, color = State_Abbr))+
  labs(title = "Percent Suicides per 1000 People for Select States",
       y = "Percent Suicides per 1000 People",
       color = "State") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, margin = margin(b = 15)), 
        axis.title.x = element_text(hjust = 0.5, margin = margin(t = 15)),
        axis.title.y = element_text(hjust = 0.5, margin = margin(r = 15)),
        legend.title = element_text(size = 10),
        legend.key.size = unit(0.5, 'cm'),
        legend.text = element_text(size = 8),
        axis.text.x = element_text(vjust = 1, hjust = 0.1))

assault_states <- ggplot(state_dive) + geom_line(aes(x = Year, y = Perc_Agg, color = State_Abbr))+
  labs(title = "Percent Aggravated Assaults per 1000 People for Select States",
       y = "Percent Aggravated Assaults per 1000 People",
       color = "State") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, margin = margin(b = 15)), 
        axis.title.x = element_text(hjust = 0.5, margin = margin(t = 15)),
        axis.title.y = element_text(hjust = 0.5, margin = margin(r = 15)),
        legend.title = element_text(size = 10),
        legend.key.size = unit(0.5, 'cm'),
        legend.text = element_text(size = 8),
        axis.text.x = element_text(vjust = 1, hjust = 0.1))

P_Total_VC_Plot

## Warning: Ignoring 457 observations

P_Total_Death_Plot

## Warning: Ignoring 457 observations

P_Homicide_Deaths_Plot

## Warning: Ignoring 457 observations

P_Agg_Assault_Plot

## Warning: Ignoring 456 observations

P_Suicide_Plot

## Warning: Ignoring 450 observations

R_Temp_Plot

## Warning: Ignoring 450 observations

temps_state

## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_line()`).

hom_states

## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_line()`).

suistates

## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_line()`).

assault_states

## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_line()`).

Capstone

Brenna Peterman

2025-02-25