1 Load required libraries

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(readr)
## Warning: package 'readr' was built under R version 4.4.3

#2 Set working directory

setwd("~/Desktop/BDATA 200")

#3 Load the dataset

crime <- read.csv("state_crime.csv", stringsAsFactors = FALSE)

#4 View column names

colnames(crime)
##  [1] "State"                         "Year"                         
##  [3] "Data.Population"               "Data.Rates.Property.All"      
##  [5] "Data.Rates.Property.Burglary"  "Data.Rates.Property.Larceny"  
##  [7] "Data.Rates.Property.Motor"     "Data.Rates.Violent.All"       
##  [9] "Data.Rates.Violent.Assault"    "Data.Rates.Violent.Murder"    
## [11] "Data.Rates.Violent.Rape"       "Data.Rates.Violent.Robbery"   
## [13] "Data.Totals.Property.All"      "Data.Totals.Property.Burglary"
## [15] "Data.Totals.Property.Larceny"  "Data.Totals.Property.Motor"   
## [17] "Data.Totals.Violent.All"       "Data.Totals.Violent.Assault"  
## [19] "Data.Totals.Violent.Murder"    "Data.Totals.Violent.Rape"     
## [21] "Data.Totals.Violent.Robbery"

#5 Filter out DC and calculate average homicide rate by state

state_data <- crime %>%
  filter(State != "District of Columbia") %>%
  group_by(State) %>%
  summarise(
    Avg_Homicide_Rate = mean(Data.Rates.Violent.Murder, na.rm = TRUE)
  )

#6 Top 10 states

top10 <- state_data %>%
  arrange(desc(Avg_Homicide_Rate)) %>%
  slice(1:10)

top10
## # A tibble: 10 × 2
##    State          Avg_Homicide_Rate
##    <chr>                      <dbl>
##  1 Louisiana                  12.7 
##  2 Mississippi                10.4 
##  3 Georgia                    10.3 
##  4 Alabama                    10.1 
##  5 Nevada                      9.83
##  6 South Carolina              9.81
##  7 Texas                       9.44
##  8 Maryland                    9.04
##  9 Florida                     8.93
## 10 New Mexico                  8.69
ggplot(top10, aes(x = reorder(State, Avg_Homicide_Rate),
                  y = Avg_Homicide_Rate)) +
  geom_text(aes(label = round(Avg_Homicide_Rate, 1)),  
            hjust = -0.1, size = 4) +
  geom_bar(stat = "identity", fill = "skyblue") +
  coord_flip() +
  labs(
    title = "Top 10 States by Average Homicide Rate",
    x = "State",
    y = "Average Homicide Rate"
  ) +
  theme_minimal() +
  expand_limits(y = max(top10$Avg_Homicide_Rate) * 1.1)

ggplot(top10, aes(x = reorder(State, Avg_Homicide_Rate),
                  y = Avg_Homicide_Rate)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  geom_text(aes(label = round(Avg_Homicide_Rate, 1)),
            vjust = -0.3, size = 4) +   
  labs(
    title = "Top 10 States by Average Homicide Rate",
    x = "State",
    y = "Average Homicide Rate"
  ) +
  theme_minimal() +
  scale_y_continuous(expand = expansion(mult = c(0, 0.1)))