library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(readr)
## Warning: package 'readr' was built under R version 4.4.3
#2 Set working directory
setwd("~/Desktop/BDATA 200")
#3 Load the dataset
crime <- read.csv("state_crime.csv", stringsAsFactors = FALSE)
#4 View column names
colnames(crime)
## [1] "State" "Year"
## [3] "Data.Population" "Data.Rates.Property.All"
## [5] "Data.Rates.Property.Burglary" "Data.Rates.Property.Larceny"
## [7] "Data.Rates.Property.Motor" "Data.Rates.Violent.All"
## [9] "Data.Rates.Violent.Assault" "Data.Rates.Violent.Murder"
## [11] "Data.Rates.Violent.Rape" "Data.Rates.Violent.Robbery"
## [13] "Data.Totals.Property.All" "Data.Totals.Property.Burglary"
## [15] "Data.Totals.Property.Larceny" "Data.Totals.Property.Motor"
## [17] "Data.Totals.Violent.All" "Data.Totals.Violent.Assault"
## [19] "Data.Totals.Violent.Murder" "Data.Totals.Violent.Rape"
## [21] "Data.Totals.Violent.Robbery"
#5 Filter out DC and calculate average homicide rate by state
state_data <- crime %>%
filter(State != "District of Columbia") %>%
group_by(State) %>%
summarise(
Avg_Homicide_Rate = mean(Data.Rates.Violent.Murder, na.rm = TRUE)
)
#6 Top 10 states
top10 <- state_data %>%
arrange(desc(Avg_Homicide_Rate)) %>%
slice(1:10)
top10
## # A tibble: 10 × 2
## State Avg_Homicide_Rate
## <chr> <dbl>
## 1 Louisiana 12.7
## 2 Mississippi 10.4
## 3 Georgia 10.3
## 4 Alabama 10.1
## 5 Nevada 9.83
## 6 South Carolina 9.81
## 7 Texas 9.44
## 8 Maryland 9.04
## 9 Florida 8.93
## 10 New Mexico 8.69
ggplot(top10, aes(x = reorder(State, Avg_Homicide_Rate),
y = Avg_Homicide_Rate)) +
geom_text(aes(label = round(Avg_Homicide_Rate, 1)),
hjust = -0.1, size = 4) +
geom_bar(stat = "identity", fill = "skyblue") +
coord_flip() +
labs(
title = "Top 10 States by Average Homicide Rate",
x = "State",
y = "Average Homicide Rate"
) +
theme_minimal() +
expand_limits(y = max(top10$Avg_Homicide_Rate) * 1.1)
ggplot(top10, aes(x = reorder(State, Avg_Homicide_Rate),
y = Avg_Homicide_Rate)) +
geom_bar(stat = "identity", fill = "skyblue") +
geom_text(aes(label = round(Avg_Homicide_Rate, 1)),
vjust = -0.3, size = 4) +
labs(
title = "Top 10 States by Average Homicide Rate",
x = "State",
y = "Average Homicide Rate"
) +
theme_minimal() +
scale_y_continuous(expand = expansion(mult = c(0, 0.1)))