The dataset comprises hate crime offenses reported in the United States from 1991 to 2022. The analysis aims to predict the likelihood of hate crime occurrences in 2023 based on historical data.
Show the top 6 rows of the dataset from CSV file data source.
# Read data
data <- read.csv("hate_crime.csv")
head(data)
## incident_id data_year ori pug_agency_name pub_agency_unit
## 1 43 1991 AR0350100 Pine Bluff
## 2 44 1991 AR0350100 Pine Bluff
## 3 45 1991 AR0600300 North Little Rock
## 4 46 1991 AR0600300 North Little Rock
## 5 47 1991 AR0670000 Sevier
## 6 3015 1991 AR0040200 Rogers
## agency_type_name state_abbr state_name division_name region_name
## 1 City AR Arkansas West South Central South
## 2 City AR Arkansas West South Central South
## 3 City AR Arkansas West South Central South
## 4 City AR Arkansas West South Central South
## 5 County AR Arkansas West South Central South
## 6 City AR Arkansas West South Central South
## population_group_code population_group_description incident_date
## 1 3 Cities from 50,000 thru 99,999 1991-07-04
## 2 3 Cities from 50,000 thru 99,999 1991-12-24
## 3 3 Cities from 50,000 thru 99,999 1991-07-10
## 4 3 Cities from 50,000 thru 99,999 1991-10-06
## 5 8D Non-MSA counties under 10,000 1991-10-14
## 6 5 Cities from 10,000 thru 24,999 1991-08-31
## adult_victim_count juvenile_victim_count total_offender_count
## 1 NA NA 1
## 2 NA NA 1
## 3 NA NA 1
## 4 NA NA 2
## 5 NA NA 1
## 6 NA NA 1
## adult_offender_count juvenile_offender_count offender_race
## 1 NA NA Black or African American
## 2 NA NA Black or African American
## 3 NA NA Black or African American
## 4 NA NA Black or African American
## 5 NA NA White
## 6 NA NA White
## offender_ethnicity victim_count
## 1 Not Specified 1
## 2 Not Specified 2
## 3 Not Specified 2
## 4 Not Specified 1
## 5 Not Specified 1
## 6 Not Specified 1
## offense_name
## 1 Aggravated Assault
## 2 Aggravated Assault;Destruction/Damage/Vandalism of Property
## 3 Aggravated Assault;Murder and Nonnegligent Manslaughter
## 4 Intimidation
## 5 Intimidation
## 6 Intimidation
## total_individual_victims location_name
## 1 1 Residence/Home
## 2 1 Highway/Road/Alley/Street/Sidewalk
## 3 2 Residence/Home
## 4 1 Residence/Home
## 5 1 School/College
## 6 1 Highway/Road/Alley/Street/Sidewalk
## bias_desc victim_types multiple_offense multiple_bias
## 1 Anti-Black or African American Individual S S
## 2 Anti-White Individual M S
## 3 Anti-White Individual M S
## 4 Anti-White Individual S S
## 5 Anti-Black or African American Individual S S
## 6 Anti-Black or African American Individual S S
Aggregate and sort the dataset. Looks at the top three offenses. Show the 2022 occurance of offenses. The psuedo code to do this
Extract records from the yearly counts for the year 2022. *Predict Next Year’s Counts
Define a function to:
Display the predictions for 2023 alongside the historical data and visualizations.
# Step 1: Get the top 3 offenses based on total offender count
top_offenses <- data %>%
group_by(offense_name) %>%
summarize(total_offender_count = sum(total_offender_count, na.rm = TRUE)) %>%
arrange(desc(total_offender_count)) %>%
slice_head(n = 3)
top_offenses
## # A tibble: 3 × 2
## offense_name total_offender_count
## <chr> <int>
## 1 Simple Assault 65440
## 2 Intimidation 63191
## 3 Aggravated Assault 42291
# Extract the names of the top 3 offenses
top_offenses_names <- top_offenses$offense_name
# Step 2: Filter the dataset to include only the top 3 offenses
filtered_data <- data %>%
filter(offense_name %in% top_offenses_names)
head(filtered_data)
## incident_id data_year ori pug_agency_name pub_agency_unit
## 1 43 1991 AR0350100 Pine Bluff
## 2 46 1991 AR0600300 North Little Rock
## 3 47 1991 AR0670000 Sevier
## 4 3015 1991 AR0040200 Rogers
## 5 3016 1991 AR0290100 Hope
## 6 3017 1991 AR0350100 Pine Bluff
## agency_type_name state_abbr state_name division_name region_name
## 1 City AR Arkansas West South Central South
## 2 City AR Arkansas West South Central South
## 3 County AR Arkansas West South Central South
## 4 City AR Arkansas West South Central South
## 5 City AR Arkansas West South Central South
## 6 City AR Arkansas West South Central South
## population_group_code population_group_description incident_date
## 1 3 Cities from 50,000 thru 99,999 1991-07-04
## 2 3 Cities from 50,000 thru 99,999 1991-10-06
## 3 8D Non-MSA counties under 10,000 1991-10-14
## 4 5 Cities from 10,000 thru 24,999 1991-08-31
## 5 6 Cities from 2,500 thru 9,999 1991-09-19
## 6 3 Cities from 50,000 thru 99,999 1991-12-23
## adult_victim_count juvenile_victim_count total_offender_count
## 1 NA NA 1
## 2 NA NA 2
## 3 NA NA 1
## 4 NA NA 1
## 5 NA NA 1
## 6 NA NA 1
## adult_offender_count juvenile_offender_count offender_race
## 1 NA NA Black or African American
## 2 NA NA Black or African American
## 3 NA NA White
## 4 NA NA White
## 5 NA NA Black or African American
## 6 NA NA Black or African American
## offender_ethnicity victim_count offense_name total_individual_victims
## 1 Not Specified 1 Aggravated Assault 1
## 2 Not Specified 1 Intimidation 1
## 3 Not Specified 1 Intimidation 1
## 4 Not Specified 1 Intimidation 1
## 5 Not Specified 1 Simple Assault 1
## 6 Not Specified 1 Aggravated Assault 1
## location_name bias_desc
## 1 Residence/Home Anti-Black or African American
## 2 Residence/Home Anti-White
## 3 School/College Anti-Black or African American
## 4 Highway/Road/Alley/Street/Sidewalk Anti-Black or African American
## 5 Highway/Road/Alley/Street/Sidewalk Anti-White
## 6 Service/Gas Station Anti-White
## victim_types multiple_offense multiple_bias
## 1 Individual S S
## 2 Individual S S
## 3 Individual S S
## 4 Individual S S
## 5 Individual S S
## 6 Individual S S
# Step 3: Calculate yearly counts for each of the top 3 offenses
yearly_counts <- filtered_data %>%
group_by(offense_name, data_year) %>%
summarize(yearly_offender_count = sum(total_offender_count, na.rm = TRUE)) %>%
arrange(offense_name, data_year)
## `summarise()` has grouped output by 'offense_name'. You can override using the
## `.groups` argument.
# Step 4: Filter the data to get counts for the year 2022
year_2022_counts <- yearly_counts %>%
filter(data_year == 2022)
year_2022_counts
## # A tibble: 3 × 3
## # Groups: offense_name [3]
## offense_name data_year yearly_offender_count
## <chr> <int> <int>
## 1 Aggravated Assault 2022 1389
## 2 Intimidation 2022 2860
## 3 Simple Assault 2022 2943
# Function to fit linear model and predict next year's count
predict_next_year <- function(data) {
model <- lm(yearly_offender_count ~ data_year, data = data)
next_year <- max(data$data_year) + 1
prediction <- predict(model, newdata = data.frame(data_year = next_year))
return(data.frame(offense_name = unique(data$offense_name),
data_year = next_year,
predicted_count = prediction))
}
# Apply the prediction function to each of the top offenses
predictions <- yearly_counts %>%
group_by(offense_name) %>%
do(predict_next_year(.))
predictions
## # A tibble: 3 × 3
## # Groups: offense_name [3]
## offense_name data_year predicted_count
## <chr> <dbl> <dbl>
## 1 Aggravated Assault 2023 882.
## 2 Intimidation 2023 1760.
## 3 Simple Assault 2023 2020.
# Step 5: Visualize each offense's counts per year and the prediction
ggplot(yearly_counts, aes(x = data_year, y = yearly_offender_count, color = offense_name, group = offense_name)) +
geom_line(size = 1) +
geom_point(size = 2) +
geom_point(data = predictions, aes(x = data_year, y = predicted_count), shape = 17, size = 3, color = "red") +
geom_text(data = predictions, aes(x = data_year, y = predicted_count, label = round(predicted_count, 0)), vjust = -0.5) +
labs(title = "Yearly Counts of Top 3 Offenses with Predictions",
x = "Year",
y = "Offender Count",
color = "Offense Name") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.