# Filter data
df %<>% mutate(
Longitude = as.numeric(Longitude),
Latitude = as.numeric(Latitude)
) %>%
filter(!is.na(Longitude) & !is.na(Latitude))
df_sf <- st_as_sf(df, coords = c("Longitude", "Latitude"), crs = 4326)
df %<>% mutate(Age = as.numeric(Age)) %>%
filter(!is.na(Age))
df %<>% mutate(Race_Categorized = case_when(
str_detect(Race, "Black") ~ "Black",
str_detect(Race, "White") ~ "White",
str_detect(Race, "Hispanic") ~ "Hispanic/Latino",
str_detect(Race, "Middle Eastern") ~ "Middle Eastern",
str_detect(Race, "Native American") ~ "Native American",
str_detect(Race, "Asian") ~ "Asian",
str_detect(Race, "Race unspecified") ~ "Race unspecified",
TRUE ~ "Race unspecified"
))
ggplot(df %>% filter(Gender %in% c("Female", "Male")), aes(x = Age, color = Gender)) +
geom_density() +
theme_minimal() +
labs(title = "Age Distribution by Gender", x = "Age", y = "Density") +
scale_color_manual(values = c("Female" = "orange", "Male" = "dodgerblue"))
Younger individuals tend to be more prevalent in the dataset, particularly in their 20s, with males slightly overrepresented compared to females in the dataset.
ggplot(df %>% filter(Gender %in% c("Female", "Male")), aes(x = Gender, fill = Race_Categorized)) +
geom_bar(position = "stack") +
theme_minimal() +
labs(title = "Gender and Race Breakdown", x = "Gender", y = "Count")
This plot reveals that male victims overwhelmingly outnumber female victims. For race specified cases,White make up the largest portions across both genders, but Black and Hispanic/Latino men are also highly represented.
top_five_forces <- df_sf %>%
count(Highest.level.of.force, sort = TRUE) %>%
top_n(5, n) %>%
pull(Highest.level.of.force)
fe_data_state <- df %>%
filter(Highest.level.of.force %in% top_five_forces) %>%
group_by(State) %>%
summarise(Total_Force_Count = sum(n()))
# load state boundary
states <- states(cb = TRUE)
states_merged <- states %>%
left_join(fe_data_state, by = c("STUSPS" = "State")) %>%
filter(!STUSPS %in% c("AK", "HI", "AS", "MP", "VI", "PR", "GU"))
tmap_mode("view")
tm_shape(states_merged) +
tm_polygons("Total_Force_Count", title = "Total Force Count (Top 5)") +
tm_layout(title = "Total Top 5 Forces Used by State")
California stands out with the highest number of top 5 force incidents. Texas follows closely, with a value of 2442. Other states, like Florida, Georgia, and some Northeastern states, show a moderate count of 1,001 to 2,000 incidents.
counties_atlanta <- counties(state = "GA", cb = TRUE) %>%
filter(NAME %in% c("Fulton", "DeKalb", "Clayton"))
df_sf <- st_transform(df_sf, crs = 4326)
counties_atlanta <- st_transform(counties_atlanta, crs = 4326)
df_sf_top5 <- df_sf %>%
filter(Highest.level.of.force %in% top_five_forces)
within_atlanta <- st_within(df_sf_top5, counties_atlanta)
df_sf_atlanta <- df_sf_top5[lengths(within_atlanta) > 0, ]
tm_shape(counties_atlanta) +
tm_polygons() +
tm_shape(df_sf_atlanta) +
tm_dots(col = "Highest.level.of.force", palette = "Set1", title = "Top 5 Forces") +
tm_layout(title = "Top 5 Forces in Atlanta Region")
Gunshot incidents are by far the most common, with a dense concentration across the region, especially in the central parts of these counties.