Load Libraries

library(tidyverse)
library(lubridate)
library(janitor)
library(ggthemes)
library(ggrepel)
library(maps)
library(mapdata)
library(sf)
library(tmap)
library(tigris)
library(viridis)
library(scales)
library(dplyr)
library(knitr)

Load and Clean Data

fe <- read_csv("C://Intro to UA//fatal_encounters.csv") %>%
clean_names()

glimpse(fe)
colnames(fe)

Select relevant columns and clean

fe_clean <- fe %>%
select(
name,
date_of_injury_resulting_in_death_month_day_year,
race,
gender,
location_of_death_city,
state,
location_of_death_county,
cause_of_death = highest_level_of_force,
latitude,
longitude
) %>%
mutate(
date = mdy(date_of_injury_resulting_in_death_month_day_year),
year = year(date),
subject_gender = str_to_title(gender),
subject_race = str_to_title(race),
state = str_to_title(state),
county = str_to_title(location_of_death_county)
) %>%
filter(!is.na(year), !is.na(subject_race))

Gender Breakdown

fe_clean %>%
count(subject_gender) %>%
mutate(pct = n / sum(n) * 100) %>%
ggplot(aes(x = reorder(subject_gender, -pct), y = pct, fill = subject_gender)) +
geom_col() +
theme_minimal() +
labs(
title = "Gender Distribution of Victims",
x = "Gender",
y = "Percentage"
) +
scale_fill_brewer(palette = "Tableau10")

Plot description:

Bar chart showing the percentage of victims by gender.

Inferences:

The overwhelming majority of victims are male, which is consistent with national data on fatal encounters.

Female victims represent a small proportion, indicating a gendered pattern in police encounters.

Policies and interventions may need to be gender-aware, but the focus remains primarily on male populations at higher risk.

Racial Disparities

race_data <- fe_clean %>%
count(subject_race, name = "count") %>%
filter(!is.na(subject_race)) %>%
arrange(desc(count)) %>%
mutate(pct = count / sum(count) * 100)




ggplot(race_data, aes(x = reorder(subject_race, count), y = count, fill = subject_race)) +
geom_col(width = 0.7, show.legend = FALSE) +
geom_text(aes(label = paste0(round(pct, 1), "%")),
hjust = -0.2, size = 3.5, color = "black") +
coord_flip() +
scale_y_continuous(labels = comma, expand = expansion(mult = c(0, 0.1))) +
scale_fill_brewer(palette = "Set3") +
theme_minimal(base_size = 12) +
labs(
title = "Number of Fatal Police Encounters by Race",
subtitle = "Showing proportions (%) alongside counts",
x = NULL,
y = "Count of Encounters"
) +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 11, margin = margin(b = 10)),
axis.text.y = element_text(size = 10),
axis.text.x = element_text(size = 9),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank()
)

Plot description:

Horizontal bar chart showing counts of fatalities by race with percentages annotated.

Inferences:

African-American/Black individuals are disproportionately affected compared to their population share.

White/European-American fatalities are high in absolute counts but lower in proportion to population in some areas.

Other races (Hispanic, Asian/Pacific Islander, Native) are affected less frequently.

This plot highlights racial disparities and can be used to inform equity-focused policy interventions.

Geographic Distribution (US Map)

options(tigris_class = "sf")

Load US counties shapefile

# Ensure latitude/longitude are numeric
fe_map <- fe_clean %>%
  filter(!is.na(latitude), !is.na(longitude)) %>%
  mutate(
    latitude = as.numeric(latitude),
    longitude = as.numeric(longitude)
  ) %>%
  filter(!is.na(latitude), !is.na(longitude))

# US map data
usa <- map_data("state")

# Plot with title and legend
ggplot() +
  geom_polygon(data = usa, aes(x = long, y = lat, group = group),
               fill = "gray95", color = "gray70") +
  geom_point(data = fe_map, aes(x = longitude, y = latitude, color = subject_race),
             alpha = 0.5, size = 1) +
  scale_color_brewer(palette = "Set1", name = "Race of Victim") +
  coord_fixed(1.3) +
  theme_minimal() +
  labs(
    title = "Geographic Spread of Fatal Police Encounters in the US",
    subtitle = "Each point represents a fatal encounter",
    x = "Longitude",
    y = "Latitude"
  ) +
  theme(
    legend.position = "right",
    plot.title = element_text(face = "bold", size = 14, hjust = 0.5),
    plot.subtitle = element_text(size = 11, hjust = 0.5)
  )

Plot description:

Each point represents a fatal encounter overlaid on a US map.

Inferences:

High concentration of incidents in urban areas, especially in states like California, Texas, Florida, and Georgia.

Sparse coverage in rural areas or less populated states.

This suggests that population density and urban policing patterns influence fatal encounters.

Mapping by race (via color) can show regional racial disparities visually.

Interactive Map

# Convert fe_map to sf object
fe_map_sf <- st_as_sf(
  fe_map,
  coords = c("longitude", "latitude"),  # specify coordinates
  crs = 4326                             # WGS84 coordinate reference system
)

# Interactive tmap
tmap_mode("view")

tm_shape(fe_map_sf) +
  tm_dots(
    col = "subject_race",        # color by race
    palette = "Set1",
    size = 0.3,
    alpha = 0.6,
    popup.vars = c(
      "Name" = "name",
      "Race" = "subject_race",
      "State" = "state"
    )
  ) +
  tm_layout(
    title = "Geographic Spread of Fatal Police Encounters in the US"
  )

Plot description:

Choropleth map showing total fatalities per state.

County Focus: Atlanta, Georgia (Fulton County Region)

Filter Atlanta metro counties (Fulton, DeKalb, etc.)

atlanta_data <- fe_clean %>%
mutate(
location_of_death_county = tolower(location_of_death_county),
location_of_death_city = tolower(location_of_death_city)
) %>%
filter(
grepl("fulton", location_of_death_county, ignore.case = TRUE) |
grepl("atlanta", location_of_death_city, ignore.case = TRUE)
)

Summarize by race

atlanta_race <- atlanta_data %>%
mutate(
race_clean = case_when(
grepl("African|Black", race, ignore.case = TRUE) ~ "African-American/Black",
grepl("White|European", race, ignore.case = TRUE) ~ "White/European-American",
grepl("Hispanic|Latino", race, ignore.case = TRUE) ~ "Hispanic/Latino",
grepl("Asian|Pacific", race, ignore.case = TRUE) ~ "Asian/Pacific Islander",
grepl("Native|Alaska", race, ignore.case = TRUE) ~ "Native American/Alaskan",
grepl("Middle", race, ignore.case = TRUE) ~ "Middle Eastern",
grepl("Unknown|unspecified|Unk", race, ignore.case = TRUE) ~ "Race Unspecified",
TRUE ~ "Other/Unknown"
)
) %>%
count(race_clean, name = "count") %>%
mutate(
pct = count / sum(count) * 100
) %>%
arrange(desc(count))

Plot

ggplot(atlanta_race, aes(x = reorder(race_clean, count), y = count, fill = race_clean)) +
geom_col(width = 0.7, show.legend = FALSE) +
geom_text(aes(label = paste0(round(pct, 1), "%")),
hjust = -0.2, color = "black", size = 3.5) +
coord_flip() +
scale_y_continuous(labels = comma, expand = expansion(mult = c(0, 0.1))) +
scale_fill_brewer(palette = "Set2") +
theme_minimal(base_size = 12) +
labs(
title = "County Focus: Atlanta, Georgia (Fulton County Region)",
subtitle = "Racial Breakdown of Fatal Police Encounters",
x = NULL,
y = "Count of Fatal Encounters"
) +
theme(
plot.title = element_text(face = "bold", size = 14, hjust = 0.5),
plot.subtitle = element_text(size = 11, hjust = 0.5, margin = margin(b = 10)),
axis.text.y = element_text(size = 10),
panel.grid.major.y = element_blank()
)

Plot description:

Bar chart showing racial breakdown of fatal encounters in Atlanta metro counties.

Inferences:

The category ‘Race Unspecified’ is retained in the analysis to maintain transparency and accurately reflect gaps in the data. Including it highlights that some incidents lack recorded racial information, which is important for understanding the completeness and limitations of the dataset.

African-American/Black individuals are disproportionately represented in fatal encounters in Fulton County and Atlanta.

White/European-American and Hispanic/Latino populations are present but at lower counts.

This localized view helps identify hotspots and informs regional interventions.

State-wise Summary

# Create state-wise summary
state_summary <- fe_clean %>%
  count(state) %>%
  arrange(desc(n))

# Display as table
kable(state_summary, 
      col.names = c("State", "Number of Fatal Encounters"),
      caption = "State-wise Summary of Fatal Police Encounters")
State-wise Summary of Fatal Police Encounters
State Number of Fatal Encounters
Ca 4969
Tx 2797
Fl 1894
Ga 1216
Il 1117
Oh 1022
Pa 992
Mo 945
Az 914
Mi 903
Ny 725
Ok 721
Al 700
Tn 695
Co 693
Wa 675
Nc 673
Va 663
La 655
Sc 620
In 609
Md 553
Nj 549
Wi 482
Ms 446
Nm 444
Ky 437
Ks 428
Nv 423
Ar 391
Mn 366
Or 358
Ia 296
Ut 269
Ne 217
Ma 215
Wv 190
Id 161
Ct 142
Ak 129
Hi 116
Mt 116
Me 109
Dc 91
Nh 66
Wy 64
De 63
Sd 61
Nd 43
Vt 42
Ri 32

Table description: Counts of fatalities per state.

Inferences:

Confirms what the US maps show: high counts in highly populated states.

Useful for quick reference and reporting, especially in policy discussions.

Adding percentages relative to population could highlight disproportionate impacts more clearly.

Summary

Between 2000 and 2024, the United States has witnessed 31,497 of police-involved fatalities. The Fatal Encounters data shows a steady rise in recorded deaths after 2013, likely reflecting improved reporting and awareness following movements like Black Lives Matter.

Males constitute nearly 89.8% of all victims, a striking imbalance that underscores gendered dynamics in law enforcement interactions. Racially, White and Black/African American individuals represent the largest groups, but when adjusted for population, the risk of being killed by police is disproportionately higher for Black individuals.

Spatially, incidents are concentrated in urbanized regions — notably California, Texas, and Florida, which together account for a significant share of deaths. Mapping reveals dense clusters along the West Coast and major metropolitan corridors in the South and Northeast.

These findings suggest that while fatalities occur across all demographics and geographies, race and region remain key predictors of police violence. The data underscores systemic inequalities that persist in the justice system and highlights the value of transparent, publicly available datasets like Fatal Encounters for civic accountability.