Load Libraries

library(tidyverse)
library(lubridate)
library(janitor)
library(ggthemes)
library(ggrepel)
library(maps)
library(mapdata)
library(sf)
library(tmap)
library(tigris)
library(viridis)
library(scales)
library(dplyr)
library(knitr)

Load and Clean Data

fe <- read_csv("C://Intro to UA//fatal_encounters.csv") %>%
clean_names()

glimpse(fe)
colnames(fe)

Select relevant columns and clean

fe_clean <- fe %>%
select(
name,
date_of_injury_resulting_in_death_month_day_year,
race,
gender,
location_of_death_city,
state,
location_of_death_county,
cause_of_death = highest_level_of_force,
latitude,
longitude
) %>%
mutate(
date = mdy(date_of_injury_resulting_in_death_month_day_year),
year = year(date),
subject_gender = str_to_title(gender),
subject_race = str_to_title(race),
state = str_to_title(state),
county = str_to_title(location_of_death_county)
) %>%
filter(!is.na(year), !is.na(subject_race))

Annual Trends in Police Killings

fe_clean %>%
count(year) %>%
ggplot(aes(year, n)) +
geom_line(color = "red", size = 1) +
geom_point() +
theme_minimal() +
labs(
title = "Yearly Trend in Fatal Police Encounters (US)",
x = "Year",
y = "Number of Fatalities"
)

Plot description:

Line chart showing total fatalities per year across the US.

Inferences:

There is an overall upward trend from early years to recent years, indicating more reported fatal encounters over time.

Some years may show spikes or drops, which could reflect reporting inconsistencies or actual policy/events effects.

This plot highlights the importance of yearly monitoring to understand patterns and trends in fatal encounters.

Gender Breakdown

fe_clean %>%
count(subject_gender) %>%
mutate(pct = n / sum(n) * 100) %>%
ggplot(aes(x = reorder(subject_gender, -pct), y = pct, fill = subject_gender)) +
geom_col() +
theme_minimal() +
labs(
title = "Gender Distribution of Victims",
x = "Gender",
y = "Percentage"
) +
scale_fill_brewer(palette = "Tableau10")

Plot description:

Bar chart showing the percentage of victims by gender.

Inferences:

The overwhelming majority of victims are male, which is consistent with national data on fatal encounters.

Female victims represent a small proportion, indicating a gendered pattern in police encounters.

Policies and interventions may need to be gender-aware, but the focus remains primarily on male populations at higher risk.

Racial Disparities

race_data <- fe_clean %>%
count(subject_race, name = "count") %>%
filter(!is.na(subject_race)) %>%
arrange(desc(count)) %>%
mutate(pct = count / sum(count) * 100)




ggplot(race_data, aes(x = reorder(subject_race, count), y = count, fill = subject_race)) +
geom_col(width = 0.7, show.legend = FALSE) +
geom_text(aes(label = paste0(round(pct, 1), "%")),
hjust = -0.2, size = 3.5, color = "black") +
coord_flip() +
scale_y_continuous(labels = comma, expand = expansion(mult = c(0, 0.1))) +
scale_fill_brewer(palette = "Set3") +
theme_minimal(base_size = 12) +
labs(
title = "Number of Fatal Police Encounters by Race",
subtitle = "Showing proportions (%) alongside counts",
x = NULL,
y = "Count of Encounters"
) +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 11, margin = margin(b = 10)),
axis.text.y = element_text(size = 10),
axis.text.x = element_text(size = 9),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank()
)

Plot description:

Horizontal bar chart showing counts of fatalities by race with percentages annotated.

Inferences:

African-American/Black individuals are disproportionately affected compared to their population share.

White/European-American fatalities are high in absolute counts but lower in proportion to population in some areas.

Other races (Hispanic, Asian/Pacific Islander, Native) are affected less frequently.

This plot highlights racial disparities and can be used to inform equity-focused policy interventions.

Geographic Distribution (US Map)

options(tigris_class = "sf")

Load US counties shapefile

# Ensure latitude/longitude are numeric
fe_map <- fe_clean %>%
  filter(!is.na(latitude), !is.na(longitude)) %>%
  mutate(
    latitude = as.numeric(latitude),
    longitude = as.numeric(longitude)
  ) %>%
  filter(!is.na(latitude), !is.na(longitude))

# US map data
usa <- map_data("state")

# Plot with title and legend
ggplot() +
  geom_polygon(data = usa, aes(x = long, y = lat, group = group),
               fill = "gray95", color = "gray70") +
  geom_point(data = fe_map, aes(x = longitude, y = latitude, color = subject_race),
             alpha = 0.5, size = 1) +
  scale_color_brewer(palette = "Set1", name = "Race of Victim") +
  coord_fixed(1.3) +
  theme_minimal() +
  labs(
    title = "Geographic Spread of Fatal Police Encounters in the US",
    subtitle = "Each point represents a fatal encounter",
    x = "Longitude",
    y = "Latitude"
  ) +
  theme(
    legend.position = "right",
    plot.title = element_text(face = "bold", size = 14, hjust = 0.5),
    plot.subtitle = element_text(size = 11, hjust = 0.5)
  )

Plot description:

Each point represents a fatal encounter overlaid on a US map.

Inferences:

High concentration of incidents in urban areas, especially in states like California, Texas, Florida, and Georgia.

Sparse coverage in rural areas or less populated states.

This suggests that population density and urban policing patterns influence fatal encounters.

Mapping by race (via color) can show regional racial disparities visually.

Interactive Map

# Convert fe_map to sf object
fe_map_sf <- st_as_sf(
  fe_map,
  coords = c("longitude", "latitude"),  # specify coordinates
  crs = 4326                             # WGS84 coordinate reference system
)

# Interactive tmap
tmap_mode("view")

tm_shape(fe_map_sf) +
  tm_dots(
    col = "subject_race",        # color by race
    palette = "Set1",
    size = 0.3,
    alpha = 0.6,
    popup.vars = c(
      "Name" = "name",
      "Race" = "subject_race",
      "State" = "state"
    )
  ) +
  tm_layout(
    title = "Geographic Spread of Fatal Police Encounters in the US"
  )

Plot description:

Choropleth map showing total fatalities per state.

County Focus: Atlanta, Georgia (Fulton County Region)

Filter Atlanta metro counties (Fulton, DeKalb, etc.)

atlanta_data <- fe_clean %>%
mutate(
location_of_death_county = tolower(location_of_death_county),
location_of_death_city = tolower(location_of_death_city)
) %>%
filter(
grepl("fulton", location_of_death_county, ignore.case = TRUE) |
grepl("atlanta", location_of_death_city, ignore.case = TRUE)
)

Summarize by race

atlanta_race <- atlanta_data %>%
mutate(
race_clean = case_when(
grepl("African|Black", race, ignore.case = TRUE) ~ "African-American/Black",
grepl("White|European", race, ignore.case = TRUE) ~ "White/European-American",
grepl("Hispanic|Latino", race, ignore.case = TRUE) ~ "Hispanic/Latino",
grepl("Asian|Pacific", race, ignore.case = TRUE) ~ "Asian/Pacific Islander",
grepl("Native|Alaska", race, ignore.case = TRUE) ~ "Native American/Alaskan",
grepl("Middle", race, ignore.case = TRUE) ~ "Middle Eastern",
grepl("Unknown|unspecified|Unk", race, ignore.case = TRUE) ~ "Race Unspecified",
TRUE ~ "Other/Unknown"
)
) %>%
count(race_clean, name = "count") %>%
mutate(
pct = count / sum(count) * 100
) %>%
arrange(desc(count))

Plot

ggplot(atlanta_race, aes(x = reorder(race_clean, count), y = count, fill = race_clean)) +
geom_col(width = 0.7, show.legend = FALSE) +
geom_text(aes(label = paste0(round(pct, 1), "%")),
hjust = -0.2, color = "black", size = 3.5) +
coord_flip() +
scale_y_continuous(labels = comma, expand = expansion(mult = c(0, 0.1))) +
scale_fill_brewer(palette = "Set2") +
theme_minimal(base_size = 12) +
labs(
title = "County Focus: Atlanta, Georgia (Fulton County Region)",
subtitle = "Racial Breakdown of Fatal Police Encounters",
x = NULL,
y = "Count of Fatal Encounters"
) +
theme(
plot.title = element_text(face = "bold", size = 14, hjust = 0.5),
plot.subtitle = element_text(size = 11, hjust = 0.5, margin = margin(b = 10)),
axis.text.y = element_text(size = 10),
panel.grid.major.y = element_blank()
)

Plot description:

Bar chart showing racial breakdown of fatal encounters in Atlanta metro counties.

Inferences:

The category ‘Race Unspecified’ is retained in the analysis to maintain transparency and accurately reflect gaps in the data. Including it highlights that some incidents lack recorded racial information, which is important for understanding the completeness and limitations of the dataset.

African-American/Black individuals are disproportionately represented in fatal encounters in Fulton County and Atlanta.

White/European-American and Hispanic/Latino populations are present but at lower counts.

This localized view helps identify hotspots and informs regional interventions.

State-wise Summary

# Create state-wise summary
state_summary <- fe_clean %>%
  count(state) %>%
  arrange(desc(n))

# Display as table
kable(state_summary, 
      col.names = c("State", "Number of Fatal Encounters"),
      caption = "State-wise Summary of Fatal Police Encounters")

State-wise Summary of Fatal Police Encounters
State	Number of Fatal Encounters
Ca	4969
Tx	2797
Fl	1894
Ga	1216
Il	1117
Oh	1022
Pa	992
Mo	945
Az	914
Mi	903
Ny	725
Ok	721
Al	700
Tn	695
Co	693
Wa	675
Nc	673
Va	663
La	655
Sc	620
In	609
Md	553
Nj	549
Wi	482
Ms	446
Nm	444
Ky	437
Ks	428
Nv	423
Ar	391
Mn	366
Or	358
Ia	296
Ut	269
Ne	217
Ma	215
Wv	190
Id	161
Ct	142
Ak	129
Hi	116
Mt	116
Me	109
Dc	91
Nh	66
Wy	64
De	63
Sd	61
Nd	43
Vt	42
Ri	32

Table description: Counts of fatalities per state.

Inferences:

Confirms what the US maps show: high counts in highly populated states.

Useful for quick reference and reporting, especially in policy discussions.

Adding percentages relative to population could highlight disproportionate impacts more clearly.

Summary

Between 2000 and 2024, the United States has witnessed 31,497 of police-involved fatalities. The Fatal Encounters data shows a steady rise in recorded deaths after 2013, likely reflecting improved reporting and awareness following movements like Black Lives Matter.

Males constitute nearly 89.8% of all victims, a striking imbalance that underscores gendered dynamics in law enforcement interactions. Racially, White and Black/African American individuals represent the largest groups, but when adjusted for population, the risk of being killed by police is disproportionately higher for Black individuals.

Spatially, incidents are concentrated in urbanized regions — notably California, Texas, and Florida, which together account for a significant share of deaths. Mapping reveals dense clusters along the West Coast and major metropolitan corridors in the South and Northeast.

These findings suggest that while fatalities occur across all demographics and geographies, race and region remain key predictors of police violence. The data underscores systemic inequalities that persist in the justice system and highlights the value of transparent, publicly available datasets like Fatal Encounters for civic accountability.

Fatal Encounters Analysis

Rasmitha Reddy Mundla

2025-10-14

Load Libraries

Load and Clean Data

Select relevant columns and clean

Annual Trends in Police Killings

Plot description:

Inferences:

Gender Breakdown

Plot description:

Inferences:

Racial Disparities

Plot description:

Inferences:

Geographic Distribution (US Map)

Load US counties shapefile

Plot description:

Inferences:

Interactive Map

Plot description:

County Focus: Atlanta, Georgia (Fulton County Region)

Filter Atlanta metro counties (Fulton, DeKalb, etc.)

Summarize by race

Plot

Plot description:

Inferences:

State-wise Summary

Table description: Counts of fatalities per state.

Inferences:

Summary