library(tidyverse)
library(lubridate)
library(janitor)
library(ggthemes)
library(ggrepel)
library(maps)
library(mapdata)
library(sf)
library(tmap)
library(tigris)
library(viridis)
library(scales)
library(dplyr)
library(knitr)
fe <- read_csv("C://Intro to UA//fatal_encounters.csv") %>%
clean_names()
glimpse(fe)
colnames(fe)
fe_clean <- fe %>%
select(
name,
date_of_injury_resulting_in_death_month_day_year,
race,
gender,
location_of_death_city,
state,
location_of_death_county,
cause_of_death = highest_level_of_force,
latitude,
longitude
) %>%
mutate(
date = mdy(date_of_injury_resulting_in_death_month_day_year),
year = year(date),
subject_gender = str_to_title(gender),
subject_race = str_to_title(race),
state = str_to_title(state),
county = str_to_title(location_of_death_county)
) %>%
filter(!is.na(year), !is.na(subject_race))
fe_clean %>%
count(year) %>%
ggplot(aes(year, n)) +
geom_line(color = "red", size = 1) +
geom_point() +
theme_minimal() +
labs(
title = "Yearly Trend in Fatal Police Encounters (US)",
x = "Year",
y = "Number of Fatalities"
)
Line chart showing total fatalities per year across the US.
There is an overall upward trend from early years to recent years, indicating more reported fatal encounters over time.
Some years may show spikes or drops, which could reflect reporting inconsistencies or actual policy/events effects.
This plot highlights the importance of yearly monitoring to understand patterns and trends in fatal encounters.
fe_clean %>%
count(subject_gender) %>%
mutate(pct = n / sum(n) * 100) %>%
ggplot(aes(x = reorder(subject_gender, -pct), y = pct, fill = subject_gender)) +
geom_col() +
theme_minimal() +
labs(
title = "Gender Distribution of Victims",
x = "Gender",
y = "Percentage"
) +
scale_fill_brewer(palette = "Tableau10")
Bar chart showing the percentage of victims by gender.
The overwhelming majority of victims are male, which is consistent with national data on fatal encounters.
Female victims represent a small proportion, indicating a gendered pattern in police encounters.
Policies and interventions may need to be gender-aware, but the focus remains primarily on male populations at higher risk.
race_data <- fe_clean %>%
count(subject_race, name = "count") %>%
filter(!is.na(subject_race)) %>%
arrange(desc(count)) %>%
mutate(pct = count / sum(count) * 100)
ggplot(race_data, aes(x = reorder(subject_race, count), y = count, fill = subject_race)) +
geom_col(width = 0.7, show.legend = FALSE) +
geom_text(aes(label = paste0(round(pct, 1), "%")),
hjust = -0.2, size = 3.5, color = "black") +
coord_flip() +
scale_y_continuous(labels = comma, expand = expansion(mult = c(0, 0.1))) +
scale_fill_brewer(palette = "Set3") +
theme_minimal(base_size = 12) +
labs(
title = "Number of Fatal Police Encounters by Race",
subtitle = "Showing proportions (%) alongside counts",
x = NULL,
y = "Count of Encounters"
) +
theme(
plot.title = element_text(face = "bold", size = 14),
plot.subtitle = element_text(size = 11, margin = margin(b = 10)),
axis.text.y = element_text(size = 10),
axis.text.x = element_text(size = 9),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank()
)
Horizontal bar chart showing counts of fatalities by race with percentages annotated.
African-American/Black individuals are disproportionately affected compared to their population share.
White/European-American fatalities are high in absolute counts but lower in proportion to population in some areas.
Other races (Hispanic, Asian/Pacific Islander, Native) are affected less frequently.
This plot highlights racial disparities and can be used to inform equity-focused policy interventions.
options(tigris_class = "sf")
# Ensure latitude/longitude are numeric
fe_map <- fe_clean %>%
filter(!is.na(latitude), !is.na(longitude)) %>%
mutate(
latitude = as.numeric(latitude),
longitude = as.numeric(longitude)
) %>%
filter(!is.na(latitude), !is.na(longitude))
# US map data
usa <- map_data("state")
# Plot with title and legend
ggplot() +
geom_polygon(data = usa, aes(x = long, y = lat, group = group),
fill = "gray95", color = "gray70") +
geom_point(data = fe_map, aes(x = longitude, y = latitude, color = subject_race),
alpha = 0.5, size = 1) +
scale_color_brewer(palette = "Set1", name = "Race of Victim") +
coord_fixed(1.3) +
theme_minimal() +
labs(
title = "Geographic Spread of Fatal Police Encounters in the US",
subtitle = "Each point represents a fatal encounter",
x = "Longitude",
y = "Latitude"
) +
theme(
legend.position = "right",
plot.title = element_text(face = "bold", size = 14, hjust = 0.5),
plot.subtitle = element_text(size = 11, hjust = 0.5)
)
Each point represents a fatal encounter overlaid on a US map.
High concentration of incidents in urban areas, especially in states like California, Texas, Florida, and Georgia.
Sparse coverage in rural areas or less populated states.
This suggests that population density and urban policing patterns influence fatal encounters.
Mapping by race (via color) can show regional racial disparities visually.
# Convert fe_map to sf object
fe_map_sf <- st_as_sf(
fe_map,
coords = c("longitude", "latitude"), # specify coordinates
crs = 4326 # WGS84 coordinate reference system
)
# Interactive tmap
tmap_mode("view")
tm_shape(fe_map_sf) +
tm_dots(
col = "subject_race", # color by race
palette = "Set1",
size = 0.3,
alpha = 0.6,
popup.vars = c(
"Name" = "name",
"Race" = "subject_race",
"State" = "state"
)
) +
tm_layout(
title = "Geographic Spread of Fatal Police Encounters in the US"
)