Summary of Findings

The number of fatal encounters has increased between 2000-2020 (Figure 1). This could be a true increase or it could be that it was harder to find this data at the beginning of the data collection period. The majority of fatal encounters occurred in the South, followed by the West (Figure 2). Within these regions, California and Texas had the highest number of fatal encounters (Figure 3). When accounting for the total population of the state, New Mexico has the highest percentage of fatal encounters, followed by Oklahoma and Alaska (Figure 4). When looking at fatal encounters by race for the entire data set, we see that white residents had the highest number of fatal encounters (Figures 5 and 6). However, this is likely indicative of the higher percentage of the population that is white. Therefore, in bringing in Census data with the total number of residents who self-identify as each race category, we see that the proportion of fatal encounters in Black residents is the highest, followed by Native American residents and Hispanic residents (Figure 7). Fatal encounters in White residents is only higher than in Asian residents. I have also summarized these by state, though it’s a bit hard to understand in that way (Figure 8). Unsurprisingly, the number of fatal encounters experienced by women is much less than the number experienced by men (Figure 9). Lastly, looking at fatal encounters by age, the median age of a fatal encounter is 33, which is expected. The minimum age is around 10 months old, which is incredibly sad if that’s not a typo. The majority of fatal encounters occurred between the ages of 26-50, with a high number occurring between 16-25 as well (Figure 10). The proportion of fatal encounters by race within each age group looks relatively steady, though the proportion of white residents experiencing fatal encounters seems to be slightly higher in the 36-50 age group, while the proportion of black residents experiencing a fatal encounter seems to be slightly higher in the 16-25 age group.

library(tidyverse)
library(lubridate)
library(tidycensus)
library(plotly)
FE <- read_csv("Fatal_Encounters.csv") %>%
  rename(Race_Imp = "Race with imputations", Race_NP = "Race")

 
How have the number of fatal encounters changed over the years of data collection?

## change date column to Date class and pull out year
FE$Date <- as.Date(FE$`Date of injury resulting in death (month/day/year)`,  format = "%m/%d/%Y")

FE$Year <- year(FE$Date)

## plot FEs by year
plot1 <- ggplot(data = FE) + 
  geom_bar(mapping = aes(x = Year), fill = "steelblue") +
             labs(title = "Fig 1. Number of Fatal Encounters by Year")+
  theme_minimal()

plot1

 
Where were these encounters most likely to happen?

## Look at Region first 
## create region variable based on census regions 
FE <- FE %>%
  mutate(Region = case_when(
    State %in% c("ME", "NH", "VT", "MA", "RI", "CT", "NY", "NJ", "PA") ~ "Northeast",
    State %in% c("DE", "DC", "MD", "VA", "WV", "NC", "SC", "GA", "FL", "AL", "MS", "TN", "KY", "TX", "OK", "AR", "LA") ~ "South",
    State %in% c("OH", "IN", "IL", "MI", "WI", "MN", "IA", "MO", "ND", "SD", "NE", "KS") ~ "Midwest",
    State %in% c("CA", "NV", "UT", "AZ", "CO", "NM", "HI","AK","MT", "ID", "WY", "OR", "WA") ~ "West"
  ))

FE <- FE %>%
  filter(!is.na(Region))

plot2 <- ggplot(data = FE, aes(x = Region)) + 
  geom_bar()+
             labs(title = "Fig 2. Number of Fatal Encounters by Region")+
  theme_minimal()

plot2

 

# look at states within these two regions
state_sub <- FE %>%
  filter(Region %in% c("South", "West"))

plot3 <- ggplot(data = state_sub, aes(x = State)) + 
  geom_bar(fill = "steelblue")+
             labs(title = "Fig 3. Number of Fatal Encounters by State in the South and West Regions")+
  theme_minimal()

plot3

 

# pull in census data for total population of each state in 2020
population_data <- get_acs(
  geography = "state",
  variables = "B01003_001",
  year = 2020,                
  survey = "acs5"           
) %>%
  rename(State = NAME, Population = estimate)
## Getting data from the 2016-2020 5-year ACS
# convert state names to abbreviations
state_abbreviations <- data.frame(
  State = c("Alabama", "Alaska", "Arizona", "Arkansas", "California", 
            "Colorado", "Connecticut", "Delaware", "District of Columbia",
            "Florida", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa",
            "Kansas", "Kentucky", "Louisiana", "Maine", "Maryland",
            "Massachusetts", "Michigan", "Minnesota", "Mississippi", 
            "Missouri", "Montana",  "Nebraska", "Nevada", "New Hampshire", 
            "New Jersey", "New Mexico", "New York", "North Carolina", 
            "North Dakota", "Ohio", "Oklahoma", 
            "Oregon", "Pennsylvania", "Rhode Island", "South Carolina", 
            "South Dakota", "Tennessee", "Texas", "Utah", "Vermont", 
            "Virginia", "Washington", "West Virginia", "Wisconsin", "Wyoming"),
  Abbreviation = c("AL", "AK", "AZ", "AR", "CA", 
                   "CO", "CT", "DE", "DC", "FL", "GA", 
                   "HI", "ID", "IL", "IN", "IA", 
                   "KS", "KY", "LA", "ME", "MD", 
                   "MA", "MI", "MN", "MS", "MO", 
                   "MT", "NE", "NV", "NH", "NJ", 
                   "NM", "NY", "NC", "ND", "OH", 
                   "OK", "OR", "PA", "RI", "SC", 
                   "SD", "TN", "TX", "UT", "VT", 
                   "VA", "WA", "WV", "WI", "WY")
)

population_data <- population_data %>%
  left_join(state_abbreviations, by = "State") 

population_data <- population_data %>%
  rename(State_Full = State, State = Abbreviation)

# join with the FE data
FE <- FE %>%
  left_join(population_data, by = "State") 

# create a percent variable
state_deaths <- FE %>%
  group_by(State) %>%
  summarise(Death_Count = n()) 

deaths_pop <- state_deaths %>%
  left_join(population_data, by = "State") %>%
  mutate(Death_Percent = (Death_Count/Population))
  
plot4 <- ggplot(deaths_pop, aes(x = State, y = Death_Percent)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(title = "Fig 4. Percentage of Fatal Encounters by State Population",
       x = "State",
       y = "Percentage of Fatal Encounters (%)") +
  theme_minimal() +  
  theme(axis.text.x = element_text(angle = 90, hjust = 2, size = 7))

ggplotly(plot4)

 

How do these Fatal Encounters differ by Race?

## fix the messed up Race text and create categories to match census names
unique(FE$Race_Imp)
##  [1] "African-American/Black"  NA                       
##  [3] "European-American/White" "Hispanic/Latino"        
##  [5] "Asian/Pacific Islander"  "Native American/Alaskan"
##  [7] "Middle Eastern"          "Race unspecified"       
##  [9] "european-American/White" "HIspanic/Latino"
FE <- FE %>%
    mutate(Race_Imp = ifelse(Race_Imp == "european-American/White", "European-American/White", Race_Imp)) %>%
    mutate(Race_Imp = ifelse(Race_Imp == "HIspanic/Latino", "Hispanic/Latino", Race_Imp))

FE <- FE %>%
  mutate(Race_Imp = case_when(
    Race_Imp == "European-American/White" ~ "White",
    Race_Imp == "Hispanic/Latino" ~ "Hispanic",
    Race_Imp == "African-American/Black" ~ "Black",
    Race_Imp == "Native American/Alaskan" ~ "Native_American",
    Race_Imp == "Asian/Pacific Islander" ~ "Asian",
    TRUE ~ "Other/Unspecified"  
  )) 

unique(FE$Race_Imp)
## [1] "Black"             "Other/Unspecified" "White"            
## [4] "Hispanic"          "Asian"             "Native_American"
## Look at FEs by Race for the entire data set 
plot5 <- ggplot(FE, aes(x = Race_Imp)) +
  geom_bar(fill = "steelblue") +
  labs(
    title = "Fig 5. Fatal Encounters by Race",
    x = "Race") + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme_minimal()

ggplotly(plot5)

 

## Get count of FEs by state 
summary_df <- FE %>%
  group_by(State, Race_Imp) %>%
  summarise(Count = n(), .groups = "drop")

## Plot by state and race
plot6 <- ggplot(summary_df, aes(x = State, y = Count, fill = Race_Imp)) +
  geom_bar(stat = "identity") +
  labs(
    title = "Fig 6. Fatal Encounters by State and Race",
    x = "State",
    y = "Fatal Encounters",
    fill = "Race"
  ) +
  theme_minimal() +  
  theme(axis.text.x = element_text(angle = 90, hjust = 2, size = 7))

ggplotly(plot6)

 

# Look at Race by proportion of FEs to Total Count of residents of each race
# pull in census data for total population of each state in 2020
race_data <- get_acs(
  geography = "state",
  variables = c(
    total = "B01003_001", 
    White = "B02001_002", 
    Black = "B02001_003", 
    Asian = "B02001_005", 
    Hispanic = "B03002_012",
    Native_American = "B02001_004" 
  ),
  year = 2020,                
  survey = "acs5"           
) %>%
  rename(State = NAME, Population = estimate, Race = variable)
## Getting data from the 2016-2020 5-year ACS
race_data <- race_data %>%
  left_join(state_abbreviations, by = "State") 

# Look at proportion of FEs by race for US as a whole
## Calculate total population for each race
total_population_by_race <- race_data %>%
  group_by(Race) %>%
  summarise(Total_Population = sum(Population), .groups = "drop")

## Calculate total U.S. population
total_population_us <- sum(total_population_by_race$Total_Population)

# summarize FE by race 
summary_race <- FE %>%
  group_by(Race_Imp) %>%
  summarise(Count = n(), .groups = "drop")

## join census data to FEs data 
race_pop <- summary_race %>%
  left_join(total_population_by_race, by = c("Race_Imp" = "Race")) %>%
  mutate(deaths_percentage = (Count/Total_Population)) %>%
  filter(Race_Imp != "Other/Unspecified")

plot7 <- ggplot(race_pop, aes(x = Race_Imp, y = deaths_percentage)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(
    title = "Fig 7. Proportion of Fatal Encounters by Race",
    y = "Fatal Encounters Percentage",
    x = "Race"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  theme_minimal()

ggplotly(plot7)

 

# look at distribution by race for each state 
# summarize FE by race 
summary_race_state <- FE %>%
  group_by(State, Race_Imp) %>%
  summarise(Count = n(), .groups = "drop") 

## join census data to FEs data 
race_pop_state <- summary_race_state %>%
  left_join(race_data, by = c("State" = "Abbreviation", "Race_Imp" = "Race")) %>%
  mutate(deaths_percentage = (Count/Population))

## Plot 
plot8 <- ggplot(race_pop_state, aes(x = State, y = deaths_percentage, fill = Race_Imp)) +
  geom_bar(stat = "identity") +
  labs(
    title = "Fig 8. Death Percentage by State and Race",
    x = "State",
    y = "Death Percentage",
    fill = "Race"
  ) +
  theme_minimal() +  
  theme(axis.text.x = element_text(angle = 90, hjust = 2, size = 7)) 

ggplotly(plot8)
## Warning: Removed 50 rows containing missing values (`position_stack()`).

 
How do fatal encounters differ by gender?

plot9 <- ggplot(FE, aes(x = Gender)) +
  geom_bar(fill = "steelblue") +
  labs(
    title = "Fig 9. Fatal Encounters by Gender") + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme_minimal()

plot9

 
How do fatal encounters differ by age?

## create age groups 
class(FE$Age)
## [1] "numeric"
max(FE$Age)
## [1] NA
FE2 <- FE %>%
  filter(!is.na(Age))
summary(FE2$Age) # I think 107 is probably a typo... 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.08   25.00   33.00   35.28   44.00  107.00
FE2 <- FE2 %>%
  mutate(Age_Group = cut(Age, 
                         breaks = c(-Inf, 15, 25, 35, 50, 65, Inf), 
                         labels = c("0-15", "16-25", "26-35", "36-50", "51-65", "66+")))

# plot FEs by age and race
plot10 <- ggplot(FE2, aes(x = Age_Group, fill = Race_Imp)) +
  geom_bar() +
  labs(
    title = "Fig 10. Fatal Encounters by Age Group",
    fill = "Race") + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  theme_minimal()

plot10