1 Overview

We provide here the technical code and methods to accompany our analysis of data on lynching and policing. We examine some of the historical and contemporary contexts of state violence and social control using a media frames analysis regarding the state’s means of production towards social control – we then situate this idea using Goodwin’s (1992) analysis around professional visions and we broadly frame our analysis within the research in Black and African American studies. We also consider some of the structures, assumptions, and quantitative models related to the analysis of the historical data. We make use of original source data from Ida B. Wells-Barnett’s The Red Record and the Washington Post Fatal Force database.

2 Data

2.1 The Washington Post Fatal Force database

# fatal database
fatal <- read.csv("https://raw.githubusercontent.com/washingtonpost/data-police-shootings/refs/heads/master/v2/fatal-police-shootings-data.csv")
str(fatal)
## 'data.frame':    10430 obs. of  19 variables:
##  $ id                        : int  3 4 5 8 9 11 13 15 16 17 ...
##  $ date                      : chr  "2015-01-02" "2015-01-02" "2015-01-03" "2015-01-04" ...
##  $ threat_type               : chr  "point" "point" "move" "point" ...
##  $ flee_status               : chr  "not" "not" "not" "not" ...
##  $ armed_with                : chr  "gun" "gun" "unarmed" "replica" ...
##  $ city                      : chr  "Shelton" "Aloha" "Wichita" "San Francisco" ...
##  $ county                    : chr  "Mason" "Washington" "Sedgwick" "San Francisco" ...
##  $ state                     : chr  "WA" "OR" "KS" "CA" ...
##  $ latitude                  : num  47.2 45.5 37.7 37.8 40.4 ...
##  $ longitude                 : num  -123.1 -122.9 -97.3 -122.4 -104.7 ...
##  $ location_precision        : chr  "not_available" "not_available" "not_available" "not_available" ...
##  $ name                      : chr  "Tim Elliot" "Lewis Lee Lembke" "John Paul Quintero" "Matthew Hoffman" ...
##  $ age                       : int  53 47 23 32 39 18 22 35 34 47 ...
##  $ gender                    : chr  "male" "male" "male" "male" ...
##  $ race                      : chr  "A" "W" "H" "W" ...
##  $ race_source               : chr  "not_available" "not_available" "not_available" "not_available" ...
##  $ was_mental_illness_related: chr  "True" "False" "False" "True" ...
##  $ body_camera               : chr  "False" "False" "False" "False" ...
##  $ agency_ids                : chr  "73" "70" "238" "196" ...
# fix vars
# change vars to more appropriate formats
fatal$date <- as.Date(fatal$date) # check/change to date format

fatal$year <- as.numeric(format(fatal$date, "%Y"))

fatal$age <- as.numeric(fatal$age)

fatal$gender[fatal$gender == ""] <- NA
fatal$gender <- as.factor(fatal$gender)
fatal$gender <- droplevels(fatal$gender)

fatal <- fatal %>%
  mutate(
    race_category = case_when(
      race == ""       ~ NA_character_,
      race == "A"      ~ "Asian",
      race == "B"      ~ "Black",
      race == "H"      ~ "Hispanic",
      race == "N"      ~ "Native American",
      race == "O"      ~ "Other",
      race == "W"      ~ "White",
      race == "B;H"    ~ "Black, Hispanic",
      race == "N;H"    ~ "Native, Hispanic",
      race == "W;A"    ~ "White, Asian",
      race == "W;B"    ~ "White, Black",
      race == "W;B;N"  ~ "White, Black, Native",
      race == "W;H"    ~ "White, Hispanic",
      TRUE             ~ "Other"
    )
  )
fatal$race_category <- as.factor(fatal$race_category)

library(dplyr)

fatal <- fatal %>%
  mutate(
    black = case_when(
      race == "B" ~ "Black",
      grepl(";", race) & grepl("B", race) ~ "Black Other",  # Multiracial with Black
      TRUE ~ "Non-Black"
    )
  )

# convert to factor for ordered levels
fatal$black <- factor(fatal$black, 
                                 levels = c("Black", "Black Other", "Non-Black"))

fatal$was_mental_illness_related <- as.logical(fatal$was_mental_illness_related)

fatal$body_camera <- as.logical(fatal$body_camera)

str(fatal)
## 'data.frame':    10430 obs. of  22 variables:
##  $ id                        : int  3 4 5 8 9 11 13 15 16 17 ...
##  $ date                      : Date, format: "2015-01-02" "2015-01-02" ...
##  $ threat_type               : chr  "point" "point" "move" "point" ...
##  $ flee_status               : chr  "not" "not" "not" "not" ...
##  $ armed_with                : chr  "gun" "gun" "unarmed" "replica" ...
##  $ city                      : chr  "Shelton" "Aloha" "Wichita" "San Francisco" ...
##  $ county                    : chr  "Mason" "Washington" "Sedgwick" "San Francisco" ...
##  $ state                     : chr  "WA" "OR" "KS" "CA" ...
##  $ latitude                  : num  47.2 45.5 37.7 37.8 40.4 ...
##  $ longitude                 : num  -123.1 -122.9 -97.3 -122.4 -104.7 ...
##  $ location_precision        : chr  "not_available" "not_available" "not_available" "not_available" ...
##  $ name                      : chr  "Tim Elliot" "Lewis Lee Lembke" "John Paul Quintero" "Matthew Hoffman" ...
##  $ age                       : num  53 47 23 32 39 18 22 35 34 47 ...
##  $ gender                    : Factor w/ 3 levels "female","male",..: 2 2 2 2 2 2 2 2 1 2 ...
##  $ race                      : chr  "A" "W" "H" "W" ...
##  $ race_source               : chr  "not_available" "not_available" "not_available" "not_available" ...
##  $ was_mental_illness_related: logi  TRUE FALSE FALSE TRUE FALSE FALSE ...
##  $ body_camera               : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ agency_ids                : chr  "73" "70" "238" "196" ...
##  $ year                      : num  2015 2015 2015 2015 2015 ...
##  $ race_category             : Factor w/ 12 levels "Asian","Black",..: 1 8 4 8 4 8 4 8 8 2 ...
##  $ black                     : Factor w/ 3 levels "Black","Black Other",..: 3 3 3 3 3 3 3 3 3 1 ...
# view a summary of the data
summary(fatal)
##        id             date            threat_type        flee_status       
##  Min.   :    3   Min.   :2015-01-02   Length:10430       Length:10430      
##  1st Qu.: 2912   1st Qu.:2017-08-25   Class :character   Class :character  
##  Median : 5752   Median :2020-04-12   Mode  :character   Mode  :character  
##  Mean   : 5721   Mean   :2020-03-06                                        
##  3rd Qu.: 8527   3rd Qu.:2022-10-02                                        
##  Max.   :11295   Max.   :2024-12-31                                        
##                                                                            
##   armed_with            city              county             state          
##  Length:10430       Length:10430       Length:10430       Length:10430      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##     latitude       longitude       location_precision     name          
##  Min.   :19.50   Min.   :-160.01   Length:10430       Length:10430      
##  1st Qu.:33.45   1st Qu.:-111.95   Class :character   Class :character  
##  Median :36.06   Median : -94.19   Mode  :character   Mode  :character  
##  Mean   :36.63   Mean   : -96.86                                        
##  3rd Qu.:40.04   3rd Qu.: -83.12                                        
##  Max.   :71.30   Max.   : -67.87                                        
##  NA's   :1140    NA's   :1141                                           
##       age               gender         race           race_source       
##  Min.   : 0.00   female    : 462   Length:10430       Length:10430      
##  1st Qu.:28.00   male      :9943   Class :character   Class :character  
##  Median :35.00   non-binary:   5   Mode  :character   Mode  :character  
##  Mean   :37.46   NA's      :  20                                        
##  3rd Qu.:45.00                                                          
##  Max.   :92.00                                                          
##  NA's   :372                                                            
##  was_mental_illness_related body_camera      agency_ids             year     
##  Mode :logical              Mode :logical   Length:10430       Min.   :2015  
##  FALSE:8373                 FALSE:8631      Class :character   1st Qu.:2017  
##  TRUE :2057                 TRUE :1799      Mode  :character   Median :2020  
##                                                                Mean   :2020  
##                                                                3rd Qu.:2022  
##                                                                Max.   :2024  
##                                                                              
##          race_category          black     
##  White          :4659   Black      :2486  
##  Black          :2486   Black Other:   8  
##  Hispanic       :1717   Non-Black  :7936  
##  Asian          : 184                     
##  Native American: 146                     
##  (Other)        :  50                     
##  NA's           :1188
# create a two-column transfer df to match state to abb
transfer <- tibble(state = state.name) %>% 
  tibble(abb = state.abb) %>% 
  bind_rows(tibble(state = "District Of Columbia", abb = "DC")) # add DC
transfer
## # A tibble: 51 × 2
##    state       abb  
##    <chr>       <chr>
##  1 Alabama     AL   
##  2 Alaska      AK   
##  3 Arizona     AZ   
##  4 Arkansas    AR   
##  5 California  CA   
##  6 Colorado    CO   
##  7 Connecticut CT   
##  8 Delaware    DE   
##  9 Florida     FL   
## 10 Georgia     GA   
## # ℹ 41 more rows
tail(transfer)
## # A tibble: 6 × 2
##   state                abb  
##   <chr>                <chr>
## 1 Virginia             VA   
## 2 Washington           WA   
## 3 West Virginia        WV   
## 4 Wisconsin            WI   
## 5 Wyoming              WY   
## 6 District Of Columbia DC
# add a state name variable to the fatal df
fatal$state.name <- state.name[match(fatal$state, transfer$abb)]
fatal %>% 
  mutate(state.abb = state) %>% 
  relocate(id, date, state.name, state.abb) -> fatal

# create a year column
# format to 20YY
fatal.year <- format(fatal$date, format="20%y") 
fatal$year <- fatal.year # add column to df
fatal$year <- as.numeric(fatal$year)
fatal %>% relocate(id, date, year, state.name, state.abb) -> fatal
tail(fatal)
##          id       date year state.name state.abb  threat_type flee_status
## 10425 11282 2024-12-30 2024    Indiana        IN        shoot         car
## 10426 11283 2024-12-30 2024 California        CA undetermined            
## 10427 11284 2024-12-30 2024    Florida        FL       threat        foot
## 10428 11286 2024-12-30 2024       Utah        UT undetermined            
## 10429 11285 2024-12-31 2024  Tennessee        TN       threat         not
## 10430 11287 2024-12-31 2024       Ohio        OH       attack            
##       armed_with         city     county state latitude  longitude
## 10425        gun Indianapolis     Marion    IN 39.81110  -86.05558
## 10426      knife   Sacramento Sacramento    CA 38.49416 -121.35566
## 10427        gun      Orlando     Orange    FL 28.58045  -81.44503
## 10428    unknown   West Point      Davis    UT 41.12282 -112.07058
## 10429        gun  Chattanooga   Hamilton    TN 35.04413  -85.14967
## 10430      knife       Celina     Mercer    OH 40.54681  -84.57087
##       location_precision                         name age gender race
## 10425              block Kenneth Thaddeus Roberts Jr.  48   male    B
## 10426              block                               38   male     
## 10427              block                Timothy Woods  26   male    B
## 10428            address                  Nathan Paul  43   male     
## 10429            address          James Junior Holder  41   male    W
## 10430            address                   Moses Alik  22   male     
##         race_source was_mental_illness_related body_camera agency_ids
## 10425 public_record                      FALSE       FALSE    2003;77
## 10426                                     TRUE        TRUE       1069
## 10427         photo                      FALSE       FALSE        482
## 10428                                     TRUE       FALSE       1450
## 10429         photo                      FALSE       FALSE        750
## 10430                                    FALSE       FALSE      17378
##       race_category     black
## 10425         Black     Black
## 10426          <NA> Non-Black
## 10427         Black     Black
## 10428          <NA> Non-Black
## 10429         White Non-Black
## 10430          <NA> Non-Black

Subsetting data for 2023 and 2024.

df2023 <- fatal %>% 
  filter(year == 2023)

df2024 <- fatal %>% 
  filter(year == 2024)

2.2 Ida B. Wells-Barnett’s The Red Record data

We then load data from The Red Record. Data are gathered from two sources. We conduct a set of cross references to confirm the final selection of case studies for the analysis.

# data from forked repo on IdaBWellsProject
df1893 <- read.csv("https://raw.githubusercontent.com/quant-shop/IdaBWellsProject/master/RedRecord/redrecord1893.csv")

df1894 <- read.csv("https://raw.githubusercontent.com/quant-shop/IdaBWellsProject/master/RedRecord/redrecord1894.csv")
# records from quant shop entry
df1892 <- read_csv("../data/Red Record Lynching Record - 1892.csv")
## New names:
## Rows: 26 Columns: 5
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): State, 1892, ...5 dbl (1): count lgl (1): ...3
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...3`
## • `` -> `...5`
df1893b <- read_csv("../data/Red Record Lynching Record - 1893.csv")
## New names:
## Rows: 150 Columns: 6
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): Accusation, State, Name, 1893, ...6 lgl (1): ...4
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...4`
## • `` -> `...6`
df1894b <- read_csv("../data/Red Record Lynching Record - 1893.csv")
## New names:
## Rows: 150 Columns: 6
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): Accusation, State, Name, 1893, ...6 lgl (1): ...4
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...4`
## • `` -> `...6`

2.3 Standardizing data frames

# standardize data frames
# --- Fix date columns for 1893 and 1894 ---
df1893$Date <- as.Date(df1893$Date, format="%b. %d, %Y")
df1894$Date <- as.Date(df1894$Date, format="%b. %d, %Y")

# when months are not abbreviated with a dot, use this alternative:
# df1893$Date <- as.Date(df1893$Date, format="%b %d, %Y")
# df1894$Date <- as.Date(df1894$Date, format="%b %d, %Y")

# --- standardize column names for 1893 and 1894 ---
names(df1893) <- c("name", "location", "date", "alleged_crime", "latitude", "longitude")
names(df1894) <- c("name", "location", "date", "alleged_crime", "latitude", "longitude")

# --- check and fix types in 2023 and 2024 ---
# factorize categorical variables if needed:
factor_cols <- c("state.name", "state.abb", "threat_type", "flee_status", "armed_with",
                 "city", "county", "state", "gender", "race", "race_source", "location_precision")

df2023[factor_cols] <- lapply(df2023[factor_cols], as.factor)
df2024[factor_cols] <- lapply(df2024[factor_cols], as.factor)

# --- add a 'year' column to 1893 and 1894 for consistency ---
df1893$year <- 1893
df1894$year <- 1894

# --- add missing columns to 1893/1894 with NA values for merging ---
missing_cols <- setdiff(names(df2023), names(df1893))
for(col in missing_cols) df1893[[col]] <- NA
df1893 <- df1893[, names(df2023)] # Reorder columns

missing_cols <- setdiff(names(df2023), names(df1894))
for(col in missing_cols) df1894[[col]] <- NA
df1894 <- df1894[, names(df2023)]

all_years <- rbind(df1893, df1894, df2023, df2024)

3 Maps

library(ggplot2)
library(maps)
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
#install.packages("maps")

# get map data for US states
states_map <- map_data("state")

3.1 Fatal Force

# lowercase to match maps of state names
fatal <- fatal %>%
  mutate(region = tolower(state))  # or whatever your state column is called
# states to higlight from wells 1892 data - top five states
states1892 <- c(
  "louisiana", 
  "tennessee", 
  "arkansas",
  "alabama",
  "georgia",
  "mississippi",
  "texas",
  "florida",
  "kentucky",
  "wyoming"
  )
states_map_1892 <- states_map %>%
  mutate(highlight = ifelse(region %in% states1892, "highlight", "normal"))

Add dots for each fatal incident.

ggplot() +
  geom_polygon(
    data = states_map_1892,
    aes(x = long, y = lat, group = group, fill = highlight),
    color = "white", size = 0.2
  ) +
  scale_fill_manual(values = c("highlight" = "red", "normal" = "grey80")) +
  geom_point(
    data = fatal,
    aes(x = longitude, y = latitude),
    color = "blue", alpha = 0.6, size = 2
  ) +
  coord_fixed(1.3) +
  theme_void() +
  labs(title = "US Map with Highlighted States and Incident Dots")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 1141 rows containing missing values or values outside the scale range
## (`geom_point()`).

3.1.1 Black men in 2022

fatal_black_men_2022 <- fatal %>%
  filter(
    race_category == "Black" | race == "B",
    gender == "male" | gender == "M", # Adjust based on your gender coding
    year == 2022
  )

fatal_black_men_2022 <- fatal_black_men_2022 %>%
  mutate(state_lower = tolower(state)) 
ggplot() +
  geom_polygon(
    data = states_map,
    aes(x = long, y = lat, group = group),
    fill = "gray90",
    color = "white"
  ) +
  geom_point(
    data = fatal_black_men_2022,
    aes(x = longitude, y = latitude),
    color = "black",
    alpha = 0.7,
    size = 2
  ) +
  coord_fixed(1.3) +
  theme_void() +
  labs(
    title = "Fatal Police Shootings of Black Men in the US, 2022",
    caption = "Data source: Washington Post Fatal Force Database"
  )
## Warning: Removed 54 rows containing missing values or values outside the scale range
## (`geom_point()`).

states_map_1892 <- states_map_1892 %>%
  mutate(
    highlight = ifelse(region %in% states1892, "highlight", "normal")
  )

3.2 Integreted maps

Here, we integrate the fatal force 2022 data for Black men with the 1892 records.

ggplot() +
  geom_polygon(
    data = states_map_1892,
    aes(x = long, y = lat, group = group, fill = highlight),
    color = "white", size = 0.2
  ) +
  scale_fill_manual(
    values = c("highlight" = "red", "normal" = "gray80"),
    guide = "none"
  ) +
  coord_fixed(1.3) +
  theme_void() +
  labs(
    title = "Top 10 States, Lynching in 1892"
  )

ggplot() +
  geom_polygon(
    data = states_map_1892,
    aes(x = long, y = lat, group = group, fill = highlight),
    color = "white", size = 0.2
  ) +
  scale_fill_manual(
    values = c("highlight" = "red", "normal" = "gray80"),
    guide = "none"
  ) +
  geom_point(
    data = fatal_black_men_2022,
    aes(x = longitude, y = latitude),
    color = "black", alpha = 0.7, size = 2
  ) +
  coord_fixed(1.3) +
  theme_void() +
  labs(
    title = "Top 10 States - Lynching, 1892 (red); Black Male Fatal Shootings, 2022 (dots)"
  )
## Warning: Removed 54 rows containing missing values or values outside the scale range
## (`geom_point()`).


  1. Howard University↩︎

  2. Morehouse College↩︎