We provide here the technical code and methods to accompany our analysis of data on lynching and policing. We examine some of the historical and contemporary contexts of state violence and social control using a media frames analysis regarding the state’s means of production towards social control – we then situate this idea using Goodwin’s (1992) analysis around professional visions and we broadly frame our analysis within the research in Black and African American studies. We also consider some of the structures, assumptions, and quantitative models related to the analysis of the historical data. We make use of original source data from Ida B. Wells-Barnett’s The Red Record and the Washington Post Fatal Force database.
# fatal database
fatal <- read.csv("https://raw.githubusercontent.com/washingtonpost/data-police-shootings/refs/heads/master/v2/fatal-police-shootings-data.csv")
str(fatal)
## 'data.frame': 10430 obs. of 19 variables:
## $ id : int 3 4 5 8 9 11 13 15 16 17 ...
## $ date : chr "2015-01-02" "2015-01-02" "2015-01-03" "2015-01-04" ...
## $ threat_type : chr "point" "point" "move" "point" ...
## $ flee_status : chr "not" "not" "not" "not" ...
## $ armed_with : chr "gun" "gun" "unarmed" "replica" ...
## $ city : chr "Shelton" "Aloha" "Wichita" "San Francisco" ...
## $ county : chr "Mason" "Washington" "Sedgwick" "San Francisco" ...
## $ state : chr "WA" "OR" "KS" "CA" ...
## $ latitude : num 47.2 45.5 37.7 37.8 40.4 ...
## $ longitude : num -123.1 -122.9 -97.3 -122.4 -104.7 ...
## $ location_precision : chr "not_available" "not_available" "not_available" "not_available" ...
## $ name : chr "Tim Elliot" "Lewis Lee Lembke" "John Paul Quintero" "Matthew Hoffman" ...
## $ age : int 53 47 23 32 39 18 22 35 34 47 ...
## $ gender : chr "male" "male" "male" "male" ...
## $ race : chr "A" "W" "H" "W" ...
## $ race_source : chr "not_available" "not_available" "not_available" "not_available" ...
## $ was_mental_illness_related: chr "True" "False" "False" "True" ...
## $ body_camera : chr "False" "False" "False" "False" ...
## $ agency_ids : chr "73" "70" "238" "196" ...
# fix vars
# change vars to more appropriate formats
fatal$date <- as.Date(fatal$date) # check/change to date format
fatal$year <- as.numeric(format(fatal$date, "%Y"))
fatal$age <- as.numeric(fatal$age)
fatal$gender[fatal$gender == ""] <- NA
fatal$gender <- as.factor(fatal$gender)
fatal$gender <- droplevels(fatal$gender)
fatal <- fatal %>%
mutate(
race_category = case_when(
race == "" ~ NA_character_,
race == "A" ~ "Asian",
race == "B" ~ "Black",
race == "H" ~ "Hispanic",
race == "N" ~ "Native American",
race == "O" ~ "Other",
race == "W" ~ "White",
race == "B;H" ~ "Black, Hispanic",
race == "N;H" ~ "Native, Hispanic",
race == "W;A" ~ "White, Asian",
race == "W;B" ~ "White, Black",
race == "W;B;N" ~ "White, Black, Native",
race == "W;H" ~ "White, Hispanic",
TRUE ~ "Other"
)
)
fatal$race_category <- as.factor(fatal$race_category)
library(dplyr)
fatal <- fatal %>%
mutate(
black = case_when(
race == "B" ~ "Black",
grepl(";", race) & grepl("B", race) ~ "Black Other", # Multiracial with Black
TRUE ~ "Non-Black"
)
)
# convert to factor for ordered levels
fatal$black <- factor(fatal$black,
levels = c("Black", "Black Other", "Non-Black"))
fatal$was_mental_illness_related <- as.logical(fatal$was_mental_illness_related)
fatal$body_camera <- as.logical(fatal$body_camera)
str(fatal)
## 'data.frame': 10430 obs. of 22 variables:
## $ id : int 3 4 5 8 9 11 13 15 16 17 ...
## $ date : Date, format: "2015-01-02" "2015-01-02" ...
## $ threat_type : chr "point" "point" "move" "point" ...
## $ flee_status : chr "not" "not" "not" "not" ...
## $ armed_with : chr "gun" "gun" "unarmed" "replica" ...
## $ city : chr "Shelton" "Aloha" "Wichita" "San Francisco" ...
## $ county : chr "Mason" "Washington" "Sedgwick" "San Francisco" ...
## $ state : chr "WA" "OR" "KS" "CA" ...
## $ latitude : num 47.2 45.5 37.7 37.8 40.4 ...
## $ longitude : num -123.1 -122.9 -97.3 -122.4 -104.7 ...
## $ location_precision : chr "not_available" "not_available" "not_available" "not_available" ...
## $ name : chr "Tim Elliot" "Lewis Lee Lembke" "John Paul Quintero" "Matthew Hoffman" ...
## $ age : num 53 47 23 32 39 18 22 35 34 47 ...
## $ gender : Factor w/ 3 levels "female","male",..: 2 2 2 2 2 2 2 2 1 2 ...
## $ race : chr "A" "W" "H" "W" ...
## $ race_source : chr "not_available" "not_available" "not_available" "not_available" ...
## $ was_mental_illness_related: logi TRUE FALSE FALSE TRUE FALSE FALSE ...
## $ body_camera : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ agency_ids : chr "73" "70" "238" "196" ...
## $ year : num 2015 2015 2015 2015 2015 ...
## $ race_category : Factor w/ 12 levels "Asian","Black",..: 1 8 4 8 4 8 4 8 8 2 ...
## $ black : Factor w/ 3 levels "Black","Black Other",..: 3 3 3 3 3 3 3 3 3 1 ...
# view a summary of the data
summary(fatal)
## id date threat_type flee_status
## Min. : 3 Min. :2015-01-02 Length:10430 Length:10430
## 1st Qu.: 2912 1st Qu.:2017-08-25 Class :character Class :character
## Median : 5752 Median :2020-04-12 Mode :character Mode :character
## Mean : 5721 Mean :2020-03-06
## 3rd Qu.: 8527 3rd Qu.:2022-10-02
## Max. :11295 Max. :2024-12-31
##
## armed_with city county state
## Length:10430 Length:10430 Length:10430 Length:10430
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## latitude longitude location_precision name
## Min. :19.50 Min. :-160.01 Length:10430 Length:10430
## 1st Qu.:33.45 1st Qu.:-111.95 Class :character Class :character
## Median :36.06 Median : -94.19 Mode :character Mode :character
## Mean :36.63 Mean : -96.86
## 3rd Qu.:40.04 3rd Qu.: -83.12
## Max. :71.30 Max. : -67.87
## NA's :1140 NA's :1141
## age gender race race_source
## Min. : 0.00 female : 462 Length:10430 Length:10430
## 1st Qu.:28.00 male :9943 Class :character Class :character
## Median :35.00 non-binary: 5 Mode :character Mode :character
## Mean :37.46 NA's : 20
## 3rd Qu.:45.00
## Max. :92.00
## NA's :372
## was_mental_illness_related body_camera agency_ids year
## Mode :logical Mode :logical Length:10430 Min. :2015
## FALSE:8373 FALSE:8631 Class :character 1st Qu.:2017
## TRUE :2057 TRUE :1799 Mode :character Median :2020
## Mean :2020
## 3rd Qu.:2022
## Max. :2024
##
## race_category black
## White :4659 Black :2486
## Black :2486 Black Other: 8
## Hispanic :1717 Non-Black :7936
## Asian : 184
## Native American: 146
## (Other) : 50
## NA's :1188
# create a two-column transfer df to match state to abb
transfer <- tibble(state = state.name) %>%
tibble(abb = state.abb) %>%
bind_rows(tibble(state = "District Of Columbia", abb = "DC")) # add DC
transfer
## # A tibble: 51 × 2
## state abb
## <chr> <chr>
## 1 Alabama AL
## 2 Alaska AK
## 3 Arizona AZ
## 4 Arkansas AR
## 5 California CA
## 6 Colorado CO
## 7 Connecticut CT
## 8 Delaware DE
## 9 Florida FL
## 10 Georgia GA
## # ℹ 41 more rows
tail(transfer)
## # A tibble: 6 × 2
## state abb
## <chr> <chr>
## 1 Virginia VA
## 2 Washington WA
## 3 West Virginia WV
## 4 Wisconsin WI
## 5 Wyoming WY
## 6 District Of Columbia DC
# add a state name variable to the fatal df
fatal$state.name <- state.name[match(fatal$state, transfer$abb)]
fatal %>%
mutate(state.abb = state) %>%
relocate(id, date, state.name, state.abb) -> fatal
# create a year column
# format to 20YY
fatal.year <- format(fatal$date, format="20%y")
fatal$year <- fatal.year # add column to df
fatal$year <- as.numeric(fatal$year)
fatal %>% relocate(id, date, year, state.name, state.abb) -> fatal
tail(fatal)
## id date year state.name state.abb threat_type flee_status
## 10425 11282 2024-12-30 2024 Indiana IN shoot car
## 10426 11283 2024-12-30 2024 California CA undetermined
## 10427 11284 2024-12-30 2024 Florida FL threat foot
## 10428 11286 2024-12-30 2024 Utah UT undetermined
## 10429 11285 2024-12-31 2024 Tennessee TN threat not
## 10430 11287 2024-12-31 2024 Ohio OH attack
## armed_with city county state latitude longitude
## 10425 gun Indianapolis Marion IN 39.81110 -86.05558
## 10426 knife Sacramento Sacramento CA 38.49416 -121.35566
## 10427 gun Orlando Orange FL 28.58045 -81.44503
## 10428 unknown West Point Davis UT 41.12282 -112.07058
## 10429 gun Chattanooga Hamilton TN 35.04413 -85.14967
## 10430 knife Celina Mercer OH 40.54681 -84.57087
## location_precision name age gender race
## 10425 block Kenneth Thaddeus Roberts Jr. 48 male B
## 10426 block 38 male
## 10427 block Timothy Woods 26 male B
## 10428 address Nathan Paul 43 male
## 10429 address James Junior Holder 41 male W
## 10430 address Moses Alik 22 male
## race_source was_mental_illness_related body_camera agency_ids
## 10425 public_record FALSE FALSE 2003;77
## 10426 TRUE TRUE 1069
## 10427 photo FALSE FALSE 482
## 10428 TRUE FALSE 1450
## 10429 photo FALSE FALSE 750
## 10430 FALSE FALSE 17378
## race_category black
## 10425 Black Black
## 10426 <NA> Non-Black
## 10427 Black Black
## 10428 <NA> Non-Black
## 10429 White Non-Black
## 10430 <NA> Non-Black
Subsetting data for 2023 and 2024.
df2023 <- fatal %>%
filter(year == 2023)
df2024 <- fatal %>%
filter(year == 2024)
We then load data from The Red Record. Data are gathered from two sources. We conduct a set of cross references to confirm the final selection of case studies for the analysis.
# data from forked repo on IdaBWellsProject
df1893 <- read.csv("https://raw.githubusercontent.com/quant-shop/IdaBWellsProject/master/RedRecord/redrecord1893.csv")
df1894 <- read.csv("https://raw.githubusercontent.com/quant-shop/IdaBWellsProject/master/RedRecord/redrecord1894.csv")
# records from quant shop entry
df1892 <- read_csv("../data/Red Record Lynching Record - 1892.csv")
## New names:
## Rows: 26 Columns: 5
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): State, 1892, ...5 dbl (1): count lgl (1): ...3
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...3`
## • `` -> `...5`
df1893b <- read_csv("../data/Red Record Lynching Record - 1893.csv")
## New names:
## Rows: 150 Columns: 6
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): Accusation, State, Name, 1893, ...6 lgl (1): ...4
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...4`
## • `` -> `...6`
df1894b <- read_csv("../data/Red Record Lynching Record - 1893.csv")
## New names:
## Rows: 150 Columns: 6
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (5): Accusation, State, Name, 1893, ...6 lgl (1): ...4
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...4`
## • `` -> `...6`
# standardize data frames
# --- Fix date columns for 1893 and 1894 ---
df1893$Date <- as.Date(df1893$Date, format="%b. %d, %Y")
df1894$Date <- as.Date(df1894$Date, format="%b. %d, %Y")
# when months are not abbreviated with a dot, use this alternative:
# df1893$Date <- as.Date(df1893$Date, format="%b %d, %Y")
# df1894$Date <- as.Date(df1894$Date, format="%b %d, %Y")
# --- standardize column names for 1893 and 1894 ---
names(df1893) <- c("name", "location", "date", "alleged_crime", "latitude", "longitude")
names(df1894) <- c("name", "location", "date", "alleged_crime", "latitude", "longitude")
# --- check and fix types in 2023 and 2024 ---
# factorize categorical variables if needed:
factor_cols <- c("state.name", "state.abb", "threat_type", "flee_status", "armed_with",
"city", "county", "state", "gender", "race", "race_source", "location_precision")
df2023[factor_cols] <- lapply(df2023[factor_cols], as.factor)
df2024[factor_cols] <- lapply(df2024[factor_cols], as.factor)
# --- add a 'year' column to 1893 and 1894 for consistency ---
df1893$year <- 1893
df1894$year <- 1894
# --- add missing columns to 1893/1894 with NA values for merging ---
missing_cols <- setdiff(names(df2023), names(df1893))
for(col in missing_cols) df1893[[col]] <- NA
df1893 <- df1893[, names(df2023)] # Reorder columns
missing_cols <- setdiff(names(df2023), names(df1894))
for(col in missing_cols) df1894[[col]] <- NA
df1894 <- df1894[, names(df2023)]
all_years <- rbind(df1893, df1894, df2023, df2024)
library(ggplot2)
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
#install.packages("maps")
# get map data for US states
states_map <- map_data("state")
# lowercase to match maps of state names
fatal <- fatal %>%
mutate(region = tolower(state)) # or whatever your state column is called
# states to higlight from wells 1892 data - top five states
states1892 <- c(
"louisiana",
"tennessee",
"arkansas",
"alabama",
"georgia",
"mississippi",
"texas",
"florida",
"kentucky",
"wyoming"
)
states_map_1892 <- states_map %>%
mutate(highlight = ifelse(region %in% states1892, "highlight", "normal"))
Add dots for each fatal incident.
ggplot() +
geom_polygon(
data = states_map_1892,
aes(x = long, y = lat, group = group, fill = highlight),
color = "white", size = 0.2
) +
scale_fill_manual(values = c("highlight" = "red", "normal" = "grey80")) +
geom_point(
data = fatal,
aes(x = longitude, y = latitude),
color = "blue", alpha = 0.6, size = 2
) +
coord_fixed(1.3) +
theme_void() +
labs(title = "US Map with Highlighted States and Incident Dots")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 1141 rows containing missing values or values outside the scale range
## (`geom_point()`).
fatal_black_men_2022 <- fatal %>%
filter(
race_category == "Black" | race == "B",
gender == "male" | gender == "M", # Adjust based on your gender coding
year == 2022
)
fatal_black_men_2022 <- fatal_black_men_2022 %>%
mutate(state_lower = tolower(state))
ggplot() +
geom_polygon(
data = states_map,
aes(x = long, y = lat, group = group),
fill = "gray90",
color = "white"
) +
geom_point(
data = fatal_black_men_2022,
aes(x = longitude, y = latitude),
color = "black",
alpha = 0.7,
size = 2
) +
coord_fixed(1.3) +
theme_void() +
labs(
title = "Fatal Police Shootings of Black Men in the US, 2022",
caption = "Data source: Washington Post Fatal Force Database"
)
## Warning: Removed 54 rows containing missing values or values outside the scale range
## (`geom_point()`).
states_map_1892 <- states_map_1892 %>%
mutate(
highlight = ifelse(region %in% states1892, "highlight", "normal")
)
Here, we integrate the fatal force 2022 data for Black men with the 1892 records.
ggplot() +
geom_polygon(
data = states_map_1892,
aes(x = long, y = lat, group = group, fill = highlight),
color = "white", size = 0.2
) +
scale_fill_manual(
values = c("highlight" = "red", "normal" = "gray80"),
guide = "none"
) +
coord_fixed(1.3) +
theme_void() +
labs(
title = "Top 10 States, Lynching in 1892"
)
ggplot() +
geom_polygon(
data = states_map_1892,
aes(x = long, y = lat, group = group, fill = highlight),
color = "white", size = 0.2
) +
scale_fill_manual(
values = c("highlight" = "red", "normal" = "gray80"),
guide = "none"
) +
geom_point(
data = fatal_black_men_2022,
aes(x = longitude, y = latitude),
color = "black", alpha = 0.7, size = 2
) +
coord_fixed(1.3) +
theme_void() +
labs(
title = "Top 10 States - Lynching, 1892 (red); Black Male Fatal Shootings, 2022 (dots)"
)
## Warning: Removed 54 rows containing missing values or values outside the scale range
## (`geom_point()`).