library(ggplot2)
library(kableExtra)
library(dplyr)
library(forcats)
library(here)
library(leaflet)
library(sf)

load(file = here("data-outputs", "WA2015.rda"))
load(file = here("data-outputs", "USpop.rda"))

## Select Clark County

level = "county"
location = "Clark"

all.fatalities <- finaldata_2015 %>%
  filter(county == location)
  
# Select homicides
# Remove suicides and some final cases that don't belong
## 25804-5 are the Tad Norman case in Lake City. 
## Not victims of police violence.
## they were killed by Norman

homicides <- all.fatalities %>% 
  filter(circumstances.fe != "Suicide" &
           feID != 28698 & #deputy crash while having stroke
           feID != 25804 & #next 2 killed by suspect, not police
           feID != 25805) %>%
  arrange(date)


# for geocoded LDs
with(homicides, write.csv(cbind(feID,latitude,longitude),
                          here::here("data-outputs",
                                     "geocodes2015.csv")))

all.cases <- dim(all.fatalities)[1]
last.case <- dim(homicides)[1]

last.date <- max(homicides$date)
last.name <- ifelse(homicides$lname[last.case] == "Unknown", 
                    "(Name not released)",        
                  paste(homicides$fname[last.case],
                        homicides$lname[last.case]))

last.age <- homicides$age.fe[last.case]
last.agency <- homicides$agency[last.case]
last.cod <- homicides$cod.fe[last.case]
tot.by.yr <- table(homicides$year)
tot.this.yr <- tot.by.yr[[length(tot.by.yr)]]
num.suffix <- ifelse(tot.this.yr == 1, "st",
                     ifelse(tot.this.yr == 2, "nd",
                            ifelse(tot.this.yr == 3, "rd", "th")))

# HLE stats (homicides by law enforcement)

mean.HLE <- round(mean(tot.by.yr[1:length(tot.by.yr) - 1]))
mean.male.HLE <- 
  table(homicides$gender[homicides$year < 2021])[[2]]/
  (length(tot.by.yr) - 1)

# # All homicide stats
# all.homicides = pop.by.sy %>% filter(st == "WA") %>% select(homicides)
# mean.all.homicides = round(mean(unlist(all.homicides)))
# 
# all.male.homicides = males.by.sy %>% filter(st == "WA") %>% select(homicides)
# mean.all.male.homicides = round(mean(unlist(all.male.homicides)))
# 
# all.by.race <- pop.by.syr %>% 
#   filter(st == "WA") %>% 
#   select(Year, race.eth, pop, homicides, homrate) %>%
#   group_by(Year) %>%
#   mutate(pop.pct = pop / sum(pop),
#          hom.pct = homicides / sum(homicides)) %>%
#   ungroup

# WA DOH reported homicide deaths
#https://www.doh.wa.gov/DataandStatisticalReports/HealthDataVisualization/MortalityDashboards/ACHInjuryDeathsDashboards
wadoh.all.homicides = c(12, 15, 17, 14, NA) # 2015-2019 SW Washington counts, all
wadoh.male.homicides = c(10, 11, 13, NA, NA) 

mean.wadoh.all = round(mean(wadoh.all.homicides, na.rm=T), 1)
mean.wadoh.male = round(mean(wadoh.male.homicides, na.rm=T), 1)

Introduction

This report tracks the number of persons killed by police in Clark County, WA since January 1, 2015.

MOST RECENT DATA UPDATE

  • Total homicides by police since Jan 1, 2015: 16

  • Last reported case: Jenoah Donald, 30 years old, on 2021-02-04 by Clark County Sheriff’s Office

    Jenoah Donald is the 1st person killed by police in 2021. The cause of death is reported as Gunshot.

Last year, 3 people were killed by law enforcement officers in Clark County.
The average number killed each year since 2015 is 3, and 2.6 of these are men.

The average total number of homicide deaths each year in Clark County during this period was 14.5, as reported by the WA state Department of Health.

This means that 20.7% of all homicide victims in Clark County are killed by a law enforcement officer (23.0% of men).


Where the data come from

The data in this report are updated at least once each week, pulling and merging from two online sources: Fatal Encounters (https://fatalencounters.org/), and the Washington Post (https://www.washingtonpost.com/graphics/investigations/police-shootings-database/).

  • Fatal Encounters includes all deaths during encounters with police;

  • Washington Post only includes fatal shootings by police.

  • Neither dataset includes deaths in custody after booking.

A comparison of the cases found in each dataset is found in the last section of this report.

This report is restricted to the cases that can be classified as homicides by police, it excludes cases identified in the Fatal Encounters dataset as suicides, and a handful of other cases (see the last section for details).


What is a homicide?

The deadly force incidents in this report are homicides. A homicide is simply defined as the killing of one person by another. In the context of this report it refers to any encounter with law enforcement officers that results in a fatality. Homicides normally result in a criminal investigation or inquest, but the word does not imply a crime has been committed.

  • The word homicide means only that the death was caused in some way by the officer.

  • It does not not mean the officer’s actions that led to the death were justified, or that they were unjustified.

There are many different types of homicides. In the U.S., these types and definitions vary across states, but there are some general similarities. The definitions below are taken from a useful online summary found here, based on California State laws.

Homicide
Homicide is the killing of one person by another. This is a broad term that includes both legal and illegal killings. For example, a soldier may kill another soldier in battle, but that is not a crime. The situation in which the killing happened determines whether it is a crime.

  • Murder is the illegal and intentional killing of another person. Under California Penal Code Section 187, for example, murder is defined as one person killing another person with malice aforethought. Malice is defined as the knowledge and intention or desire to do evil. Malice aforethought is found when one person kills another person with the intention to do so.

    In California, for example, a defendant may be charged with first-degree murder, second-degree murder, or capital murder.

    • First-degree murder is the most serious and includes capital murder – first-degree murder with “special circumstances” that make the crime even more egregious. These cases can be punishable by life in prison without the possibility of parole, or death.

    • Second-degree murder is murder without premeditation, but with intent that is typically rooted in pre-existing circumstances. The penalty for second-degree murder may be up to 15 years to life in prison in California.

    • Felony murder is a subset of first-degree murder and is charged when a person is killed during the commission of a felony, such as a robbery or rape.

  • Manslaughter is the illegal killing of another person without premeditation, and in some cases without the intent to kill. These cases are treated as less severe crimes than murder. Manslaughter can also be categorized as voluntary or involuntary.

    • Voluntary manslaughter occurs when a person kills another without premeditation, typically in the heat of passion. The provocation must be such that a reasonable person under the same circumstances would have acted the same way. Penalties for voluntary manslaughter include up to 11 years in prison in California.

    • Involuntary manslaughter is when a person is killed by actions that involve a wanton disregard for life by another. Involuntary manslaughter is committed without premeditation and without the true intent to kill, but the death of another person still occurs as a result. Penalties for involuntary manslaughter include up to four years in prison in California.

    • Vehicular manslaughter occurs when a person dies in a car accident due to another driver’s gross negligence or even simple negligence, in certain circumstances. ___

Interactive Map

You can click down to the map pointers for each person killed by police.

  • Hovering over the pointer brings up the name of the person killed, the agency of the officer who killed them and the date;

  • Clicking the pointer will bring up a url to a news article on the case (if available).

# Read in WA county boundaries
# Data: https://geo.wa.gov/datasets/wadnr::wa-county-boundaries-1?geometry=-127.755%2C45.975%2C-113.880%2C48.583
wa.boundaries <- st_read(here::here("data-raw", "WA_County_Boundaries", 
                                    "WA_County_Boundaries.shp"))

# select my county
tmp <- filter(wa.boundaries, COUNTY_LAB == location)
  
# Read in KC municipal boundaries to get standard projection that works with Leaflet
# Data: https://data-seattlecitygis.opendata.arcgis.com/datasets/municipal-boundaries?geometry=-123.631%2C47.148%2C-120.162%2C47.797
kc.muni <- st_read(here::here("data-raw", "KC_Municipal_Boundaries", 
                              "Municipal_Boundaries.shp"))

# transform from Pseudo-Mercator to plain WGS84
my.boundaries <- st_transform(tmp, st_crs(kc.muni))
# draw map
map1 <- leaflet(data = homicides, width = "100%") %>% 
  addTiles() %>%
  addPolygons(data = my.boundaries, fillOpacity = 0.1, weight = 1.2) %>%
  addMarkers( ~ longitude,
              ~ latitude,
              popup = ~ url_click,
              label = ~ as.character(paste(name, "by", agency, "on", date)),
              clusterOptions = markerClusterOptions())
map1

Breakdowns

By race

Table

The race of the victim is missing in 25% of the original data. Fatal Encounters employs an algorithm to try to impute these cases. Over half of the missing values are successfully imputed. These imputations are included in the analysis here.

tab <- homicides %>%
  mutate(raceImp = recode(raceImp,
    "API" = "Asian/Pacific Islander",
    "BAA" = "Black/African American",
    "HL" = "Hispanic/Latinx",
    "NA" = "Native American/Indigenous",
    "WEA" = "White/European American")
  ) %>%
group_by(raceImp) %>%
  summarize(Number = n(),
            Percent = round(100*Number/nrow(homicides), 1)
  ) %>%
  bind_rows(data.frame(raceImp="Total", 
                       Number = sum(.$Number), 
                       Percent = sum(.$Percent))) %>%
  rename(Race = raceImp) 

tab %>%
  kable(caption = "Breakdown by Race") %>%
  kable_styling(bootstrap_options = c("striped","hover")) %>%
  row_spec(row=dim(tab)[1], bold = T) %>%
  add_footnote(label = "Percents may not sum to 100 due to rounding",
               notation = "symbol")
Breakdown by Race
Race Number Percent
Asian/Pacific Islander 1 6.2
Black/African American 2 12.5
Hispanic/Latinx 2 12.5
White/European American 10 62.5
Unknown 1 6.2
Total 16 99.9
* Percents may not sum to 100 due to rounding

Plots

homicides %>%
  mutate(raceImp = recode(raceImp,
    "API" = "Asian",
    "BAA" = "Black",
    "HL" = "Latinx",
    "NA" = "Native",
    "WEA" = "White")
  ) %>%
  count(raceImp) %>%
  mutate(perc = n / nrow(homicides)) %>%
  ggplot(aes(x=raceImp, 
             y = perc, 
             label = n)) +
  # label = round(100*perc, 1))) +
  geom_bar(stat="identity", fill="blue", alpha=.5) +
  geom_text(aes(y = perc), size = 3, nudge_y = .025) +
  labs(title = "Fatalities by Race",
       caption = "Clark County since 2015; y-axis=pct, bar label=count") +
         xlab("Reported Race") +
         ylab("Percent of Total")

homicides %>%
  mutate(raceb = case_when(raceImp == "WEA" ~ "White",
                           raceImp == "Unknown" ~ "Unknown",
                           TRUE ~ "BIPOC"),
         raceb = fct_relevel(raceb, "Unknown", 
                             after = Inf)) %>%
  count(raceb) %>%
  mutate(perc = n / nrow(homicides)) %>%
  ggplot(aes(x=raceb, 
             y = perc, 
             label = n)) +
  geom_text(aes(y = perc), size = 3, nudge_y = .025) +
  geom_bar(stat="identity", fill="blue", alpha=.5) +
  labs(title = "Fatalities by Race",
       caption = "Clark County since 2015; y-axis=pct, bar label=count") +
  xlab("Reported Race") +
  ylab("Percent of Total")

Discussion

Racial disparities in the risk of being killed by police are one of the most important factors driving the public demand for police accountability and reform. For that reason it is important to understand how these numbers can, or should not be used.

There are several things to keep in mind when interpreting the breakdown of cases by the race of the person killed in this report.

  1. Many case reports are missing data on race

These cases are denoted “Unknown” in the tables and plots in this report.

For the Fatal Encounters dataset, about 25% of the cases for Clark County since 2015 do not have information that explicitly identifies the race of the person killed. The Fatal Encounters team uses an “imputation” model to try to predict race for these cases. A brief description of the methodology is online here. They are able to impute just over half of the missing cases with reasonable confidence, and we include these imputations in the breakdowns reported here. After imputations, about 6% of cases are still missing race.

  1. We are reporting the counts here, not per capita rates

Breaking the total count down by race, the largest single group of persons killed by police, among those whose race is known, are identified as White/European-American: 46% of all cases, and 53% of the cases with known race.

This might be used to support a statement like, “The majority of persons killed by police in Clark County are white, there aren’t any racial disparities”. That would be incorrect, for two reasons.

  • First, the fraction of cases with unknown race, 6%, is large enough that it is possible that the majority of persons killed in Clark County are BIPOC. If two-thirds of the unknown cases are persons of color, that would mean the majority of victims are BIPOC. Using the case counts, we are not able to say with certainty whether the majority of persons killed by police in Clark County are White.

  • Second, the question of disparity can only be answered after controlling for the size of the Clark County populations by race. The population of Clark County is overwhelmingly white – almost 80% as of 2020 source: Clark County OFM. On that basis alone, even if all of the unknown race cases where White, their rate of fatal encounters with police would still be 23% lower than expected, given their population share. On a per capita basis, the rate of persons killed by police is therefore higher for BIPOC. In particular, Black/African Americans comprise only 4% of the WA population, but 15% of the persons killed by police: they are 3.7 times more likely to be killed by police on a per capita basis.

  1. In addition, there are other sources of uncertainty that should be kept in mind:
  • Racial self-identification in Clark County includes about 5% of people who report two or more races when asked. This multiple-race classification does not exist in the data on persons killed by police, and it complicates the detailed calculation of per capita rates by race.

  • Hispanic/Latinx is an ethnicity classification that crosses several racial groups, primarily White, Black and Native American. These cases are identified in the Fatal Encounters and WaPo datasets as a distinct racial group, rather than as a separate ethnicity classification. This also complicates the calculation of detailed per capita rates by race.

  • Because the race classifications here do not represent what the officer perceived the person’s race to be, we can’t answer the question of intention, or implicit bias, with certainty.

By cause of death

Plot

homicides %>%
  mutate(codb = case_when(cod.fe == "Gunshot" ~ "Gunshot",
                          TRUE ~ "Other")
         ) %>%
  count(codb) %>%
  mutate(perc = n / nrow(homicides)) %>%
  ggplot(aes(x=codb, 
             y = perc, 
             label = n)) +
  geom_text(aes(y = perc), size = 3, nudge_y = .025) +
  geom_bar(stat="identity", fill="blue", alpha=.5) +
  labs(title = "Fatalities by Cause of Death",
       caption = "Clark County since 2015; y-axis=pct, bar label=count") +
  xlab("Reported Weapon Used by Police") +
  ylab("Percent of Total")

Table

tab <- homicides %>%
  group_by(cod.fe) %>%
  summarize(Number = n(),
            Percent = round(100*Number/nrow(homicides), 1)
  ) %>%
  arrange(desc(Number)) %>%
  bind_rows(data.frame(cod.fe ="Total", 
                       Number = sum(.$Number), 
                       Percent = sum(.$Percent))) 

tab %>%
  kable(caption = "Breakdown by Cause of Death",
        col.names = c("Cause of Death", "Number", "Percent")) %>%
  kable_styling(bootstrap_options = c("striped","hover")) %>%
  row_spec(row=dim(tab)[1], bold = T)  %>%
  add_footnote(label = "Percents may not sum to 100 due to rounding",
               notation = "symbol")
Breakdown by Cause of Death
Cause of Death Number Percent
Gunshot 14 87.5
Asphyxiated/Restrained 1 6.2
Vehicle 1 6.2
Total 16 99.9
* Percents may not sum to 100 due to rounding

By Victim armed or not

This information comes from the Post dataset, so it is not available for the additional cases in Fatal Encounters. The additional cases in Fatal Encounters are included in the “Unknown” category in the plot and table below.

Plot

homicides %>%
  mutate(armed = case_when(
    is.na(armed.wapo) ~ "Unknown",
    armed.wapo == "undetermined" ~ "Unknown",
    armed.wapo == "gun" ~ "Gun",
    armed.wapo == "gun and car" ~ "Gun",
    armed.wapo == "knife" ~ "Edged Weapon",
    armed.wapo == "machete" ~ "Edged Weapon",
    armed.wapo == "sword" ~"Edged Weapon",
    armed.wapo == "unarmed" ~ "Unarmed",
    TRUE ~ "Other")
         ) %>%
  count(armed) %>%
  mutate(perc = n / nrow(homicides)) %>%
  ggplot(aes(reorder(armed, perc),
             y = perc, 
             label = n)) +
  geom_text(aes(y = perc), size = 3, nudge_y = .025) +
  geom_bar(stat="identity", fill="blue", alpha=.5) +
  labs(title = "Fatalities by Report of Victim Weapon",
       caption = "Clark County since 2015; y-axis=pct, bar label=count") +
  xlab("Reported Weapon Used by Victim") +
  ylab("Percent of Total")

Table

tab <- homicides %>%
  mutate(armed.wapo = case_when(is.na(armed.wapo) ~ "Unknown",
                                armed.wapo == "undetermined" ~ "Unknown",
                                TRUE ~ armed.wapo),
         armed.wapo = fct_relevel(armed.wapo, "Unknown", 
                                  after = Inf)
  ) %>%
  group_by(armed.wapo) %>%
  summarize(Number = n(),
            Percent = round(100*Number/nrow(homicides), 1)
  ) %>%
  arrange(desc(Number)) %>%
  bind_rows(data.frame(armed.wapo ="Total", 
                       Number = sum(.$Number), 
                       Percent = sum(.$Percent)))

tab %>%
  kable(caption = "Breakdown by Report of Victim Weapon (data from WaPo only)",
        col.names = c("Armed", "Number", "Percent")) %>%
  kable_styling(bootstrap_options = c("striped","hover")) %>%
  row_spec(row=dim(tab)[1], bold = T)  %>%
  add_footnote(label = "Percents may not sum to 100 due to rounding",
               notation = "symbol")
Breakdown by Report of Victim Weapon (data from WaPo only)
Armed Number Percent
gun 5 31.2
Unknown 5 31.2
knife 2 12.5
hammer 1 6.2
sharp object 1 6.2
unarmed 1 6.2
vehicle 1 6.2
Total 16 99.7
* Percents may not sum to 100 due to rounding

By Bodycam

This information comes from the Post dataset, so is missing for the additional cases in Fatal Encounters. These additional cases are listed as “Unknown” below.

homicides %>%
  mutate(bodycam.wapo = ifelse(bodycam.wapo, "Yes", "No"),
         bodycam.wapo = tidyr::replace_na(bodycam.wapo, "Unknown"),
         bodycam.wapo = fct_relevel(bodycam.wapo, "Unknown", 
                             after = Inf)) %>%
  group_by(bodycam.wapo) %>%
  summarize(Number = n(),
            Percent = round(100*Number/nrow(homicides), 1)
  ) %>%
  bind_rows(data.frame(bodycam.wapo ="Total", 
                       Number = sum(.$Number), 
                       Percent = sum(.$Percent))) %>%
  kable(caption = "Breakdown by Bodycam",
        col.names = c("Bodycam", "Number", "Percent")) %>%
  kable_styling(bootstrap_options = c("striped","hover")) %>%
  row_spec(row=3, bold = T)  %>%
  add_footnote(label = "Percents may not sum to 100 due to rounding",
               notation = "symbol")
Breakdown by Bodycam
Bodycam Number Percent
No 13 81.2
Unknown 3 18.8
Total 16 100.0
* Percents may not sum to 100 due to rounding

Agency/PD involved

This information comes from Fatal Encounters.

# tab <- homicides %>%
#   group_by(agency) %>%
#   summarize(Number = n(),
#             Percent = round(100*Number/nrow(homicides), 1)
#             ) %>%
#   arrange(desc(Number)) %>%
#   bind_rows(data.frame(agency ="Total", 
#                        Number = sum(.$Number), 
#                        Percent = sum(.$Percent))) 
# 
# tab %>%
#   kable(caption = "Breakdown by Agency/PD of Involved Officer",
#         col.names = c("Agency/PD", "Number", "Percent")) %>%
#   kable_styling(bootstrap_options = c("striped","hover")) %>%
#   row_spec(row=dim(tab)[1], bold = T) %>%
#   add_footnote(label = "Percents may not sum to 100 due to rounding",
#                notation = "symbol")

homicides %>%
  group_by(agency) %>%
  summarize(Number = n(),
            Percent = round(100*Number/nrow(homicides), 1)
  ) %>%
  DT::datatable(rownames = F,
                caption = "Breakdown by Agency/PD of Involved Officer")

Online information availability

This information comes from Fatal Encounters. It takes the form of a single url to a news article that is available online.

There may be multiple news articles available online for a case, and they may report the conflicting details of the event, as well as conflicting perspectives on whether the death was justifiable. So the link provided here should be used as a starting place for research, not as a definitive description of the event.

The clickable urls are available in this report in the Interactive Map and Say their names sections.

tab <- homicides %>%
  mutate(url_info = case_when(url_info == "" ~ "No",
                               is.na(url_info) ~ "No",
                               TRUE ~ "Yes")) %>%
  group_by(url_info) %>%
  summarize(Number = n(),
            Percent = round(100*Number/nrow(homicides), 1)
  ) %>%
  bind_rows(data.frame(url_info ="Total", 
                       Number = sum(.$Number), 
                       Percent = sum(.$Percent))) 

tab %>%
  kable(caption = "URL for news article in Fatal Encounters",
        col.names = c("Availability", "Number", "Percent")) %>%
  kable_styling(bootstrap_options = c("striped","hover")) %>%
  row_spec(row=dim(tab)[1], bold = T) %>%
  add_footnote(label = "Percents may not sum to 100 due to rounding",
               notation = "symbol")
URL for news article in Fatal Encounters
Availability Number Percent
Yes 16 100
Total 16 100
* Percents may not sum to 100 due to rounding

By date

By Year

Note 2021 is not complete, it is year to date: 2021-05-02. There is also a lag in reporting of up to two weeks as information becomes available for a case.

p <- homicides %>%
  mutate(cod = ifelse(cod.fe == "Gunshot", "shot", "other"),
         year = as.character(year)) %>%
  group_by(year, cod) %>%
  summarize(n = n()) %>%
  mutate(percent = round(100*n / sum(n), 1)) %>%
  ggplot(aes(x = year,
             y = n, 
             label = percent,
             fill = cod)) +
  #geom_text(aes(y = n), size = 3, nudge_y = .025) +
  geom_bar(stat="identity", alpha=.5) +
  labs(title = "Year",
       caption = "Clark County since 2015") +
  xlab("Year") +
  ylab("Number") +
  labs(fill = "Cause of\nDeath")
plotly::ggplotly(p)

Cumulative Totals by Month

The lines show the cumulative total fatalities by month as the year progresses, for each year. For the current year, we only plot months that have ended, to get the full monthly count.

# we will only plot after current month has finished
curr_mo <- lubridate::month(Sys.Date())

homicides %>%
  filter(date < as.Date(paste0("2021-", curr_mo, "-01"))) %>% 
  group_by(year, month) %>%
  summarize(count = n()) %>%
  mutate(cumulative = cumsum(count)) %>%
  ggplot(aes(x = month, 
             y = cumulative, 
             color = factor(year),
             group = year)) +
  geom_line(size = 1.5, alpha = 0.5) +
  geom_point(size=1, alpha = 0.5) +
  labs(title = "Cumulative fatalities by Month and Year",
       caption = "Clark County since 2015",
       color = "Year") +
  xlab("Month") +
  ylab("Number")+ 
  scale_color_brewer(palette="YlOrRd")

By Month

The points represent monthly averages, across all years. For the current year, we only plot months that have ended, to get the full monthly count.

homicides %>%
  filter(date < as.Date(paste0("2021-", curr_mo, "-01"))) %>% 
  group_by(month, year) %>%
  summarize(count = n()) %>%
  group_by(month) %>%
  summarize(avg.per.mo = mean(count)) %>%
  ggplot(aes(x = month, 
             y = avg.per.mo)) +
  geom_bar(stat="identity", fill = "blue", alpha = 0.5) +
  labs(title = "Average Fatalities by Month",
       caption = "Clark County since 2015") +
  xlab("Month") +
  ylab("Average Number")

Say their names

Name known

Of the 16 persons killed by police, 15 of the victim’s names are known at this time.

homicides %>%
  filter(name != "Unknown") %>%
  select(name, date, age=age.fe, county, agency, url_click) %>%
  arrange(desc(date)) %>%
  DT::datatable(rownames = F,
                caption = paste("The Names We Know:  as of", scrape.date),
                filter = 'top',
                escape = FALSE)

Name Unknown

The remaining 1 of the victim’s names are not known at this time.

homicides %>%
  filter(name == "Unknown") %>%
  select(name, date, age=age.fe, county, agency, url_click) %>%
  arrange(desc(date)) %>%
  DT::datatable(rownames = F,
                caption = paste("The Names We Don't Know:  as of", scrape.date),
                filter = 'top',
                escape = FALSE)

Dataset comparison

Fatal Encounters includes more cases than the Washington Post, because the Post dataset is restricted to fatal shootings.


Among the cases missing from the Washington Post data are Manny Ellis (in WA) and George Floyd (at the national level), because their deaths were caused by asphyxiation, not gunshots.


The Fatal Encounters data also includes some cases that we have excluded from the above report:

  1. cases described as “suicides” that occur during an encounter with police

  2. a handful of other cases clearly not homicides by police. Examples include a vehicle crash caused by a deputy who had a stroke (and later died), and two people killed by a suspect before police could apprehend him.

The total number of cases in the Fatal Encounters dataset for Clark County since 2015 took effect, including both homicides and the cases we’ve excluded as described above, is 19. These are classified as follows:

tab <- all.fatalities %>%
  group_by(circumstances.fe) %>%
  summarize(Number = n(),
            Percent = round(100*Number/nrow(all.fatalities), 1)
  ) %>%
  arrange(desc(Number)) %>%
  bind_rows(data.frame(circumstances.fe ="Total", 
                       Number = sum(.$Number), 
                       Percent = sum(.$Percent))) 

tab %>%
  kable(caption = "All Fatal Encounters dataset cases, broken down by type",
        col.names = c("Type", "Number", "Percent")) %>%
  kable_styling(bootstrap_options = c("striped","hover")) %>%
  row_spec(dim(tab)[1], bold = T)  %>%
  add_footnote(label = "Percents may not sum to 100 due to rounding",
               notation = "symbol")
All Fatal Encounters dataset cases, broken down by type
Type Number Percent
Deadly force 14 73.7
Suicide 3 15.8
Less-than-lethal force 1 5.3
Vehicle hot pursuits 1 5.3
Total 19 100.1
* Percents may not sum to 100 due to rounding

In total we excluded 3 of these cases in the report and analysis.

Note that there are often competing narratives about how a fatal encounter with an officer transpired. These narratives are embedded in the news reports that both Fatal Encounters and the Washington Post rely on when classifying cases into the categories above. We, too, rely on these categories for selecting cases that can be classified as homicides.

Homicide cases

Restricting the Fatal Encounters dataset to the homicide cases, it still has 3 more cases than found in the Post dataset.

tab <- homicides %>%
  mutate(in.wapo = ifelse(is.na(wapoID), 
                          "In FE only", 
                          "In both FE and WaPo")) %>%
  group_by(in.wapo) %>%
  summarize(Number = n(),
            Percent = round(100*Number/nrow(homicides), 1)
  ) %>%
  bind_rows(data.frame(in.wapo ="Total", 
                       Number = sum(.$Number), 
                       Percent = sum(.$Percent))) 

tab %>%
  kable(caption = "Cases found in each dataset",
        col.names = c("Case is:", "Number", "Percent")) %>%
  kable_styling(bootstrap_options = c("striped","hover")) %>%
  row_spec(row=dim(tab)[1], bold = T) %>%
  add_footnote(label = "Percents may not sum to 100 due to rounding",
               notation = "symbol")
Cases found in each dataset
Case is: Number Percent
In both FE and WaPo 13 81.2
In FE only 3 18.8
Total 16 100.0
* Percents may not sum to 100 due to rounding

Missing cases in Washington Post

By cause of death

Among the cases missing from the Post dataset, 33% are identified as fatal shootings in the Fatal Encounters dataset.

tab <- homicides %>%
  filter(is.na(wapoID)) %>%
  group_by(cod.fe) %>%
  summarize(Number = n()
  ) %>%
  arrange(desc(Number)) %>%
  bind_rows(data.frame(cod.fe ="Total", 
                       Number = sum(.$Number))) 

tab %>%
  kable(caption = "Cause of death for cases missing from WaPo dataset",
        col.names = c("Cause:", "Number")) %>%
  kable_styling(bootstrap_options = c("striped","hover")) %>%
  row_spec(row=c(which(tab$cod.fe=="Gunshot"),
                 dim(tab)[1]), bold = T)
Cause of death for cases missing from WaPo dataset
Cause: Number
Asphyxiated/Restrained 1
Gunshot 1
Vehicle 1
Total 3

By name, for fatal shootings

Below is the list of cases identified as gunshot fatalities in Fatal Encounters that are missing from the Washington Post dataset.

homicides %>%
  filter(is.na(wapoID) & cod.fe == "Gunshot") %>%
  select(name, date, city, county, agency, url_click) %>%
  DT::datatable(rownames = F,
                caption = "Gunshot cases missing from WaPo dataset",
                escape = FALSE)