Introduction

The 2014 killing of Michael Brown in Ferguson, Missouri, began the protest movement culminating in Black Lives Matter and an increased focus on police accountability nationwide.

Since Jan. 1, 2015, The Washington Post has been compiling a database of every fatal shooting in the US by a police officer in the line of duty. It’s difficult to find reliable data from before this period, as police killings haven’t been comprehensively documented, and the statistics on police brutality are much less available. As a result, a vast number of cases go unreported.

Key research questions of this exercise are as follows:

  1. Are police shootings more likely to happen for certain races in certain states compared to the percentage of each race in the total population ?

  2. In the case of police shootings, is there a difference in race within gender or vice versa?

  3. Were the people in the police shooting cases attempting to flee the scene or not?

Data

The data of this homework is the classroom dataset from Kaggle, Data source: https://www.kaggle.com/kwullum/fatal-police-shootings-in-the-us

files:

  1. PoliceKillingsUS.csv
  2. ShareRaceByCity.csv

Additional Census Data could be found in:

https://www.infoplease.com/us/race-population/population-united-states-race-and-hispaniclatino-origin-census-2000-and-2010

Results

library(ggplot2)
library(ggthemes)
library(babynames)
library(Zelig)
library(ggrepel)
library(HistData)
library(tidyverse)
library(HistData)
library(ggpubr)

options(dplyr.show_progress = FALSE)
hw9_shooting <- read.csv("PoliceKillingsUS.csv", stringsAsFactors = FALSE)
hw9_race <- read.csv("ShareRaceByCity.csv", stringsAsFactors = FALSE)
hw9_shooting <- hw9_shooting %>% filter(race !="")

hw9_shooting$race <- ifelse(hw9_shooting$race == "W","White", ifelse(hw9_shooting$race == "B", "Black", ifelse(hw9_shooting$race == "A", "Asian", ifelse(hw9_shooting$race == "H", 
"Hispanic", ifelse(hw9_shooting$race == "N", "Native American", "Other")))))

hw9_shooting$gender <- ifelse(hw9_shooting$gender == "M", "Male", "Female")
hw9_shooting$flee <- ifelse(hw9_shooting$flee == "", "Unknown", hw9_shooting$flee)
hw9_race_long <- hw9_race %>% gather(race, share_pct, 3:7) %>% mutate(share_pct = as.numeric(share_pct))

hw9_race_long$race <- gsub("share_", "", hw9_race_long$race)
colnames(hw9_race_long)[1] <- "state"
### Income and shooting

hw9_shooting_heatmap <- hw9_shooting %>% group_by(state, race) %>% summarize(total = n()) %>% mutate( total_pct = total / sum(total))

ggplot(hw9_shooting_heatmap, aes(x=race, y=state, fill = total_pct)) + geom_tile() + 
  scale_fill_gradient(low="white", high="blue") +  ggtitle("Police Shooting Heatmap By State, Race")

ggplot(hw9_race_long, aes(x=race, y=state ,fill = share_pct)) + geom_tile() + 
  scale_fill_gradient(low="white", high="blue")  + ggtitle("Population Heatmap By State and Race")

####### Race % in total population vs. in Police shooting cases

race_pct <- c(0.751,0.125,0.123,0.036,0.009,0.08)
race <- c("White","Hispanic", "Black", "Asian", "Native American", "Other")
population <- data.frame(race=race, race_pct = race_pct)

total_pop <- ggplot(population, aes(x=race, y=race_pct, fill=race)) + geom_bar(stat="identity") + geom_text(aes(label=round(race_pct,2)), vjust=1.6, color="black",position = position_dodge(0.9), size=3.5) + ggtitle("2010 US Census") + xlab("Race") +
  ylab("Race % of Total Population")

hw9_shooting_race <-  hw9_shooting %>% group_by(race) %>% summarize(total = n()) %>% mutate( total_pct = total / sum(total))

total_shooting <- ggplot(hw9_shooting_race, aes(x=race, y=total_pct, fill=race)) + geom_bar(stat="identity") + geom_text(aes(label=round(total_pct,2)), vjust=1.6, color="black",position = position_dodge(0.9), size=3.5) + ggtitle("Police Shooting Cases") + xlab("Race") +
  ylab("Race % of Police Shooting")

ggarrange(total_pop, total_shooting)

### Ratio of race % in population and in police shooting

hw9_shooting_race <- hw9_shooting_race %>% mutate_if(is.factor, as.character)
population <- population %>% mutate_if(is.factor, as.character)

hw9_shooting_ratio <- hw9_shooting_race %>% inner_join(population,by=c("race" = "race"))

hw9_shooting_ratio <- hw9_shooting_ratio %>% mutate(killing_ratio = total_pct / race_pct)

ggplot(hw9_shooting_ratio, aes(x=race ,y=race,fill = killing_ratio)) + geom_tile() + 
  scale_fill_gradient(low="white", high="blue")  + ggtitle("Killing Ratio Heatmap By Race : Killing Ratio = race % of police shooting / race % of total population") + xlab("Race") + ylab("Race")

#### Gender Difference within race
hw9_shooting_2 <- hw9_shooting %>% group_by(race, gender) %>% summarize(total = n()) %>% mutate(total_pct = total / sum(total))

ggplot(hw9_shooting_2) + geom_col(aes(x=gender, y=total_pct, fill = gender)) + facet_wrap(~race) + ggtitle("Police Shooting: Gender difference within race") + xlab("Gender") + ylab("Percentage of Shooting Cases")

#### Race difference within Gender

hw9_shooting_3 <- hw9_shooting %>% group_by(gender, race) %>% summarize(total = n()) %>% mutate(total_pct = total / sum(total))

ggplot(hw9_shooting_3) + geom_col(aes(x=race, y=total_pct, fill = race)) + facet_wrap(~gender) + ggtitle("Police Shooting: Race difference within gender") + xlab("Gender") + ylab("Percentage of Shooting Cases")

###### Whether or not the people involved in shooting cases flee
hw9_shooting_flee <- hw9_shooting %>% group_by(race,gender,flee) %>% summarize(total = n()) %>% mutate(total_pct = total / sum(total))

ggplot(hw9_shooting_flee) + geom_col(aes(x=flee, y=total_pct, fill = flee)) + facet_wrap(~race + gender) + ggtitle("Police Shooting: Fleeing or Not by Race and Gender") + ylab("Percentage of Shooting Cases")