https://www.washingtonpost.com/graphics/investigations/police-shootings-database/
library(tidyverse)
## -- Attaching packages ---------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
setwd("C:/Users/rsaidi/Dropbox/Rachel/MontColl/Datasets/Datasets")
data <- read_csv("fatal_police_shootings.csv")
## Parsed with column specification:
## cols(
## id = col_double(),
## name = col_character(),
## date = col_character(),
## manner_of_death = col_character(),
## armed = col_character(),
## age = col_double(),
## gender = col_character(),
## race = col_character(),
## city = col_character(),
## state = col_character(),
## signs_of_mental_illness = col_logical(),
## threat_level = col_character(),
## flee = col_character(),
## body_camera = col_logical()
## )
Drop NAs in the variable “armed”, then rename variables as either armed or unarmed
library(tidyr)
data <- data %>%
drop_na(armed) %>%
mutate(weapon = ifelse(armed == "unarmed", "unarmed", "armed"))
data
## # A tibble: 5,148 x 15
## id name date manner_of_death armed age gender race city state
## <dbl> <chr> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> <chr>
## 1 3 Tim ~ 1/2/~ shot gun 53 M A Shel~ WA
## 2 4 Lewi~ 1/2/~ shot gun 47 M W Aloha OR
## 3 5 John~ 1/3/~ shot and Taser~ unar~ 23 M H Wich~ KS
## 4 8 Matt~ 1/4/~ shot toy ~ 32 M W San ~ CA
## 5 9 Mich~ 1/4/~ shot nail~ 39 M H Evans CO
## 6 11 Kenn~ 1/4/~ shot gun 18 M W Guth~ OK
## 7 13 Kenn~ 1/5/~ shot gun 22 M H Chan~ AZ
## 8 15 Broc~ 1/6/~ shot gun 35 M W Assa~ KS
## 9 16 Autu~ 1/6/~ shot unar~ 34 F W Burl~ IA
## 10 17 Lesl~ 1/6/~ shot toy ~ 47 M B Knox~ PA
## # ... with 5,138 more rows, and 5 more variables:
## # signs_of_mental_illness <lgl>, threat_level <chr>, flee <chr>,
## # body_camera <lgl>, weapon <chr>
Rename the coded races (instead of A, B, W, H, N, and O)
data$race[data$race == "A"] <- "Asian"
data$race[data$race == "B"] <- "Black non-hispanic"
data$race[data$race == "W"] <- "White non-hispanic"
data$race[data$race == "H"] <- "Hispanic"
data$race[data$race == "N"] <- "Native American"
data$race[data$race == "O"] <- "Other"
data
## # A tibble: 5,148 x 15
## id name date manner_of_death armed age gender race city state
## <dbl> <chr> <chr> <chr> <chr> <dbl> <chr> <chr> <chr> <chr>
## 1 3 Tim ~ 1/2/~ shot gun 53 M Asian Shel~ WA
## 2 4 Lewi~ 1/2/~ shot gun 47 M Whit~ Aloha OR
## 3 5 John~ 1/3/~ shot and Taser~ unar~ 23 M Hisp~ Wich~ KS
## 4 8 Matt~ 1/4/~ shot toy ~ 32 M Whit~ San ~ CA
## 5 9 Mich~ 1/4/~ shot nail~ 39 M Hisp~ Evans CO
## 6 11 Kenn~ 1/4/~ shot gun 18 M Whit~ Guth~ OK
## 7 13 Kenn~ 1/5/~ shot gun 22 M Hisp~ Chan~ AZ
## 8 15 Broc~ 1/6/~ shot gun 35 M Whit~ Assa~ KS
## 9 16 Autu~ 1/6/~ shot unar~ 34 F Whit~ Burl~ IA
## 10 17 Lesl~ 1/6/~ shot toy ~ 47 M Blac~ Knox~ PA
## # ... with 5,138 more rows, and 5 more variables:
## # signs_of_mental_illness <lgl>, threat_level <chr>, flee <chr>,
## # body_camera <lgl>, weapon <chr>
write_csv(data, "C:/Users/rsaidi/Dropbox/Rachel/MontColl/Datasets/Datasets/fatal_clean.csv")
proptable <- with(data, table(race, weapon))
x <-prop.table(proptable) %>%
round(3)
x
## weapon
## race armed unarmed
## Asian 0.018 0.002
## Black non-hispanic 0.236 0.027
## Hispanic 0.171 0.014
## Native American 0.015 0.001
## Other 0.009 0.001
## White non-hispanic 0.475 0.032
p <- table(data$race, data$weapon)
plot(p)
Use position = “stack” to make a stacked bar chart Use na.omit to eliminate the NA for race
p1 <- data %>%
na.omit(race) %>%
ggplot(aes(x = race, fill = weapon)) +
geom_bar(position = "stack")
p1
Use position = “dodge” to make the bars side-by-side rather than stacked
p2 <- data %>%
na.omit(race) %>%
ggplot(aes(x = race, fill = weapon)) +
geom_bar(position = "dodge")
p2
Change the order of the bars to go from smallest to largest Add a title, “alpha” changes the transparency level of the color, flip the bar chart horizontally, and change the theme from the default style.
data$race <- ordered(data$race, levels = c("White non-hispanic", "Black non-hispanic", "Hispanic", "Asian", "Native American", "Other"))
p3 <- data %>%
na.omit(race) %>%
ggplot(aes(x = race, fill = weapon)) +
geom_bar(position = "dodge", alpha = 0.6) +
coord_flip() +
ggtitle("Race of Fatal Police Shooting and Whether Armed or Unarmed") +
theme_classic()
p3