This report consists of a bunch of visualisations concerning U.S. Police Shootings statistics.

Data obtained via Kaggle, dataset presents observations for years 2015-2020.

 

Packages that I used during the analysis:

* tidyverse
* lubridate
* usmap
* plotly
* sunburstR
* htmltools
* d3r
* treemapify
* DT
* prettydoc

 

Let’s take a look at the dataset:

dk <- data %>%
  select(name, date, manner_of_death, armed, age, gender, race, city, state, signs_of_mental_illness, flee, body_camera, arms_category)
dk <- dk[1:10,]


datatable(dk, rownames = FALSE, filter="top", options = list(pageLength = 5, scrollX=T) )

 

Timeline of shooting cases quantity:

# let's get rid of warning results: 
options(dplyr.summarise.inform = FALSE)

data %>%
  group_by(YM) %>%
  summarize(counts = n()) %>%
  ggplot(aes(y=counts, x = YM, fill = counts)) +
  geom_bar(stat="identity") +
  theme_minimal() + 
  labs(x = "", y = "shootings", title = "Shootings Counts - Monthly:") +
  scale_x_discrete(breaks = c("2015-01","2016-01","2017-01","2018-01","2019-01","2020-01"))

 

How many cases registered in each year?

summary(data$Y)
## 2015 2016 2017 2018 2019 2020 
##  965  904  906  888  858  374

 

data %>%
  group_by(Y) %>%
  summarize(counts = n()) %>%
  ggplot(aes(x = Y, y = counts, fill = Y)) + 
  geom_bar(stat="identity") + 
  labs(x = "", y = "shootings", title = "Shooting cases in each year:") +
  theme_classic() + 
  theme(legend.position = "none") + 
  geom_text(aes(label = counts), vjust = 2, size = 5, color = "white")

How many cases registered in each quarter?

summary(data$Q)
##    1    2    3    4 
## 1470 1247 1112 1066

 

data %>%
  group_by(Q) %>%
  summarize(counts = n()) %>%
  ggplot(aes(x = Q, y = counts, fill = Q)) + 
  geom_bar(stat="identity") + 
  labs(x = "", y = "shootings", title = "Shooting cases in each quarter:") +
  theme_classic() + 
  theme(legend.position = "none") + 
  geom_text(aes(label = counts), vjust = 2, size = 5, color = "white")

How many cases in each month?

summary(data$M)
##   1   2   3   4   5   6   7   8   9  10  11  12 
## 505 462 503 430 418 399 413 371 328 367 354 345

 

data %>%
  group_by(M) %>%
  summarize(counts = n()) %>%
  ggplot(aes(x = M, y = counts, fill = M)) + 
  geom_bar(stat="identity") + 
  labs(x = "", y = "shootings", title = "Shooting cases in each month:") +
  theme_classic() + 
  theme(legend.position = "none") + 
  geom_text(aes(label = counts), vjust = 2, size = 4, color = "white")

 

What is the age distribution among the shot people?

summary(data$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.00   27.00   35.00   36.55   45.00   91.00

 

What are the shooting counts distribution concerning age?

data %>%
  ggplot(aes(age, fill = "black")) + 
  geom_bar(width = 1) + 
  theme_classic() + 
  theme(legend.position = "none") + 
  labs(y = "shootings", title = "Shooting frequency ~ age:")
## Warning: position_stack requires non-overlapping x intervals

 

Let’s check the boxplots for age distribution ~ gender:

data %>%
  ggplot(aes(y=age, fill = gender)) +
  geom_boxplot() + 
  stat_boxplot(geom = 'errorbar') +
  facet_grid(~gender) +
  scale_x_discrete() +
  theme_minimal() +
  ggtitle("Age distribution ~ gender:") + 
  theme(legend.position = "none")

 

How differ age distributions across years and gender?

# In order to get a split in violin plot, I am using a function geom_split_violin(), 
# cc: https://stackoverflow.com/questions/35717353/split-violin-plot-with-ggplot2
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin, 
                           draw_group = function(self, data, ..., draw_quantiles = NULL) {
  data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
  grp <- data[1, "group"]
  newdata <- plyr::arrange(transform(data, x = if (grp %% 2 == 1) xminv else xmaxv), if (grp %% 2 == 1) y else -y)
  newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
  newdata[c(1, nrow(newdata) - 1, nrow(newdata)), "x"] <- round(newdata[1, "x"])

  if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
    stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
      1))
    quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
    aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
    aesthetics$alpha <- rep(1, nrow(quantiles))
    both <- cbind(quantiles, aesthetics)
    quantile_grob <- GeomPath$draw_panel(both, ...)
    ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
  }
  else {
    ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
  }
})

geom_split_violin <- function(mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ..., 
                              draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE, 
                              show.legend = NA, inherit.aes = TRUE) {
  layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin, 
        position = position, show.legend = show.legend, inherit.aes = inherit.aes, 
        params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}

data %>%
  ggplot(aes(x = Y, y = age, fill = gender)) +
  geom_split_violin() +
  theme_minimal() +
  ggtitle("Age distribution ~ gender throughout years") + 
  xlab("year")

 

Let’s check the boxplots for age distribution among races:

data %>%
  ggplot(aes(y=age, fill = race)) +
  geom_boxplot() +
  stat_boxplot(geom = 'errorbar') +
  facet_grid(~race) +
  scale_x_discrete() +
  theme_minimal() +
  ggtitle("Age distribution ~ race:") +
  xlab("race") +
  theme(legend.position = "none")

 

What are the shooting counts for each gender?

summary(data$gender)
## Female   Male 
##    222   4673

 

data %>%
  group_by(gender) %>%
  summarize(counts = n()) %>%
  ggplot(aes(x = gender, y = counts, fill = counts)) + 
  geom_bar(stat="identity") + 
  labs(x = "", y = "shootings", title = "Shooting cases in each quarter:") +
  scale_fill_continuous(low = "steelblue", high = "purple") +
  theme_classic() + 
  geom_text(aes(label = counts), vjust = -0.5, size = 4, color = "black") +
  theme(legend.position = "none")

 

How many shootings were recorded?

summary(data$body_camera)
##   No  Yes 
## 4317  578

 

data %>%
  group_by(body_camera) %>%
  summarize(counts = n()) %>%
  ggplot(aes(x = body_camera, y = counts, fill = counts)) + 
  geom_bar(stat="identity") + 
  labs(x = "", y = "counts", title = "Shooting recorded?") +
  scale_fill_continuous(low = "pink", high = "red") +
  theme_classic() + 
  geom_text(aes(label = counts), vjust = -0.5, size = 4, color = "black") +
  theme(legend.position = "none")

 

What are the shooting counts distribution concerning race?

summary(data$race)
##    Asian    Black Hispanic   Native    Other    White 
##       93     1298      902       78       48     2476

 

data %>%
  group_by(race) %>%
  summarize(counts = n()) %>%
  ggplot(aes(x = race, y = counts, fill = race)) + 
  geom_bar(stat="identity") + 
  labs(x = "race", y = "shootings", title = "Shooting cases ~ race:") +
  theme_classic() + 
  theme(legend.position = "none") + 
  geom_text(aes(label = counts), vjust = -0.5, size = 4, color = "black")

 

What are the causes of death?

summary(data$manner_of_death)
##             shot shot and Tasered 
##             4647              248

 

data %>%
  group_by(manner_of_death) %>%
  summarize(counts = n()) %>%
  ggplot(aes(x = manner_of_death, y = counts, fill = counts)) +
  scale_fill_continuous(low = "lightgreen", high = "darkgreen") +
  geom_bar(stat="identity") + 
  labs(x = "", y = "counts", title = "Death causes:") +
  theme_classic() + 
  geom_text(aes(label = counts), vjust = -0.5, size = 4, color = "black") +
  theme(legend.position = "none")

 

What are the states, where occured more than 120 shooting cases?

data %>% 
  group_by(state) %>%
  summarize(counts = n()) %>%
  filter(counts > 120) %>%
  mutate(state = factor(state, state)) %>%
  ggplot(aes(y=reorder(state, counts), x = counts, fill = counts)) +
  geom_bar(stat = "identity") + 
  scale_fill_continuous(low = "pink", high = "purple", label = scales::comma) + 
  labs(x = "shootings", y = "state", title = "States where occured more than 120 shootings:") +
  theme_classic() + 
  geom_text(aes(label = counts), hjust = -0.1, size = 4, color = "black") +
  theme(legend.position = "none")

 

Let’s present the shootings’ counts in each state in a form of a treemap:

data$state <- as.factor(data$state)

data %>%
  group_by(state) %>%
  summarize(counts = n()) %>%
  ggplot(aes(fill = state, area = counts, label = state)) +
  geom_treemap() +
  geom_treemap_text(color="white") + 
  theme(legend.position = "none")

 

Let’s present this data on the U.S. map:

val <- data %>% 
  group_by(state) %>%
  summarize(counts = n())

plot_usmap(regions = "states", data = val, values = "counts", color = "black") +
  scale_fill_continuous(low = "white", high = "black", name = "shooting cases", label = scales::comma) +
  ggtitle("Shooting cases in each state:") +
  theme(legend.position = "right", plot.title = element_text(size = 14))

 

What are the cities, where occured more than 20 shootings?

data %>% 
  group_by(city) %>%
  summarize(counts = n()) %>%
  filter(counts > 20) %>%
  mutate(city = factor(city, city)) %>%
  ggplot(aes(y=reorder(city, counts), x = counts, fill = counts)) +
  scale_fill_continuous(low = "orange", high = "brown") +
  geom_bar(stat = "identity") + 
  labs(y = "city", title = "Cities where occured more than 20 shootings:") + 
  theme_classic() + 
  geom_text(aes(label = counts), hjust = -0.1, size = 4, color = "black") +
  theme(legend.position = "none")

 

How many subjects wanted to flee and in which way?

summary(data$flee)
##         Car        Foot Not fleeing       Other 
##         820         642        3073         360

 

data %>%
  group_by(flee) %>%
  summarize(counts = n()) %>%
  mutate(flee = factor(flee, flee)) %>%
  ggplot(aes(y=reorder(flee, counts), x = counts, fill = counts)) +
  scale_fill_continuous(low = "lightgrey", high = "darkgrey") +
  geom_bar(stat = "identity") + 
  labs(x = "counts", y = "flee manner", title = "How shot people tried to flee:") +
  theme_classic() + 
  geom_text(aes(label = counts), hjust = -0.1, size = 4, color = "black") +
  theme(legend.position = "none")

 

How many subjects showed signs of mental illness?

summary(data$signs_of_mental_illness)
##   No  Yes 
## 3792 1103

 

data %>%
  group_by(signs_of_mental_illness) %>%
  summarize(counts = n()) %>%
  ggplot(aes(x = signs_of_mental_illness, y = counts, fill = counts)) +
  geom_bar(stat="identity") + 
  labs(x = "", y = "counts", title = "How many shot people show signs of mental illness:") +
  scale_fill_continuous(low = "lightblue", high = "lightgreen") +
  theme_classic() + 
  geom_text(aes(label = counts), vjust = -0.5, size = 4, color = "black") +
  theme(legend.position = "none")

 

A simple sunburst plot to present cases proportions given:

1st layer - year

2nd layer - gender

3rd layer - race

sb3 <- data[,c("Y", "gender", "race")] %>%
   group_by(Y,gender,race) %>%
   summarize(counts=n())
  
dat <- data.frame(
  level1 = sb3$Y,
  level2 = sb3$gender,
  level3 = sb3$race,
  size = sb3$counts,
  stringsAsFactors = F
)

tree <- d3_nest(dat, value_cols = "size")

sunburst(tree, width="100%", height=400, legend = T)
Legend