This report consists of a bunch of visualisations concerning U.S. Police Shootings statistics.
Data obtained via Kaggle, dataset presents observations for years 2015-2020.
Packages that I used during the analysis:
* tidyverse
* lubridate
* usmap
* plotly
* sunburstR
* htmltools
* d3r
* treemapify
* DT
Let’s take a look at the dataset:
dk <- data %>%
select(name, date, manner_of_death, armed, age, gender, race, city, state, signs_of_mental_illness, flee, body_camera, arms_category)
dk <- dk[1:10,]
datatable(dk, rownames = FALSE, filter="top", options = list(pageLength = 5, scrollX=T) )
Timeline of shooting cases quantity:
# let's get rid of warning results:
options(dplyr.summarise.inform = FALSE)
data %>%
group_by(YM) %>%
summarize(counts = n()) %>%
ggplot(aes(y=counts, x = YM, fill = counts)) +
geom_bar(stat="identity") +
theme_minimal() +
labs(x = "", y = "shootings", title = "Shootings Counts - Monthly:") +
scale_x_discrete(breaks = c("2015-01","2016-01","2017-01","2018-01","2019-01","2020-01"))
How many cases registered in each year?
summary(data$Y)
## 2015 2016 2017 2018 2019 2020
## 965 904 906 888 858 374
data %>%
group_by(Y) %>%
summarize(counts = n()) %>%
ggplot(aes(x = Y, y = counts, fill = Y)) +
geom_bar(stat="identity") +
labs(x = "", y = "shootings", title = "Shooting cases in each year:") +
theme_classic() +
theme(legend.position = "none") +
geom_text(aes(label = counts), vjust = 2, size = 5, color = "white")

How many cases registered in each quarter?
summary(data$Q)
## 1 2 3 4
## 1470 1247 1112 1066
data %>%
group_by(Q) %>%
summarize(counts = n()) %>%
ggplot(aes(x = Q, y = counts, fill = Q)) +
geom_bar(stat="identity") +
labs(x = "", y = "shootings", title = "Shooting cases in each quarter:") +
theme_classic() +
theme(legend.position = "none") +
geom_text(aes(label = counts), vjust = 2, size = 5, color = "white")

How many cases in each month?
summary(data$M)
## 1 2 3 4 5 6 7 8 9 10 11 12
## 505 462 503 430 418 399 413 371 328 367 354 345
data %>%
group_by(M) %>%
summarize(counts = n()) %>%
ggplot(aes(x = M, y = counts, fill = M)) +
geom_bar(stat="identity") +
labs(x = "", y = "shootings", title = "Shooting cases in each month:") +
theme_classic() +
theme(legend.position = "none") +
geom_text(aes(label = counts), vjust = 2, size = 4, color = "white")

What is the age distribution among the shot people?
summary(data$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 6.00 27.00 35.00 36.55 45.00 91.00
What are the shooting counts distribution concerning age?
data %>%
ggplot(aes(age, fill = "black")) +
geom_bar(width = 1) +
theme_classic() +
theme(legend.position = "none") +
labs(y = "shootings", title = "Shooting frequency ~ age:")
## Warning: position_stack requires non-overlapping x intervals

Let’s check the boxplots for age distribution ~ gender:
data %>%
ggplot(aes(y=age, fill = gender)) +
geom_boxplot() +
stat_boxplot(geom = 'errorbar') +
facet_grid(~gender) +
scale_x_discrete() +
theme_minimal() +
ggtitle("Age distribution ~ gender:") +
theme(legend.position = "none")

How differ age distributions across years and gender?
# In order to get a split in violin plot, I am using a function geom_split_violin(),
# cc: https://stackoverflow.com/questions/35717353/split-violin-plot-with-ggplot2
GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin,
draw_group = function(self, data, ..., draw_quantiles = NULL) {
data <- transform(data, xminv = x - violinwidth * (x - xmin), xmaxv = x + violinwidth * (xmax - x))
grp <- data[1, "group"]
newdata <- plyr::arrange(transform(data, x = if (grp %% 2 == 1) xminv else xmaxv), if (grp %% 2 == 1) y else -y)
newdata <- rbind(newdata[1, ], newdata, newdata[nrow(newdata), ], newdata[1, ])
newdata[c(1, nrow(newdata) - 1, nrow(newdata)), "x"] <- round(newdata[1, "x"])
if (length(draw_quantiles) > 0 & !scales::zero_range(range(data$y))) {
stopifnot(all(draw_quantiles >= 0), all(draw_quantiles <=
1))
quantiles <- ggplot2:::create_quantile_segment_frame(data, draw_quantiles)
aesthetics <- data[rep(1, nrow(quantiles)), setdiff(names(data), c("x", "y")), drop = FALSE]
aesthetics$alpha <- rep(1, nrow(quantiles))
both <- cbind(quantiles, aesthetics)
quantile_grob <- GeomPath$draw_panel(both, ...)
ggplot2:::ggname("geom_split_violin", grid::grobTree(GeomPolygon$draw_panel(newdata, ...), quantile_grob))
}
else {
ggplot2:::ggname("geom_split_violin", GeomPolygon$draw_panel(newdata, ...))
}
})
geom_split_violin <- function(mapping = NULL, data = NULL, stat = "ydensity", position = "identity", ...,
draw_quantiles = NULL, trim = TRUE, scale = "area", na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE) {
layer(data = data, mapping = mapping, stat = stat, geom = GeomSplitViolin,
position = position, show.legend = show.legend, inherit.aes = inherit.aes,
params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...))
}
data %>%
ggplot(aes(x = Y, y = age, fill = gender)) +
geom_split_violin() +
theme_minimal() +
ggtitle("Age distribution ~ gender throughout years") +
xlab("year")

Let’s check the boxplots for age distribution among races:
data %>%
ggplot(aes(y=age, fill = race)) +
geom_boxplot() +
stat_boxplot(geom = 'errorbar') +
facet_grid(~race) +
scale_x_discrete() +
theme_minimal() +
ggtitle("Age distribution ~ race:") +
xlab("race") +
theme(legend.position = "none")

What are the shooting counts for each gender?
summary(data$gender)
## Female Male
## 222 4673
data %>%
group_by(gender) %>%
summarize(counts = n()) %>%
ggplot(aes(x = gender, y = counts, fill = counts)) +
geom_bar(stat="identity") +
labs(x = "", y = "shootings", title = "Shooting cases in each quarter:") +
scale_fill_continuous(low = "steelblue", high = "purple") +
theme_classic() +
geom_text(aes(label = counts), vjust = -0.5, size = 4, color = "black") +
theme(legend.position = "none")

How many shootings were recorded?
summary(data$body_camera)
## No Yes
## 4317 578
data %>%
group_by(body_camera) %>%
summarize(counts = n()) %>%
ggplot(aes(x = body_camera, y = counts, fill = counts)) +
geom_bar(stat="identity") +
labs(x = "", y = "counts", title = "Shooting recorded?") +
scale_fill_continuous(low = "pink", high = "red") +
theme_classic() +
geom_text(aes(label = counts), vjust = -0.5, size = 4, color = "black") +
theme(legend.position = "none")

What are the shooting counts distribution concerning race?
summary(data$race)
## Asian Black Hispanic Native Other White
## 93 1298 902 78 48 2476
data %>%
group_by(race) %>%
summarize(counts = n()) %>%
ggplot(aes(x = race, y = counts, fill = race)) +
geom_bar(stat="identity") +
labs(x = "race", y = "shootings", title = "Shooting cases ~ race:") +
theme_classic() +
theme(legend.position = "none") +
geom_text(aes(label = counts), vjust = -0.5, size = 4, color = "black")

Which weapons were the most popular among the shot people?
summary(data$arms_category)
## Blunt instruments Electrical devices Explosives
## 122 24 4
## Guns Hand tools Multiple
## 2764 1 54
## Other unusual objects Piercing objects Sharp objects
## 192 29 818
## Unarmed Unknown Vehicles
## 348 418 121
data %>%
group_by(arms_category) %>%
summarize(counts = n()) %>%
ggplot(aes(x = counts, y = reorder(arms_category, counts), fill = arms_category)) +
geom_bar(stat="identity") +
labs(y="weapon", title= "Weapons used by shot people:") +
theme_classic() +
theme(legend.position = "none") +
geom_text(aes(label = counts), hjust = -0.1, size = 3, color = "black")

What are the causes of death?
summary(data$manner_of_death)
## shot shot and Tasered
## 4647 248
data %>%
group_by(manner_of_death) %>%
summarize(counts = n()) %>%
ggplot(aes(x = manner_of_death, y = counts, fill = counts)) +
scale_fill_continuous(low = "lightgreen", high = "darkgreen") +
geom_bar(stat="identity") +
labs(x = "", y = "counts", title = "Death causes:") +
theme_classic() +
geom_text(aes(label = counts), vjust = -0.5, size = 4, color = "black") +
theme(legend.position = "none")

What are the states, where occured more than 120 shooting cases?
data %>%
group_by(state) %>%
summarize(counts = n()) %>%
filter(counts > 120) %>%
mutate(state = factor(state, state)) %>%
ggplot(aes(y=reorder(state, counts), x = counts, fill = counts)) +
geom_bar(stat = "identity") +
scale_fill_continuous(low = "pink", high = "purple", label = scales::comma) +
labs(x = "shootings", y = "state", title = "States where occured more than 120 shootings:") +
theme_classic() +
geom_text(aes(label = counts), hjust = -0.1, size = 4, color = "black") +
theme(legend.position = "none")

Let’s present this data on the U.S. map:
val <- data %>%
group_by(state) %>%
summarize(counts = n())
plot_usmap(regions = "states", data = val, values = "counts", color = "black") +
scale_fill_continuous(low = "white", high = "black", name = "shooting cases", label = scales::comma) +
ggtitle("Shooting cases in each state:") +
theme(legend.position = "right", plot.title = element_text(size = 14))

What are the cities, where occured more than 20 shootings?
data %>%
group_by(city) %>%
summarize(counts = n()) %>%
filter(counts > 20) %>%
mutate(city = factor(city, city)) %>%
ggplot(aes(y=reorder(city, counts), x = counts, fill = counts)) +
scale_fill_continuous(low = "orange", high = "brown") +
geom_bar(stat = "identity") +
labs(y = "city", title = "Cities where occured more than 20 shootings:") +
theme_classic() +
geom_text(aes(label = counts), hjust = -0.1, size = 4, color = "black") +
theme(legend.position = "none")

How many subjects wanted to flee and in which way?
summary(data$flee)
## Car Foot Not fleeing Other
## 820 642 3073 360
data %>%
group_by(flee) %>%
summarize(counts = n()) %>%
mutate(flee = factor(flee, flee)) %>%
ggplot(aes(y=reorder(flee, counts), x = counts, fill = counts)) +
scale_fill_continuous(low = "lightgrey", high = "darkgrey") +
geom_bar(stat = "identity") +
labs(x = "counts", y = "flee manner", title = "How shot people tried to flee:") +
theme_classic() +
geom_text(aes(label = counts), hjust = -0.1, size = 4, color = "black") +
theme(legend.position = "none")

How many subjects showed signs of mental illness?
summary(data$signs_of_mental_illness)
## No Yes
## 3792 1103
data %>%
group_by(signs_of_mental_illness) %>%
summarize(counts = n()) %>%
ggplot(aes(x = signs_of_mental_illness, y = counts, fill = counts)) +
geom_bar(stat="identity") +
labs(x = "", y = "counts", title = "How many shot people show signs of mental illness:") +
scale_fill_continuous(low = "lightblue", high = "lightgreen") +
theme_classic() +
geom_text(aes(label = counts), vjust = -0.5, size = 4, color = "black") +
theme(legend.position = "none")

A simple sunburst plot to present cases proportions given:
1st layer - year
2nd layer - gender
3rd layer - race
sb3 <- data[,c("Y", "gender", "race")] %>%
group_by(Y,gender,race) %>%
summarize(counts=n())
dat <- data.frame(
level1 = sb3$Y,
level2 = sb3$gender,
level3 = sb3$race,
size = sb3$counts,
stringsAsFactors = F
)
tree <- d3_nest(dat, value_cols = "size")
sunburst(tree, width="100%", height=400, legend = T)