Click the Original, Code and Reconstruction tabs to read about the issues and how they were fixed.
Objective
The original SAPS data visualisation appears to be an attempt to communicate two things: the proportional distribution of serious crimes in the Reporting Period April 2019-March 2020, with prior period comparative and a snapshot of the proportion of active policing relative to passive policing in combating serious crimes in the Reporting Period April 2019 - March 2020, with prior period comparative.
The audience, as outlined on the SAPS website, are policy-makers.
The visualisation chosen had the following three main issues:
Based on the definitions by Pandey et al. (2015) and Kirk(2012) on what constitutes deceptive data visualisation, we can conclude that the SAPS data visualisation is deceptive.
Reference
The following code was used to fix the issues identified in the original.
# call relevant packages
library(tidyr)
library(dplyr)
library(magrittr)
library(knitr)
library(openxlsx)
library(ggplot2)
library(ggpubr)
library(RColorBrewer)
library(colorBlindness)
# establish current working directory
getwd()
## [1] "/Users/pndunguokech/Desktop/Data Visualisation_Assign2"
# specify working directory
setwd("~/Desktop")
# import SAPS crime statistics dataset.
SAPS_DATA <- read.xlsx("~/Desktop/Data Visualisation_Assign2/Crime-Statistics 2019_2020.xlsx", sheet = "National Crime stats 2019_20", startRow = 13)
# inspect imported dataset
head(SAPS_DATA)
## CRIME.CATEGORY 2010/2011 2011/2012
## 1 CONTACT CRIMES ( CRIMES AGAINST THE PERSON) NA NA
## 2 Murder 15893 15554
## 3 Sexual Offences 64921 60539
## 4 Attempted murder 15360 14730
## 5 Assault with the intent to inflict grievous bodily harm 197470 191612
## 6 Common assault 184103 180165
## 2012/2013 2013/2014 2014/2015 2015/2016 2016/2017 2017/2018 2018/2019
## 1 NA NA NA NA NA NA NA
## 2 16213 17023 17805 18673 19016 20336 21022
## 3 60888 56680 53617 51895 49660 50108 52420
## 4 16236 16989 17537 18127 18205 18233 18980
## 5 185050 182333 182556 182933 170616 167352 170979
## 6 171653 166081 161486 164958 156450 156243 162012
## 2019/2020 Case.Diff %.Change
## 1 NA NA NA
## 2 21325 303 0.01441347
## 3 53293 873 0.01665395
## 4 18635 -345 -0.01817703
## 5 166720 -4259 -0.02490949
## 6 165494 3482 0.02149224
# create a list of row titles starting with the word "Total"
Trim_list <- c(grep("Total", SAPS_DATA$CRIME.CATEGORY))
# create a subset of SAPS_DATA consisting of CRIME.CATEGORY filtered first word Total
df1 <- slice(SAPS_DATA, Trim_list)
# inspect new subset
head(df1)
## CRIME.CATEGORY 2010/2011 2011/2012
## 1 Total Contact Crimes ( Crimes Against The Person) 633228 615935
## 2 Total Sexual Offences NA NA
## 3 Total Sexual Offences 64921 60539
## 4 Total Contact-Related Crimes 128971 125903
## 5 Total Property-Related Crimes 528957 530624
## 6 Total Other Serious Crimes 525194 528296
## 2012/2013 2013/2014 2014/2015 2015/2016 2016/2017 2017/2018 2018/2019
## 1 608724 611574 616973 623223 608321 601366 617210
## 2 NA NA NA NA NA NA NA
## 3 60888 56680 53617 51895 49660 50108 52420
## 4 124691 123441 125789 124804 120730 115361 117172
## 5 558334 558228 553487 543524 540653 507975 495161
## 6 517252 510748 499698 479075 469276 438113 444447
## 2019/2020 Case.Diff %.Change
## 1 621282 4072 0.00659743
## 2 NA NA NA
## 3 53293 873 0.01665395
## 4 112244 -4928 -0.04205783
## 5 469224 -25937 -0.05238094
## 6 426569 -17878 -0.04022527
# subset df1 further to create a subset of crime incidents relating to community reporting & rename crime categories
df2 <- df1[-c(2,3,7,8),]
df2$CRIME.CATEGORY <- c("Contact crime", "Contact-related crimes", "Property-related crimes", "Other serious crimes")
# create a subset of crime incidents relating to Police action
df3 <- SAPS_DATA[41:44,]
# combine df2 and df3 to create a subset of aggregated categories of serious crimes
df4 <- rbind(df2,df3) %>% select(CRIME.CATEGORY,`2018/2019`:`2019/2020`)
# create vector for new variable, Policing
Policing <- cbind(c((rep("Community reporting", times = 4)),(rep("Police action", times = 4))))
# remove rows not required, add Policing variable then transform years to a variable column
df5 <- cbind(df4,Policing) %>% gather(`2018/2019`, `2019/2020`, key = "year", value = "incidents")
# inspect transformed dataset
head(df5)
## CRIME.CATEGORY Policing year
## 1 Contact crime Community reporting 2018/2019
## 2 Contact-related crimes Community reporting 2018/2019
## 3 Property-related crimes Community reporting 2018/2019
## 4 Other serious crimes Community reporting 2018/2019
## 5 Illegal possession of firearms and ammunition Police action 2018/2019
## 6 Drug-related crime Police action 2018/2019
## incidents
## 1 617210
## 2 117172
## 3 495161
## 4 444447
## 5 15736
## 6 232657
# Convert crime category variable to factors
fct_list <- unique(df5$CRIME.CATEGORY)
df5$CRIME.CATEGORY<- factor(df5$CRIME.CATEGORY, levels=c(fct_list), ordered=TRUE)
levels(df5$CRIME.CATEGORY)
## [1] "Contact crime"
## [2] "Contact-related crimes"
## [3] "Property-related crimes"
## [4] "Other serious crimes"
## [5] "Illegal possession of firearms and ammunition"
## [6] "Drug-related crime"
## [7] "Driving under the influence of alcohol or drugs"
## [8] "Sexual Offences detected as a result of police action"
str(df5$CRIME.CATEGORY)
## Ord.factor w/ 8 levels "Contact crime"<..: 1 2 3 4 5 6 7 8 1 2 ...
# change incidents to report in millions in order give a better sense of the scale of crime
df5$incidents <- (df5$incidents/1000000)
# create visualisation bar plot
p <- ggplot(data=df5, aes(fill=CRIME.CATEGORY, y=incidents, x=Policing))
# Fill Bar plot showing proportions of serious crime reported by category
p1 <- p + geom_bar(position="fill", stat="identity") +
theme(legend.position = "bottom",legend.title=element_blank(),plot.margin = margin(0.15, 0.15, 0.15, 0.15, "cm")) +
scale_y_continuous(labels = scales::percent_format()) +
labs(title = "Serious Crimes: April 2019-March 2020",subtitle = "Proportional Distribution of Serious Crimes, by Category", y = "Proportion", x = "Policing") +
facet_wrap(~year, ncol = 2) + scale_fill_brewer(palette = "Set3") +
theme(plot.title = element_text(size = 10),plot.subtitle = element_text(size = 8), axis.title.x = element_text(size = 8), axis.title.y = element_text(size = 8), legend.text = element_text(size = 6), axis.text = element_text(size = 6))
# Stacked Bar plot showing total reported serious crime incidents
p2 <- p + geom_bar(stat="identity") +
theme(legend.position = "none",legend.title=element_blank(),plot.margin = margin(0.15, 0.15, 0.15, 0.15, "cm")) +
labs(title = "Serious Crimes:April 2019-March 2020",subtitle = "Total Incidents reported", y = "Incidents (millions)", x = "Policing", caption = "Source:https://www.saps.gov.za/services/crimestats.php") +
facet_wrap(~year, ncol = 2) + scale_fill_brewer(palette = "Set3") +
theme(plot.title = element_text(size = 10),plot.subtitle = element_text(size = 8), axis.title.x = element_text(size = 8), axis.title.y = element_text(size = 8), legend.text = element_text(size = 6), axis.text = element_text(size = 6))
# create single plot by combining p1 and p2
p_SAPS <- ggarrange(p1,p2,common.legend = TRUE,nrow = 2,legend = "bottom")
# inspect plot colours for colour blindness
# *cvdPlot(p_SAPS)*
# print visualisation
#*p_SAPS*
Data Reference
The following plot fixes the main issues in the original.