We will have a look at the crime statistics in South Africa from 2005-2006 to 2014-2015.The dataset includes information about province, station and crime type.
You can find some official presentation with various category definitions in the Crime situation in South Africa pdf
We will start by checking the “17 Community-Reported Serious Crimes”. Those 17 crimes are related to 4 categories which include Contact crimes, Contact related crimes, Property related crimes and Other serious crimes. We will check later what each category is made of.
These 17 crime types don’t include what is labelled as “4 Crimes Detected as a result of Police Action”:
Illegal Possession of firearm and ammunition
Drug Related Crimes
Driving under the influence of alcohol and drugs
Sexual offences detected as a result of police action
library(dplyr)
library(tidyr)
library(ggplot2)
library(gridExtra)
library(ggmap)
library(ggrepel)
#loading
data.source <- read.csv("C:/Users/marc/Desktop/Data/161022_south africa crime/SouthAfricaCrimeStats.csv", header = TRUE,sep = ",")
#cleaning
data.cleaned <- data.source
colnames(data.cleaned)[8:17] <- gsub("X","",names(data.cleaned)[8:17])
data.cleaned <- gather(data.cleaned, year, freq, 8:17)
#focus on the 17 crimes cateogry
province.overview <- data.cleaned[data.cleaned$Crime.Category == "17 Community Reported Crimes",]
province.overview <- aggregate(data=province.overview, freq ~ Province + year,FUN="sum")
my.palette <- c("#5bc0eb", "#fde74c", "#9bc53d", "#e55934", "#fa7921", "#b6174b", "#205921", "#e99f4e", "#999999")
ggplot(province.overview, aes(x=year,y=freq/1000,colour=Province,group=Province))+
geom_point(size=5)+
geom_line(size= 1.2)+
theme_bw()+
theme(legend.position="top")+
coord_trans(y="log2")+
scale_colour_manual(values = my.palette)+
ylab("Number of crimes in thousands")+
geom_point(aes(x=year,y=freq/1000),colour="white",size=3)+
ggtitle("Number of crimes in thousands per province by year with log2 y coordinate")
Gauteng’s province is clearly above other provinces in terms of number of crime. Northen Cape seems to be safest province.
The number of crimes decreased in most provinces between 2005-2006 and 2014-2015.
province.overview <- filter(province.overview, year %in% c("2005.2006", "2014.2015"))
province.overview <- spread(province.overview, year,freq,-province.overview$Province)
province.overview <- mutate(province.overview,Size = `2014.2015` - `2005.2006`)
province.overview <- mutate(province.overview, 'Crime evolution' = sign(Size))
province.overview$`Crime evolution` <- gsub("-1","Improved", province.overview$`Crime evolution`)
province.overview$`Crime evolution` <- gsub("1","Deteriorated", province.overview$`Crime evolution`)
province.overview$Size <- abs(province.overview$Size)
#get geo code
province.overview$full.province <- paste(province.overview$Province, "South Africa")
latlong <- geocode(province.overview$full.province)
province.overview <- cbind(province.overview, latlong)
#get map
SA.map <- get_map("South Africa",
zoom = 5,
source = "stamen",
maptype = "watercolor") #toner
#chart
ggmap(SA.map, extent = "device") +
geom_point(data= province.overview,
aes(x = lon, y = lat,
size=Size,
colour=`Crime evolution`))+
scale_size_continuous(range = c(4,10))+
scale_color_manual(values = c("#cc0000","#00d27f"))+
geom_text_repel(data= province.overview,
force=10,
aes(label=Province))+
ggtitle("Crime number evolution between 2005-2006 and 2014-2015")
Gauteng and Eastern Cape strongly improved between 2005-2006 and 2014-2015.
Western Cape and Limpopo’s crime situation deteriorated.
17 Community Reported Crimes is made up of four categories:
Contact Crimes
Contact-Related Crimes
Property-Related Crimes
Other Serious Crimes
categories.17 <- data.cleaned[data.cleaned$Crime.Category %in%
c("Total Property Related Crimes",
"Total Other Serious Crimes",
"Total Contact Related Crimes",
"Total Contact Crimes"),]
categories.17$Crime.Category <- gsub("Total", "",categories.17$Crime.Category)
categories.17.evol <- aggregate(data=categories.17, freq ~ Crime.Category + year,FUN="sum")
my.palette <- c("#f74902", "#60605b", "#00b5d6", "#ffbf0d")
ggplot(categories.17.evol, aes(x=year,y=freq/1000,colour=Crime.Category,group=Crime.Category))+
geom_point(size=5)+
geom_line(size= 1.2)+
theme_bw()+
theme(legend.position="top")+
scale_colour_manual(values = my.palette)+
ylab("Number of crimes in thousands")+
geom_point(aes(x=year,y=freq/1000),colour="white",size=3)+
ggtitle("Number of crimes in thousands per category by year")+
geom_label_repel(data= categories.17.evol,
force=10,
aes(label=freq/1000),
box.padding = unit(0.25, "lines"),
show_guide = FALSE)
Contact crimes category decreased by almost 18% from 2005-2006 to 2014-2015.
Property related crimes stay pretty stable other time despite a small increase since 2010-2011.
Contact related crimes is the lowest crime category. It is made of Arson and Malicious damage to property.
Other serious crimes include: Other Theft, Commercial Crime, Shoplifting.
We are now going to check the evolution by Province.
categories.17 <- filter(categories.17, year == "2005.2006" | year == "2014.2015")
categories.17 <- aggregate(data=categories.17, freq ~ Province + Crime.Category + year,FUN="sum")
ggplot(categories.17, aes(x=year, y=freq, fill=Crime.Category))+
geom_bar(stat="identity", position="fill")+
facet_wrap(~Province)+
theme_bw()+
scale_fill_manual(values = my.palette)+
theme(legend.position="top")+
ylab("share")+
ggtitle("Share of Crime categories for 2005-2006 and 2014-2015 by province")
Those 4 categories are pretty similar in term of proportion from one province to another.
We can notice that the share of contact crimes decreased in each province while the property related crimes increased.
Let’s check now all crime types
sa.crime <- filter(data.cleaned, year == "2014.2015")
sa.crime <- filter(sa.crime, !(Crime.Category %in%
c("Total TRIO Crimes",
"Total Subcategories Of Aggravated Robbery",
"Total Sexual Offences",
"Total Property Related Crimes",
"Total Other Serious Crimes",
"Total Crimes Dependent on Police Action for Detection",
"Total Contact Related Crimes",
"Total Contact Crimes",
"17 Community Reported Crimes")))
sa.crime <- aggregate(data=sa.crime, freq ~ Crime.Category, FUN="sum")
ggplot(sa.crime, aes(x=reorder(Crime.Category,freq), y= freq, label=freq))+
geom_bar(stat = "identity",position = "dodge", fill="#222f5b")+
coord_flip()+
theme_bw()+
ylab("Number of crimes in 2014-2015")+
xlab("")+
ggtitle("Number of crimes in South Africa in 2014-2015")+
geom_text(hjust=-0.5)+
scale_y_continuous(breaks= seq(0,400000,100000),
limits = c(0, 400000))
Moving forward, we will focus on Western Cape. Mostly because crimes are increasing and it is the second highest crimes province.
western.cape <- filter(data.cleaned, Province == "Western Cape", year == "2014.2015")
western.cape <- filter(western.cape, !(Crime.Category %in%
c("Total TRIO Crimes",
"Total Subcategories Of Aggravated Robbery",
"Total Sexual Offences",
"Total Property Related Crimes",
"Total Other Serious Crimes",
"Total Crimes Dependent on Police Action for Detection",
"Total Contact Related Crimes",
"Total Contact Crimes",
"17 Community Reported Crimes")))
western.cape <- aggregate(data=western.cape, freq ~ Station,FUN="sum")
western.cape$full.station <- paste(western.cape$Station, "South Africa Western Cape Police station")
latlong <- geocode(western.cape$full.station)
# 10 stations weren't found and have to be added manually
latlong[25,] <- c(22.117842, -34.176171)
latlong[30,] <- c(18.465467, -34.031147)
latlong[31,] <- c(18.237770, -31.812266)
latlong[33,] <- c(22.435730, -33.575786)
latlong[36,] <- c(18.565269, -33.924385)
latlong[47,] <- c(18.978046, -33.866305)
latlong[76,] <- c(18.660040, -34.042960)
latlong[116,] <- c(18.872605, -33.351015)
latlong[131,] <- c(18.836733, -34.114233)
latlong[136,] <- c(18.425732, -33.910530)
western.cape <- cbind(western.cape, latlong)
for(i in 1:length(western.cape$freq)) {
if(western.cape$freq[i] > median(western.cape$freq)) {
western.cape$Crimes.median[i] <- "Above"
} else {
western.cape$Crimes.median[i] <- "Below"
}
next
}
#get map
western.cape.map <- get_map(location = c(lon =20.580408, lat = -32.768878),
zoom = 7,
source = "stamen",
maptype = "toner")
#chart
ggmap(western.cape.map, extent = "device") +
geom_point(data= western.cape, size=4, alpha=0.6,
aes(x = lon, y = lat, colour= Crimes.median))+
scale_color_manual(values = c("#cc0000","#00d27f"))+
ggtitle("Western Cape Police stations in 2014-2015")
Colours indicate if each station is above or below the Western Cape crimes’ median
The “toner” mapstyle has been chosen instead of the “watercolor” style (that you can see at the beginning of the notebook) in order to show the main roads as well as the borders of the Western Cape province (delimited by the dotted line)
Most of the stations which are above the crime median are located in Cape Town. It is most likely due to the density population which is probably higher.
#filter for Western Cape
western.cape.cat <- filter(data.cleaned, Province == "Western Cape", year == "2014.2015")
#remove categories we don't want
western.cape.cat <- filter(western.cape.cat, !(Crime.Category %in%
c("Total TRIO Crimes",
"Total Subcategories Of Aggravated Robbery",
"Total Sexual Offences",
"Total Property Related Crimes",
"Total Other Serious Crimes",
"Total Crimes Dependent on Police Action for Detection",
"Total Contact Related Crimes",
"Total Contact Crimes",
"17 Community Reported Crimes")))
western.cape.cat <- aggregate(data=western.cape.cat, freq ~ Crime.Category,FUN="sum")
western.cape.cat <- arrange(western.cape.cat, Crime.Category)
colnames(western.cape.cat) <- c("Crime.Category", "Western Cape")
#for South Africa
south.africa.cat <- filter(data.cleaned, year == "2014.2015")
south.africa.cat <- filter(south.africa.cat, !(Crime.Category %in%
c("Total TRIO Crimes",
"Total Subcategories Of Aggravated Robbery",
"Total Sexual Offences",
"Total Property Related Crimes",
"Total Other Serious Crimes",
"Total Crimes Dependent on Police Action for Detection",
"Total Contact Related Crimes",
"Total Contact Crimes",
"17 Community Reported Crimes")))
south.africa.cat <- aggregate(data=south.africa.cat, freq ~ Province + Crime.Category,FUN="sum")
south.africa.cat <- aggregate(data=south.africa.cat, freq ~ Crime.Category,FUN="mean")
south.africa.cat <- arrange(south.africa.cat, Crime.Category)
colnames(south.africa.cat) <- c("Crime.Category", "South Africa")
western.cape.cat <- cbind(western.cape.cat, south.africa.cat)
western.cape.cat <- western.cape.cat[,-3]
western.cape.cat <- gather(western.cape.cat, area, freq, -Crime.Category)
ggplot(western.cape.cat, aes(x=Crime.Category,
y= freq,
colour=area,
group=Crime.Category))+
geom_line(size= 1.2, colour="grey")+
geom_point(size=5, alpha= 0.9)+
scale_colour_manual(values = c("#70bae7","#222f5b"))+
coord_flip()+
theme_bw()+
theme(legend.position="top")+
ggtitle("2014-2015 Western Cape crimes vs average crimes in South Africa")+
ylab("Number of crimes")