Summary

We will have a look at the crime statistics in South Africa from 2005-2006 to 2014-2015.The dataset includes information about province, station and crime type.

17 Community Reported Crimes

We will start by checking the “17 Community-Reported Serious Crimes”. Those 17 crimes are related to 4 categories which include Contact crimes, Contact related crimes, Property related crimes and Other serious crimes. We will check later what each category is made of.

These 17 crime types don’t include what is labelled as “4 Crimes Detected as a result of Police Action”:

library(dplyr)
library(tidyr)
library(ggplot2)
library(gridExtra)
library(ggmap)
library(ggrepel)

#loading
data.source <- read.csv("C:/Users/marc/Desktop/Data/161022_south africa crime/SouthAfricaCrimeStats.csv", header = TRUE,sep = ",")

#cleaning
data.cleaned <- data.source
colnames(data.cleaned)[8:17] <- gsub("X","",names(data.cleaned)[8:17])

data.cleaned <- gather(data.cleaned, year, freq, 8:17)
#focus on the 17 crimes cateogry
province.overview <- data.cleaned[data.cleaned$Crime.Category == "17 Community Reported Crimes",]

province.overview <- aggregate(data=province.overview, freq ~ Province + year,FUN="sum")

my.palette <- c("#5bc0eb", "#fde74c", "#9bc53d", "#e55934", "#fa7921", "#b6174b", "#205921", "#e99f4e", "#999999")

ggplot(province.overview, aes(x=year,y=freq/1000,colour=Province,group=Province))+
      geom_point(size=5)+
      geom_line(size= 1.2)+
      theme_bw()+
      theme(legend.position="top")+
      coord_trans(y="log2")+
      scale_colour_manual(values = my.palette)+
      ylab("Number of crimes in thousands")+
      geom_point(aes(x=year,y=freq/1000),colour="white",size=3)+
      ggtitle("Number of crimes in thousands per province by year with log2 y coordinate")

province.overview <- filter(province.overview, year %in% c("2005.2006", "2014.2015"))

province.overview <- spread(province.overview, year,freq,-province.overview$Province)

province.overview <- mutate(province.overview,Size = `2014.2015` - `2005.2006`)

province.overview <- mutate(province.overview, 'Crime evolution' = sign(Size))
province.overview$`Crime evolution` <- gsub("-1","Improved", province.overview$`Crime evolution`)
province.overview$`Crime evolution` <- gsub("1","Deteriorated", province.overview$`Crime evolution`)

province.overview$Size <- abs(province.overview$Size)

#get geo code
province.overview$full.province <- paste(province.overview$Province, "South Africa")

latlong <- geocode(province.overview$full.province)

province.overview <- cbind(province.overview, latlong)

#get map
SA.map <- get_map("South Africa", 
                  zoom = 5, 
                  source = "stamen",
                  maptype = "watercolor") #toner

#chart
ggmap(SA.map, extent = "device") +
      geom_point(data= province.overview,
                 aes(x = lon, y = lat, 
                     size=Size, 
                     colour=`Crime evolution`))+
      scale_size_continuous(range = c(4,10))+
      scale_color_manual(values = c("#cc0000","#00d27f"))+
      geom_text_repel(data= province.overview,
                       force=10,
                       aes(label=Province))+
      ggtitle("Crime number evolution between 2005-2006 and 2014-2015")

Four crime categories

17 Community Reported Crimes is made up of four categories:

categories.17 <- data.cleaned[data.cleaned$Crime.Category %in% 
                                    c("Total Property Related Crimes",
                                      "Total Other Serious Crimes",
                                      "Total Contact Related Crimes",
                                      "Total Contact Crimes"),]

categories.17$Crime.Category <- gsub("Total", "",categories.17$Crime.Category)

categories.17.evol <- aggregate(data=categories.17, freq ~ Crime.Category + year,FUN="sum")

my.palette <- c("#f74902", "#60605b", "#00b5d6", "#ffbf0d")

ggplot(categories.17.evol, aes(x=year,y=freq/1000,colour=Crime.Category,group=Crime.Category))+
      geom_point(size=5)+
      geom_line(size= 1.2)+
      theme_bw()+
      theme(legend.position="top")+
      scale_colour_manual(values = my.palette)+
      ylab("Number of crimes in thousands")+
      geom_point(aes(x=year,y=freq/1000),colour="white",size=3)+
      ggtitle("Number of crimes in thousands per category by year")+
      geom_label_repel(data= categories.17.evol,
                       force=10,
                       aes(label=freq/1000),
                       box.padding = unit(0.25, "lines"),
                       show_guide = FALSE)

categories.17 <- filter(categories.17, year == "2005.2006" | year == "2014.2015")

categories.17 <- aggregate(data=categories.17, freq ~ Province + Crime.Category + year,FUN="sum")

ggplot(categories.17, aes(x=year, y=freq, fill=Crime.Category))+
      geom_bar(stat="identity", position="fill")+
      facet_wrap(~Province)+
      theme_bw()+
      scale_fill_manual(values = my.palette)+
      theme(legend.position="top")+
      ylab("share")+
      ggtitle("Share of Crime categories for 2005-2006 and 2014-2015 by province")

sa.crime <- filter(data.cleaned, year == "2014.2015")

sa.crime <- filter(sa.crime, !(Crime.Category %in% 
                                             c("Total TRIO Crimes",
                                               "Total Subcategories Of Aggravated Robbery",
                                               "Total Sexual Offences", 
                                               "Total Property Related Crimes", 
                                               "Total Other Serious Crimes", 
                                               "Total Crimes Dependent on Police Action for Detection", 
                                               "Total Contact Related Crimes", 
                                               "Total Contact Crimes", 
                                               "17 Community Reported Crimes")))

sa.crime <- aggregate(data=sa.crime, freq ~ Crime.Category, FUN="sum")

ggplot(sa.crime, aes(x=reorder(Crime.Category,freq), y= freq, label=freq))+
      geom_bar(stat = "identity",position = "dodge", fill="#222f5b")+
      coord_flip()+
      theme_bw()+
      ylab("Number of crimes in 2014-2015")+
      xlab("")+
      ggtitle("Number of crimes in South Africa in 2014-2015")+
      geom_text(hjust=-0.5)+
      scale_y_continuous(breaks= seq(0,400000,100000),
                         limits = c(0, 400000))

Focus on Western Cape

Moving forward, we will focus on Western Cape. Mostly because crimes are increasing and it is the second highest crimes province.

western.cape <- filter(data.cleaned, Province == "Western Cape", year == "2014.2015")

western.cape <- filter(western.cape, !(Crime.Category %in% 
                                             c("Total TRIO Crimes",
                                               "Total Subcategories Of Aggravated Robbery",
                                               "Total Sexual Offences", 
                                               "Total Property Related Crimes", 
                                               "Total Other Serious Crimes", 
                                               "Total Crimes Dependent on Police Action for Detection", 
                                               "Total Contact Related Crimes", 
                                               "Total Contact Crimes", 
                                               "17 Community Reported Crimes")))

western.cape <- aggregate(data=western.cape, freq ~ Station,FUN="sum")

western.cape$full.station <- paste(western.cape$Station, "South Africa Western Cape Police station")

latlong <- geocode(western.cape$full.station)

# 10 stations weren't found and have to be added manually
latlong[25,] <- c(22.117842, -34.176171)
latlong[30,] <- c(18.465467, -34.031147)
latlong[31,] <- c(18.237770, -31.812266)
latlong[33,] <- c(22.435730, -33.575786)
latlong[36,] <- c(18.565269, -33.924385)
latlong[47,] <- c(18.978046, -33.866305)
latlong[76,] <- c(18.660040, -34.042960)
latlong[116,] <- c(18.872605, -33.351015)
latlong[131,] <- c(18.836733, -34.114233)
latlong[136,] <- c(18.425732, -33.910530)

western.cape <- cbind(western.cape, latlong)

for(i in 1:length(western.cape$freq)) {
      if(western.cape$freq[i] > median(western.cape$freq)) {
            western.cape$Crimes.median[i] <- "Above"
            
      } else {
            western.cape$Crimes.median[i] <- "Below"      
      }
      next
}
      
#get map
western.cape.map <- get_map(location = c(lon =20.580408, lat = -32.768878), 
                  zoom = 7, 
                  source = "stamen",
                  maptype = "toner") 

#chart
ggmap(western.cape.map, extent = "device") +
      geom_point(data= western.cape, size=4, alpha=0.6,
                 aes(x = lon, y = lat, colour= Crimes.median))+
                  scale_color_manual(values = c("#cc0000","#00d27f"))+
      ggtitle("Western Cape Police stations in 2014-2015")

#filter for Western Cape
western.cape.cat <- filter(data.cleaned, Province == "Western Cape", year == "2014.2015")

#remove categories we don't want
western.cape.cat <- filter(western.cape.cat, !(Crime.Category %in% 
                                             c("Total TRIO Crimes",
                                               "Total Subcategories Of Aggravated Robbery",
                                               "Total Sexual Offences", 
                                               "Total Property Related Crimes", 
                                               "Total Other Serious Crimes", 
                                               "Total Crimes Dependent on Police Action for Detection", 
                                               "Total Contact Related Crimes", 
                                               "Total Contact Crimes", 
                                               "17 Community Reported Crimes")))

western.cape.cat <- aggregate(data=western.cape.cat, freq ~ Crime.Category,FUN="sum")

western.cape.cat <- arrange(western.cape.cat, Crime.Category)
colnames(western.cape.cat) <- c("Crime.Category", "Western Cape")



#for South Africa
south.africa.cat <- filter(data.cleaned, year == "2014.2015")

south.africa.cat <- filter(south.africa.cat, !(Crime.Category %in% 
                                             c("Total TRIO Crimes",
                                               "Total Subcategories Of Aggravated Robbery",
                                               "Total Sexual Offences", 
                                               "Total Property Related Crimes", 
                                               "Total Other Serious Crimes", 
                                               "Total Crimes Dependent on Police Action for Detection", 
                                               "Total Contact Related Crimes", 
                                               "Total Contact Crimes", 
                                               "17 Community Reported Crimes")))

south.africa.cat <- aggregate(data=south.africa.cat, freq ~ Province +  Crime.Category,FUN="sum")

south.africa.cat <- aggregate(data=south.africa.cat, freq ~   Crime.Category,FUN="mean")

south.africa.cat <- arrange(south.africa.cat, Crime.Category)
colnames(south.africa.cat) <- c("Crime.Category", "South Africa")



western.cape.cat <- cbind(western.cape.cat, south.africa.cat)
western.cape.cat <- western.cape.cat[,-3]

western.cape.cat <- gather(western.cape.cat, area, freq, -Crime.Category)

ggplot(western.cape.cat, aes(x=Crime.Category, 
                             y= freq,
                             colour=area, 
                             group=Crime.Category))+
      geom_line(size= 1.2, colour="grey")+
      geom_point(size=5, alpha= 0.9)+
      scale_colour_manual(values = c("#70bae7","#222f5b"))+
      coord_flip()+
      theme_bw()+
      theme(legend.position="top")+
      ggtitle("2014-2015 Western Cape crimes vs average crimes in South Africa")+
      ylab("Number of crimes")