Install Packages Required for Data Manipulation, Analytics and Visualisation

library(ggplot2) # Useful for creating plots
library(dplyr)  # Useful for data manipulation
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(knitr) # Useful for creating nice tables
library(esquisse) # Useful for data visualization
library(lubridate) # for date manipulation
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(hrbrthemes) # for tooltips
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
##       Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
##       if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(plotly) # for tooltips
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(gapminder) # for tooltips
library(tidyverse) # for data manipulation
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble  3.0.4     v purrr   0.3.4
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date()        masks base::date()
## x plotly::filter()         masks dplyr::filter(), stats::filter()
## x lubridate::intersect()   masks base::intersect()
## x dplyr::lag()             masks stats::lag()
## x lubridate::setdiff()     masks base::setdiff()
## x lubridate::union()       masks base::union()
library(tibble)
library(colorspace)
library(githubinstall)
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
## 
## Attaching package: 'ggmap'
## The following object is masked from 'package:plotly':
## 
##     wind
library(devtools)
## Loading required package: usethis

Introduction

To complete Assignment #2 for Math2024 - Data Visualisation and Communication I have sourced a Crime Data Set from Data Gov called:

Crime statistics (authored by the South Australia Police - 15/9/2020) - https://data.gov.au/dataset/ds-sa-860126f7-eeb5-4fbc-be44-069aa0467d11/details?q=crime

This data plotted all crimes that occurred in South Australia between July 2019 and June 2020. Using this data it was possible to investigate the data further, in particular;

Review Area 1 - Total Crime Numbers

Question 1 - How many crimes were committed in South Australia in total through the period 1st of July 2019 to 30 June 2020?

Question 2 - What type of crimes were committed during that time period? - Assess by Category, Class and Type

Review Area 2 - Plotting Crime against Dates

Question 1 - What months had the highest crime rates?

Question 2 - Did the type of crimes committed vary across different months?

Review Area 3 - Plotting Crime by Region

Question 1 - What areas within Adelaide have the highest Crime Levels

# Data Import

CrimeData <- read.table("SA Crime Data 19-20 3.csv", header = TRUE, stringsAsFactor = FALSE, sep = ",")

# Check Data

View(CrimeData)

# Clean the Data

CrimeData2 <- na.omit(CrimeData)
dim(CrimeData2)
## [1] 93206     9
colSums(is.na(CrimeData2))
##               Reported.Date           Suburb...Incident 
##                           0                           0 
##         Postcode...Incident  Offence.Level1.Description 
##                           0                           0 
## Offence.Level.2.Description Offence.Level.3.Description 
##                           0                           0 
##               Offence.count                         lat 
##                           0                           0 
##                         lon 
##                           0
CrimeData2 <- CrimeData2 %>% 
  rename(
    Level_1 = `Offence.Level1.Description`,
    Level_2 = 'Offence.Level.2.Description',
    Level_3 = 'Offence.Level.3.Description',
    Suburb = 'Suburb...Incident',
    PC = 'Postcode...Incident')
    
str(CrimeData2)
## 'data.frame':    93206 obs. of  9 variables:
##  $ Reported.Date: chr  "1/07/2019" "1/07/2019" "1/07/2019" "1/07/2019" ...
##  $ Suburb       : chr  "ADELAIDE" "ADELAIDE" "ADELAIDE" "ADELAIDE" ...
##  $ PC           : int  5000 5000 5000 5000 5000 5000 5000 5000 5000 5950 ...
##  $ Level_1      : chr  "Property" "Property" "Property" "Property" ...
##  $ Level_2      : chr  "Fraud" "Fraud" "Damage" "Damage" ...
##  $ Level_3      : chr  "Obtain benefit by deception" "Other fraud, deception and related offences" "Graffiti" "Other property damage and environmental" ...
##  $ Offence.count: int  1 1 1 2 6 2 5 3 1 1 ...
##  $ lat          : num  -34.9 -34.9 -34.9 -34.9 -34.9 ...
##  $ lon          : num  139 139 139 139 139 ...
table(CrimeData2$Level_1)
## 
##   Person Property 
##    16569    76637

Review Area 1 - Total Crime Numbers

#Total offence Count by Level 1 offence category

gg1<- ggplot(data = CrimeData2,aes(as.factor(Level_1), fill=as.factor(Level_1)))+
  geom_bar( )+scale_fill_manual(values=c("#ED3323", "#3333FF"), 
                                name="Offence Category",
                                breaks=c("Person", "Property"),
                                labels=c("Person", "Property")) + 
  ggtitle("Count by Offence Category") +
  theme(plot.title = element_text(hjust = 0.5,face = "bold", colour="Black", size = (16)))+
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5, size = 8, vjust = 0.5))+
  labs(x='Offence Category', y='Count')+
  geom_text(aes(label=..count..), stat="count",position=position_dodge(width=0.9), vjust=2, colour="White")
gg1

Based upon this data review it is possible to observe that the majority of crimes were against Property 76,637 offences vs. the 16,569 against the Person.

#Total offence Count by Level 2 Offence Class

attach(CrimeData2)#load the dataframe into memory
gg2<- ggplot(data = CrimeData2,aes(as.factor(Level_2), fill=as.factor(Level_1)))+
  geom_bar() + scale_fill_manual(values=c("#ED3323", "#3333FF"), 
                                 name="Offence Category",
                                 breaks=c("Person", "Property"),
                                 labels=c("Person", "Property"))+
  ggtitle("Count by Offence Class") +
  theme(plot.title = element_text(hjust = 0.5,face = "bold", colour="Black", size = (16)))+
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5, size = 8, vjust = 0.5))+
  labs(x='Offence Class', y='Count')+
  geom_text(aes(label=..count..), stat="count",position=position_dodge(width=0.9), vjust=0)
gg2

It can be observed that theft is the most commonly recorded criminal offence with 42,372 counts at 55% of all property offences, followed by damage with 17,542 offence counts. Assault registered the highest count for offences against the person with 87% of offences registered within that category.

# In Grid View to Compare Crime Categories

attach(CrimeData2)#load the dataframe into memory
## The following objects are masked from CrimeData2 (pos = 3):
## 
##     lat, Level_1, Level_2, Level_3, lon, Offence.count, PC,
##     Reported.Date, Suburb
gg3<- ggplot(data = CrimeData2,aes(as.factor(Level_2), fill=as.factor(Level_1)))+
  geom_bar() + scale_fill_manual(values=c("#ED3323", "#3333FF"), 
                                  name="Offence Type",
                                  breaks=c("Person", "Property"),
                                  labels=c("Person", "Property"))+
  facet_grid(as.factor(Level_1))+ ggtitle("Count by Offence Class") +
  theme(plot.title = element_text(hjust = 0.5,face = "bold", colour="Black", size = (16)))+
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5, size = 8, vjust = 0.5))+
  labs(x='Offence Class', y='Count')+
  geom_text(aes(label=..count..), stat="count",position=position_dodge(width=0.9), vjust=0)
gg3

This facet chart was used to clearly show the distinction between offence type - was not particularly insights beyond the pervious chart, but showcased a new charting technique.

# Creating a Heat Map to plot Level 3 Offences Types against the Level 2 Classes

library(tidyverse)

Grouped_Level <-group_by(CrimeData2, Level_1, Level_2, Level_3)
Grouped_Level <-summarise(Grouped_Level, Total = sum(Offence.count))
## `summarise()` regrouping output by 'Level_1', 'Level_2' (override with `.groups` argument)
# Create groups by Offence Category (Level 1) to allow for charting

Property_Filter <- filter(Grouped_Level, Level_1 == "Property")
Person_Filter <- filter(Grouped_Level, Level_1 == "Person")

# Review Property Category Crimes

HeatMap2 <- ggplot(Property_Filter, aes(Property_Filter$Level_2, Property_Filter$Level_3, fill= Property_Filter$Total, text = paste("Category:", Property_Filter$Level_1,"
Class:", Property_Filter$Level_2,"
Type:", Property_Filter$Level_3,"
Count:", Property_Filter$Total))) + 
  geom_tile() + scale_fill_distiller(palette = "Spectral", 
                                     name="Offence Count")+
  labs(title = "Heatmap of Level 3 Offence Types Against Level 2 Class",
       y = "Level 3 Offence Type", x = "Level 2 Offence Class")

library(plotly) # for tooltips

ggplotly(HeatMap2, tooltip = "text")

This heatmap is focussed on only property based offences to make it more readable. The findings in this chart breakdown the offence categories into type confirming that theft (other theft) was the highest type of offence, followed by other property damage. This chart is interactive so it is possible to hover over each tile to identify the offence count.

# Review Person Category Crimes

HeatMap3 <- ggplot(Person_Filter, aes(Person_Filter$Level_2, Person_Filter$Level_3, fill= Person_Filter$Total, text = paste("Category:", Person_Filter$Level_1,"
Class:", Person_Filter$Level_2,"
Type:", Person_Filter$Level_3,"
Count:", Person_Filter$Total))) + 
  geom_tile() + scale_fill_distiller(palette = "Spectral", 
                                     name="Offence Count")+
  labs(title = "Heatmap of Level 3 Offence Types Against Level 2 Class",
       y = "Level 3 Offence Type", x = "Level 2 Offence Class")
ggplotly(HeatMap3, tooltip = "text")

This heatmap is focussed on only person based offences to make it more readable. Using this person category drilldown it is possible to see that serious assault not resulting in injury was the highest offence type.

# Review Area 2 - Crime plotted across the year

library(dplyr)  # Useful for data manipulation

CrimeData3 <- CrimeData2 %>% 
  separate(Reported.Date, c("day", "month", "year"), sep = "/")

CrimeData3 <- CrimeData3 %>% 
  mutate(Date = paste0(year,"/",month,"/",day))

CrimeData3$Date <- lubridate::as_date(CrimeData3$Date)

# Plot by Month

MonthPlot <- ggplot(CrimeData3, aes(x=Date, text = paste("Count:", ..count..))) + geom_histogram(binwidth=30, colour="white", aes(fill=as.factor(Level_1)))+
  scale_fill_manual(values=c("#ED3323", "#3333FF"), 
                    name="Offence Category",
                    breaks=c("Person", "Property"),
                    labels=c("Person", "Property")) + 
  ggtitle("Crime Count by Month by Category") + labs(x='Month', y='Count')
ggplotly(MonthPlot, tooltip = "text")

It can be observed that crime rates were gradually increasing between the months July 19 and Feb 2020. Following that there was a severe drop in crime. It can also be observed that the data for July 2020 is unlikely to be complete.

# Plot by Week

WeekPlot <- ggplot(CrimeData3, aes(x=Date, text = paste("Count:", ..count..))) + geom_histogram(binwidth=7, colour="white", aes(fill=as.factor(Level_1)))+
  scale_fill_manual(values=c("#ED3323", "#3333FF"), 
                    name="Offence Category",
                    breaks=c("Person", "Property"),
                    labels=c("Person", "Property")) + 
  ggtitle("Crime Count by Week by Category") + labs(x='Month', y='Count')
ggplotly(WeekPlot, tooltip = "text")

It can be seen that the general weekly crime rates were relatively consistent with no major outliers compared with the monthly view.

# Plot by Type by Month

L2Plot <- ggplot(CrimeData3, aes(x=Date, text = paste("Count:", ..count..))) + geom_histogram(binwidth=30, colour="white", aes(fill=as.factor(Level_2)))+
  scale_fill_discrete(name="Offence Type")+
  ggtitle("Crime Count by Month by Class") + labs(x='Month', y='Count')
ggplotly(L2Plot, tooltip = "text")

It can be seen that crime rate peaked across this timeframe in Feb 2020 and then dramatically dropped off in April. It can be seen that there is no real change in the offence type as a % breakdown across just the total count moved up into summer, which could also be contributed to the fact more people spend time outside in summer resulting in more opportunities to offend.

# Plot by Type by Week

L2PlotWk <- ggplot(CrimeData3, aes(x=Date, text = paste("Count:", ..count..))) + geom_histogram(binwidth=7, colour="white", aes(fill=as.factor(Level_2)))+
  scale_fill_discrete(name="Offence Type")+
  ggtitle("Crime Count by Week by Class") + labs(x='Month', y='Count')
ggplotly(L2PlotWk, tooltip = "text")

It can be seen here that the level of crime has dramatically dropped from April 2020 onwards. My hypothesis for this is that due to lockdown measures in place as a result of the Covid-19 measures the levels of crime reduced.

Review Area 3 - Plotting Crime by Region

# Review Property Category Crimes by Suburb

library(dplyr) 

Expanded_Suburb <- data.frame(Suburb = rep(CrimeData2$Suburb, CrimeData2$Offence.count),
                       lon = rep(CrimeData2$lon, CrimeData2$Offence.count),
                       lat = rep(CrimeData2$lat, CrimeData2$Offence.count))
# Add the Mapping packages

library(ggmap)
library(devtools)
# Obtain the map API key

library(ggmap)

register_google(key = "AIzaSyCvvqQ5-pBXm8nIqahvsoCmVDEAJSPaMI8")
ADL1 <- get_googlemap("Adelaide", zoom = 10)
## Source : https://maps.googleapis.com/maps/api/staticmap?center=Adelaide&zoom=10&size=640x640&scale=2&maptype=terrain&key=xxx-pBXm8nIqahvsoCmVDEAJSPaMI8
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Adelaide&key=xxx-pBXm8nIqahvsoCmVDEAJSPaMI8
ADL1 <- ggmap(ADL1)

ADL2 <- ADL1 + stat_bin2d(data=Expanded_Suburb,
                  aes(x=lon, y=lat, text = paste ("Count:", ..count..)), alpha = .5, bins = 75) +
  labs(title = "Adelaide Crime by Area",
       x = "", y = "")
## Warning: Ignoring unknown aesthetics: text
library(plotly) # for tooltips

ggplotly(ADL2, tooltip = "text")
## Warning: Removed 20020 rows containing non-finite values (stat_bin2d).
## Warning: 'heatmap' objects don't have these attributes: 'mode'
## Valid attributes include:
## 'type', 'visible', 'legendgroup', 'opacity', 'name', 'uid', 'ids', 'customdata', 'meta', 'hoverinfo', 'hoverlabel', 'stream', 'transforms', 'uirevision', 'z', 'x', 'x0', 'dx', 'y', 'y0', 'dy', 'text', 'hovertext', 'transpose', 'xtype', 'ytype', 'zsmooth', 'hoverongaps', 'connectgaps', 'xgap', 'ygap', 'zhoverformat', 'hovertemplate', 'showlegend', 'zauto', 'zmin', 'zmax', 'zmid', 'colorscale', 'autocolorscale', 'reversescale', 'showscale', 'colorbar', 'coloraxis', 'xcalendar', 'ycalendar', 'xaxis', 'yaxis', 'idssrc', 'customdatasrc', 'metasrc', 'hoverinfosrc', 'zsrc', 'xsrc', 'ysrc', 'textsrc', 'hovertextsrc', 'hovertemplatesrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

This chart shows the locations across Adelaide (based on postcode geocodes) were crimes occurred most frequently. It can be seen that the highest level of crimes occur in the CBD which would be consistent with the level of activity typical within a city centre.

Thank you - Jake Harry - Student ID s3875020