Assignment Summary

This assignment tries to visualise the crime rate of Victoria and to find out, if there is any, the corelation between crime rate and income using both of the income and crime statistics data from Victorian government from year 2014 untill 2017 .

This assignment firstly visualise the relatiopship between crime rate and average income of each victorian local government area (LGA). Many of us may have the assumption that the lower the average income is the higher the crime rate. However, as the data visualisation shows, it’s not exactly the case. As the graphis illustrates, most Victorian LGAs have an average income between 40K and 60K and their crime rate varies from 2K to 9K incidents per 100K population per year. The LGAs with very high crime rate (above 10K) actually have average income between 50K and 75K which is above the average Victorian LGAs’ income.

This assignment then contiues and visualises the crime rate on a map of victoria where users can highlight each LGA to view the details of their crime rate.

Setup

Load the necessary packages for this assignment:

library("tidyr")
library("gdata")
library("dplyr")
library("plotly")
library("ggplot2")
library("rgeos")
library("maptools")
library("ggmap")
library("broom")
library("leaflet")
library("htmlwidgets")
library("htmltools")

Read Data

Read the necessary source data.

# Step 1: Read Income Statistics Data
url1 <- "http://data.gov.au/data/dataset/540e3eac-f2df-48d1-9bc0-fbe8dfec641f/resource/8a7bfe9e-ad35-4a0a-be52-52058fea1bca/download/ts17individual08medianaveragetaxableincomestatepostcode.xlsx"

IncomeData <- read.xls(url1, sheet = "Individuals Table 8", perl = "C:/Strawberry/perl/bin/perl.exe", skip = 1, method="csv")

# Step 2: Read Crime Statistics Data
url2 <- "http://www.crimestatistics.vic.gov.au/sites/default/files/embridge_cache/emshare/original/public/users/201909/f7/c0119bb69/Data_tables_Criminal_Incidents_Visualisation_year_ending_June_2019.xlsx"

CrimeData <- read.xls(url2, sheet = 3, perl = "C:/Strawberry/perl/bin/perl.exe")

# Step 3: Read Location Data for Postcode and LGA Mapping
url3 <- "http://www.vec.vic.gov.au/Files/LocalityFinder.xls"

LGAPC <- read.xls(url3, perl = "C:/Strawberry/perl/bin/perl.exe", skip = 2, method="csv")

# Step 4: Read Spatial Shape Data
VicShp <- readShapeSpatial("C:/Program Files/R/WorkingDirectory/vmlite_lga_cm/vmlite_lga_cm.shp")

Tidy Data

# Step 5: Tidy Income Data - Change Variable Names
names(IncomeData)[1]<-"State"
names(IncomeData)[2]<-"Postcode"
names(IncomeData)[8]<-"2014"
names(IncomeData)[11]<-"2015"
names(IncomeData)[14]<-"2016"
names(IncomeData)[17]<-"2017"

# Step 6: Tidy Income Data - Select only needed Variables
IncomeTidy <- IncomeData %>% select(State, Postcode, "2014", "2015", "2016", "2017") %>% filter(State == "VIC")

# Step 7: Tidy Income Data - Format Data for Operations Later On
IncomeTidy$`2014` <- sapply(IncomeTidy$`2014`, function(x) gsub(",", "", x))
IncomeTidy$`2014` <- as.numeric(as.character(IncomeTidy$`2014`))
IncomeTidy$`2015` <- sapply(IncomeTidy$`2015`, function(x) gsub(",", "", x))
IncomeTidy$`2015` <- as.numeric(as.character(IncomeTidy$`2015`))
IncomeTidy$`2016` <- sapply(IncomeTidy$`2016`, function(x) gsub(",", "", x))
IncomeTidy$`2016` <- as.numeric(as.character(IncomeTidy$`2016`))
IncomeTidy$`2017` <- sapply(IncomeTidy$`2017`, function(x) gsub(",", "", x))
IncomeTidy$`2017` <- as.numeric(as.character(IncomeTidy$`2017`))

# Step 8: Tidy Income Data - Subset and Keep Only Complete Observations
IncomeTidy <- IncomeTidy[complete.cases(IncomeTidy),]

# Step 9: Tidy Income Data - Transform Data for Mapping Later On
IncomeTidy <- IncomeTidy %>% gather('2014', '2015', '2016','2017', key = "Year", value = "Income")

# Step 10: Tidy Income Data - Change the Income Unit to K
IncomeTidy$Income <- IncomeTidy$Income / 1000

# Step 11: Tidy Income Data - Format Data
IncomeTidy$Year <- as.numeric(as.character(IncomeTidy$Year))
IncomeTidy$Income <- as.integer(as.character(IncomeTidy$Income))

# Step 12: Tidy Crime Data - Change Variable Names
names(CrimeData)[1]<-"Year"
names(CrimeData)[3]<-"lga_name"
names(CrimeData)[4]<-"Incidents"
names(CrimeData)[5]<-"Crime.Rate"

# Step 13: Tidy Crime Data - Subset and Keep Only Data for Relevant Years
CrimeTidy <- CrimeData %>% filter(Year >= "2014" & Year <= "2017")

# Step 14: Tidy Crime Data - Format Data for Operations Later On
CrimeTidy$Incidents <- sapply(CrimeTidy$Incidents, function(x) gsub(",", "", x))
CrimeTidy$Incidents <- as.integer(as.character(CrimeTidy$Incidents))
CrimeTidy$Crime.Rate <- sapply(CrimeTidy$Crime.Rate, function(x) gsub(",", "", x))
CrimeTidy$Crime.Rate <- as.integer(as.character(CrimeTidy$Crime.Rate))

# Step 15: Tidy Crime Data - Change lga_name to Uppercase
CrimeTidy[,3] = toupper(CrimeTidy[,3])

# Step 16: Tidy Location Data - Select only needed Variables
LGAPCTidy <- LGAPC %>% select (Post.Code, Municipality.Name) %>% distinct(Post.Code, .keep_all= TRUE)

# Step 17: Tidy Location Data - Change Variable Names
names(LGAPCTidy)[1]<-"Postcode"
names(LGAPCTidy)[2]<-"lga_name"

# Step 18: Tidy Location Data - Format Data for Operations Later On
LGAPCTidy$lga_name <- sapply(LGAPCTidy$lga_name, function(x) gsub(" City Council", "", x))
LGAPCTidy$lga_name <- sapply(LGAPCTidy$lga_name, function(x) gsub(" Shire Council", "", x))

# Step 19: Tidy Location Data - Change lga_name to Uppercase
LGAPCTidy[,2] = toupper(LGAPCTidy[,2])

# Step 20: Tidy Shape Data - Factorise the Data
VicShp <- tidy(VicShp, region = "lga_name")

# Step 21: Tidy Shape Data - Keep Only Distinct LGA
VicShpDistinct <- VicShp %>% distinct(id, .keep_all= TRUE)

# Step 22: Tidy Shape Data - Select only needed Variables
VicShpDistinct <- VicShpDistinct %>% select(long, lat, id)

# Step 23: Tidy Shape Data - Change Variable Name
names(VicShpDistinct)[3]<-"lga_name"

# Step 24: Join Data - Add LGA to Income Dataset
IncomeLGA <- IncomeTidy %>% left_join(LGAPCTidy)

# Step 25: Join Data - Select only needed Variables
IncomeLGA <- IncomeLGA %>% select(Year, Income, lga_name)

# Step 26: Group by LGA and Calculate Average Income from Suburb to LGA Level
IncomeLGA <- IncomeLGA %>% group_by(lga_name, Year) %>% summarise(Income = mean(Income, na.rm = TRUE))

# Step 27: Change Format to Integer to remove decimal points
IncomeLGA$Income <- as.integer(as.character(IncomeLGA$Income))

# Step 28: Join Data - Add Crime Data to IncomeLGA Dataset
IncomeLGACrime <- IncomeLGA %>% left_join(CrimeTidy)

# Step 29: Join Data - Add Long/Lat Data to IncomeLGACrime Dataset
DataAll <- IncomeLGACrime %>% left_join(VicShpDistinct)

# Step 30: Final Dataset - Select only needed Variables
DataAll <- DataAll %>% select(Year, lga_name, Income, Crime.Rate, long, lat)

# Step 31: Final Dataset - Keep Only Complete Observations
DataAll <- DataAll[complete.cases(DataAll), ]

Visualise the Data 1

Visualise using Victorian Spatial Map trying to explore the geographic pattern of crime rate.

# Step 32: Read Spatial Shape Data
vic.lga.shp <- readShapeSpatial("C:/Program Files/R/WorkingDirectory/vmlite_lga_cm/vmlite_lga_cm.shp")

lga.shp.f <- tidy(vic.lga.shp, region = "lga_name")

lga.shp.f$lga_name <-lga.shp.f$id

# Step 33: Merge spatial shape data with Crime Dataset of DataAll
merge.lga.crime<-sp::merge(vic.lga.shp, DataAll, by="lga_name", duplicateGeoms = TRUE)

# Step 34: Create spatial graphic 
bins <- quantile(DataAll$Crime.Rate, probs = seq(0,1,.2), names = FALSE, na.rm = TRUE)

ggplot(data = DataAll, aes(x = Crime.Rate)) + geom_histogram(colour = "white", bins = 40) +
  geom_vline(
    xintercept = quantile(DataAll$Crime.Rate, probs = seq(0,1,0.2), na.rm = TRUE),
    colour = "red", lwd = 1, lty = 2)

pal <- colorBin("YlOrRd", domain = DataAll$Crime.Rate, bins = bins)

p1 <- leaflet(merge.lga.crime) %>% setView(lng = 147, lat = -36.5, zoom = 6)

labels <- sprintf(
  "<strong>%s</strong><br/>%g Crime Incidents/100K People",
  merge.lga.crime$lga_name, merge.lga.crime$Crime.Rate
) %>% lapply(htmltools::HTML)

title <- tags$div(HTML('<h3>Victorian LGA Crime Rate - 2014-2017</h3>'))

p1 %>% addPolygons(fillColor = ~pal(Crime.Rate),
  weight=2, opacity=1, color="grey", dashArray="3",fillOpacity=0.8,
  highlight=highlightOptions(weight=6,color="#666",dashArray="",fillOpacity=0.8,bringToFront=TRUE),
  label=labels,
  labelOptions=labelOptions(
    style=list("font-weight"="normal",padding="3px 8px"),
    textsize="15px",direction="auto")) %>%
  addLegend(pal=pal,values=~Crime.Rate,opacity=0.7,title="Crime Incidents/100K People",position="bottomright") %>%
  addControl(title,positio ="topright")

Visualise the Data 2

Secondly visualise the relationship between average income and crime rate from year 2014 to 2017.

# Step 35: Using ggplot to visualise Average Income v.s. Crime Rate in Victorian LGAs from 2014 to 2017
p2 <- ggplot(data=DataAll,aes(x=Income,y=Crime.Rate,colour=lga_name)) + geom_point() + facet_grid(. ~ Year) + ggtitle("Average Income v.s. Crime Rate") + xlab("Average Income (Unit: $1K AUD)") + ylab("Crime Rate Per 100K Population") + theme(
plot.title = element_text(color="red", size=14, face="bold.italic"),
axis.title.x = element_text(color="blue", size=12, face="bold"),
axis.title.y = element_text(color="#993333", size=12, face="bold")
)

# Step 36: Using ggplotly to wrap and add interactivities
p2 <- ggplotly(p2, width = 1000, height = 600)

p2

Reference