This is an Manchester Criminal Rate Analysis, aiming to find out the relationship between criminal rate and unemployment rate, and visualise the locations of high criminal rate. Data source:

http://data.police.uk/about/

https://borders.ukdataservice.ac.uk/bds.html https://www.ons.gov.uk/peoplepopulationandcommunity/populationandmigration/populationestimates/datasets/lowersuperoutputareamidyearpopulationestimates

(All the data on this site is made available under the Open Government Licence v3.0.)

1. Cleaning data

# load dplyr and data.table already 
library(ggplot2)
library(stringr)
mancr1611 <- fread("2016-11-greater-manchester-street.csv")
glimpse(mancr1611)
## Observations: 31,122
## Variables: 12
## $ Crime ID              <chr> "", "e48e3df211c8c551ae0284884e9fcabd2d2...
## $ Month                 <chr> "2016-11", "2016-11", "2016-11", "2016-1...
## $ Reported by           <chr> "Greater Manchester Police", "Greater Ma...
## $ Falls within          <chr> "Greater Manchester Police", "Greater Ma...
## $ Longitude             <dbl> -2.462774, -2.462774, -2.441554, -2.4411...
## $ Latitude              <dbl> 53.62210, 53.62210, 53.61113, 53.63305, ...
## $ Location              <chr> "On or near Scout Road", "On or near Sco...
## $ LSOA code             <chr> "E01012628", "E01012628", "E01004768", "...
## $ LSOA name             <chr> "Blackburn with Darwen 018D", "Blackburn...
## $ Crime type            <chr> "Anti-social behaviour", "Criminal damag...
## $ Last outcome category <chr> "", "Investigation complete; no suspect ...
## $ Context               <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
names(mancr1611)
##  [1] "Crime ID"              "Month"                
##  [3] "Reported by"           "Falls within"         
##  [5] "Longitude"             "Latitude"             
##  [7] "Location"              "LSOA code"            
##  [9] "LSOA name"             "Crime type"           
## [11] "Last outcome category" "Context"
#filter the dataset
mancr1611 %>%
  select(Month, Longitude, Latitude, `Crime type`) %>%   
  rename( Crimetype =`Crime type`) %>%  
  group_by(Crimetype) %>%
  summarise(nCrime = n())  -> crime1611

2. Plot the categories of crimes

ggplot(crime1611, aes(Crimetype, nCrime, fill = Crimetype)) + 
  geom_bar(stat = "identity") +
  scale_x_discrete(labels = function(Crimetype) str_wrap(Crimetype, width = 10)) +
  ggtitle("Criminal Numbers in Manchester, 2016 Nov") +    
  theme(plot.title = element_text(hjust = 0.5)) + 
  xlab("Crime type") + 
  ylab("Number of Crimes") 

3. Different time-series plots

#FIREST: Data cleaning 
mancrs <- fread("mergedatabymonth.csv")
## 
Read 0.0% of 1978064 rows
Read 18.7% of 1978064 rows
Read 29.3% of 1978064 rows
Read 37.9% of 1978064 rows
Read 50.6% of 1978064 rows
Read 65.2% of 1978064 rows
Read 76.8% of 1978064 rows
Read 82.9% of 1978064 rows
Read 1978064 rows and 13 (of 13) columns from 0.436 GB file in 00:00:14
mancrs %>%
  select(Month, Longitude, Latitude, Crime.type, LSOA.name) %>%   # ` ` used to select the column name which has space
  filter(str_detect(LSOA.name, 'Manchester')) %>%
  group_by(Crime.type, Month) %>%
  summarise(nCrime = n()) -> mancrsgroup

3.1 First Line graph – Over all crimes in timeseries:

#use scales package to break the date from 4 month
library(ggplot2)
library(scales)
ggplot(crimestimeseries, aes(Month, nCrimeintotals)) + 
  geom_line(colour = "#00A4E6", size = 1.5) + scale_x_date(labels = date_format("%Y-%m"), breaks = date_breaks("4 month")) + 
  theme(axis.text.x = element_text(size=9)) + xlab(" ") + 
  ylab("Number of Total Crimes") +  ggtitle("Figure.1 Crimes of all kinds in Manchester") + 
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_line(color = "gray50", size = 0.5),
        panel.grid.major.x = element_blank(), 
        panel.background = element_blank(),
        axis.text.y = element_text(colour="#666666", size = 10),
        axis.text.x = element_text(size = 10), axis.ticks.length = unit(.25, "cm"),
        axis.ticks.y = element_blank(),
        plot.title = element_text(hjust = 0.5, vjust=2.12, colour="#666666", size = 18),
        axis.title.y = element_text(size = 14, colour = "#666666")) 

3.2 Second Line graph – umeployment rate displays in quarterly :

library(grid)
#change to date type 
uneply <- fread("unemploy_rate_ of_manchester.csv")
uneply$Month <- as.yearmon(uneply$Month, "%b-%y") #change to date type
uneply$Month <- as.Date(uneply$Month)
# umeployment rate displays in quarterly  
ggplot(uneply, aes(Month, umeply_rate)) + 
  geom_line(colour = "#FF6666", size = 1.5) + scale_x_date(labels = date_format("%Y-%m"), breaks = date_breaks("4 month")) + 
  theme(axis.text.x = element_text(size=9)) + xlab(" ") + 
  ylab("Unemployment (%)") +  ggtitle("Figure.2 Unemployment Rate in Manchester") + 
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_line(color = "gray50", size = 0.5),
        panel.grid.major.x = element_blank(), 
        panel.background = element_blank(),
        axis.text.y = element_text(colour="#FF6666", size = 12),
        axis.text.x = element_text(size = 10), axis.ticks.length = unit(.25, "cm"),
        axis.ticks.y = element_blank(),
        plot.title = element_text(hjust = 0.5, vjust=2.12, colour="#666666", size = 18),
        axis.title.y = element_text(size = 14, colour = "#666666"))

#transfer the Overall Crimes into quarlterly
library(stringr)
library(tidyr)
crimes_quarterly <- fread("crimes_quarterly.csv")
crimes_quarterly_cls <- separate(crimes_quarterly, Month, c("Year", "Month"))
crimes_quarterly_cls %>%
  select(Year, Quarter, nCrimeintotals) %>%
  group_by(Year, Quarter) %>%
  summarise(nCrimequaterly = sum(nCrimeintotals)) -> crimes_qualt
#create a seq for quarter for each year 
Q <- c("Mar", "Jun", "Sep", "Nov" , "Mar", "Jun", "Sep", "Nov" , "Mar", "Jun", "Sep", "Nov" , "Mar", "Jun", "Sep", "Nov" , "Mar", "Jun", "Sep", "Nov" , "Mar", "Jun", "Sep", "Nov")
crimes_qualt$Qua <- Q
crimes_qualt_plt <- unite(crimes_qualt, Year_qualt, Year, Qua, sep = "-")

crimes_qualt_plt$Year_qualt <- as.yearmon(crimes_qualt_plt$Year_qualt, "%Y-%b")
crimes_qualt_plt$Year_qualt <- as.Date(crimes_qualt_plt$Year_qualt)

3.3 Combine two tables in order to compare the unemployment rate and criminal rate over time

library(gtable)
ggplot(uneply, aes(Month, umeply_rate)) + 
  geom_line(colour = "#FF6666", size = 1.5) + scale_x_date(labels = date_format("%b%y"), breaks = date_breaks("4 month")) + 
  theme(axis.text.x = element_text(size=9)) + xlab(" ") + 
  ylab("Figure.2 unemployment in Manchester by quarterly") + 
  ggtitle("Figure.3 Crimes and unemployment in Manchester by quarterly") + 
  theme(panel.grid.minor = element_blank(), 
        panel.grid.major = element_line(color = "gray50", size = 0.5),
        panel.grid.major.x = element_blank(), legend.position="top") + 
  theme(panel.background = element_blank()) +
  theme(axis.text.y = element_text(colour="#FF6666", size = 12),
        axis.text.x = element_text(size = 10)) +
  theme(axis.ticks.length = unit(.25, "cm"),
        axis.ticks.y = element_blank()) +
  ggtitle("Figure.3 Crimes and Unemployment in Manchester, Quarterly\n") + labs(x=NULL, y= NULL) +
  theme(plot.title = element_text(hjust = 0.5, vjust=2.12, colour="#666666", size = 18)) -> p1

ggplot(crimes_qualt_plt, aes(Year_qualt, nCrimequaterly)) +
  geom_line(colour = "#00A4E6", size = 1.5) +
  ggtitle("Total Crimes Number\n") +
  labs(x=NULL,y=NULL) + 
  scale_x_date(labels = date_format("%b%y"), breaks = date_breaks("4 month")) + 
  theme(axis.text.x = element_text(size=9)) + xlab(" ") +  
  theme(panel.background = element_blank(),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(),
    panel.grid.major.x = element_blank(),
    panel.grid.major.y = element_blank(),
    axis.text.y = element_text(colour="#00A4E6", size=12),
    axis.text.x = element_text(size = 10),
    axis.ticks.length = unit(.25, "cm"),
    axis.ticks.y = element_blank(),
    plot.title = element_text(hjust = 0.6, vjust=2.12, colour = "#00a4e6", size = 14)) -> p2

# Combine two tables
g1 <- ggplot_gtable(ggplot_build(p1))
g2 <- ggplot_gtable(ggplot_build(p2))

pp <- c(subset(g1$layout, name == "panel", se = t:r))

g <- gtable_add_grob(g1, g2$grobs[[which(g2$layout$name == "panel")]], pp$t, 
                     pp$l, pp$b, pp$l)

ia <- which(g2$layout$name == "axis-l")
ga <- g2$grobs[[ia]]
ax <- ga$children[[2]]

ax$widths <- rev(ax$widths)
ax$grobs <- rev(ax$grobs)
#ax$grobs[[1]]$x <- ax$grobs[[1]]$x - unit(1, "npc") + unit(0.15, "cm")

g <- gtable_add_cols(g, g2$widths[g2$layout[ia, ]$l], length(g$widths) - 1)
g <- gtable_add_grob(g, ax, pp$t, length(g$widths) - 1, pp$b)

g$grobs[[8]]$children$GRID.text.1767$label <- c("Unemployment Rate\n", "Total Crimes Number\n")

# change color
g$grobs[[8]]$children$GRID.text.1767$gp$col <- c("#FF6666","#00A4E6")

# change x-coordinate
g$grobs[[8]]$children$GRID.text.1767$x <- unit(c(1.2, 2.12), "npc")
grid.draw(g)

4. choroplath map of crimes of each LOSA

4.1 Anti-Social Behavior Crime Occurrence in Manchester, 2016

mancr <- read.csv("2016-11-greater-manchester-street.csv") 
crimes <- filter(mancr, str_detect(LSOA.name, 'Manchester'))  # alternative method: grep() 

#filter(crimes, str_detect(Crime.type, 'Burglary')) -> crimes  
coords <- cbind(Longitude = as.numeric(as.character(crimes$Longitude)), Latitude = as.numeric(as.character(crimes$Latitude)))
crime.pts <- SpatialPointsDataFrame(coords, crimes[,-(5:6)], proj4string = CRS("+init=epsg:4326"))
plot(crime.pts,pch='.',col='darkred')

#unzip BoundaryData.zip first
#read shapefile
manch.lsoa <- readShapeSpatial('manchester2011LSOA/england_lsoa_2011.shp')
#reproject the crime.pts data so that it can be display with Manchester LSOA data
crime.pts <- spTransform(crime.pts, CRS("+init=epsg:27700"))
# display all crimes in mancherster
plot(manch.lsoa)
plot(crime.pts,pch='.',col='red',add=TRUE)

# focus on antisocial behaviour
asb.pts <- crime.pts[crime.pts$Crime.type == "Anti-social behaviour", ]
#bur.pts <- crime.pts[crime.pts$Crime.type == "Burglary", ]
# preload regeos packages, which use for overlay operations in R


# This defines a new R function - it counts how many points fall into each polygon
poly.counts <- function (pts, polys) colSums(gContains(polys, pts, byid = TRUE))

#counts the number of crimes in each LSOA
#bur.count <- poly.counts(crime.pts ,manch.lsoa)
asb.count <- poly.counts(asb.pts, manch.lsoa)
## Warning in RGEOSBinPredFunc(spgeom1, spgeom2, byid, func): spgeom1 and
## spgeom2 have different proj4 strings
#crime.count <- poly.counts(crime.pts,manch.lsoa)

# draw choropleth map of asb counts for each LSOA
library(classInt)
library(RColorBrewer)
# First, add an ASB event count column to the 'manch.lsoa' SpatialPolygonsDataFrame
manch.lsoa@data$asb.count <- asb.count
# manch.lsoa@data$crime.count <- crime.count

var <- manch.lsoa@data[,"asb.count"]
breaks <- classIntervals(var, n = 6, style = "fisher")$brk
my_colours <- brewer.pal(6, "Greens")
plot(manch.lsoa, col = my_colours[findInterval(var, breaks, all.inside = TRUE)],   axes = FALSE, border = rgb(0.8,0.8,0.8))
breaks <- list(b=breaks,c=my_colours)
title(main="Figure.4 Anti-Social Behavior Crime Occurrence in Manchester, 11-2016", col.main="grey50", font.main= 2, cex.main= 1.5)

4.2 Rate of Anti-Social behaviour(ASBs) Crime Occurrence

pop <- read.csv("pop.csv")

#match the location in pop that eacht LSOA code in manche.lsoa
#see code above for creating manch.lsoa file 
lsoa.lookup <- match(manch.lsoa$code,pop$lsoa)
head(lsoa.lookup)
## [1] 3401 3415 3405 3213 3397 3394
head(manch.lsoa@data)
##                         label            name      code asb.count
## 0 E08000003E02006912E01033659 Manchester 055D E01033659        27
## 1 E08000003E02006916E01033660 Manchester 059B E01033660         3
## 2 E08000003E02006913E01033665 Manchester 056C E01033665         4
## 3 E08000003E02001061E01033666 Manchester 017F E01033666        10
## 4 E08000003E02006902E01033667 Manchester 054E E01033667        41
## 5 E08000003E02006902E01033651 Manchester 054B E01033651         8
#add a column with the appropriate population estimates to the manchester (SpatialPolygonsDataFrame)
manch.lsoa@data$pop <- pop[lsoa.lookup,"pop"]
head(manch.lsoa@data)
##                         label            name      code asb.count  pop
## 0 E08000003E02006912E01033659 Manchester 055D E01033659        27 1736
## 1 E08000003E02006916E01033660 Manchester 059B E01033660         3 1402
## 2 E08000003E02006913E01033665 Manchester 056C E01033665         4 1341
## 3 E08000003E02001061E01033666 Manchester 017F E01033666        10 2768
## 4 E08000003E02006902E01033667 Manchester 054E E01033667        41 1175
## 5 E08000003E02006902E01033651 Manchester 054B E01033651         8 1607
#add column contains the rate of ASBs per head of population
#Note the multiplication by 10000 - this means rates are per 10000 heads of population.
manch.lsoa@data$asb.rate <- 10000 * (manch.lsoa@data$asb.count / manch.lsoa@data$pop)
head(manch.lsoa@data) 
##                         label            name      code asb.count  pop
## 0 E08000003E02006912E01033659 Manchester 055D E01033659        27 1736
## 1 E08000003E02006916E01033660 Manchester 059B E01033660         3 1402
## 2 E08000003E02006913E01033665 Manchester 056C E01033665         4 1341
## 3 E08000003E02001061E01033666 Manchester 017F E01033666        10 2768
## 4 E08000003E02006902E01033667 Manchester 054E E01033667        41 1175
## 5 E08000003E02006902E01033651 Manchester 054B E01033651         8 1607
##    asb.rate
## 0 155.52995
## 1  21.39800
## 2  29.82849
## 3  36.12717
## 4 348.93617
## 5  49.78220
var <- manch.lsoa@data[,"asb.rate"]
breaks <- classIntervals(var, n = 6, style = "fisher")$brk
my_colours <- brewer.pal(6, "Greens")
par(xpd=T, mar=par()$mar+c(0,0,0,3))
plot(manch.lsoa, col = my_colours[findInterval(var, breaks, all.inside = TRUE)],   axes = FALSE, border = rgb(0.8,0.8,0.8))
breaks <- list(b=breaks,c=my_colours)
#cex is the whole scale of legend
legend("bottomright", legend = leglabs(round(breaks$b,1)), fill = breaks$c, cex = 0.6, bty = "n", title="Times per 10,000 people", title.adj=0.3)
title(main="Figure.5 Rate of Anti-Social behaviour in Manchester By LSOA", col.main="grey50", font.main=2,cex.main=1.5)

5. Indicators of Deprivatior

#First: cleaning data
deprivation <- read.csv("2015_All_ranks__deciles_and_scores_for_the_Indices_of_Deprivation__and_population_denominators.csv")

deprivation <- deprivation[, c(1,5,8,11,17,14,23,20,26)]
colnames(deprivation) <- c("LSOA","Overall","Income","Employment","Health","Education","Housing","Crime","Environment")
head(deprivation)
##        LSOA Overall Income Employment Health Education Housing Crime
## 1 E01031349  12.389  0.096      0.083 -0.718    20.055  10.586 0.242
## 2 E01031350  28.619  0.187      0.162  0.722    34.653  24.689 0.244
## 3 E01031351  11.713  0.065      0.066 -0.616    26.334  13.631 0.495
## 4 E01031352  16.446  0.117      0.113 -0.234    25.779  17.803 0.026
## 5 E01031370  18.265  0.102      0.115  0.082    32.056  15.482 0.182
## 6 E01031374  12.529  0.072      0.084 -0.183    14.261  15.696 0.128
##   Environment
## 1      15.180
## 2      17.837
## 3      11.150
## 4      10.677
## 5      15.378
## 6      22.053
lsoa.lookup <- match(manch.lsoa$zonecode,deprivation$LSOA)

draw a choropleth map

#var <- manch.lsoa@data[,"Overall"]
#breaks <- classIntervals(var, n = 6, style = "fisher")$brk
#my_colours <- brewer.pal(6, "Greens")
#par(xpd=T, mar=par()$mar+c(0,0,0,3))
#plot(manch.lsoa, col = my_colours[findInterval(var, breaks, all.inside = TRUE)],   axes = FALSE, border = rgb(0.8,0.8,0.8))
#breaks <- list(b=breaks,c=my_colours)
#legend("bottomright", legend = leglabs(round(breaks$b,1)), fill = breaks$c, cex = 0.6, title="Index of Deprivation", title.adj=0.3, bty ="n")
#title(main="Figure.10 Overall Deprivation in Mancheser (By LSOA)", col.main="grey50", font.main=4,cex.main=1.5)

6. Incidence and density maps of perticular place

Plotting the locations of different crimes in downtown of Manchester

mancr <- read.csv("2016-11-greater-manchester-street.csv") 
crimes <- filter(mancr, str_detect(LSOA.name, 'Manchester'))  # alternative method: grep() 
#filter targerts of crimes type
targets <- c('Anti-social behaviour', 'Violence and sexual offences', 'Burglary', 'Other theft')
crimes <- filter(crimes, Crime.type %in% targets)

#rank different crimes
crimes$Crime.type <- factor(crimes$Crime.type, levels = c("Burglary", "Other theft", "Violence and sexual offences", "Anti-social behaviour"))

# restrict to downtown
# longtitude & latitude get from google earth
crimes <- subset(crimes, ((-2.270859 <= Longitude)
                           & (Longitude <= -2.213429)
                           & (53.463590 <= Latitude)
                           & (Latitude <= 53.498749)))
          
map <- get_map( location = 'Manchester', zoom = 14, maptype = "roadmap", color = "bw")
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Manchester&zoom=14&size=640x640&scale=2&maptype=roadmap&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Manchester&sensor=false
#use ggmap to plot the map
p <- ggmap(map)
p <- p + geom_point(data = crimes, aes(x = Longitude, y = Latitude, size = Crime.type, colour = Crime.type), size = 2)

# legend positioning, removing grid and axis labeling
p <- p + theme( legend.position = c(0.0, 0.7) # put the legend inside the plot area
                , legend.justification = c(0, 0)
                , legend.background = element_rect(colour = F, fill = "white")
                , legend.key = element_rect(fill = F, colour = F)
                , panel.grid.major = element_blank()
                , panel.grid.minor = element_blank()
                , axis.text = element_blank()
                , axis.title = element_blank()
                , axis.ticks = element_blank())

print(p)
## Warning: Removed 31 rows containing missing values (geom_point).

2D density plot

# 2D density plot
p <- ggmap(map)
overlay <- stat_density2d(data = crimes
                          , aes(x = Longitude, y = Latitude, fill = ..level..*2 , alpha = ..level..)
                          , size = 2, bins = 4, geom = "polygon")
p <- p + overlay
p <- p + scale_fill_gradient("Violent\nCrime\nDensity")
p <- p + scale_alpha(range = c(0.4, 0.75), guide = FALSE)
p <- p + guides(fill = guide_colorbar(barwidth = 1.5, barheight = 10))
#p <- p + inset(grob = ggplotGrob(ggplot() + overlay + theme_inset())
# , xmin = -95.35836, xmax = Inf, ymin = -Inf, ymax = 29.75062) print(p)                           
print(p)
## Warning: Removed 31 rows containing non-finite values (stat_density2d).