This RPub will demonstrate how to import, clean, manipulate and visualise police recorded crime data using the following packages developed by Hadley Wickham:

Metropolitan and City of London Police recorded crime data for April 2015 were downloaded from data.police.uk.



read_csv() readr

Read comma-separated files downloaded from data.police.uk
library(readr)
mps <- read_csv("2015-04-metropolitan-street.csv")
## Warning: 75161 problems parsing '2015-04-metropolitan-street.csv'. See
## problems(...) for more details.
problems(mps)
## Source: local data frame [75,161 x 4]
## 
##    row col           expected  actual
## 1    1   2 date like %Y-%m-%d 2015-04
## 2    2   2 date like %Y-%m-%d 2015-04
## 3    3   2 date like %Y-%m-%d 2015-04
## 4    4   2 date like %Y-%m-%d 2015-04
## 5    5   2 date like %Y-%m-%d 2015-04
## 6    6   2 date like %Y-%m-%d 2015-04
## 7    7   2 date like %Y-%m-%d 2015-04
## 8    8   2 date like %Y-%m-%d 2015-04
## 9    9   2 date like %Y-%m-%d 2015-04
## 10  10   2 date like %Y-%m-%d 2015-04
## .. ... ...                ...     ...
# readr cannot parse dates that are in the format ("%Y-%m") so NAs are returned. readr will also not automatically convert strings as factors.
mps[1:2 , 2]
## Source: local data frame [2 x 1]
## 
##   Month
## 1  <NA>
## 2  <NA>
# Read the 'Month' variable as a character and 'Crime type' as a factor.
mps <- read_csv("2015-04-metropolitan-street.csv", col_types = list(
                Month = col_character(),
                `Crime type` = col_factor(c("Anti-social behaviour", "Bicycle theft", "Burglary", "Criminal damage and arson", "Drugs", "Other crime", "Other theft", "Possession of weapons", "Public order", "Robbery", "Shoplifting", "Theft from the person", "Vehicle crime", "Violence and sexual offences"))))
head(mps, 4)
## Source: local data frame [4 x 12]
## 
##                                                           Crime ID   Month
## 1 b8f736dbfc310cb6f7c61df07d29bc2e784b81ceb5817e7afa8d3fc8d83ef670 2015-04
## 2 1b45a6d7ef4248668c433597bdc573221733699b3b1d53fb23dd19ccf8ead800 2015-04
## 3 e8217fc99ae63f04809dbf2efb8cc9127b9e0a2261b0eb832806971398f28bcc 2015-04
## 4                                                                  2015-04
## Variables not shown: Reported by (chr), Falls within (chr), Longitude
##   (dbl), Latitude (dbl), Location (chr), LSOA code (chr), LSOA name (chr),
##   Crime type (fctr), Last outcome category (chr), Context (lgl)
colp <- read_csv("2015-04-city-of-london-street.csv", col_types = list(
                Month = col_character(),
                `Crime type` = col_factor(c("Anti-social behaviour", "Bicycle theft", "Burglary", "Criminal damage and arson", "Drugs", "Other crime", "Other theft", "Possession of weapons", "Public order", "Robbery", "Shoplifting", "Theft from the person", "Vehicle crime", "Violence and sexual offences"))))

lubridate

Optional: Use the lubridate package to convert ‘Month’ to a date variable.
library(lubridate)
mps$Month <- parse_date_time(mps$Month, "ym")
colp$Month <- parse_date_time(colp$Month, "ym")

bind_rows() dplyr

Join the data frames together by appending rows
library(dplyr, warn = FALSE)
crimes <- bind_rows(mps, colp)

separate() tidyr

Create two new columns by splitting the ‘LSOA name’ variable
crimes[1:2 , 9]
## Source: local data frame [2 x 1]
## 
##        LSOA name
## 1 Allerdale 001B
## 2   Ashford 005D
library(tidyr)
crimes <- separate(crimes, `LSOA name`, into = c("borough", "code"), sep = -5)
crimes[1:2 , c(9:10)] 
## Source: local data frame [2 x 2]
## 
##      borough code
## 1 Allerdale  001B
## 2   Ashford  005D

select() dplyr

Choose and rename the variables
crimes <- crimes %>% select(long = Longitude,
                       lat = Latitude,
                       borough,
                       category = `Crime type`)

filter() dplyr

Filter out observations with missing coordinates
crimes <- crimes %>% filter(!is.na(long))

count() dplyr

Check the data by counting the number of offences recorded by borough
count(crimes, borough)
## Source: local data frame [82 x 2]
## 
##                          borough    n
## 1                     Allerdale     1
## 2                       Ashford     1
## 3                       Babergh     1
## 4          Barking and Dagenham  1708
## 5                        Barnet  2516
## 6  Bath and North East Somerset     1
## 7                       Bedford     2
## 8                        Bexley  1253
## 9                    Birmingham     1
## 10                    Blackpool     1
## ..                           ...  ...

filter() dplyr

Retain only local authorities in London
crimes <- filter(crimes, grepl('Barking and Dagenham|Barnet|Bexley|Brent|Bromley|Camden|City of London|Croydon|Ealing|Enfield|Greenwich|Hackney|Hammersmith and Fulham|Haringey|Harrow|Havering|Hillingdon|Hounslow|Islington|Kensington and Chelsea|Kingston upon Thames|Lambeth|Lewisham|Merton|Newham|Redbridge|Richmond upon Thames|Southwark|Sutton|Tower Hamlets|Waltham Forest|Wandsworth|Westminster', borough))

count() dplyr

Check the results
count(crimes, borough)
## Source: local data frame [33 x 2]
## 
##                  borough    n
## 1  Barking and Dagenham  1708
## 2                Barnet  2516
## 3                Bexley  1253
## 4                 Brent  2560
## 5               Bromley  2209
## 6                Camden  3146
## 7        City of London   451
## 8               Croydon  2926
## 9                Ealing  2830
## 10              Enfield  2288
## ..                   ...  ...

ggplot2

Visualise the count of crime by borough in a bar chart with ggplot2
library(ggplot2)
crimes %>%
  group_by(borough) %>%
  summarize(count = n()) %>%
  mutate(borough = reorder(borough, -count)) %>%
  ggplot(aes(x=borough, y=count)) +
  geom_bar(fill="skyblue", stat="identity") +
  theme_bw() + xlab("") + ylab("") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  theme(line = element_blank(), rect = element_blank()) +
  ggtitle("Police recorded offences by borough during April 2015")

ggvis

Visualise the count of crime by category in a bar chart with ggvis
library(ggvis, warn = FALSE)
crimes %>%
  group_by(category) %>%
  summarize(count = n()) %>%
  ggvis(~category, ~count, fill = ~category) %>%
  mutate(category = reorder(category, -count)) %>%
  layer_bars(stroke := "white") %>%
  add_axis("x", title = "", properties = axis_props(labels=list(angle=270, align="right")) ) %>%
  add_axis("y", title = "") %>%
  hide_legend("fill")

ggvis

Visualise the count of crime interactively by borough and category in a bar chart with ggvis
crimes %>%
  group_by(category, borough) %>%
  summarize(count = n()) %>%
  ggvis(~borough, ~count, fill = ~category) %>%
  filter(category == eval(input_select(
    choices = c(levels(crimes$category)),
    selected = "Burglary",
    label = "Crime type"))) %>%
  mutate(borough = reorder(borough, -count)) %>%
  layer_bars(stroke := "white") %>%
  add_axis("x", title = "", properties = axis_props(labels=list(angle=270, align="right")) ) %>%
  add_axis("y", title = "") %>%
  hide_legend("fill")
You cannot visualise interactive ggvis plots on rpubs.com. However, the bar chart can be viewed in a Shiny app here


Leaflet

Plot the data in a map with the Leaflet package by Yihui Xie
library(leaflet)
robbery <- filter(crimes, category == "Robbery") %>% group_by(long, lat) %>% summarize(count = n())
robbery_popup <- paste0("<strong>Robbery offences: </strong>", 
                      robbery$count)
burglary <- filter(crimes, category == "Burglary") %>% group_by(long, lat) %>% summarize(count = n())
burglary_popup <- paste0("<strong>Burglary offences: </strong>", 
                      burglary$count)
cycle <- filter(crimes, category == "Bicycle theft") %>% group_by(long, lat) %>% summarize(count = n())
cycle_popup <- paste0("<strong>Bicycle theft offences: </strong>", 
                      cycle$count)

leaflet() %>%
  addProviderTiles("CartoDB.Positron") %>% 
  setView(-0.142770, 51.539184, 15) %>%
  addCircleMarkers(data = robbery, ~long, ~lat, color = "red", radius = ~count, popup = ~robbery_popup, group = "Robbery") %>%
  addCircleMarkers(data = burglary, ~long, ~lat, color = "blue", radius = ~count, popup = ~burglary_popup, group = "Burglary") %>% 
    addCircleMarkers(data = cycle, ~long, ~lat, color = "orange", radius = ~count, popup = ~cycle_popup, group = "Bicycle theft") %>% 
addLayersControl(
    overlayGroups = c("Robbery", "Burglary", "Bicycle theft"),
    options = layersControlOptions(collapsed = FALSE)
  ) %>% 
  hideGroup("Robbery")