Introduction

In another post, we used the widely available “chicago_crime” dataset to predict the classification of different crimes.

The data can be found here.

Here, we will try to map visualize select aspects of the data using a map of the city to gain a better understanding of Chicago’s overall crime picture.


Mapping murders in 2017

# Filter the data 
murders <- dt3[primarytype == "HOMICIDE" & year == 2017]

# Extract the locations
locs <- data.table(table(murders$longitude, murders$latitude))

# Filter out the 0's
locs <- locs[N > 0]

# Name the columns
setnames(locs, c("V1", "V2"), c("longitude", "latitude"))

# Convert to numeric
locs$longitude <- as.numeric(locs$longitude)
locs$latitude <- as.numeric(locs$latitude)

# Get a map of Chicago
chi <- get_map(location = "Chicago", zoom = 11)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Chicago&zoom=11&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Chicago&sensor=false
# Plot points
ggmap(chi) + geom_point(data = locs, aes(x = longitude, y = latitude, color = N, size = N))

From this we can see that the murder rate was definitely higher in Chicago’s southside than in other areas.

More importantly, we were able to successfully subset and plot the data onto a map with just a few short lines of code. From here, the possibilities are endless.


Full code

library(knitr)
library(data.table)
library(maps)
library(ggmap)
library(mapproj)
library(lubridate)

# Set the root directory for RMarkdown
opts_knit$set(root.dir = "/PATH")

set.seed(100)

options(scipen = 3)
knitr::opts_chunk$set(echo = TRUE)

# Use "fread" to load the data and create a data.table
dt <- setDT(fread("chicago_crime.csv"))

# Remove the unwanted characters
names(dt) <- gsub(" ", "", names(dt))

# Lower-casing
setnames(dt, names(dt), tolower(names(dt)))

# Fix the dates
date_convert <- as.Date(strptime(dt$date, "%m/%d/%Y %H:%M:%S"))
dt$date <- date_convert

# Create columns for month, day, and year
dt[, c("month", "day") := c(list(months(date_convert)), list(weekdays(date_convert)))]

# Convert factor columns
factors <- c("block", "iucr", "primarytype", "description", "locationdescription", "arrest", "domestic", "ward", "communityarea", "fbicode", "year", "month", "day")

dt2 <- dt[, (factors) := lapply(.SD, as.factor), .SDcols = factors]

# Combine non-criminal offenses to reduce number of levels of "primarytype" 
levels(dt2$primarytype)[c(20, 21, 22)] <- "NON-CRIMINAL"

# Remove non-criminal offenses from the data
dt3 <- dt2[dt2$primarytype != "NON-CRIMINAL",]

dt3$primarytype <- dt3$primarytype[, drop = TRUE]

# Filter the data 
murders <- dt3[primarytype == "HOMICIDE" & year == 2017]

# Extract the locations
locs <- data.table(table(murders$longitude, murders$latitude))

# Filter out the 0's
locs <- locs[N > 0]

# Name the columns
setnames(locs, c("V1", "V2"), c("longitude", "latitude"))

# Convert to numeric
locs$longitude <- as.numeric(locs$longitude)
locs$latitude <- as.numeric(locs$latitude)

# Get a map of Chicago
chi <- get_map(location = "Chicago", zoom = 11)

# Plot points
ggmap(chi) + geom_point(data = locs, aes(x = longitude, y = latitude, color = N, size = N))