Introduction

This is more of an introduction to d3heatmap than it is on any analysis of the Montco. 911 dataset. Since the 911 data set contains dates (months and days can be tricky to sort), it’s a good set to use.

Below are a few of the steps that this report will attempt to cover.

  1. Create functions to extract Month and Day-of-week
  2. Select the records we want
  3. Pivot the tables
  4. Month columns need to be ordered, hence ‘Jan’ should always come before ‘Feb’ etc.
library(dplyr)
library(ggplot2)
library(tidyr)
library(reshape2)
library(data.table)
library(DT)
library(d3heatmap)

# Read our input
d <- read.csv("../input/911.csv", sep=",")

# We only want Traffic
t = d[d$title == "Traffic: VEHICLE ACCIDENT -",]

# Show a few values
datatable(t[1:8,], class = 'compact')




TimeStamp Conversion – Month, Day-of-Week

You can use other time conversion, but we want something that is going to work with d3heatmap

# Create a few funtions that will work with d3heatmap
wd <- function(x) as.POSIXct(strptime(x, '%Y-%m-%d %H:%M:%S'))
monthName <- function(x) strftime(x, '%b') # Jan, Feb etc.
weekDayName <- function(x) strftime(x,'%a')
hourName <- function(x) strftime(x,'%H')


# 
t$timeStamp = wd(t$timeStamp)
t$month = monthName(t$timeStamp)
t$day  = weekDayName(t$timeStamp)
t$hour = hourName(t$timeStamp)

# Display a few values
#   ... also want to remove search on datatable
datatable(t[1:5,c('twp','title','month','day')], class = 'compact', 
          options = list(sDom  = '<"top">lrt<"bottom">'))




Townships with Most Traffic Accidents

Group by Township counting the number of accidents. We only want the top N number.

# Get the top N
N <- 15  # Say 15, since we have to choose something.

counts <- summarise(group_by(t, twp), Counts=length(title))
counts <- counts[order(-counts$Counts),]
topN = counts$twp[0:N]

# Here we select the top N in t
t = t[t$twp %in% topN,]


# Here you can see the counts
datatable(counts, class = 'compact', 
          options = list(sDom  = '<"top">lrt<"bottom">'))




Counts by Month, Township and Pivot

Get the total by month and Township.

# Now we need to get the count by month,twp
# 
counts <- summarise(group_by(t,month, twp), Counts=length(title))
counts <- counts[order(-counts$Counts),]
# counts

# 
datatable(counts[,c("month","Counts","twp")], 
          class = 'compact', options = list(sDom  = '<"top">lrt<"bottom">'))




Pivot and Columns

The months ‘Jan’, ‘Feb’ … need to be column names. These months also have to be ordered, hence, Jan should come before Feb etc. You can’t use alphabetical ording, since ‘Apr’ would always come first in the order.

# Create a Pivot table with month at col
p = dcast(counts, twp ~ month)

# Convert NA to 0
p[is.na(p)] <- 0

# Make twp the row names
row.names(p) <- p$twp
# Remove first
p = p[,-1]

# Are colume names are months 'Jan,Feb' etc., buth
# they need to be sorted.  Hence, Jan needs to come
# before Feb.

columnNames  = factor(names(p), month.abb, ordered=TRUE)
p = p[,order(columnNames)]

datatable(p, class = 'compact', options = list(sDom  = '<"top">lrt<"bottom">'))




Create HeatMap

Create the heatmap, but since we went to the trouble of ordering the month columns, we want to make sure that ordering is preserved.

dmp = data.matrix(p)
# Because you want to preserve row and col names
#  Rowv = FALSE, Colv = FALSE
d3heatmap(dmp, scale = "column",Rowv = FALSE, Colv = FALSE,
          colors = "Reds")




HeatMap Options

Changing color and font size for yaxis (Township names) and xaxis (months).

d3heatmap(dmp, scale = "column",Rowv = FALSE, Colv = FALSE,
          colors = heat.colors(256),
          yaxis_font_size = 10,
          xaxis_font_size = 12)

Cheltenham Hourly Traffic Accidents

Below are the step for creating hourly traffic accidents for Cheltenham by day-of-week and hour. The code will start from the selection of data.

# Just Cheltenham
t = d[d$title == "Traffic: VEHICLE ACCIDENT -" & d$twp == "CHELTENHAM",]

t$timeStamp = wd(t$timeStamp)
t$month = monthName(t$timeStamp)
t$day  = weekDayName(t$timeStamp)
t$hour = hourName(t$timeStamp)

counts <- summarise(group_by(t,day, hour), Counts=length(title))
counts <- counts[order(-counts$Counts),]

counts = counts[,c("hour","day","Counts")]

# pivot
p = dcast(counts, hour ~ day)

# Convert NA to 0
p[is.na(p)] <- 0

# Make day the row names
row.names(p) <- p$day

# Remove first
p = p[,-1]

columnNames = factor(names(p), levels= c("Sat","Sun", "Mon", 
    "Tue", "Wed", "Thu", "Fri"))

p = p[,order(columnNames)]

dmp = data.matrix(p)
# Because you want to preserve row and col names
#  Rowv = FALSE, Colv = FALSE
heatmapDay <- d3heatmap(dmp, scale = "column",Rowv = FALSE, Colv = FALSE,
          color = scales::col_quantile("Greens", NULL, 9),
          yaxis_font_size = 12,
          xaxis_font_size = 12)

Cheltenham Hourly Traffic Accidents

By hour and day of week.

datatable(p, class = 'compact', options = list(sDom  = '<"top">lrt<"bottom">'))

All Montco

Below is the code for traffic accidents by day-of-week and hour.

# All Montco
t = d[d$title == "Traffic: VEHICLE ACCIDENT -",]

t$timeStamp = wd(t$timeStamp)
t$month = monthName(t$timeStamp)
t$day  = weekDayName(t$timeStamp)
t$hour = hourName(t$timeStamp)

counts <- summarise(group_by(t,day, hour), Counts=length(title))
counts <- counts[order(-counts$Counts),]

counts = counts[,c("hour","day","Counts")]

# pivot
p = dcast(counts, hour ~ day)

# Convert NA to 0
p[is.na(p)] <- 0

# Make day the row names
row.names(p) <- p$day

# Remove first
p = p[,-1]

columnNames = factor(names(p), levels= c("Sat","Sun", "Mon", 
    "Tue", "Wed", "Thu", "Fri"))

p = p[,order(columnNames)]

dmp = data.matrix(p)
# Because you want to preserve row and col names
#  Rowv = FALSE, Colv = FALSE
heatmapDay <- d3heatmap(dmp, scale = "column",Rowv = FALSE, Colv = FALSE,
          color = scales::col_quantile("Greens", NULL, 25),
          yaxis_font_size = 12,
          xaxis_font_size = 12)

Traffic Accidents: Montco (by day-of-week and hour)