This is more of an introduction to d3heatmap than it is on any analysis of the Montco. 911 dataset. Since the 911 data set contains dates (months and days can be tricky to sort), it’s a good set to use.
Below are a few of the steps that this report will attempt to cover.
library(dplyr)
library(ggplot2)
library(tidyr)
library(reshape2)
library(data.table)
library(DT)
library(d3heatmap)
# Read our input
d <- read.csv("../input/911.csv", sep=",")
# We only want Traffic
t = d[d$title == "Traffic: VEHICLE ACCIDENT -",]
# Show a few values
datatable(t[1:8,], class = 'compact')
You can use other time conversion, but we want something that is going to work with d3heatmap
# Create a few funtions that will work with d3heatmap
wd <- function(x) as.POSIXct(strptime(x, '%Y-%m-%d %H:%M:%S'))
monthName <- function(x) strftime(x, '%b') # Jan, Feb etc.
weekDayName <- function(x) strftime(x,'%a')
hourName <- function(x) strftime(x,'%H')
#
t$timeStamp = wd(t$timeStamp)
t$month = monthName(t$timeStamp)
t$day = weekDayName(t$timeStamp)
t$hour = hourName(t$timeStamp)
# Display a few values
# ... also want to remove search on datatable
datatable(t[1:5,c('twp','title','month','day')], class = 'compact',
options = list(sDom = '<"top">lrt<"bottom">'))
Group by Township counting the number of accidents. We only want the top N number.
# Get the top N
N <- 15 # Say 15, since we have to choose something.
counts <- summarise(group_by(t, twp), Counts=length(title))
counts <- counts[order(-counts$Counts),]
topN = counts$twp[0:N]
# Here we select the top N in t
t = t[t$twp %in% topN,]
# Here you can see the counts
datatable(counts, class = 'compact',
options = list(sDom = '<"top">lrt<"bottom">'))
Get the total by month and Township.
# Now we need to get the count by month,twp
#
counts <- summarise(group_by(t,month, twp), Counts=length(title))
counts <- counts[order(-counts$Counts),]
# counts
#
datatable(counts[,c("month","Counts","twp")],
class = 'compact', options = list(sDom = '<"top">lrt<"bottom">'))
The months ‘Jan’, ‘Feb’ … need to be column names. These months also have to be ordered, hence, Jan should come before Feb etc. You can’t use alphabetical ording, since ‘Apr’ would always come first in the order.
# Create a Pivot table with month at col
p = dcast(counts, twp ~ month)
# Convert NA to 0
p[is.na(p)] <- 0
# Make twp the row names
row.names(p) <- p$twp
# Remove first
p = p[,-1]
# Are colume names are months 'Jan,Feb' etc., buth
# they need to be sorted. Hence, Jan needs to come
# before Feb.
columnNames = factor(names(p), month.abb, ordered=TRUE)
p = p[,order(columnNames)]
datatable(p, class = 'compact', options = list(sDom = '<"top">lrt<"bottom">'))
Create the heatmap, but since we went to the trouble of ordering the month columns, we want to make sure that ordering is preserved.
dmp = data.matrix(p)
# Because you want to preserve row and col names
# Rowv = FALSE, Colv = FALSE
d3heatmap(dmp, scale = "column",Rowv = FALSE, Colv = FALSE,
colors = "Reds")
Changing color and font size for yaxis (Township names) and xaxis (months).
d3heatmap(dmp, scale = "column",Rowv = FALSE, Colv = FALSE,
colors = heat.colors(256),
yaxis_font_size = 10,
xaxis_font_size = 12)
Below are the step for creating hourly traffic accidents for Cheltenham by day-of-week and hour. The code will start from the selection of data.
# Just Cheltenham
t = d[d$title == "Traffic: VEHICLE ACCIDENT -" & d$twp == "CHELTENHAM",]
t$timeStamp = wd(t$timeStamp)
t$month = monthName(t$timeStamp)
t$day = weekDayName(t$timeStamp)
t$hour = hourName(t$timeStamp)
counts <- summarise(group_by(t,day, hour), Counts=length(title))
counts <- counts[order(-counts$Counts),]
counts = counts[,c("hour","day","Counts")]
# pivot
p = dcast(counts, hour ~ day)
# Convert NA to 0
p[is.na(p)] <- 0
# Make day the row names
row.names(p) <- p$day
# Remove first
p = p[,-1]
columnNames = factor(names(p), levels= c("Sat","Sun", "Mon",
"Tue", "Wed", "Thu", "Fri"))
p = p[,order(columnNames)]
dmp = data.matrix(p)
# Because you want to preserve row and col names
# Rowv = FALSE, Colv = FALSE
heatmapDay <- d3heatmap(dmp, scale = "column",Rowv = FALSE, Colv = FALSE,
color = scales::col_quantile("Greens", NULL, 9),
yaxis_font_size = 12,
xaxis_font_size = 12)
By hour and day of week.
datatable(p, class = 'compact', options = list(sDom = '<"top">lrt<"bottom">'))
Below is the code for traffic accidents by day-of-week and hour.
# All Montco
t = d[d$title == "Traffic: VEHICLE ACCIDENT -",]
t$timeStamp = wd(t$timeStamp)
t$month = monthName(t$timeStamp)
t$day = weekDayName(t$timeStamp)
t$hour = hourName(t$timeStamp)
counts <- summarise(group_by(t,day, hour), Counts=length(title))
counts <- counts[order(-counts$Counts),]
counts = counts[,c("hour","day","Counts")]
# pivot
p = dcast(counts, hour ~ day)
# Convert NA to 0
p[is.na(p)] <- 0
# Make day the row names
row.names(p) <- p$day
# Remove first
p = p[,-1]
columnNames = factor(names(p), levels= c("Sat","Sun", "Mon",
"Tue", "Wed", "Thu", "Fri"))
p = p[,order(columnNames)]
dmp = data.matrix(p)
# Because you want to preserve row and col names
# Rowv = FALSE, Colv = FALSE
heatmapDay <- d3heatmap(dmp, scale = "column",Rowv = FALSE, Colv = FALSE,
color = scales::col_quantile("Greens", NULL, 25),
yaxis_font_size = 12,
xaxis_font_size = 12)