Visualizing Melbourne Pedestrian Counts

(you can find the whole project for this on github)

First we need some libraries

library(rvest)     # devtools::install_github("hadley/rvest")
library(httr)
library(dplyr)
library(xml2)      # devtools::install_github("hadley/xml2")
library(pbapply)
library(data.table)
library(lubridate)
library(tidyr)
library(ggplot2)
library(scales)
library(grid)

Next we get the list of data files from the site by scraping the download page for the URLs and storing them locally. The use of write_disk makes for a free “caching” function as well.

data_dir <- ifelse(basename(getwd())=="R", "../data", "./data")
base_url <- "http://www.pedestrian.melbourne.vic.gov.au/"

pg <- read_html(modify_url(base_url, path="datadownload.html"))

pg %>%
  html_nodes("a[href^='datadownload']") %>%
  html_attr("href") %>%
  pbsapply(function(x) {
    dfile <- basename(x)
    invisible(try(GET(modify_url(base_url, path=x),
                      write_disk(file.path(data_dir, dfile))),
                  silent=TRUE))
    dfile
  }) -> data_files

We’re not using data.table operations for speed (it’s not very large data). fread just worked better than readr::read_csv. It does have the added benefit of being fast.

rbindlist(pblapply(data_files, function(x) {
  data.table::fread(file.path(data_dir, x), verbose=FALSE)
}), fill=TRUE) -> walking

walking$Date <- parse_date_time(walking$Date, orders=c("mdY", "dby", "dmY"))

Now we get the counts by day of week and hour of day. There are many ways to visualize this data. This is just one of them.

walking <- gather(walking, location, count, -Date, -Hour)
walking$count <- as.numeric(walking$count)
walking <- filter(walking, !is.na(count))
walking$weekday <- wday(walking$Date, label=TRUE)

walking %>% count(weekday, Hour, wt=count) -> wday_totals

tbl_df(wday_totals) %>%
  mutate(Hour=factor(Hour),
         weekday=factor(weekday)) %>%
  rename(`Total Walkers\n(log scale)`=n) -> wday_totals

And, finally, we plot it heatmap style.

parula <- c('#352a87', '#0363e1', '#1485d4', '#06a7c6', '#38b99e',
            '#92bf73', '#d9ba56', '#fcce2e', '#f9fb0e')

palette <- parula

ggplot(wday_totals, aes(x=Hour, y=weekday)) +
  geom_tile(aes(fill=`Total Walkers\n(log scale)`), color="white", size=0.5) +
  scale_x_discrete(expand=c(0, 0)) +
  scale_y_discrete(expand=c(0, 0)) +
  scale_fill_gradientn(label=comma, colours=palette) +
  coord_equal() +
  labs(x=NULL, y=NULL, title="Melbourne Walkers (Time of Day/Day of Week)\n") +
  theme(plot.title=element_text(face="bold", hjust=0, size=16)) +
  theme(legend.key.width=unit(2, "cm")) +
  theme(legend.position="bottom")

And, we can also make this interactive:

library(d3heatmap)

wday_totals %>%
  select(weekday=1, hour=2, total=3) %>% 
  spread(hour, total) %>% 
  data.frame -> wday_df

rownames(wday_df) <- wday_df$weekday

select(wday_df, -weekday) %>% 
  as.matrix() %>% 
  d3heatmap(width=600, height=300,dendrogram="none", colors=parula)

19309791241837924916702288390866300917432655959918184032824558223898538529940067882317446514751017573804636931528606978744871753522534278099922733801735754109981756395429571719165714898114513931116611245963352105732424449221114562354560022090100819532412750795173831748316564103249366519413388026829642762360370169130710251234261812057371258721104621174973302731315134384190381203855188048450457654465199028246957065073484827238522845262710894854710256042380703200487257388018466431124771548534280438170187133792126978320974128033037082587855828515585846181855678839926795197613697553828749809985537381074546071930294506027332867426996512154302135011060685430279418387313849813265232453512775193706772783303652448294616142559620891721169772684768118675505928615081109431207553447482265336090252923452227541814705087195913793482405561830391656713437271281992364270477917935277621485293560162629697378102755828238873826988894963581169593188521026552352500657642991783946757288326517403519623406129664341422760382838494507769501621814371266534440140035499044705708778343597835277796604477165957121468593862049100384062458371135237925512920990X0X1X2X3X4X5X6X7X8X9X10X11X12X13X14X15X16X17X18X19X20X21X22X23SunMonTuesWedThursFriSat