Choropleth mapping

Example of flow data: loading, manipulating, visualising

The purpose of this document is to illustrate techniques for manipulating flow data, resulting in a choropleth map of the number of commuters flowing to each of 1744 small areas in Sheffield.

The first stage is to load the data, which originates as a Nomis .csv file, but which has been pre-processed for use in R:

load("flows1.RData")  # Load data
shef.flow[c(1, 2, 3, 50, 1000), ]  # Data format
##               from         to n
## 1434894 00CGFA0001 00CEFW0011 3
## 1434895 00CGFA0001 00CFFF0013 3
## 1434896 00CGFA0001 00CFFF0032 3
## 1434943 00CGFA0002 00BYFQ0007 3
## 1435893 00CGFA0028 00CGFX0032 3
summary(shef.flow)
##          from                to              n        
##  00CGFD0006:   81   00CGFX0017: 1604   Min.   : 3.00  
##  00CGFN0007:   68   00CGFE0005: 1426   1st Qu.: 3.00  
##  00CGFD0004:   67   00CGFD0038: 1209   Median : 3.00  
##  00CGGE0006:   66   00CGFX0016: 1095   Mean   : 4.01  
##  00CGFG0011:   65   00CGGB0002:  975   3rd Qu.: 3.00  
##  00CGFK0023:   64   00CGFL0032:  955   Max.   :96.00  
##  (Other)   :54139   (Other)   :47286


library(maptools)
## Loading required package: foreign
## Loading required package: sp
## Loading required package: lattice
## Checking rgeos availability: TRUE
oas <- readShapePoly("allmodes.shp")  # Load the shapes
plot(oas)

plot of chunk unnamed-chunk-1


sum(shef.flow$n)  # Number of commuters in flow data
## [1] 218841
sum(oas$KS0150001)  # Number of commuters in Casweb data
## [1] 218483

Only include those working in Sheffield, simplify

shef.flow <- shef.flow[which(shef.flow$to %in% oas$ZONE_CODE), ]
length(levels(shef.flow$from))  # 165 thousand entries is too many
## [1] 175428
shef.flow$from <- factor(shef.flow$from)  # Remove superfluous data
shef.flow$to <- factor(shef.flow$to)  # Remove superfluous data
shef.flow$to <- as.character(shef.flow$to)  # Make to variable character

Test that we can calculate flows out of places correctly

it <- which(shef.flow$from == oas$ZONE_CODE[1])
sum(shef.flow$n[it])
## [1] 121
oas$KS0150001[1]
## [1] 146
it <- which(shef.flow$from == oas$ZONE_CODE[5])
sum(shef.flow$n[it])
## [1] 98
oas$KS0150001[5]
## [1] 155

Calculate total flows to and from areas

for (i in 1:nrow(oas)) {
    it <- which(shef.flow$from == oas$ZONE_CODE[i])
    to <- which(shef.flow$to == oas$ZONE_CODE[i])
    shef.oa$from.flow[i] <- sum(shef.flow$n[it])
    shef.oa$to.flow[i] <- sum(shef.flow$n[to])
}

Compare Casweb data with from data calculated

plot(shef.oa$KS0150001, shef.oa$from.flow)  # Looks like a good correlation, above step worked

plot of chunk unnamed-chunk-5

Plot and save the results of incoming flows

class(shef.oa$to.flow)
## [1] "integer"
oas$to <- as.numeric(shef.oa$to.flow)
library(GISTools)
## Loading required package: RColorBrewer
## Loading required package: MASS
## Loading required package: rgeos
## Loading required package: stringr
## Loading required package: plyr
## rgeos: (SVN revision 348) GEOS runtime version: 3.3.3-CAPI-1.7.4 Polygon
## checking: TRUE

shades <- shading(breaks = c(50, 100, 200, 500), cols = brewer.pal(5, "Blues"))
choropleth(oas, v = oas$to, shades)
bbox(oas)  # Bounding box of map
##      min    max
## x 413273 445071
## y 378721 400756
legend.loc <- c(bbox(oas)[1, 1] + 100, bbox(oas)[2, 1] + 22000)
choro.legend(legend.loc[1], legend.loc[2], shades)  # Looks clunky

plot of chunk unnamed-chunk-7

The function writeSpatialShape(oas, “oas”) was used to write the results to file.

Mapping with ggplot

# From https://github.com/hadley/ggplot2/wiki/plotting-polygon-shapefiles
class(oas)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"

require("rgdal")  # requires sp, will use proj.4 if installed
## Loading required package: rgdal
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 1.9.1, released 2012/05/15 Path to GDAL shared
## files: /usr/share/gdal/1.9 Loaded PROJ.4 runtime: Rel. 4.7.1, 23 September
## 2009, [PJ_VERSION: 470] Path to PROJ.4 shared files: (autodetected)
require("ggplot2")
## Loading required package: ggplot2
gpclibPermit()  # required for fortify method
## [1] TRUE

shef.gg <- fortify(oas)  # Region names?
## Regions defined for each Polygons
head(shef.gg)
##     long    lat order  hole piece group id
## 1 434252 389328     1 FALSE     1   0.1  0
## 2 434210 389419     2 FALSE     1   0.1  0
## 3 434135 389421     3 FALSE     1   0.1  0
## 4 434028 389498     4 FALSE     1   0.1  0
## 5 434012 389543     5 FALSE     1   0.1  0
## 6 434039 389602     6 FALSE     1   0.1  0
class(shef.gg$id)
## [1] "character"
# shef.gg$id <- factor(shef.gg) # Not working
length(unique(shef.gg$id))  # Shows there is the correct number of zones
## [1] 1744
shef.oa$id <- 0:1743
shef.df <- join(shef.gg, shef.oa, by = "id")
head(shef.df)
##     long    lat order  hole piece group id  ZONE_CODE KS0150001 KS0150002
## 1 434252 389328     1 FALSE     1   0.1  0 00CGFZ0009       146        10
## 2 434210 389419     2 FALSE     1   0.1  0 00CGFZ0009       146        10
## 3 434135 389421     3 FALSE     1   0.1  0 00CGFZ0009       146        10
## 4 434028 389498     4 FALSE     1   0.1  0 00CGFZ0009       146        10
## 5 434012 389543     5 FALSE     1   0.1  0 00CGFZ0009       146        10
## 6 434039 389602     6 FALSE     1   0.1  0 00CGFZ0009       146        10
##   from.flow to.flow
## 1       121    2132
## 2       121    2132
## 3       121    2132
## 4       121    2132
## 5       121    2132
## 6       121    2132
ggplot(shef.df) + aes(long, lat, fill = to.flow) + geom_polygon() + geom_path(color = "white") + 
    coord_equal() + scale_fill_continuous()

plot of chunk unnamed-chunk-9

Plotted on qgis instead

This was plotted using QGIS, loading the file “oas.shp” saved above:

QGIS plot