The purpose of this document is to illustrate techniques for manipulating flow data, resulting in a choropleth map of the number of commuters flowing to each of 1744 small areas in Sheffield.
The first stage is to load the data, which originates as a Nomis .csv file, but which has been pre-processed for use in R:
load("flows1.RData") # Load data
shef.flow[c(1, 2, 3, 50, 1000), ] # Data format
## from to n
## 1434894 00CGFA0001 00CEFW0011 3
## 1434895 00CGFA0001 00CFFF0013 3
## 1434896 00CGFA0001 00CFFF0032 3
## 1434943 00CGFA0002 00BYFQ0007 3
## 1435893 00CGFA0028 00CGFX0032 3
summary(shef.flow)
## from to n
## 00CGFD0006: 81 00CGFX0017: 1604 Min. : 3.00
## 00CGFN0007: 68 00CGFE0005: 1426 1st Qu.: 3.00
## 00CGFD0004: 67 00CGFD0038: 1209 Median : 3.00
## 00CGGE0006: 66 00CGFX0016: 1095 Mean : 4.01
## 00CGFG0011: 65 00CGGB0002: 975 3rd Qu.: 3.00
## 00CGFK0023: 64 00CGFL0032: 955 Max. :96.00
## (Other) :54139 (Other) :47286
library(maptools)
## Loading required package: foreign
## Loading required package: sp
## Loading required package: lattice
## Checking rgeos availability: TRUE
oas <- readShapePoly("allmodes.shp") # Load the shapes
plot(oas)
sum(shef.flow$n) # Number of commuters in flow data
## [1] 218841
sum(oas$KS0150001) # Number of commuters in Casweb data
## [1] 218483
shef.flow <- shef.flow[which(shef.flow$to %in% oas$ZONE_CODE), ]
length(levels(shef.flow$from)) # 165 thousand entries is too many
## [1] 175428
shef.flow$from <- factor(shef.flow$from) # Remove superfluous data
shef.flow$to <- factor(shef.flow$to) # Remove superfluous data
shef.flow$to <- as.character(shef.flow$to) # Make to variable character
it <- which(shef.flow$from == oas$ZONE_CODE[1])
sum(shef.flow$n[it])
## [1] 121
oas$KS0150001[1]
## [1] 146
it <- which(shef.flow$from == oas$ZONE_CODE[5])
sum(shef.flow$n[it])
## [1] 98
oas$KS0150001[5]
## [1] 155
for (i in 1:nrow(oas)) {
it <- which(shef.flow$from == oas$ZONE_CODE[i])
to <- which(shef.flow$to == oas$ZONE_CODE[i])
shef.oa$from.flow[i] <- sum(shef.flow$n[it])
shef.oa$to.flow[i] <- sum(shef.flow$n[to])
}
plot(shef.oa$KS0150001, shef.oa$from.flow) # Looks like a good correlation, above step worked
class(shef.oa$to.flow)
## [1] "integer"
oas$to <- as.numeric(shef.oa$to.flow)
library(GISTools)
## Loading required package: RColorBrewer
## Loading required package: MASS
## Loading required package: rgeos
## Loading required package: stringr
## Loading required package: plyr
## rgeos: (SVN revision 348) GEOS runtime version: 3.3.3-CAPI-1.7.4 Polygon
## checking: TRUE
shades <- shading(breaks = c(50, 100, 200, 500), cols = brewer.pal(5, "Blues"))
choropleth(oas, v = oas$to, shades)
bbox(oas) # Bounding box of map
## min max
## x 413273 445071
## y 378721 400756
legend.loc <- c(bbox(oas)[1, 1] + 100, bbox(oas)[2, 1] + 22000)
choro.legend(legend.loc[1], legend.loc[2], shades) # Looks clunky
The function writeSpatialShape(oas, “oas”) was used to write the results to file.
# From https://github.com/hadley/ggplot2/wiki/plotting-polygon-shapefiles
class(oas)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
require("rgdal") # requires sp, will use proj.4 if installed
## Loading required package: rgdal
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 1.9.1, released 2012/05/15 Path to GDAL shared
## files: /usr/share/gdal/1.9 Loaded PROJ.4 runtime: Rel. 4.7.1, 23 September
## 2009, [PJ_VERSION: 470] Path to PROJ.4 shared files: (autodetected)
require("ggplot2")
## Loading required package: ggplot2
gpclibPermit() # required for fortify method
## [1] TRUE
shef.gg <- fortify(oas) # Region names?
## Regions defined for each Polygons
head(shef.gg)
## long lat order hole piece group id
## 1 434252 389328 1 FALSE 1 0.1 0
## 2 434210 389419 2 FALSE 1 0.1 0
## 3 434135 389421 3 FALSE 1 0.1 0
## 4 434028 389498 4 FALSE 1 0.1 0
## 5 434012 389543 5 FALSE 1 0.1 0
## 6 434039 389602 6 FALSE 1 0.1 0
class(shef.gg$id)
## [1] "character"
# shef.gg$id <- factor(shef.gg) # Not working
length(unique(shef.gg$id)) # Shows there is the correct number of zones
## [1] 1744
shef.oa$id <- 0:1743
shef.df <- join(shef.gg, shef.oa, by = "id")
head(shef.df)
## long lat order hole piece group id ZONE_CODE KS0150001 KS0150002
## 1 434252 389328 1 FALSE 1 0.1 0 00CGFZ0009 146 10
## 2 434210 389419 2 FALSE 1 0.1 0 00CGFZ0009 146 10
## 3 434135 389421 3 FALSE 1 0.1 0 00CGFZ0009 146 10
## 4 434028 389498 4 FALSE 1 0.1 0 00CGFZ0009 146 10
## 5 434012 389543 5 FALSE 1 0.1 0 00CGFZ0009 146 10
## 6 434039 389602 6 FALSE 1 0.1 0 00CGFZ0009 146 10
## from.flow to.flow
## 1 121 2132
## 2 121 2132
## 3 121 2132
## 4 121 2132
## 5 121 2132
## 6 121 2132
ggplot(shef.df) + aes(long, lat, fill = to.flow) + geom_polygon() + geom_path(color = "white") +
coord_equal() + scale_fill_continuous()
This was plotted using QGIS, loading the file “oas.shp” saved above: