We start by using province level data for only 3 provinces, Noord-Brabant Utrecht and Zuid-Holland to test the process. This data has already been loaded, and is described here. An rbitrary employment rate (M) and number of workers (W) are set, such that they add-up.
x = c("ggplot2", "sp", "rgeos", "mapproj", "rgdal", "maptools")
lapply(x, require, character.only = T) # the R packages we'll be using
## Loading required package: sp
## Loading required package: rgeos
## rgeos version: 0.2-17, (SVN revision 392) GEOS runtime version:
## 3.3.8-CAPI-1.7.8 Polygon checking: TRUE
## Loading required package: mapproj
## Loading required package: maps
## Loading required package: rgdal
## rgdal: version: 0.8-9, (SVN revision 470) Geospatial Data Abstraction
## Library extensions to R successfully loaded Loaded GDAL runtime: GDAL
## 1.10.0, released 2013/04/24 but rgdal build and GDAL runtime not in sync:
## ... consider re-installing rgdal!! Path to GDAL shared files:
## /usr/share/gdal/1.10 Loaded PROJ.4 runtime: Rel. 4.8.0, 6 March 2012,
## [PJ_VERSION: 480] Path to PROJ.4 shared files: (autodetected)
## Loading required package: maptools
## Loading required package: grid
## Loading required package: lattice
## Warning: the specification for class "im" in package 'maptools' seems
## equivalent to one from package 'sp' and is not turning on duplicate class
## definitions for this class
## Warning: the specification for class "owin" in package 'maptools' seems
## equivalent to one from package 'sp' and is not turning on duplicate class
## definitions for this class
## Warning: the specification for class "ppp" in package 'maptools' seems
## equivalent to one from package 'sp' and is not turning on duplicate class
## definitions for this class
## Warning: the specification for class "psp" in package 'maptools' seems
## equivalent to one from package 'sp' and is not turning on duplicate class
## definitions for this class
## Checking rgeos availability: TRUE
## [[1]]
## [1] TRUE
##
## [[2]]
## [1] TRUE
##
## [[3]]
## [1] TRUE
##
## [[4]]
## [1] TRUE
##
## [[5]]
## [1] TRUE
##
## [[6]]
## [1] TRUE
load(".RData")
r3@data[1:5] # This is the province-level data
## c2 Shape_Leng name code_hasc Vervoerwijzen
## 1 NB 5.374 Noord-Brabant NL.NB Noord-Brabant
## 2 UT 2.896 Utrecht NL.UT Utrecht
## 3 ZH 4.157 Zuid-Holland NL.ZH Zuid-Holland
## 4 NH 4.488 Noord-Holland NL.NH Noord-Holland
## 5 DR 2.964 Drenthe NL.DR Drenthe
## 6 FR 5.914 Friesland NL.FR Friesland
## 7 GE 5.820 Gelderland NL.GE Gelderland
## 8 GR 3.325 Groningen NL.GR Groningen
## 9 LI 4.515 Limburg NL.LI Limburg
## 10 OV 4.323 Overijssel NL.OV Overijssel
## 11 FL 2.675 Flevoland NL.FL Flevoland
## 12 ZE 5.779 Zeeland NL.ZE Zeeland
names(r3)
## [1] "c2" "Shape_Leng"
## [3] "name" "code_hasc"
## [5] "Vervoerwijzen" "Totaal.vervoerwijzen"
## [7] "Car.d" "Car.p"
## [9] "Train" "Bus.tram.metro"
## [11] "Moto" "Bicycle"
## [13] "Walk" "Other"
## [15] "Totaal.vervoerwijzen.1" "Car.d.1"
## [17] "Car.p.1" "Train.1"
## [19] "Bus.tram.metro.1" "Moto.1"
## [21] "Bicycle.1" "Walk.1"
## [23] "Other.1" "Totaal.vervoerwijzen.2"
## [25] "Car.d.2" "Car.p.2"
## [27] "Train.2" "Bus.tram.metro.2"
## [29] "Moto.2" "Bicycle.2"
## [31] "Walk.2" "Other.2"
## [33] "etot" "id"
## [35] "pops" "area"
## [37] "popdens" "EAV"
munis <- r3[1:3, ]
munis@data <- munis@data[, c("c2", "name", "area", "pops", "popdens", "EAV")]
munis$M <- munis$pops * c(0.3, 0.4, 0.5) # estimate of employment in each municipality
munis$M
## [1] 724.5 472.0 1765.0
munis$W <- sum(munis$M) * (munis$pops/sum(munis$pops)) # set n. workers proportional to population
sum(munis$M) == sum(munis$W) # check they add-up
## [1] TRUE
The next stage is to calculate the distance matrix from zone j to destination zone k:
coordinates(munis)
## [,1] [,2]
## 1 898241 209099
## 2 890973 269256
## 3 847023 246749
library(maptools)
d <- matrix(nrow = nrow(munis), ncol = (nrow(munis))) # 3 by 3 distance matrix
for (i in 1:nrow(munis)) {
d[i, ] <- spDists(coordinates(munis), coordinates(munis[i, ]))
}
d <- d/1000 # distance matrix
d
## [,1] [,2] [,3]
## [1,] 0.00 60.59 63.57
## [2,] 60.59 0.00 49.38
## [3,] 63.57 49.38 0.00
Now we implement the 'naive', simple measure, a proxy for the expected average distance of commute.
for (j in 1:nrow(munis)) {
munis$amean[j] <- sum(munis$M[-j]/sum(munis$M[-j]) * d[j, -j])
}
munis@data # zuid-holland has lowest average employment-weighted work dis.
## c2 name area pops popdens EAV M W amean
## 1 NB Noord-Brabant 5229 2415 461.8 40.48 724.5 1003.8 62.94
## 2 UT Utrecht 1403 1180 841.3 37.84 472.0 490.5 52.64
## 3 ZH Zuid-Holland 3208 3530 1100.3 32.21 1765.0 1467.2 57.97
sum(munis$W/sum(munis$W) * munis$amean) # employment-weighted arithmetic mean
## [1] 58.77
mean(munis$amean)
## [1] 57.85
for (j in 1:nrow(munis)) {
munis$hmean[j] <- 1/sum(munis$M[-j]/sum(munis$M[-j])/d[j, -j])
}
munis@data
## c2 name area pops popdens EAV M W amean hmean
## 1 NB Noord-Brabant 5229 2415 461.8 40.48 724.5 1003.8 62.94 62.92
## 2 UT Utrecht 1403 1180 841.3 37.84 472.0 490.5 52.64 52.19
## 3 ZH Zuid-Holland 3208 3530 1100.3 32.21 1765.0 1467.2 57.97 57.09
sum(munis$W/sum(munis$W) * munis$hmean) # employment-weighted arithmetic mean
## [1] 58.26
plot(munis)
for (k in 1:nrow(munis)) {
lines(c(coordinates(munis[1, ])[1], coordinates(munis[k, ])[1]), c(coordinates(munis[1,
])[2], coordinates(munis[k, ])[2]))
}
These results seem plausible, although the difference between harmonic and arithmetic mean seems small. However, testing over all 12 Dutch provinces shows that the harmonic mean is significantly lower than the standard mean and that the two measures are very closely correlated (r = 0.99).