Initial test of harmonic mean estimate from P. Rietveld

Setting-up the data

We start by using province level data for only 3 provinces, Noord-Brabant Utrecht and Zuid-Holland to test the process. This data has already been loaded, and is described here. An rbitrary employment rate (M) and number of workers (W) are set, such that they add-up.

x = c("ggplot2", "sp", "rgeos", "mapproj", "rgdal", "maptools")
lapply(x, require, character.only = T)  # the R packages we'll be using

## Loading required package: sp

## Loading required package: rgeos

## rgeos version: 0.2-17, (SVN revision 392) GEOS runtime version:
## 3.3.8-CAPI-1.7.8 Polygon checking: TRUE

## Loading required package: mapproj

## Loading required package: maps

## Loading required package: rgdal

## rgdal: version: 0.8-9, (SVN revision 470) Geospatial Data Abstraction
## Library extensions to R successfully loaded Loaded GDAL runtime: GDAL
## 1.10.0, released 2013/04/24 but rgdal build and GDAL runtime not in sync:
## ... consider re-installing rgdal!! Path to GDAL shared files:
## /usr/share/gdal/1.10 Loaded PROJ.4 runtime: Rel. 4.8.0, 6 March 2012,
## [PJ_VERSION: 480] Path to PROJ.4 shared files: (autodetected)

## Loading required package: maptools

## Loading required package: grid

## Loading required package: lattice

## Warning: the specification for class "im" in package 'maptools' seems
## equivalent to one from package 'sp' and is not turning on duplicate class
## definitions for this class

## Warning: the specification for class "owin" in package 'maptools' seems
## equivalent to one from package 'sp' and is not turning on duplicate class
## definitions for this class

## Warning: the specification for class "ppp" in package 'maptools' seems
## equivalent to one from package 'sp' and is not turning on duplicate class
## definitions for this class

## Warning: the specification for class "psp" in package 'maptools' seems
## equivalent to one from package 'sp' and is not turning on duplicate class
## definitions for this class

## Checking rgeos availability: TRUE

## [[1]]
## [1] TRUE
## 
## [[2]]
## [1] TRUE
## 
## [[3]]
## [1] TRUE
## 
## [[4]]
## [1] TRUE
## 
## [[5]]
## [1] TRUE
## 
## [[6]]
## [1] TRUE


load(".RData")
r3@data[1:5]  # This is the province-level data

##    c2 Shape_Leng          name code_hasc Vervoerwijzen
## 1  NB      5.374 Noord-Brabant     NL.NB Noord-Brabant
## 2  UT      2.896       Utrecht     NL.UT       Utrecht
## 3  ZH      4.157  Zuid-Holland     NL.ZH  Zuid-Holland
## 4  NH      4.488 Noord-Holland     NL.NH Noord-Holland
## 5  DR      2.964       Drenthe     NL.DR       Drenthe
## 6  FR      5.914     Friesland     NL.FR     Friesland
## 7  GE      5.820    Gelderland     NL.GE    Gelderland
## 8  GR      3.325     Groningen     NL.GR     Groningen
## 9  LI      4.515       Limburg     NL.LI       Limburg
## 10 OV      4.323    Overijssel     NL.OV    Overijssel
## 11 FL      2.675     Flevoland     NL.FL     Flevoland
## 12 ZE      5.779       Zeeland     NL.ZE       Zeeland

names(r3)

##  [1] "c2"                     "Shape_Leng"            
##  [3] "name"                   "code_hasc"             
##  [5] "Vervoerwijzen"          "Totaal.vervoerwijzen"  
##  [7] "Car.d"                  "Car.p"                 
##  [9] "Train"                  "Bus.tram.metro"        
## [11] "Moto"                   "Bicycle"               
## [13] "Walk"                   "Other"                 
## [15] "Totaal.vervoerwijzen.1" "Car.d.1"               
## [17] "Car.p.1"                "Train.1"               
## [19] "Bus.tram.metro.1"       "Moto.1"                
## [21] "Bicycle.1"              "Walk.1"                
## [23] "Other.1"                "Totaal.vervoerwijzen.2"
## [25] "Car.d.2"                "Car.p.2"               
## [27] "Train.2"                "Bus.tram.metro.2"      
## [29] "Moto.2"                 "Bicycle.2"             
## [31] "Walk.2"                 "Other.2"               
## [33] "etot"                   "id"                    
## [35] "pops"                   "area"                  
## [37] "popdens"                "EAV"

munis <- r3[1:3, ]
munis@data <- munis@data[, c("c2", "name", "area", "pops", "popdens", "EAV")]

munis$M <- munis$pops * c(0.3, 0.4, 0.5)  # estimate of employment in each municipality
munis$M

## [1]  724.5  472.0 1765.0


munis$W <- sum(munis$M) * (munis$pops/sum(munis$pops))  # set n. workers proportional to population
sum(munis$M) == sum(munis$W)  # check they add-up

## [1] TRUE

Calculate distance matrix

The next stage is to calculate the distance matrix from zone j to destination zone k:

coordinates(munis)

##     [,1]   [,2]
## 1 898241 209099
## 2 890973 269256
## 3 847023 246749

library(maptools)
d <- matrix(nrow = nrow(munis), ncol = (nrow(munis)))  # 3 by 3 distance matrix
for (i in 1:nrow(munis)) {
    d[i, ] <- spDists(coordinates(munis), coordinates(munis[i, ]))
}
d <- d/1000  # distance matrix
d

##       [,1]  [,2]  [,3]
## [1,]  0.00 60.59 63.57
## [2,] 60.59  0.00 49.38
## [3,] 63.57 49.38  0.00

The Naive measure

Now we implement the 'naive', simple measure, a proxy for the expected average distance of commute.

for (j in 1:nrow(munis)) {
    munis$amean[j] <- sum(munis$M[-j]/sum(munis$M[-j]) * d[j, -j])
}
munis@data  # zuid-holland has lowest average employment-weighted work dis.

##   c2          name area pops popdens   EAV      M      W amean
## 1 NB Noord-Brabant 5229 2415   461.8 40.48  724.5 1003.8 62.94
## 2 UT       Utrecht 1403 1180   841.3 37.84  472.0  490.5 52.64
## 3 ZH  Zuid-Holland 3208 3530  1100.3 32.21 1765.0 1467.2 57.97

sum(munis$W/sum(munis$W) * munis$amean)  # employment-weighted arithmetic mean

## [1] 58.77

mean(munis$amean)

## [1] 57.85

Sophisticated measure

for (j in 1:nrow(munis)) {
    munis$hmean[j] <- 1/sum(munis$M[-j]/sum(munis$M[-j])/d[j, -j])
}
munis@data

##   c2          name area pops popdens   EAV      M      W amean hmean
## 1 NB Noord-Brabant 5229 2415   461.8 40.48  724.5 1003.8 62.94 62.92
## 2 UT       Utrecht 1403 1180   841.3 37.84  472.0  490.5 52.64 52.19
## 3 ZH  Zuid-Holland 3208 3530  1100.3 32.21 1765.0 1467.2 57.97 57.09

sum(munis$W/sum(munis$W) * munis$hmean)  # employment-weighted arithmetic mean

## [1] 58.26

plot(munis)
for (k in 1:nrow(munis)) {
    lines(c(coordinates(munis[1, ])[1], coordinates(munis[k, ])[1]), c(coordinates(munis[1, 
        ])[2], coordinates(munis[k, ])[2]))
}

plot of chunk unnamed-chunk-4

These results seem plausible, although the difference between harmonic and arithmetic mean seems small. However, testing over all 12 Dutch provinces shows that the harmonic mean is significantly lower than the standard mean and that the two measures are very closely correlated (r = 0.99).