モラン統計量(I)やギアリー統計量(C)などの空間統計量 spatial statistic をRで計算する.データはHarrison and Rubinfeld (1978, JEEM)で用いられたMA州ボストンの住宅価格データ(cross-section, N = 506).

See

Boston housing data

library(spdep)
## Warning: package 'spdep' was built under R version 3.3.3
## Loading required package: sp
## Loading required package: Matrix
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(maptools)
## Warning: package 'maptools' was built under R version 3.3.3
## Checking rgeos availability: FALSE
##      Note: when rgeos is not available, polygon geometry     computations in maptools depend on gpclib,
##      which has a restricted licence. It is disabled by default;
##      to enable gpclib, type gpclibPermit()
data(boston, package = "spdep")  # load
## Warning in data(boston, package = "spdep"): data set 'boston' not found
head(boston.c, 2)  # corrected data
##         TOWN TOWNNO TRACT     LON     LAT MEDV CMEDV    CRIM ZN INDUS CHAS
## 1     Nahant      0  2011 -70.955 42.2550 24.0  24.0 0.00632 18  2.31    0
## 2 Swampscott      1  2021 -70.950 42.2875 21.6  21.6 0.02731  0  7.07    0
##     NOX    RM  AGE    DIS RAD TAX PTRATIO     B LSTAT
## 1 0.538 6.575 65.2 4.0900   1 296    15.3 396.9  4.98
## 2 0.469 6.421 78.9 4.9671   2 242    17.8 396.9  9.14
head(boston.utm, 2)  # coordinate (UTM zone 19), class = matrix
##        x       y
## 1 338.73 4679.73
## 2 339.23 4683.33
bh_price_class <- cut(x = boston.c$CMEDV, breaks = 10)  # price (num) -> interval (factor)
# breaks部分は seq(0, 50, length = 11) と同じ
plot(boston.utm, col = rev(heat.colors(10))[bh_price_class], main = "Spatial distribution: median price", cex = .8, pch = 20)
legend(x = "topleft", col = rev(heat.colors(10)), legend = levels(bh_price_class), pch = 20, cex = .7)

# text(x = boston.utm[, 1], y = boston.utm[, 2], labels = boston.c$TOWN, cex = .5)

Census tractのポリゴンファイルもある. (追記:いつの間にかパッケージから削除されたみたい)

# bh_poly <- maptools::readShapePoly(fn = system.file("etc/shapes/boston_tracts.shp", package="spdep")[1], IDvar = "poltract")
# データの並び順が違うので,色をつけるために並び替える
# bh_price_ordered_class <- cut(x = boston.c$CMEDV[order(boston.c$TRACT)], breaks = 10)  # price (num) -> interval (factor)
# plot(bh_poly, col = rev(heat.colors(10))[bh_price_ordered_class], main = "Choropleth map: median price")
# legend(x = "topleft", col = rev(heat.colors(10)), legend = levels(bh_price_ordered_class), pch = 15, cex = .7)

Spatial weight matrix

隣接関係-based SWM (contiguity/connectivity)

bh_swm_nb_w <- nb2listw(neighbours = boston.soi, style = "W")  # neighbours list -> SWM, row standardised
bh_swm_nb_b <- nb2listw(neighbours = boston.soi, style = "B")  # neighbours list -> SWM, binary

kNN (k-nearest neighbours)-based SWM

bh_swm_knn <- nb2listw(neighbours = knn2nb(knearneigh(boston.utm, k = 5)))

距離-based SWM

bh_dist_cutoff <- bh_dist <- as.matrix(dist(boston.utm))  # 距離行列
# without cutoff
bh_w <- 1 / bh_dist^2  # 重力モデルのアナロジーより alpha = 2
diag(bh_w) <- 0  # 対角要素は 0
bh_w_std <- bh_w / apply(X = bh_w, MARGIN = 1, FUN = sum)  # 行基準化
bh_listw_std <- mat2listw(x = bh_w_std)  # matrix -> list
# with cutoff
bh_dist_cutoff[bh_dist_cutoff > 10] <- Inf  # cutt-off
bh_w_cutoff <- 1 / bh_dist_cutoff^2
diag(bh_w_cutoff) <- 0
bh_w_cutoff_std <- bh_w_cutoff / apply(X = bh_w_cutoff, MARGIN = 1, FUN = sum)
bh_listw_cutoff_std <- mat2listw(x = bh_w_cutoff_std)

Global stat, GISA

Grobal Moran’s I

\[ I = \frac{N}{\sum_i \sum_j w_{ij}} \frac{\sum_i \sum_j w_{ij} z_i z_j}{\sum_i z_i}, \quad z_i := x_i - \bar{x} \]

-1 / (nrow(boston.c) - 1)  # expectation
## [1] -0.001980198
moran.test(x = boston.c$CMEDV, listw = bh_swm_nb_w)  # contiguity-based, row-standardized
## 
##  Moran I test under randomisation
## 
## data:  boston.c$CMEDV  
## weights: bh_swm_nb_w    
## 
## Moran I statistic standard deviate = 21.786, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.690285059      -0.001980198       0.001009685
moran.test(x = boston.c$CMEDV, listw = bh_swm_nb_b)  # contiguity-based, binary
## 
##  Moran I test under randomisation
## 
## data:  boston.c$CMEDV  
## weights: bh_swm_nb_b    
## 
## Moran I statistic standard deviate = 22.279, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.672743590      -0.001980198       0.000917222
moran.test(x = boston.c$CMEDV, listw = bh_swm_knn)  # knn-based
## 
##  Moran I test under randomisation
## 
## data:  boston.c$CMEDV  
## weights: bh_swm_knn    
## 
## Moran I statistic standard deviate = 23.604, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.6165796285     -0.0019801980      0.0006867517
moran.test(x = boston.c$CMEDV, listw = bh_listw_std)  # distance-based, row-standardized
## 
##  Moran I test under randomisation
## 
## data:  boston.c$CMEDV  
## weights: bh_listw_std    
## 
## Moran I statistic standard deviate = 24.467, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.3568127598     -0.0019801980      0.0002150391
moran.test(x = boston.c$CMEDV, listw = bh_listw_cutoff_std)  # distance-based, row-standardized, w/ cutoff
## 
##  Moran I test under randomisation
## 
## data:  boston.c$CMEDV  
## weights: bh_listw_cutoff_std    
## 
## Moran I statistic standard deviate = 24.231, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.394345588      -0.001980198       0.000267528

Geary’s C

\[ C = \frac{N - 1}{2 (\sum_i \sum_j w_{ij})} \frac{\sum_i \sum_j w_{ij} (x_i - x_j)^2}{\sum_i (x_i - \bar{x})^2} \]

geary.test(x = boston.c$CMEDV, listw = bh_listw_cutoff_std)
## 
##  Geary C test under randomisation
## 
## data:  boston.c$CMEDV 
## weights: bh_listw_cutoff_std 
## 
## Geary C statistic standard deviate = 19.591, p-value < 2.2e-16
## alternative hypothesis: Expectation greater than statistic
## sample estimates:
## Geary C statistic       Expectation          Variance 
##      0.5975587655      1.0000000000      0.0004219684

Global G

globalG.test(x = boston.c$CMEDV, listw = bh_listw_cutoff_std)
## Warning in globalG.test(x = boston.c$CMEDV, listw = bh_listw_cutoff_std):
## Binary weights recommended (sepecially for distance bands)
## 
##  Getis-Ord global G statistic
## 
## data:  boston.c$CMEDV 
## weights: bh_listw_cutoff_std 
## 
## standard deviate = 3.4088, p-value = 0.0003263
## alternative hypothesis: greater
## sample estimates:
## Global G statistic        Expectation           Variance 
##       2.021121e-03       1.980198e-03       1.441230e-10
globalG.test(x = boston.c$CMEDV, listw = bh_swm_nb_b)
## 
##  Getis-Ord global G statistic
## 
## data:  boston.c$CMEDV 
## weights: bh_swm_nb_b 
## 
## standard deviate = 8.9113, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Global G statistic        Expectation           Variance 
##       9.362141e-03       8.421712e-03       1.113695e-08

Join(t) Counts

joincount.test(fx = cut(x = boston.c$CMEDV, breaks = 2), listw = bh_listw_cutoff_std)
## 
##  Join count test under nonfree sampling
## 
## data:  cut(x = boston.c$CMEDV, breaks = 2) 
## weights: bh_listw_cutoff_std 
## 
## Std. deviate for (4.96,27.5] = 13.623, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Same colour statistic           Expectation              Variance 
##            175.520939            158.019802              1.650292 
## 
## 
##  Join count test under nonfree sampling
## 
## data:  cut(x = boston.c$CMEDV, breaks = 2) 
## weights: bh_listw_cutoff_std 
## 
## Std. deviate for (27.5,50] = 11.654, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Same colour statistic           Expectation              Variance 
##            19.6835358            11.0198020             0.5526433

Local stat, LISA

Local Moran’s I

\[ I_i = \frac{z_i}{\frac{1}{n} \sum_i z_i^2} \sum_j w_{ij} z_i, \quad z_i := x_i - \bar{x} \]

bh_localm <- localmoran(x = boston.c$CMEDV, listw = bh_listw_cutoff_std)
summary(bh_localm[, 1])
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.73760  0.01044  0.11810  0.39430  0.62360  3.18000
bh_localm_class <- cut(x = bh_localm[, 1], breaks = 10)  # local I (num) -> interval (factor)
plot(boston.utm, col = cm.colors(10)[bh_localm_class], main = "Local Moran's I", pch = 20)
legend(x = "topleft", col = cm.colors(10), legend = levels(bh_localm_class), pch = 20, cex = .7)

Moran scatterplot

bh_price_std <- boston.c$CMEDV - mean(boston.c$CMEDV)  # standardized x
bh_w_price <- bh_w_cutoff_std %*% bh_price_std  # spatial lag of standardized x
plot(x = bh_price_std, y = bh_w_price, main = "Moran scatterplot")
abline(h = 0, v = 0, lty = 2)

# 有意なI_iかどうかを o/x で区別する場合
plot(x = bh_price_std, y = bh_w_price, main = "Moran scatterplot (x = not signif.)", pch = ifelse(test = bh_localm[, 5] < .05, yes = 16, no = 4))
abline(h = 0, v = 0, lty = 2)

moran.plot(x = boston.c$CMEDV, listw = bh_listw_cutoff_std, quiet = T)  # potentially influential observationsにはラベルがつく

空間分布図で hot spot (high-high, color = pink), cold spot (low-low, color = cyan) を示す.(少なくとも今回のケースでは)spatial outlier (high-low or low-high) は有意にならない.

bh_localm_col2 <- cm.colors(10)[bh_localm_class]
bh_localm_col2[bh_localm[, 5] > .05] <- "gray"
plot(boston.utm, col = bh_localm_col2, main = "Local Moran's I (x = not signif.)", cex = ifelse(test = bh_localm[, 5] < .05, yes = 1, no = .5), pch = ifelse(test = bh_localm[, 5] < .05, yes = 16, no = 4))
legend(x = "topleft", col = cm.colors(10), legend = levels(bh_localm_class), pch = 20, cex = .7)

Local G and G*

\[ G_i = \frac{\sum_{j \ne i} w_{ij} x_j}{\sum_j x_j} \]

bh_localg <- localG(x = boston.c$CMEDV, listw = bh_listw_cutoff_std)
attr(bh_localg, "gstari")
## [1] FALSE
bh_localg_class <- cut(x = bh_localg, breaks = 10)  # local G (num) -> interval (factor)
plot(boston.utm, col = cm.colors(10)[bh_localm_class], main = "Local G", pch = 20)
legend(x = "topleft", col = cm.colors(10), legend = levels(bh_localm_class), pch = 20, cex = .7)

\[ G_{i}^{*} = \frac{\sum_j w_{ij} x_j}{\sum_j x_j} \]

bh_localgs <- localG(x = boston.c$CMEDV, listw = nb2listw(neighbours = include.self(boston.soi), style = "B"))
attr(bh_localgs, "gstari")
## [1] TRUE
bh_localgs_class <- cut(x = bh_localgs, breaks = 10)  # local G* (num) -> interval (factor)
plot(boston.utm, col = cm.colors(10)[bh_localgs_class], main = "Local G*", pch = 20)
legend(x = "topleft", col = cm.colors(10), legend = levels(bh_localgs_class), pch = 20, cex = .7)

Local H (LOSH)

See Ord and Getis (2012, ARS).

\[ \bar{x}_i = \frac{\sum_j w_{ij} x_j}{\sum_j w_{ij}}, \quad e_j = x_j - \bar{x_i}, j \in N(i) \] \[ H_i = \frac{\sum_j w_{ij} |e_j|^{\alpha}}{\sum_j w_{ij}} \]

\(N(i)\)はi自身を含むiの近傍で,\(\alpha = 1\)はabsolute deviation measure,\(\alpha = 2\)はvariance measureらしい.

localH <- function(x, W, alpha = 2) {
  xi <- ej <- hi <- numeric(length = length(x))
  for (i in 1:length(x)) {
    xi[i] <- sum(W[i,] * x) / sum(W[i,])
  }
  # for (j in 1:length(x)) {
  #   ej[j] <- x[j] - xi[j]
  # }
  ej <- x - xi
  for (i in 1:length(x)) {
    hi[i] <- sum(W[i,] * abs(ej)^alpha) / sum(W[i,])
  }
  return(hi)
}
bh_localh <- localH(x = boston.c$CMEDV, W = bh_w_cutoff_std)

bh_localh_class <- cut(x = bh_localh, breaks = 10)  # local H (num) -> interval (factor)
plot(boston.utm, col = cm.colors(10)[bh_localh_class], main = "Local H (user-defined function 'localH' w/ distance-based W)", pch = 20)
legend(x = "topleft", col = cm.colors(10), legend = levels(bh_localh_class), pch = 20, cex = .7)

パッケージ spdep に関数が追加されたので試す(情報提供者:高野さん).

bh_localh_spdep <- spdep::LOSH(x = boston.c$CMEDV, listw = nb2listw(boston.soi))
head(bh_localh_spdep)
##             Hi E.Hi    Var.Hi      Z.Hi  x_bar_i         ei
## [1,] 1.3130740    1 1.4509626 1.8099349 20.92500   9.455625
## [2,] 0.9564493    1 0.8241666 2.3210095 21.25714   0.117551
## [3,] 0.5478950    1 1.1584578 0.9459041 24.58000 102.414400
## [4,] 2.6597341    1 1.9384707 2.7441571 31.26667   4.551111
## [5,] 1.3370733    1 1.9384707 1.3795136 28.33333  61.884444
## [6,] 2.3725774    1 5.8385349 0.8127304 36.20000  56.250000
summary(bh_localh_spdep[, "Hi"])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.1496  0.3532  1.0490  1.2220 16.9100
bh_localh_spdep_class <- cut(x = bh_localh_spdep[, "Hi"], breaks = 10)  # local H (num) -> interval (factor)
plot(boston.utm, col = cm.colors(10)[bh_localh_spdep_class], main = "Local H (spdep function 'LOSH')", pch = 20)
legend(x = "topleft", col = cm.colors(10), legend = levels(bh_localh_spdep_class), pch = 20, cex = .7)

上記の自作関数 localH の計算結果と比較するために,Wの定義を揃えて再計算する(スケールがズレていますが…).

bh_localh_nb_w <- localH(x = boston.c$CMEDV, W = nb2mat(neighbours = boston.soi, style = "W"))

bh_localh_nb_w_class <- cut(x = bh_localh_nb_w, breaks = 10)  # local H (num) -> interval (factor)
plot(boston.utm, col = cm.colors(10)[bh_localh_nb_w_class], main = "Local H (user-defined function 'localH')", pch = 20)
legend(x = "topleft", col = cm.colors(10), legend = levels(bh_localh_nb_w_class), pch = 20, cex = .7)

なお,spdep::LOSH.cs 関数でp-valueまで計算できる.

bh_localh_spdep_cs <- spdep::LOSH.cs(x = boston.c$CMEDV, listw = nb2listw(boston.soi))
head(bh_localh_spdep_cs)
##             Hi E.Hi    Var.Hi      Z.Hi  x_bar_i         ei      Pr()
## 2011 1.3130740    1 1.4509626 1.8099349 20.92500   9.455625 0.2625797
## 2021 0.9564493    1 0.8241666 2.3210095 21.25714   0.117551 0.3980194
## 2022 0.5478950    1 1.1584578 0.9459041 24.58000 102.414400 0.5525571
## 2031 2.6597341    1 1.9384707 2.7441571 31.26667   4.551111 0.1018230
## 2032 1.3370733    1 1.9384707 1.3795136 28.33333  61.884444 0.2487123
## 2033 2.3725774    1 5.8385349 0.8127304 36.20000  56.250000 0.1242234

(Last modified on Jan 27, 2019)

_