モラン統計量(I)やギアリー統計量(C)などの空間統計量 spatial statistic をRで計算する.データはHarrison and Rubinfeld (1978, JEEM)で用いられたMA州ボストンの住宅価格データ(cross-section, N = 506).
See
library(spdep)
## Warning: package 'spdep' was built under R version 3.3.3
## Loading required package: sp
## Loading required package: Matrix
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(maptools)
## Warning: package 'maptools' was built under R version 3.3.3
## Checking rgeos availability: FALSE
## Note: when rgeos is not available, polygon geometry computations in maptools depend on gpclib,
## which has a restricted licence. It is disabled by default;
## to enable gpclib, type gpclibPermit()
data(boston, package = "spdep") # load
## Warning in data(boston, package = "spdep"): data set 'boston' not found
head(boston.c, 2) # corrected data
## TOWN TOWNNO TRACT LON LAT MEDV CMEDV CRIM ZN INDUS CHAS
## 1 Nahant 0 2011 -70.955 42.2550 24.0 24.0 0.00632 18 2.31 0
## 2 Swampscott 1 2021 -70.950 42.2875 21.6 21.6 0.02731 0 7.07 0
## NOX RM AGE DIS RAD TAX PTRATIO B LSTAT
## 1 0.538 6.575 65.2 4.0900 1 296 15.3 396.9 4.98
## 2 0.469 6.421 78.9 4.9671 2 242 17.8 396.9 9.14
head(boston.utm, 2) # coordinate (UTM zone 19), class = matrix
## x y
## 1 338.73 4679.73
## 2 339.23 4683.33
bh_price_class <- cut(x = boston.c$CMEDV, breaks = 10) # price (num) -> interval (factor)
# breaks部分は seq(0, 50, length = 11) と同じ
plot(boston.utm, col = rev(heat.colors(10))[bh_price_class], main = "Spatial distribution: median price", cex = .8, pch = 20)
legend(x = "topleft", col = rev(heat.colors(10)), legend = levels(bh_price_class), pch = 20, cex = .7)
# text(x = boston.utm[, 1], y = boston.utm[, 2], labels = boston.c$TOWN, cex = .5)
Census tractのポリゴンファイルもある. (追記:いつの間にかパッケージから削除されたみたい)
# bh_poly <- maptools::readShapePoly(fn = system.file("etc/shapes/boston_tracts.shp", package="spdep")[1], IDvar = "poltract")
# データの並び順が違うので,色をつけるために並び替える
# bh_price_ordered_class <- cut(x = boston.c$CMEDV[order(boston.c$TRACT)], breaks = 10) # price (num) -> interval (factor)
# plot(bh_poly, col = rev(heat.colors(10))[bh_price_ordered_class], main = "Choropleth map: median price")
# legend(x = "topleft", col = rev(heat.colors(10)), legend = levels(bh_price_ordered_class), pch = 15, cex = .7)
隣接関係-based SWM (contiguity/connectivity)
bh_swm_nb_w <- nb2listw(neighbours = boston.soi, style = "W") # neighbours list -> SWM, row standardised
bh_swm_nb_b <- nb2listw(neighbours = boston.soi, style = "B") # neighbours list -> SWM, binary
kNN (k-nearest neighbours)-based SWM
bh_swm_knn <- nb2listw(neighbours = knn2nb(knearneigh(boston.utm, k = 5)))
距離-based SWM
bh_dist_cutoff <- bh_dist <- as.matrix(dist(boston.utm)) # 距離行列
# without cutoff
bh_w <- 1 / bh_dist^2 # 重力モデルのアナロジーより alpha = 2
diag(bh_w) <- 0 # 対角要素は 0
bh_w_std <- bh_w / apply(X = bh_w, MARGIN = 1, FUN = sum) # 行基準化
bh_listw_std <- mat2listw(x = bh_w_std) # matrix -> list
# with cutoff
bh_dist_cutoff[bh_dist_cutoff > 10] <- Inf # cutt-off
bh_w_cutoff <- 1 / bh_dist_cutoff^2
diag(bh_w_cutoff) <- 0
bh_w_cutoff_std <- bh_w_cutoff / apply(X = bh_w_cutoff, MARGIN = 1, FUN = sum)
bh_listw_cutoff_std <- mat2listw(x = bh_w_cutoff_std)
\[ I = \frac{N}{\sum_i \sum_j w_{ij}} \frac{\sum_i \sum_j w_{ij} z_i z_j}{\sum_i z_i}, \quad z_i := x_i - \bar{x} \]
-1 / (nrow(boston.c) - 1) # expectation
## [1] -0.001980198
moran.test(x = boston.c$CMEDV, listw = bh_swm_nb_w) # contiguity-based, row-standardized
##
## Moran I test under randomisation
##
## data: boston.c$CMEDV
## weights: bh_swm_nb_w
##
## Moran I statistic standard deviate = 21.786, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.690285059 -0.001980198 0.001009685
moran.test(x = boston.c$CMEDV, listw = bh_swm_nb_b) # contiguity-based, binary
##
## Moran I test under randomisation
##
## data: boston.c$CMEDV
## weights: bh_swm_nb_b
##
## Moran I statistic standard deviate = 22.279, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.672743590 -0.001980198 0.000917222
moran.test(x = boston.c$CMEDV, listw = bh_swm_knn) # knn-based
##
## Moran I test under randomisation
##
## data: boston.c$CMEDV
## weights: bh_swm_knn
##
## Moran I statistic standard deviate = 23.604, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.6165796285 -0.0019801980 0.0006867517
moran.test(x = boston.c$CMEDV, listw = bh_listw_std) # distance-based, row-standardized
##
## Moran I test under randomisation
##
## data: boston.c$CMEDV
## weights: bh_listw_std
##
## Moran I statistic standard deviate = 24.467, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.3568127598 -0.0019801980 0.0002150391
moran.test(x = boston.c$CMEDV, listw = bh_listw_cutoff_std) # distance-based, row-standardized, w/ cutoff
##
## Moran I test under randomisation
##
## data: boston.c$CMEDV
## weights: bh_listw_cutoff_std
##
## Moran I statistic standard deviate = 24.231, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.394345588 -0.001980198 0.000267528
\[ C = \frac{N - 1}{2 (\sum_i \sum_j w_{ij})} \frac{\sum_i \sum_j w_{ij} (x_i - x_j)^2}{\sum_i (x_i - \bar{x})^2} \]
geary.test(x = boston.c$CMEDV, listw = bh_listw_cutoff_std)
##
## Geary C test under randomisation
##
## data: boston.c$CMEDV
## weights: bh_listw_cutoff_std
##
## Geary C statistic standard deviate = 19.591, p-value < 2.2e-16
## alternative hypothesis: Expectation greater than statistic
## sample estimates:
## Geary C statistic Expectation Variance
## 0.5975587655 1.0000000000 0.0004219684
globalG.test(x = boston.c$CMEDV, listw = bh_listw_cutoff_std)
## Warning in globalG.test(x = boston.c$CMEDV, listw = bh_listw_cutoff_std):
## Binary weights recommended (sepecially for distance bands)
##
## Getis-Ord global G statistic
##
## data: boston.c$CMEDV
## weights: bh_listw_cutoff_std
##
## standard deviate = 3.4088, p-value = 0.0003263
## alternative hypothesis: greater
## sample estimates:
## Global G statistic Expectation Variance
## 2.021121e-03 1.980198e-03 1.441230e-10
globalG.test(x = boston.c$CMEDV, listw = bh_swm_nb_b)
##
## Getis-Ord global G statistic
##
## data: boston.c$CMEDV
## weights: bh_swm_nb_b
##
## standard deviate = 8.9113, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Global G statistic Expectation Variance
## 9.362141e-03 8.421712e-03 1.113695e-08
joincount.test(fx = cut(x = boston.c$CMEDV, breaks = 2), listw = bh_listw_cutoff_std)
##
## Join count test under nonfree sampling
##
## data: cut(x = boston.c$CMEDV, breaks = 2)
## weights: bh_listw_cutoff_std
##
## Std. deviate for (4.96,27.5] = 13.623, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Same colour statistic Expectation Variance
## 175.520939 158.019802 1.650292
##
##
## Join count test under nonfree sampling
##
## data: cut(x = boston.c$CMEDV, breaks = 2)
## weights: bh_listw_cutoff_std
##
## Std. deviate for (27.5,50] = 11.654, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Same colour statistic Expectation Variance
## 19.6835358 11.0198020 0.5526433
\[ I_i = \frac{z_i}{\frac{1}{n} \sum_i z_i^2} \sum_j w_{ij} z_i, \quad z_i := x_i - \bar{x} \]
bh_localm <- localmoran(x = boston.c$CMEDV, listw = bh_listw_cutoff_std)
summary(bh_localm[, 1])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.73760 0.01044 0.11810 0.39430 0.62360 3.18000
bh_localm_class <- cut(x = bh_localm[, 1], breaks = 10) # local I (num) -> interval (factor)
plot(boston.utm, col = cm.colors(10)[bh_localm_class], main = "Local Moran's I", pch = 20)
legend(x = "topleft", col = cm.colors(10), legend = levels(bh_localm_class), pch = 20, cex = .7)
Moran scatterplot
bh_price_std <- boston.c$CMEDV - mean(boston.c$CMEDV) # standardized x
bh_w_price <- bh_w_cutoff_std %*% bh_price_std # spatial lag of standardized x
plot(x = bh_price_std, y = bh_w_price, main = "Moran scatterplot")
abline(h = 0, v = 0, lty = 2)
# 有意なI_iかどうかを o/x で区別する場合
plot(x = bh_price_std, y = bh_w_price, main = "Moran scatterplot (x = not signif.)", pch = ifelse(test = bh_localm[, 5] < .05, yes = 16, no = 4))
abline(h = 0, v = 0, lty = 2)
moran.plot(x = boston.c$CMEDV, listw = bh_listw_cutoff_std, quiet = T) # potentially influential observationsにはラベルがつく
空間分布図で hot spot (high-high, color = pink), cold spot (low-low, color = cyan) を示す.(少なくとも今回のケースでは)spatial outlier (high-low or low-high) は有意にならない.
bh_localm_col2 <- cm.colors(10)[bh_localm_class]
bh_localm_col2[bh_localm[, 5] > .05] <- "gray"
plot(boston.utm, col = bh_localm_col2, main = "Local Moran's I (x = not signif.)", cex = ifelse(test = bh_localm[, 5] < .05, yes = 1, no = .5), pch = ifelse(test = bh_localm[, 5] < .05, yes = 16, no = 4))
legend(x = "topleft", col = cm.colors(10), legend = levels(bh_localm_class), pch = 20, cex = .7)
\[ G_i = \frac{\sum_{j \ne i} w_{ij} x_j}{\sum_j x_j} \]
bh_localg <- localG(x = boston.c$CMEDV, listw = bh_listw_cutoff_std)
attr(bh_localg, "gstari")
## [1] FALSE
bh_localg_class <- cut(x = bh_localg, breaks = 10) # local G (num) -> interval (factor)
plot(boston.utm, col = cm.colors(10)[bh_localm_class], main = "Local G", pch = 20)
legend(x = "topleft", col = cm.colors(10), legend = levels(bh_localm_class), pch = 20, cex = .7)
\[ G_{i}^{*} = \frac{\sum_j w_{ij} x_j}{\sum_j x_j} \]
bh_localgs <- localG(x = boston.c$CMEDV, listw = nb2listw(neighbours = include.self(boston.soi), style = "B"))
attr(bh_localgs, "gstari")
## [1] TRUE
bh_localgs_class <- cut(x = bh_localgs, breaks = 10) # local G* (num) -> interval (factor)
plot(boston.utm, col = cm.colors(10)[bh_localgs_class], main = "Local G*", pch = 20)
legend(x = "topleft", col = cm.colors(10), legend = levels(bh_localgs_class), pch = 20, cex = .7)
See Ord and Getis (2012, ARS).
\[ \bar{x}_i = \frac{\sum_j w_{ij} x_j}{\sum_j w_{ij}}, \quad e_j = x_j - \bar{x_i}, j \in N(i) \] \[ H_i = \frac{\sum_j w_{ij} |e_j|^{\alpha}}{\sum_j w_{ij}} \]
\(N(i)\)はi自身を含むiの近傍で,\(\alpha = 1\)はabsolute deviation measure,\(\alpha = 2\)はvariance measureらしい.
localH <- function(x, W, alpha = 2) {
xi <- ej <- hi <- numeric(length = length(x))
for (i in 1:length(x)) {
xi[i] <- sum(W[i,] * x) / sum(W[i,])
}
# for (j in 1:length(x)) {
# ej[j] <- x[j] - xi[j]
# }
ej <- x - xi
for (i in 1:length(x)) {
hi[i] <- sum(W[i,] * abs(ej)^alpha) / sum(W[i,])
}
return(hi)
}
bh_localh <- localH(x = boston.c$CMEDV, W = bh_w_cutoff_std)
bh_localh_class <- cut(x = bh_localh, breaks = 10) # local H (num) -> interval (factor)
plot(boston.utm, col = cm.colors(10)[bh_localh_class], main = "Local H (user-defined function 'localH' w/ distance-based W)", pch = 20)
legend(x = "topleft", col = cm.colors(10), legend = levels(bh_localh_class), pch = 20, cex = .7)
パッケージ spdep に関数が追加されたので試す(情報提供者:高野さん).
bh_localh_spdep <- spdep::LOSH(x = boston.c$CMEDV, listw = nb2listw(boston.soi))
head(bh_localh_spdep)
## Hi E.Hi Var.Hi Z.Hi x_bar_i ei
## [1,] 1.3130740 1 1.4509626 1.8099349 20.92500 9.455625
## [2,] 0.9564493 1 0.8241666 2.3210095 21.25714 0.117551
## [3,] 0.5478950 1 1.1584578 0.9459041 24.58000 102.414400
## [4,] 2.6597341 1 1.9384707 2.7441571 31.26667 4.551111
## [5,] 1.3370733 1 1.9384707 1.3795136 28.33333 61.884444
## [6,] 2.3725774 1 5.8385349 0.8127304 36.20000 56.250000
summary(bh_localh_spdep[, "Hi"])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.1496 0.3532 1.0490 1.2220 16.9100
bh_localh_spdep_class <- cut(x = bh_localh_spdep[, "Hi"], breaks = 10) # local H (num) -> interval (factor)
plot(boston.utm, col = cm.colors(10)[bh_localh_spdep_class], main = "Local H (spdep function 'LOSH')", pch = 20)
legend(x = "topleft", col = cm.colors(10), legend = levels(bh_localh_spdep_class), pch = 20, cex = .7)
上記の自作関数 localH の計算結果と比較するために,Wの定義を揃えて再計算する(スケールがズレていますが…).
bh_localh_nb_w <- localH(x = boston.c$CMEDV, W = nb2mat(neighbours = boston.soi, style = "W"))
bh_localh_nb_w_class <- cut(x = bh_localh_nb_w, breaks = 10) # local H (num) -> interval (factor)
plot(boston.utm, col = cm.colors(10)[bh_localh_nb_w_class], main = "Local H (user-defined function 'localH')", pch = 20)
legend(x = "topleft", col = cm.colors(10), legend = levels(bh_localh_nb_w_class), pch = 20, cex = .7)
なお,spdep::LOSH.cs 関数でp-valueまで計算できる.
bh_localh_spdep_cs <- spdep::LOSH.cs(x = boston.c$CMEDV, listw = nb2listw(boston.soi))
head(bh_localh_spdep_cs)
## Hi E.Hi Var.Hi Z.Hi x_bar_i ei Pr()
## 2011 1.3130740 1 1.4509626 1.8099349 20.92500 9.455625 0.2625797
## 2021 0.9564493 1 0.8241666 2.3210095 21.25714 0.117551 0.3980194
## 2022 0.5478950 1 1.1584578 0.9459041 24.58000 102.414400 0.5525571
## 2031 2.6597341 1 1.9384707 2.7441571 31.26667 4.551111 0.1018230
## 2032 1.3370733 1 1.9384707 1.3795136 28.33333 61.884444 0.2487123
## 2033 2.3725774 1 5.8385349 0.8127304 36.20000 56.250000 0.1242234
(Last modified on Jan 27, 2019)
_