This is a combination of Seth and Petra's lectures from Monday 3/11/13. It all relates to the introduction of spatial autocorrelation.
# Load tools and read in data.
library(maptools)
## Loading required package: foreign
## Loading required package: sp
## Loading required package: grid
## Loading required package: lattice
## Checking rgeos availability: FALSE Note: when rgeos is not available,
## polygon geometry computations in maptools depend on gpclib, which has a
## restricted licence. It is disabled by default; to enable gpclib, type
## gpclibPermit()
library(RANN)
library(spdep)
## Loading required package: boot
## Attaching package: 'boot'
## The following object(s) are masked from 'package:lattice':
##
## melanoma
## Loading required package: Matrix
## Loading required package: MASS
## Loading required package: nlme
## Loading required package: deldir
## deldir 0.0-21
## Loading required package: coda
## Loading required package: splines
library(rgdal)
## rgdal: version: 0.8-5, (SVN revision 449) Geospatial Data Abstraction
## Library extensions to R successfully loaded Loaded GDAL runtime: GDAL
## 1.9.0, released 2011/12/29 Path to GDAL shared files:
## /Library/Frameworks/R.framework/Versions/2.15/Resources/library/rgdal/gdal
## Loaded PROJ.4 runtime: Rel. 4.8.0, 6 March 2012, [PJ_VERSION: 480] Path to
## PROJ.4 shared files:
## /Library/Frameworks/R.framework/Versions/2.15/Resources/library/rgdal/proj
poverty <- readShapePoly("/Users/telekineticturtle/Desktop/Colorado 13/Quant Methods/Data/poverty/poverty.shp")
class(poverty) # Double-check that we have the right class.
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
Contiguity
# Contiguity finds other analysis units that share a common boundary with
# the focal unit.
poverty_nbq <- poly2nb(poverty) # Queen's case is the default (shares a boundary or a point)
poverty_nbr <- poly2nb(poverty, queen = F) # Rook's case is the default (shares a boundary only)
summary(poverty_nbq)
## Neighbour list object:
## Number of regions: 3107
## Number of nonzero links: 18218
## Percentage nonzero weights: 0.1887
## Average number of links: 5.864
## 4 regions with no links:
## 35 689 709 880
## Link number distribution:
##
## 0 1 2 3 4 5 6 7 8 9 10 11 13 14
## 4 32 41 91 283 616 1044 703 227 51 11 2 1 1
## 32 least connected regions:
## 45 49 587 645 843 852 1205 1285 1390 1415 1455 1477 1484 1537 1545 1546 1548 1558 1612 1621 1663 1672 1675 1761 1795 1796 1797 1806 2924 2925 2926 2953 with 1 link
## 1 most connected region:
## 1384 with 14 links
summary(poverty_nbr)
## Neighbour list object:
## Number of regions: 3107
## Number of nonzero links: 17236
## Percentage nonzero weights: 0.1785
## Average number of links: 5.547
## 4 regions with no links:
## 35 689 709 880
## Link number distribution:
##
## 0 1 2 3 4 5 6 7 8 9 10 11 13
## 4 33 48 105 378 860 1014 483 135 39 6 1 1
## 33 least connected regions:
## 45 49 587 645 843 852 1205 1285 1390 1415 1455 1477 1484 1537 1545 1546 1548 1558 1612 1621 1663 1672 1675 1702 1761 1795 1796 1797 1806 2924 2925 2926 2953 with 1 link
## 1 most connected region:
## 608 with 13 links
# The Queen's case on average finds a few more neighbor, which makes sense
# since it is more inclusive.
Distance (distance band, k-nearest neighbor)
## k-nearest neighbor First create a coordinate object and an ID object.
coords <- coordinates(poverty) # coordinates function extracts coordinates
summary(coords) # They are a double matrix of lat and long
## V1 V2
## Min. :-124.2 Min. :25.5
## 1st Qu.: -98.0 1st Qu.:34.7
## Median : -90.2 Median :38.3
## Mean : -91.7 Mean :38.3
## 3rd Qu.: -83.4 3rd Qu.:41.7
## Max. : -67.6 Max. :48.8
FIPS <- row.names(as(poverty, "data.frame")) # Define the fips code so that we can use K nearest neighbors
# Then create the nearest neighbor objects using knn2nb function
# knearniegh(COORD,k=#) to define the coordinates being used (an object
# derived using the coordinates function) row.names=ROWID
poverty_kn1 <- knn2nb(knearneigh(coords, k = 1), row.names = FIPS)
poverty_kn2 <- knn2nb(knearneigh(coords, k = 2), row.names = FIPS)
poverty_kn4 <- knn2nb(knearneigh(coords, k = 4), row.names = FIPS)
## Distance Bands Now that you have the nearest neighbor for each unit,
## find the distance between every pair of nearest neighbors.
dist <- unlist(nbdists(poverty_kn1, coords)) # Define distance
summary(dist)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0083 0.2800 0.3450 0.3720 0.4210 1.4700
# Now select the maximum distance between a pair.
max_k1 <- max(dist) # Create max distance
# Thirdly, set a circle around which to search for neighbors. Any unit
# that has a centroid within the search radius will be selected as a
# neighbor. (Using the dnearneigh function.) Select coordinates d1=0
# d2=## (in this case, while use ##*max_k1) row.names=ROWID
poverty_kd1 <- dnearneigh(coords, d1 = 0, d2 = 0.75 * max_k1, row.names = FIPS) # Some units will now have 0 neighbors.
poverty_kd2 <- dnearneigh(coords, d1 = 0, d2 = 1 * max_k1, row.names = FIPS) # Every unit will have at least 1 neighbor.
poverty_kd3 <- dnearneigh(coords, d1 = 0, d2 = 1.5 * max_k1, row.names = FIPS) # The total number of neighbors will increase.
Weights Matrices
# nb2listw() creates a weight list from a set of nearest neighbors.
# zero.policy=T ignores the units that have no neighbors (e.g. Martha's
# Vineyard) Row weighted is by default. This means that a focal unit with
# three neighbors will have its neighbors weighted by 1/3 and a focal unit
# with two neighbors will have its neighbors weighted by 1/2
poverty_nbq_w <- nb2listw(poverty_nbq, zero.policy = T)
# Exploring the weights file:
names(poverty_nbq_w)
## [1] "style" "neighbours" "weights"
poverty_nbq_w$neighbours[1:3]
## [[1]]
## [1] 23 31 42
##
## [[2]]
## [1] 3 4 71
##
## [[3]]
## [1] 2 5 64 71
poverty_nbq_w$weights[1:3]
## [[1]]
## [1] 0.3333 0.3333 0.3333
##
## [[2]]
## [1] 0.3333 0.3333 0.3333
##
## [[3]]
## [1] 0.25 0.25 0.25 0.25
## style='B' uses binary style instead.
poverty_nbq_wb <- nb2listw(poverty_nbq, style = "B", zero.policy = T)
Moran's I
# Find the Moran's I value using moran.test(VARIABLE, listw=WEIGHTSLIST,
# zero.policy=T) variable is the variable you wish to test for
# autocorrelation listw= is the weight list object zero.policy=T is used
# to protect from islands (units without a nearest neighbor)
moran.test(poverty$PROPOV, listw = poverty_nbq_w, zero.policy = T)
##
## Moran's I test under randomisation
##
## data: poverty$PROPOV
## weights: poverty_nbq_w
##
## Moran I statistic standard deviate = 57.52, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.6194394 -0.0003224 0.0001161
# Null Hypothesis: There is no spatial autocorrelation for this variable.
# Alt Hypothesis: There is spatial autocorrelation for this variable.
# Load tools and read in data.
library(maptools)
library(spdep)
library(rgdal)
sids <- readShapePoly("/Users/telekineticturtle/Desktop/Colorado 13/Quant Methods/Data/sids2/sids2.shp")
class(sids) # Checkign to see file type
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
# Assignment of a projection using a CRS code:
# http://www.spatialreference.org/ref/
proj4string(sids) <- CRS("+proj=longlat +ellps=WGS84")
# Assigning projections using CRS code:
sids_Albers <- spTransform(sids, CRS("+init=epsg:2163")) # Albers Equal Area
sids_SP <- spTransform(sids, CRS("+init=ESRI:102719")) # NC State Plane
par(mfrow = c(3, 1))
plot(sids, axes = T)
title("WGS84")
plot(sids_SP, axes = T)
title("NC State Plane (feet)")
plot(sids_Albers, axes = T)
title("Albers Equal Area Conic USA\n(seems wrong)")
# Find neighbors using Queen's or Rook's case.
library(spdep)
sids_nbq <- poly2nb(sids) # Queen's Case
sids_nbr <- poly2nb(sids, queen = FALSE) # Rook's Case
# Extract the coordinates for the locations:
coords <- coordinates(sids)
dev.off() #clears the screen and the panels
## null device
## 1
par(mfrow = c(2, 1))
# Plot the links for neighbors:
plot(sids_nbq, coords) # Queen's case -- selects all locations that share an edge or a point with the focus location.
plot(sids, add = T) #Add the location outlines
plot(sids_nbr, coords) # Rook's case -- selects all locations that share an edge with the focus location.
plot(sids, add = T) #Add the location outlines
# Set coords using state plane projection:
coords <- coordinates(sids_SP)
# use knn2nb(knearneigh(COORDS, k=##)) to identify nearest neighbors based
# on the coordinates. k= determines the number of neighbors to select for
# each location. k-nearest neighbor identifies the k nearest neighbors
# (by centroid), irrespective of the actual distance. Only the relative
# distance matters.
sids_kn1 <- knn2nb(knearneigh(coords, k = 1))
sids_kn2 <- knn2nb(knearneigh(coords, k = 2))
sids_kn4 <- knn2nb(knearneigh(coords, k = 4))
# Plotting:
dev.off() #clears the screen and the panels
## null device
## 1
plot(sids_SP)
plot(sids_kn2, coords, add = T) # Adds 2 links for each location.
# Distance-based will select all centroids within a certain distance of
# the focus location. This can result in 0 negihbors. Use the k1-nearest
# neighbor as a starting point if you want each location to have at least
# 1 neighbor. Use unlist(nbdists(K-NEAREST, COORDS))
dist <- unlist(nbdists(sids_kn1, coords)) # Using the State Plane version so that the distances are easier to interpret
summary(dist) # The summary gives a good sense of what might be an appropriate threshold. The maximum distance of a k1 neighbor is the highest you can make the minimum threshold without having a location with 0 neighbors.
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 40100 89800 97600 96300 107000 135000
max_k1 <- max(dist) # Store the max dist as an object.
# Now you can set various levels. using dnearneigh(COORDS, d1=#,
# d2=#*MAX_K1) d1 is usually 0 d2 if you multiply the k1 max by a number
# under 1, you will end up with some locations that have 0 neighbors.
sids_kd1 <- dnearneigh(coords, d1 = 0, d2 = 0.75 * max_k1)
sids_kd2 <- dnearneigh(coords, d1 = 0, d2 = 1 * max_k1)
sids_kd3 <- dnearneigh(coords, d1 = 0, d2 = 1.5 * max_k1)
# This can also be done by raw distance:
sids_ran1 <- dnearneigh(coords, d1 = 0, d2 = 134600)
# Not all neighbors are equal. Some should be weighted more than others.
# For instance, if location 1 has 3 neighbors and location 2 has 5
# neighbors, the neighbors for location 1 need to be weighted differently
# than those for location 2. This is resolved by row standardization,
# which assigns the appropriate weight depending on how many neighbors a
# location has.
# The function nb2listw(NEIGHBORS_LIST) performs row standardization.
sids_nbq_w <- nb2listw(sids_nbq) # Example with Queen's case neighbors
sids_nbq_w
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 100
## Number of nonzero links: 490
## Percentage nonzero weights: 4.9
## Average number of links: 4.9
##
## Weights style: W
## Weights constants summary:
## n nn S0 S1 S2
## W 100 10000 100 44.65 410.5
sids_nbq_w$neighbours[1:3] # The neighbours section of the weights object gives you the id #s of all a location's neighbors.
## [[1]]
## [1] 2 18 19
##
## [[2]]
## [1] 1 3 18
##
## [[3]]
## [1] 2 10 18 23 25
sids_nbq_w$weights[1:3] # The weights section of the weights object gives you the decimal weight for each neighbor (the weight is always 1/x when x is the number of neighbors).
## [[1]]
## [1] 0.3333 0.3333 0.3333
##
## [[2]]
## [1] 0.3333 0.3333 0.3333
##
## [[3]]
## [1] 0.2 0.2 0.2 0.2 0.2
# Since row standardization gives more weight to neighbors of lcoations
# with few neighbors, you may prefer binary weights if looking for global
# characteristizations. Binary weights bumps up the many-neighbored
# neighbors compared to row standardization.
sids_nbq_wb <- nb2listw(sids_nbq, style = "B")
sids_nbq_wb
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 100
## Number of nonzero links: 490
## Percentage nonzero weights: 4.9
## Average number of links: 4.9
##
## Weights style: B
## Weights constants summary:
## n nn S0 S1 S2
## B 100 10000 490 980 10696
sids_nbq_wb$weights[1:3] # Notice how all the neighbors here are given equal weight!
## [[1]]
## [1] 1 1 1
##
## [[2]]
## [1] 1 1 1
##
## [[3]]
## [1] 1 1 1 1 1
# Islands do not have neighbors in a queen's or rook's case. Use k nearest
# neighbor to rectify, or use 'zero.policy=T' as an argument when
# assigning weights:
sids_nbq_w <- nb2listw(sids_nbq, zero.policy = T)
# Inverse distance weighting bases weights on the distance between
# centroids.
dists <- nbdists(sids_nbq, coordinates(sids_SP))
idw <- lapply(dists, function(x) 1/(x/1000)) # The function can be whatever you want; so you could make more distant neighbors worth more! Although that is usually ridiculous...
sids_nbq_idwb <- nb2listw(sids_nbq, glist = idw, style = "B")
summary(unlist(sids_nbq_idwb$weights))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00412 0.00627 0.00764 0.00804 0.00927 0.02490
# To test spatial dependence, you need: A variable of interest A lagged
# version of that variable (based on the weights determined in step 2)
sids$NWBIR79 #births to non-white mothers
## [1] 19 12 260 145 1197 1237 139 371 844 176 597
## [12] 1369 1058 650 1492 2980 933 222 33 310 491 5
## [23] 76 950 5031 7089 1397 1161 1086 4948 2274 4 2696
## [34] 360 10 1033 6221 3 1305 177 150 941 407 651
## [45] 141 128 483 687 2330 1504 3059 914 1206 1349 62
## [56] 73 1163 406 664 905 576 3073 1453 1729 350 307
## [67] 1151 11631 1203 588 528 264 45 2047 104 2194 79
## [78] 22 1524 277 42 10614 305 1348 1161 1172 169 1227
## [89] 1218 5 2342 1436 3568 6899 487 1023 763 1832 2100
## [100] 841
sids$NWBIR79_lag <- lag(sids_nbq_w, sids$NWBIR79) #lag using row standardized weights
lm1 <- lm(sids$NWBIR79_lag ~ sids$NWBIR79)
# The regression coefficient IS the Moran's I statistic.
summary(lm1)
##
## Call:
## lm(formula = sids$NWBIR79_lag ~ sids$NWBIR79)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1667 -803 -266 644 3036
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.21e+03 1.26e+02 9.59 9.2e-16 ***
## sids$NWBIR79 1.50e-01 5.27e-02 2.84 0.0055 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1040 on 98 degrees of freedom
## Multiple R-squared: 0.0761, Adjusted R-squared: 0.0667
## F-statistic: 8.08 on 1 and 98 DF, p-value: 0.00546
plot(y = sids$NWBIR79_lag, x = sids$NWBIR79, ylab = "Births to non-white mothers lag",
xlab = "Births to non-white mothers")
title("The relationship between a variable \n and the lag of itself is Moran’s I")
## Warning: conversion failure on ' and the lag of itself is Moran’s I' in
## 'mbcsToSbcs': dot substituted for <e2>
## Warning: conversion failure on ' and the lag of itself is Moran’s I' in
## 'mbcsToSbcs': dot substituted for <80>
## Warning: conversion failure on ' and the lag of itself is Moran’s I' in
## 'mbcsToSbcs': dot substituted for <99>
## Alternate way of getting Moran's I: Find the Moran's I value using
## moran.test(VARIABLE, listw=WEIGHTSLIST, zero.policy=T) variable is the
## variable you wish to test for autocorrelation listw= is the weight list
## object zero.policy=T is used to protect from islands (units without a
## nearest neighbor)
moran.test(sids$NWBIR79, listw = sids_nbq_w, zero.policy = T)
##
## Moran's I test under randomisation
##
## data: sids$NWBIR79
## weights: sids_nbq_w
##
## Moran I statistic standard deviate = 2.613, p-value = 0.004487
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.149686 -0.010101 0.003739
# Null Hypothesis: There is no spatial autocorrelation for this variable.
# Alt Hypothesis: There is spatial autocorrelation for this variable.