Exercise 8a

This is a combination of Seth and Petra's lectures from Monday 3/11/13. It all relates to the introduction of spatial autocorrelation.

Petra's Material

# Load tools and read in data.
library(maptools)
## Loading required package: foreign
## Loading required package: sp
## Loading required package: grid
## Loading required package: lattice
## Checking rgeos availability: FALSE Note: when rgeos is not available,
## polygon geometry computations in maptools depend on gpclib, which has a
## restricted licence. It is disabled by default; to enable gpclib, type
## gpclibPermit()
library(RANN)
library(spdep)
## Loading required package: boot
## Attaching package: 'boot'
## The following object(s) are masked from 'package:lattice':
## 
## melanoma
## Loading required package: Matrix
## Loading required package: MASS
## Loading required package: nlme
## Loading required package: deldir
## deldir 0.0-21
## Loading required package: coda
## Loading required package: splines
library(rgdal)
## rgdal: version: 0.8-5, (SVN revision 449) Geospatial Data Abstraction
## Library extensions to R successfully loaded Loaded GDAL runtime: GDAL
## 1.9.0, released 2011/12/29 Path to GDAL shared files:
## /Library/Frameworks/R.framework/Versions/2.15/Resources/library/rgdal/gdal
## Loaded PROJ.4 runtime: Rel. 4.8.0, 6 March 2012, [PJ_VERSION: 480] Path to
## PROJ.4 shared files:
## /Library/Frameworks/R.framework/Versions/2.15/Resources/library/rgdal/proj
poverty <- readShapePoly("/Users/telekineticturtle/Desktop/Colorado 13/Quant Methods/Data/poverty/poverty.shp")
class(poverty)  # Double-check that we have the right class.
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"

Creating Spatial Weight Matrices

Contiguity

# Contiguity finds other analysis units that share a common boundary with
# the focal unit.
poverty_nbq <- poly2nb(poverty)  # Queen's case is the default (shares a boundary or a point)
poverty_nbr <- poly2nb(poverty, queen = F)  # Rook's case is the default (shares a boundary only)

summary(poverty_nbq)
## Neighbour list object:
## Number of regions: 3107 
## Number of nonzero links: 18218 
## Percentage nonzero weights: 0.1887 
## Average number of links: 5.864 
## 4 regions with no links:
## 35 689 709 880
## Link number distribution:
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   13   14 
##    4   32   41   91  283  616 1044  703  227   51   11    2    1    1 
## 32 least connected regions:
## 45 49 587 645 843 852 1205 1285 1390 1415 1455 1477 1484 1537 1545 1546 1548 1558 1612 1621 1663 1672 1675 1761 1795 1796 1797 1806 2924 2925 2926 2953 with 1 link
## 1 most connected region:
## 1384 with 14 links
summary(poverty_nbr)
## Neighbour list object:
## Number of regions: 3107 
## Number of nonzero links: 17236 
## Percentage nonzero weights: 0.1785 
## Average number of links: 5.547 
## 4 regions with no links:
## 35 689 709 880
## Link number distribution:
## 
##    0    1    2    3    4    5    6    7    8    9   10   11   13 
##    4   33   48  105  378  860 1014  483  135   39    6    1    1 
## 33 least connected regions:
## 45 49 587 645 843 852 1205 1285 1390 1415 1455 1477 1484 1537 1545 1546 1548 1558 1612 1621 1663 1672 1675 1702 1761 1795 1796 1797 1806 2924 2925 2926 2953 with 1 link
## 1 most connected region:
## 608 with 13 links
# The Queen's case on average finds a few more neighbor, which makes sense
# since it is more inclusive.

Distance (distance band, k-nearest neighbor)

## k-nearest neighbor First create a coordinate object and an ID object.
coords <- coordinates(poverty)  # coordinates function extracts coordinates
summary(coords)  # They are a double matrix of lat and long
##        V1               V2      
##  Min.   :-124.2   Min.   :25.5  
##  1st Qu.: -98.0   1st Qu.:34.7  
##  Median : -90.2   Median :38.3  
##  Mean   : -91.7   Mean   :38.3  
##  3rd Qu.: -83.4   3rd Qu.:41.7  
##  Max.   : -67.6   Max.   :48.8
FIPS <- row.names(as(poverty, "data.frame"))  # Define the fips code so that we can use K nearest neighbors

# Then create the nearest neighbor objects using knn2nb function
# knearniegh(COORD,k=#) to define the coordinates being used (an object
# derived using the coordinates function) row.names=ROWID
poverty_kn1 <- knn2nb(knearneigh(coords, k = 1), row.names = FIPS)
poverty_kn2 <- knn2nb(knearneigh(coords, k = 2), row.names = FIPS)
poverty_kn4 <- knn2nb(knearneigh(coords, k = 4), row.names = FIPS)

## Distance Bands Now that you have the nearest neighbor for each unit,
## find the distance between every pair of nearest neighbors.
dist <- unlist(nbdists(poverty_kn1, coords))  # Define distance
summary(dist)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0083  0.2800  0.3450  0.3720  0.4210  1.4700
# Now select the maximum distance between a pair.
max_k1 <- max(dist)  # Create max distance
# Thirdly, set a circle around which to search for neighbors. Any unit
# that has a centroid within the search radius will be selected as a
# neighbor. (Using the dnearneigh function.)  Select coordinates d1=0
# d2=## (in this case, while use ##*max_k1) row.names=ROWID
poverty_kd1 <- dnearneigh(coords, d1 = 0, d2 = 0.75 * max_k1, row.names = FIPS)  # Some units will now have 0 neighbors.
poverty_kd2 <- dnearneigh(coords, d1 = 0, d2 = 1 * max_k1, row.names = FIPS)  # Every unit will have at least 1 neighbor.
poverty_kd3 <- dnearneigh(coords, d1 = 0, d2 = 1.5 * max_k1, row.names = FIPS)  # The total number of neighbors will increase.

Weights Matrices

# nb2listw() creates a weight list from a set of nearest neighbors.
# zero.policy=T ignores the units that have no neighbors (e.g. Martha's
# Vineyard) Row weighted is by default.  This means that a focal unit with
# three neighbors will have its neighbors weighted by 1/3 and a focal unit
# with two neighbors will have its neighbors weighted by 1/2
poverty_nbq_w <- nb2listw(poverty_nbq, zero.policy = T)

# Exploring the weights file:
names(poverty_nbq_w)
## [1] "style"      "neighbours" "weights"
poverty_nbq_w$neighbours[1:3]
## [[1]]
## [1] 23 31 42
## 
## [[2]]
## [1]  3  4 71
## 
## [[3]]
## [1]  2  5 64 71
poverty_nbq_w$weights[1:3]
## [[1]]
## [1] 0.3333 0.3333 0.3333
## 
## [[2]]
## [1] 0.3333 0.3333 0.3333
## 
## [[3]]
## [1] 0.25 0.25 0.25 0.25

## style='B' uses binary style instead.
poverty_nbq_wb <- nb2listw(poverty_nbq, style = "B", zero.policy = T)

Moran's I

# Find the Moran's I value using moran.test(VARIABLE, listw=WEIGHTSLIST,
# zero.policy=T) variable is the variable you wish to test for
# autocorrelation listw= is the weight list object zero.policy=T is used
# to protect from islands (units without a nearest neighbor)
moran.test(poverty$PROPOV, listw = poverty_nbq_w, zero.policy = T)
## 
##  Moran's I test under randomisation
## 
## data:  poverty$PROPOV  
## weights: poverty_nbq_w  
##  
## Moran I statistic standard deviate = 57.52, p-value < 2.2e-16
## alternative hypothesis: greater 
## sample estimates:
## Moran I statistic       Expectation          Variance 
##         0.6194394        -0.0003224         0.0001161
# Null Hypothesis: There is no spatial autocorrelation for this variable.
# Alt Hypothesis: There is spatial autocorrelation for this variable.

Seth's Material

Load Tools and Data

# Load tools and read in data.
library(maptools)
library(spdep)
library(rgdal)

sids <- readShapePoly("/Users/telekineticturtle/Desktop/Colorado 13/Quant Methods/Data/sids2/sids2.shp")
class(sids)  # Checkign to see file type
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"

Assigning and Changing Projection Data

# Assignment of a projection using a CRS code:
# http://www.spatialreference.org/ref/
proj4string(sids) <- CRS("+proj=longlat +ellps=WGS84")

# Assigning projections using CRS code:
sids_Albers <- spTransform(sids, CRS("+init=epsg:2163"))  # Albers Equal Area
sids_SP <- spTransform(sids, CRS("+init=ESRI:102719"))  # NC State Plane

par(mfrow = c(3, 1))
plot(sids, axes = T)
title("WGS84")
plot(sids_SP, axes = T)
title("NC State Plane (feet)")
plot(sids_Albers, axes = T)
title("Albers Equal Area Conic USA\n(seems wrong)")

plot of chunk unnamed-chunk-7

1) Identify Neighbors

Contiguity based neighbors

# Find neighbors using Queen's or Rook's case.
library(spdep)
sids_nbq <- poly2nb(sids)  # Queen's Case
sids_nbr <- poly2nb(sids, queen = FALSE)  # Rook's Case

# Extract the coordinates for the locations:
coords <- coordinates(sids)
dev.off()  #clears the screen and the panels
## null device 
##           1
par(mfrow = c(2, 1))
# Plot the links for neighbors:
plot(sids_nbq, coords)  # Queen's case -- selects all locations that share an edge or a point with the focus location.
plot(sids, add = T)  #Add the location outlines
plot(sids_nbr, coords)  # Rook's case -- selects all locations that share an edge with the focus location.
plot(sids, add = T)  #Add the location outlines

plot of chunk unnamed-chunk-9

Distance-based neighbors (k nearest neighbors)

# Set coords using state plane projection:
coords <- coordinates(sids_SP)
# use knn2nb(knearneigh(COORDS, k=##)) to identify nearest neighbors based
# on the coordinates. k= determines the number of neighbors to select for
# each location.  k-nearest neighbor identifies the k nearest neighbors
# (by centroid), irrespective of the actual distance.  Only the relative
# distance matters.
sids_kn1 <- knn2nb(knearneigh(coords, k = 1))
sids_kn2 <- knn2nb(knearneigh(coords, k = 2))
sids_kn4 <- knn2nb(knearneigh(coords, k = 4))

# Plotting:
dev.off()  #clears the screen and the panels
## null device 
##           1
plot(sids_SP)
plot(sids_kn2, coords, add = T)  # Adds 2 links for each location.

Distance-based neighbors (specified distance)

# Distance-based will select all centroids within a certain distance of
# the focus location.  This can result in 0 negihbors.  Use the k1-nearest
# neighbor as a starting point if you want each location to have at least
# 1 neighbor.  Use unlist(nbdists(K-NEAREST, COORDS))
dist <- unlist(nbdists(sids_kn1, coords))  # Using the State Plane version so that the distances are easier to interpret
summary(dist)  # The summary gives a good sense of what might be an appropriate threshold. The maximum distance of a k1 neighbor is the highest you can make the minimum threshold without having a location with 0 neighbors.
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   40100   89800   97600   96300  107000  135000
max_k1 <- max(dist)  # Store the max dist as an object.

# Now you can set various levels. using dnearneigh(COORDS, d1=#,
# d2=#*MAX_K1) d1 is usually 0 d2 if you multiply the k1 max by a number
# under 1, you will end up with some locations that have 0 neighbors.
sids_kd1 <- dnearneigh(coords, d1 = 0, d2 = 0.75 * max_k1)
sids_kd2 <- dnearneigh(coords, d1 = 0, d2 = 1 * max_k1)
sids_kd3 <- dnearneigh(coords, d1 = 0, d2 = 1.5 * max_k1)
# This can also be done by raw distance:
sids_ran1 <- dnearneigh(coords, d1 = 0, d2 = 134600)

2) Assigning Weights to the Chosen Neighbors

Row Standardization

# Not all neighbors are equal. Some should be weighted more than others.

# For instance, if location 1 has 3 neighbors and location 2 has 5
# neighbors, the neighbors for location 1 need to be weighted differently
# than those for location 2.  This is resolved by row standardization,
# which assigns the appropriate weight depending on how many neighbors a
# location has.

# The function nb2listw(NEIGHBORS_LIST) performs row standardization.
sids_nbq_w <- nb2listw(sids_nbq)  # Example with Queen's case neighbors
sids_nbq_w
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 100 
## Number of nonzero links: 490 
## Percentage nonzero weights: 4.9 
## Average number of links: 4.9 
## 
## Weights style: W 
## Weights constants summary:
##     n    nn  S0    S1    S2
## W 100 10000 100 44.65 410.5
sids_nbq_w$neighbours[1:3]  # The neighbours section of the weights object gives you the id #s of all a location's neighbors.
## [[1]]
## [1]  2 18 19
## 
## [[2]]
## [1]  1  3 18
## 
## [[3]]
## [1]  2 10 18 23 25
sids_nbq_w$weights[1:3]  # The weights section of the weights object gives you the decimal weight for each neighbor (the weight is always 1/x when x is the number of neighbors).
## [[1]]
## [1] 0.3333 0.3333 0.3333
## 
## [[2]]
## [1] 0.3333 0.3333 0.3333
## 
## [[3]]
## [1] 0.2 0.2 0.2 0.2 0.2

Binary Weights

# Since row standardization gives more weight to neighbors of lcoations
# with few neighbors, you may prefer binary weights if looking for global
# characteristizations. Binary weights bumps up the many-neighbored
# neighbors compared to row standardization.
sids_nbq_wb <- nb2listw(sids_nbq, style = "B")
sids_nbq_wb
## Characteristics of weights list object:
## Neighbour list object:
## Number of regions: 100 
## Number of nonzero links: 490 
## Percentage nonzero weights: 4.9 
## Average number of links: 4.9 
## 
## Weights style: B 
## Weights constants summary:
##     n    nn  S0  S1    S2
## B 100 10000 490 980 10696
sids_nbq_wb$weights[1:3]  # Notice how all the neighbors here are given equal weight!
## [[1]]
## [1] 1 1 1
## 
## [[2]]
## [1] 1 1 1
## 
## [[3]]
## [1] 1 1 1 1 1

Regions with No Negihbors

# Islands do not have neighbors in a queen's or rook's case. Use k nearest
# neighbor to rectify, or use 'zero.policy=T' as an argument when
# assigning weights:
sids_nbq_w <- nb2listw(sids_nbq, zero.policy = T)

IDW

# Inverse distance weighting bases weights on the distance between
# centroids.
dists <- nbdists(sids_nbq, coordinates(sids_SP))
idw <- lapply(dists, function(x) 1/(x/1000))  # The function can be whatever you want; so you could make more distant neighbors worth more! Although that is usually ridiculous...
sids_nbq_idwb <- nb2listw(sids_nbq, glist = idw, style = "B")
summary(unlist(sids_nbq_idwb$weights))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.00412 0.00627 0.00764 0.00804 0.00927 0.02490

3) Examine Spatial Autocorrelation

Moran's I

# To test spatial dependence, you need: A variable of interest A lagged
# version of that variable (based on the weights determined in step 2)
sids$NWBIR79  #births to non-white mothers
##   [1]    19    12   260   145  1197  1237   139   371   844   176   597
##  [12]  1369  1058   650  1492  2980   933   222    33   310   491     5
##  [23]    76   950  5031  7089  1397  1161  1086  4948  2274     4  2696
##  [34]   360    10  1033  6221     3  1305   177   150   941   407   651
##  [45]   141   128   483   687  2330  1504  3059   914  1206  1349    62
##  [56]    73  1163   406   664   905   576  3073  1453  1729   350   307
##  [67]  1151 11631  1203   588   528   264    45  2047   104  2194    79
##  [78]    22  1524   277    42 10614   305  1348  1161  1172   169  1227
##  [89]  1218     5  2342  1436  3568  6899   487  1023   763  1832  2100
## [100]   841
sids$NWBIR79_lag <- lag(sids_nbq_w, sids$NWBIR79)  #lag using row standardized weights
lm1 <- lm(sids$NWBIR79_lag ~ sids$NWBIR79)
# The regression coefficient IS the Moran's I statistic.
summary(lm1)
## 
## Call:
## lm(formula = sids$NWBIR79_lag ~ sids$NWBIR79)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -1667   -803   -266    644   3036 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.21e+03   1.26e+02    9.59  9.2e-16 ***
## sids$NWBIR79 1.50e-01   5.27e-02    2.84   0.0055 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 1040 on 98 degrees of freedom
## Multiple R-squared: 0.0761,  Adjusted R-squared: 0.0667 
## F-statistic: 8.08 on 1 and 98 DF,  p-value: 0.00546
plot(y = sids$NWBIR79_lag, x = sids$NWBIR79, ylab = "Births to non-white mothers lag", 
    xlab = "Births to non-white mothers")
title("The relationship between a variable \n and the lag of itself is Moran’s I")
## Warning: conversion failure on ' and the lag of itself is Moran’s I' in
## 'mbcsToSbcs': dot substituted for <e2>
## Warning: conversion failure on ' and the lag of itself is Moran’s I' in
## 'mbcsToSbcs': dot substituted for <80>
## Warning: conversion failure on ' and the lag of itself is Moran’s I' in
## 'mbcsToSbcs': dot substituted for <99>

plot of chunk unnamed-chunk-16


## Alternate way of getting Moran's I: Find the Moran's I value using
## moran.test(VARIABLE, listw=WEIGHTSLIST, zero.policy=T) variable is the
## variable you wish to test for autocorrelation listw= is the weight list
## object zero.policy=T is used to protect from islands (units without a
## nearest neighbor)
moran.test(sids$NWBIR79, listw = sids_nbq_w, zero.policy = T)
## 
##  Moran's I test under randomisation
## 
## data:  sids$NWBIR79  
## weights: sids_nbq_w  
##  
## Moran I statistic standard deviate = 2.613, p-value = 0.004487
## alternative hypothesis: greater 
## sample estimates:
## Moran I statistic       Expectation          Variance 
##          0.149686         -0.010101          0.003739
# Null Hypothesis: There is no spatial autocorrelation for this variable.
# Alt Hypothesis: There is spatial autocorrelation for this variable.