Spatial Analysis and Regression 1

load("C:\\Users\\QINGHUAN\\Desktop\\soco.rda")

library(sp)
## Warning: package 'sp' was built under R version 2.15.3
library(maptools)
## Warning: package 'maptools' was built under R version 2.15.3
## Loading required package: foreign
## Loading required package: grid
## Loading required package: lattice
## Checking rgeos availability: TRUE
library(rgdal)
## Warning: package 'rgdal' was built under R version 2.15.3
## rgdal: version: 0.8-8, (SVN revision 463) Geospatial Data Abstraction
## Library extensions to R successfully loaded Loaded GDAL runtime: GDAL
## 1.9.2, released 2012/10/08 Path to GDAL shared files:
## C:/Users/QINGHUAN/Documents/R/win-library/2.15/rgdal/gdal GDAL does not
## use iconv for recoding strings. Loaded PROJ.4 runtime: Rel. 4.7.1, 23
## September 2009, [PJ_VERSION: 470] Path to PROJ.4 shared files:
## C:/Users/QINGHUAN/Documents/R/win-library/2.15/rgdal/proj
library(spdep)
## Warning: package 'spdep' was built under R version 2.15.3
## Loading required package: boot
## Warning: package 'boot' was built under R version 2.15.3
## Attaching package: 'boot'
## The following object(s) are masked from 'package:lattice':
## 
## melanoma
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 2.15.3
## Loading required package: MASS
## Warning: package 'MASS' was built under R version 2.15.3
## Loading required package: nlme
## Warning: package 'nlme' was built under R version 2.15.3
## Loading required package: deldir
## deldir 0.0-21
## Loading required package: coda
## Warning: package 'coda' was built under R version 2.15.3
## Loading required package: splines
library(classInt)
## Warning: package 'classInt' was built under R version 2.15.3
## Loading required package: class
## Warning: package 'class' was built under R version 2.15.3
## Loading required package: e1071
## Warning: package 'e1071' was built under R version 2.15.3
library(RColorBrewer)
soco_nbq <- poly2nb(soco)  #Queen's neighborhood
soco_nbq_w <- nb2listw(soco_nbq)  #row standardization
moran.mc(soco$PERPOV, listw = soco_nbq_w, nsim = 999)
## 
##  Monte-Carlo simulation of Moran's I
## 
## data:  soco$PERPOV 
## weights: soco_nbq_w  
## number of simulations + 1: 1000 
##  
## statistic = 0.4347, observed rank = 1000, p-value = 0.001
## alternative hypothesis: greater
# assessing moran's I
MyMoran <- moran.mc(soco$PPOV, listw = soco_nbq_w, nsim = 999)
hist(MyMoran$res, breaks = 50)

plot of chunk unnamed-chunk-3

MyMoran  #reject null,accept alt that moran's I is not random
## 
##  Monte-Carlo simulation of Moran's I
## 
## data:  soco$PPOV 
## weights: soco_nbq_w  
## number of simulations + 1: 1000 
##  
## statistic = 0.5893, observed rank = 1000, p-value = 0.001
## alternative hypothesis: greater

# moran plot in R
mp <- moran.plot(soco$PPOV, soco_nbq_w, labels = as.character(soco$CNTY_ST), 
    xlab = "Percent Poverty", ylab = "Lag of Percent Poverty")

plot of chunk unnamed-chunk-3


# draw a LISA map
locm <- localmoran(soco$PPOV, soco_nbq_w)  #calculate local moran's I
summary(locm)
##        Ii              E.Ii               Var.Ii            Z.Ii       
##  Min.   :-1.778   Min.   :-0.000722   Min.   :0.0901   Min.   :-3.983  
##  1st Qu.: 0.012   1st Qu.:-0.000722   1st Qu.:0.1420   1st Qu.: 0.029  
##  Median : 0.219   Median :-0.000722   Median :0.1658   Median : 0.517  
##  Mean   : 0.589   Mean   :-0.000722   Mean   :0.1861   Mean   : 1.408  
##  3rd Qu.: 0.746   3rd Qu.:-0.000722   3rd Qu.:0.1990   3rd Qu.: 1.761  
##  Max.   : 9.335   Max.   :-0.000722   Max.   :0.9981   Max.   :18.977  
##    Pr(z > 0)     
##  Min.   :0.0000  
##  1st Qu.:0.0391  
##  Median :0.3025  
##  Mean   :0.2900  
##  3rd Qu.:0.4886  
##  Max.   :1.0000
# manually make a moran plot standardize variables and save to a new
# column
soco$sPPOV <- scale(soco$PPOV)
# create a lagged variable
soco$lag_sPPOV <- lag.listw(soco_nbq_w, soco$sPPOV)
summary(soco$sPPOV)
##        V1        
##  Min.   :-2.151  
##  1st Qu.:-0.700  
##  Median :-0.123  
##  Mean   : 0.000  
##  3rd Qu.: 0.591  
##  Max.   : 4.062
summary(soco$lag_sPPOV)
##        V1         
##  Min.   :-1.9925  
##  1st Qu.:-0.5298  
##  Median :-0.0762  
##  Mean   : 0.0085  
##  3rd Qu.: 0.4514  
##  Max.   : 2.6478
plot(x = soco$sPPOV, y = soco$lag_sPPOV, main = "Moran Scatterplot PPOV")
abline(h = 0, v = 0)
abline(lm(soco$lag_sPPOV ~ soco$sPPOV), lty = 3, lwd = 4, col = "red")

# check out the outliers click on one or two and then hit escape
identify(soco$sPPOV, soco$lag_sPPOV, soco$CNTY_ST, cex = 0.8)

plot of chunk unnamed-chunk-3

## integer(0)

# identify the Moran plot quadrant for each observation this is some
# serious slicing and illustrate the power of the bracket
soco$quad_sig <- NA
soco@data[(soco$sPPOV >= 0 & soco$lag_sPPOV >= 0) & (locm[, 5] <= 0.05), "quad_sig"] <- 1
soco@data[(soco$sPPOV <= 0 & soco$lag_sPPOV <= 0) & (locm[, 5] <= 0.05), "quad_sig"] <- 2
soco@data[(soco$sPPOV >= 0 & soco$lag_sPPOV <= 0) & (locm[, 5] <= 0.05), "quad_sig"] <- 3
soco@data[(soco$sPPOV >= 0 & soco$lag_sPPOV <= 0) & (locm[, 5] <= 0.05), "quad_sig"] <- 4
soco@data[(soco$sPPOV <= 0 & soco$lag_sPPOV >= 0) & (locm[, 5] <= 0.05), "quad_sig"] <- 5
# assign a 5 to a non-significant observations

# map the results set the breaks for the thematic map classes
breaks <- seq(1, 5, 1)
# set the corresponding labels for the thematic map classes
labels <- c("High-High", "Low-Low", "High-Low", "Low-High", "Not Signif.")
# findInterval-necessary for making a map
np <- findInterval(soco$quad_sig, breaks)
# assign colors to each map class
colors <- c("red", "blue", "lightpink", "skyblue2", "white")
plot(soco, col = colors[np])  #colors[np] manually sets the color for each county
mtext("Local Moran's I", cex = 1.5, side = 3, line = 1)
legend("topleft", legend = labels, fill = colors, bty = "n")

plot of chunk unnamed-chunk-3


# correlogram of spatial lags
cor10 <- sp.correlogram(soco_nbq, soco$PPOV, order = 10, method = "I")
cor10
## Spatial correlogram for soco$PPOV 
## method: Moran's I
##            estimate expectation  variance standard deviate Pr(I) two sided
## 1 (1387)   5.89e-01   -7.22e-04  2.61e-04            36.54         < 2e-16
## 2 (1387)   4.58e-01   -7.22e-04  1.30e-04            40.20         < 2e-16
## 3 (1387)   3.34e-01   -7.22e-04  8.85e-05            35.60         < 2e-16
## 4 (1387)   2.40e-01   -7.22e-04  7.03e-05            28.69         < 2e-16
## 5 (1387)   1.48e-01   -7.22e-04  5.90e-05            19.39         < 2e-16
## 6 (1387)   8.35e-02   -7.22e-04  5.03e-05            11.88         < 2e-16
## 7 (1387)   3.09e-02   -7.22e-04  4.33e-05             4.81         1.5e-06
## 8 (1387)  -1.46e-02   -7.22e-04  3.83e-05            -2.24           0.025
## 9 (1387)  -5.41e-02   -7.22e-04  3.47e-05            -9.06         < 2e-16
## 10 (1387) -7.38e-02   -7.22e-04  3.23e-05           -12.85         < 2e-16
##              
## 1 (1387)  ***
## 2 (1387)  ***
## 3 (1387)  ***
## 4 (1387)  ***
## 5 (1387)  ***
## 6 (1387)  ***
## 7 (1387)  ***
## 8 (1387)  *  
## 9 (1387)  ***
## 10 (1387) ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(cor10)  #error bars are based on the SE from permutation

plot of chunk unnamed-chunk-3

# examine correlation as a function of distance
sp.correlogram(soco_nbq, soco$PPOV, order = 10, method = "corr")
## Spatial correlogram for soco$PPOV 
## method: Spatial autocorrelation
##        1        2        3        4        5        6        7        8 
##  0.75938  0.69002  0.58062  0.47335  0.33112  0.20307  0.08188 -0.04172 
##        9       10 
## -0.16401 -0.22568

# variogram
library(gstat)
## Warning: package 'gstat' was built under R version 2.15.3
names(soco)
##  [1] "CNTY_ST"   "STUSAB"    "FIPS"      "YCOORD"    "XCOORD"   
##  [6] "SQYCORD"   "SQXCORD"   "XYCOORD"   "PPOV"      "PHSP"     
## [11] "PFHH"      "PWKCO"     "PHSLS"     "PUNEM"     "PUDEM"    
## [16] "PEXTR"     "PPSRV"     "PMSRV"     "PNDMFG"    "PNHSPW"   
## [21] "PMNRTY"    "LO_POV"    "PFRN"      "PNAT"      "PBLK"     
## [26] "P65UP"     "PDSABL"    "METRO"     "PERPOV"    "OTMIG"    
## [31] "SQOTMIG"   "BINMIG"    "INCARC"    "BINCARC"   "SQRTPPOV" 
## [36] "SQRTUNEM"  "SQRTPFHH"  "LOGHSPLS"  "PHSPLUS"   "sPPOV"    
## [41] "lag_sPPOV" "quad_sig"
plot(variogram(soco$PPOV ~ 1, locations = coordinates(soco), data = soco, cloud = F), 
    type = "b")

plot of chunk unnamed-chunk-3

plot(variogram(soco$PFHH ~ 1, locations = coordinates(soco), data = soco, cloud = F), 
    type = "b")

plot of chunk unnamed-chunk-3

plot(variogram(soco$PWKCO ~ 1, locations = coordinates(soco), data = soco, cloud = F), 
    type = "b")

plot of chunk unnamed-chunk-3

plot(variogram(soco$PERPOV ~ 1, locations = coordinates(soco), data = soco, 
    cloud = F), type = "b")

plot of chunk unnamed-chunk-3


# Using measures of spatial autocorrelation in regression 1.for a problem
# with spatial dimension,estimate parameters using OLS 2.evalue the
# model-are assumptions of OLS met? 3.fit a spatial autoregressive model
# (e.g.lag model)
soco_OLS <- lm(PPOV ~ PFHH + PUNEM + PBLK + P65UP, data = soco)
summary(soco_OLS)
## 
## Call:
## lm(formula = PPOV ~ PFHH + PUNEM + PBLK + P65UP, data = soco)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.26570 -0.03711 -0.00785  0.03294  0.28733 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.05544    0.00891   -6.22  6.6e-10 ***
## PFHH         0.45748    0.04941    9.26  < 2e-16 ***
## PUNEM        2.18109    0.07334   29.74  < 2e-16 ***
## PBLK        -0.05919    0.01878   -3.15   0.0017 ** 
## P65UP        0.38023    0.04276    8.89  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.0576 on 1382 degrees of freedom
## Multiple R-squared: 0.603,   Adjusted R-squared: 0.601 
## F-statistic:  524 on 4 and 1382 DF,  p-value: <2e-16
plot(soco_OLS)  #non-normal residuals,right skewed

plot of chunk unnamed-chunk-3 plot of chunk unnamed-chunk-3 plot of chunk unnamed-chunk-3 plot of chunk unnamed-chunk-3


# create a map of PPOV
breaks <- classIntervals(soco$PPOV, n = 3, style = "quantile")
labels1 <- c("Low Poverty", "Medium Poverty", "High Poverty")
colors1 <- brewer.pal(3, "Reds")
plot(soco, col = colors1)
mtext("Percent Poverty", cex = 1.5, side = 3, line = 1)
legend("topleft", legend = labels1, fill = colors1, bty = "n")

plot of chunk unnamed-chunk-3


soco$resid <- resid(soco_OLS)  #save residuals
# calculate moran's I of residuals
lm.morantest(soco_OLS, soco_nbq_w)
## 
##  Global Moran's I for regression residuals
## 
## data:  
## model: lm(formula = PPOV ~ PFHH + PUNEM + PBLK + P65UP, data =
## soco)
## weights: soco_nbq_w
##  
## Moran I statistic standard deviate = 22.55, p-value < 2.2e-16
## alternative hypothesis: greater 
## sample estimates:
## Observed Moran's I        Expectation           Variance 
##           0.360876          -0.002058           0.000259

# map the residuals
breaks <- c(min(soco$resid), 0, max(soco$resid))
labels <- c("negative residuals", "positive residuals")
np <- findInterval(soco$resid, breaks)
colors <- c("red", "blue")
dev.off()
## null device 
##           1
plot(soco, col = colors[np])
mtext("OLS Residuals", cex = 1.5, side = 3, line = 1)
legend("topleft", legend = labels, fill = colors, bty = "n")

# poverty might 'spillover',spatial lag model will capture this spillover
# effect
soco_LAG <- lagsarlm(PPOV ~ PFHH + PUNEM + PBLK + P65UP, data = soco, soco_nbq_w)
summary(soco_LAG)
## 
## Call:lagsarlm(formula = PPOV ~ PFHH + PUNEM + PBLK + P65UP, data = soco, 
##     listw = soco_nbq_w)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.2457653 -0.0284360 -0.0028762  0.0262169  0.2374894 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##              Estimate Std. Error  z value  Pr(>|z|)
## (Intercept) -0.100260   0.007375 -13.5946 < 2.2e-16
## PFHH         0.429404   0.040246  10.6695 < 2.2e-16
## PUNEM        1.354637   0.065959  20.5374 < 2.2e-16
## PBLK        -0.069046   0.015335  -4.5025 6.716e-06
## P65UP        0.291192   0.035210   8.2701 2.220e-16
## 
## Rho: 0.5172, LR test value: 491.5, p-value: < 2.22e-16
## Asymptotic standard error: 0.02118
##     z-value: 24.42, p-value: < 2.22e-16
## Wald statistic: 596.3, p-value: < 2.22e-16
## 
## Log likelihood: 2239 for lag model
## ML residual variance (sigma squared): 0.002192, (sigma: 0.04682)
## Number of observations: 1387 
## Number of parameters estimated: 7 
## AIC: -4464, (AIC for lm: -3974)
## LM test for residual autocorrelation
## test value: 4.85, p-value: 0.027651
anova(soco_LAG, soco_OLS)
##          Model df   AIC logLik Test L.Ratio p-value
## soco_LAG     1  7 -4464   2239    1                
## soco_OLS     2  6 -3974   1993    2     491       0
# lag model has lower AIC, so it's better