load("C:\\Users\\QINGHUAN\\Desktop\\soco.rda")
library(sp)
## Warning: package 'sp' was built under R version 2.15.3
library(maptools)
## Warning: package 'maptools' was built under R version 2.15.3
## Loading required package: foreign
## Loading required package: grid
## Loading required package: lattice
## Checking rgeos availability: TRUE
library(rgdal)
## Warning: package 'rgdal' was built under R version 2.15.3
## rgdal: version: 0.8-8, (SVN revision 463) Geospatial Data Abstraction
## Library extensions to R successfully loaded Loaded GDAL runtime: GDAL
## 1.9.2, released 2012/10/08 Path to GDAL shared files:
## C:/Users/QINGHUAN/Documents/R/win-library/2.15/rgdal/gdal GDAL does not
## use iconv for recoding strings. Loaded PROJ.4 runtime: Rel. 4.7.1, 23
## September 2009, [PJ_VERSION: 470] Path to PROJ.4 shared files:
## C:/Users/QINGHUAN/Documents/R/win-library/2.15/rgdal/proj
library(spdep)
## Warning: package 'spdep' was built under R version 2.15.3
## Loading required package: boot
## Warning: package 'boot' was built under R version 2.15.3
## Attaching package: 'boot'
## The following object(s) are masked from 'package:lattice':
##
## melanoma
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 2.15.3
## Loading required package: MASS
## Warning: package 'MASS' was built under R version 2.15.3
## Loading required package: nlme
## Warning: package 'nlme' was built under R version 2.15.3
## Loading required package: deldir
## deldir 0.0-21
## Loading required package: coda
## Warning: package 'coda' was built under R version 2.15.3
## Loading required package: splines
library(classInt)
## Warning: package 'classInt' was built under R version 2.15.3
## Loading required package: class
## Warning: package 'class' was built under R version 2.15.3
## Loading required package: e1071
## Warning: package 'e1071' was built under R version 2.15.3
library(RColorBrewer)
soco_nbq <- poly2nb(soco) #Queen's neighborhood
soco_nbq_w <- nb2listw(soco_nbq) #row standardization
moran.mc(soco$PERPOV, listw = soco_nbq_w, nsim = 999)
##
## Monte-Carlo simulation of Moran's I
##
## data: soco$PERPOV
## weights: soco_nbq_w
## number of simulations + 1: 1000
##
## statistic = 0.4347, observed rank = 1000, p-value = 0.001
## alternative hypothesis: greater
# assessing moran's I
MyMoran <- moran.mc(soco$PPOV, listw = soco_nbq_w, nsim = 999)
hist(MyMoran$res, breaks = 50)
MyMoran #reject null,accept alt that moran's I is not random
##
## Monte-Carlo simulation of Moran's I
##
## data: soco$PPOV
## weights: soco_nbq_w
## number of simulations + 1: 1000
##
## statistic = 0.5893, observed rank = 1000, p-value = 0.001
## alternative hypothesis: greater
# moran plot in R
mp <- moran.plot(soco$PPOV, soco_nbq_w, labels = as.character(soco$CNTY_ST),
xlab = "Percent Poverty", ylab = "Lag of Percent Poverty")
# draw a LISA map
locm <- localmoran(soco$PPOV, soco_nbq_w) #calculate local moran's I
summary(locm)
## Ii E.Ii Var.Ii Z.Ii
## Min. :-1.778 Min. :-0.000722 Min. :0.0901 Min. :-3.983
## 1st Qu.: 0.012 1st Qu.:-0.000722 1st Qu.:0.1420 1st Qu.: 0.029
## Median : 0.219 Median :-0.000722 Median :0.1658 Median : 0.517
## Mean : 0.589 Mean :-0.000722 Mean :0.1861 Mean : 1.408
## 3rd Qu.: 0.746 3rd Qu.:-0.000722 3rd Qu.:0.1990 3rd Qu.: 1.761
## Max. : 9.335 Max. :-0.000722 Max. :0.9981 Max. :18.977
## Pr(z > 0)
## Min. :0.0000
## 1st Qu.:0.0391
## Median :0.3025
## Mean :0.2900
## 3rd Qu.:0.4886
## Max. :1.0000
# manually make a moran plot standardize variables and save to a new
# column
soco$sPPOV <- scale(soco$PPOV)
# create a lagged variable
soco$lag_sPPOV <- lag.listw(soco_nbq_w, soco$sPPOV)
summary(soco$sPPOV)
## V1
## Min. :-2.151
## 1st Qu.:-0.700
## Median :-0.123
## Mean : 0.000
## 3rd Qu.: 0.591
## Max. : 4.062
summary(soco$lag_sPPOV)
## V1
## Min. :-1.9925
## 1st Qu.:-0.5298
## Median :-0.0762
## Mean : 0.0085
## 3rd Qu.: 0.4514
## Max. : 2.6478
plot(x = soco$sPPOV, y = soco$lag_sPPOV, main = "Moran Scatterplot PPOV")
abline(h = 0, v = 0)
abline(lm(soco$lag_sPPOV ~ soco$sPPOV), lty = 3, lwd = 4, col = "red")
# check out the outliers click on one or two and then hit escape
identify(soco$sPPOV, soco$lag_sPPOV, soco$CNTY_ST, cex = 0.8)
## integer(0)
# identify the Moran plot quadrant for each observation this is some
# serious slicing and illustrate the power of the bracket
soco$quad_sig <- NA
soco@data[(soco$sPPOV >= 0 & soco$lag_sPPOV >= 0) & (locm[, 5] <= 0.05), "quad_sig"] <- 1
soco@data[(soco$sPPOV <= 0 & soco$lag_sPPOV <= 0) & (locm[, 5] <= 0.05), "quad_sig"] <- 2
soco@data[(soco$sPPOV >= 0 & soco$lag_sPPOV <= 0) & (locm[, 5] <= 0.05), "quad_sig"] <- 3
soco@data[(soco$sPPOV >= 0 & soco$lag_sPPOV <= 0) & (locm[, 5] <= 0.05), "quad_sig"] <- 4
soco@data[(soco$sPPOV <= 0 & soco$lag_sPPOV >= 0) & (locm[, 5] <= 0.05), "quad_sig"] <- 5
# assign a 5 to a non-significant observations
# map the results set the breaks for the thematic map classes
breaks <- seq(1, 5, 1)
# set the corresponding labels for the thematic map classes
labels <- c("High-High", "Low-Low", "High-Low", "Low-High", "Not Signif.")
# findInterval-necessary for making a map
np <- findInterval(soco$quad_sig, breaks)
# assign colors to each map class
colors <- c("red", "blue", "lightpink", "skyblue2", "white")
plot(soco, col = colors[np]) #colors[np] manually sets the color for each county
mtext("Local Moran's I", cex = 1.5, side = 3, line = 1)
legend("topleft", legend = labels, fill = colors, bty = "n")
# correlogram of spatial lags
cor10 <- sp.correlogram(soco_nbq, soco$PPOV, order = 10, method = "I")
cor10
## Spatial correlogram for soco$PPOV
## method: Moran's I
## estimate expectation variance standard deviate Pr(I) two sided
## 1 (1387) 5.89e-01 -7.22e-04 2.61e-04 36.54 < 2e-16
## 2 (1387) 4.58e-01 -7.22e-04 1.30e-04 40.20 < 2e-16
## 3 (1387) 3.34e-01 -7.22e-04 8.85e-05 35.60 < 2e-16
## 4 (1387) 2.40e-01 -7.22e-04 7.03e-05 28.69 < 2e-16
## 5 (1387) 1.48e-01 -7.22e-04 5.90e-05 19.39 < 2e-16
## 6 (1387) 8.35e-02 -7.22e-04 5.03e-05 11.88 < 2e-16
## 7 (1387) 3.09e-02 -7.22e-04 4.33e-05 4.81 1.5e-06
## 8 (1387) -1.46e-02 -7.22e-04 3.83e-05 -2.24 0.025
## 9 (1387) -5.41e-02 -7.22e-04 3.47e-05 -9.06 < 2e-16
## 10 (1387) -7.38e-02 -7.22e-04 3.23e-05 -12.85 < 2e-16
##
## 1 (1387) ***
## 2 (1387) ***
## 3 (1387) ***
## 4 (1387) ***
## 5 (1387) ***
## 6 (1387) ***
## 7 (1387) ***
## 8 (1387) *
## 9 (1387) ***
## 10 (1387) ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(cor10) #error bars are based on the SE from permutation
# examine correlation as a function of distance
sp.correlogram(soco_nbq, soco$PPOV, order = 10, method = "corr")
## Spatial correlogram for soco$PPOV
## method: Spatial autocorrelation
## 1 2 3 4 5 6 7 8
## 0.75938 0.69002 0.58062 0.47335 0.33112 0.20307 0.08188 -0.04172
## 9 10
## -0.16401 -0.22568
# variogram
library(gstat)
## Warning: package 'gstat' was built under R version 2.15.3
names(soco)
## [1] "CNTY_ST" "STUSAB" "FIPS" "YCOORD" "XCOORD"
## [6] "SQYCORD" "SQXCORD" "XYCOORD" "PPOV" "PHSP"
## [11] "PFHH" "PWKCO" "PHSLS" "PUNEM" "PUDEM"
## [16] "PEXTR" "PPSRV" "PMSRV" "PNDMFG" "PNHSPW"
## [21] "PMNRTY" "LO_POV" "PFRN" "PNAT" "PBLK"
## [26] "P65UP" "PDSABL" "METRO" "PERPOV" "OTMIG"
## [31] "SQOTMIG" "BINMIG" "INCARC" "BINCARC" "SQRTPPOV"
## [36] "SQRTUNEM" "SQRTPFHH" "LOGHSPLS" "PHSPLUS" "sPPOV"
## [41] "lag_sPPOV" "quad_sig"
plot(variogram(soco$PPOV ~ 1, locations = coordinates(soco), data = soco, cloud = F),
type = "b")
plot(variogram(soco$PFHH ~ 1, locations = coordinates(soco), data = soco, cloud = F),
type = "b")
plot(variogram(soco$PWKCO ~ 1, locations = coordinates(soco), data = soco, cloud = F),
type = "b")
plot(variogram(soco$PERPOV ~ 1, locations = coordinates(soco), data = soco,
cloud = F), type = "b")
# Using measures of spatial autocorrelation in regression 1.for a problem
# with spatial dimension,estimate parameters using OLS 2.evalue the
# model-are assumptions of OLS met? 3.fit a spatial autoregressive model
# (e.g.lag model)
soco_OLS <- lm(PPOV ~ PFHH + PUNEM + PBLK + P65UP, data = soco)
summary(soco_OLS)
##
## Call:
## lm(formula = PPOV ~ PFHH + PUNEM + PBLK + P65UP, data = soco)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.26570 -0.03711 -0.00785 0.03294 0.28733
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.05544 0.00891 -6.22 6.6e-10 ***
## PFHH 0.45748 0.04941 9.26 < 2e-16 ***
## PUNEM 2.18109 0.07334 29.74 < 2e-16 ***
## PBLK -0.05919 0.01878 -3.15 0.0017 **
## P65UP 0.38023 0.04276 8.89 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0576 on 1382 degrees of freedom
## Multiple R-squared: 0.603, Adjusted R-squared: 0.601
## F-statistic: 524 on 4 and 1382 DF, p-value: <2e-16
plot(soco_OLS) #non-normal residuals,right skewed
# create a map of PPOV
breaks <- classIntervals(soco$PPOV, n = 3, style = "quantile")
labels1 <- c("Low Poverty", "Medium Poverty", "High Poverty")
colors1 <- brewer.pal(3, "Reds")
plot(soco, col = colors1)
mtext("Percent Poverty", cex = 1.5, side = 3, line = 1)
legend("topleft", legend = labels1, fill = colors1, bty = "n")
soco$resid <- resid(soco_OLS) #save residuals
# calculate moran's I of residuals
lm.morantest(soco_OLS, soco_nbq_w)
##
## Global Moran's I for regression residuals
##
## data:
## model: lm(formula = PPOV ~ PFHH + PUNEM + PBLK + P65UP, data =
## soco)
## weights: soco_nbq_w
##
## Moran I statistic standard deviate = 22.55, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran's I Expectation Variance
## 0.360876 -0.002058 0.000259
# map the residuals
breaks <- c(min(soco$resid), 0, max(soco$resid))
labels <- c("negative residuals", "positive residuals")
np <- findInterval(soco$resid, breaks)
colors <- c("red", "blue")
dev.off()
## null device
## 1
plot(soco, col = colors[np])
mtext("OLS Residuals", cex = 1.5, side = 3, line = 1)
legend("topleft", legend = labels, fill = colors, bty = "n")
# poverty might 'spillover',spatial lag model will capture this spillover
# effect
soco_LAG <- lagsarlm(PPOV ~ PFHH + PUNEM + PBLK + P65UP, data = soco, soco_nbq_w)
summary(soco_LAG)
##
## Call:lagsarlm(formula = PPOV ~ PFHH + PUNEM + PBLK + P65UP, data = soco,
## listw = soco_nbq_w)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2457653 -0.0284360 -0.0028762 0.0262169 0.2374894
##
## Type: lag
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.100260 0.007375 -13.5946 < 2.2e-16
## PFHH 0.429404 0.040246 10.6695 < 2.2e-16
## PUNEM 1.354637 0.065959 20.5374 < 2.2e-16
## PBLK -0.069046 0.015335 -4.5025 6.716e-06
## P65UP 0.291192 0.035210 8.2701 2.220e-16
##
## Rho: 0.5172, LR test value: 491.5, p-value: < 2.22e-16
## Asymptotic standard error: 0.02118
## z-value: 24.42, p-value: < 2.22e-16
## Wald statistic: 596.3, p-value: < 2.22e-16
##
## Log likelihood: 2239 for lag model
## ML residual variance (sigma squared): 0.002192, (sigma: 0.04682)
## Number of observations: 1387
## Number of parameters estimated: 7
## AIC: -4464, (AIC for lm: -3974)
## LM test for residual autocorrelation
## test value: 4.85, p-value: 0.027651
anova(soco_LAG, soco_OLS)
## Model df AIC logLik Test L.Ratio p-value
## soco_LAG 1 7 -4464 2239 1
## soco_OLS 2 6 -3974 1993 2 491 0
# lag model has lower AIC, so it's better