# Load libraries
library(spdep)
## Loading required package: sp
## Loading required package: boot
## Loading required package: Matrix
## Loading required package: lattice
## Attaching package: 'lattice'
## The following object(s) are masked from 'package:boot':
##
## melanoma
## Loading required package: MASS
## Loading required package: nlme
## Loading required package: maptools
## Loading required package: foreign
## Loading required package: grid
## Checking rgeos availability: FALSE Note: when rgeos is not available,
## polygon geometry computations in maptools depend on gpclib, which has a
## restricted licence. It is disabled by default; to enable gpclib, type
## gpclibPermit()
## Loading required package: deldir
## deldir 0.0-21
## Loading required package: coda
## Loading required package: splines
library(classInt)
## Loading required package: class
## Loading required package: e1071
library(gstat)
# Load soco dataset with load() function Automatically creates an object
# called soco
load("/Users/telekineticturtle/Desktop/Colorado 13/Quant Methods/Data/soco.rda")
# Find neighbors for each location and save them to an object.
soco_nbq <- poly2nb(soco) # Queen’s neighborhood method
# Create the weight matrix for the neighbors
soco_nbq_w <- nb2listw(soco_nbq)
# Moran's I Test
moran.test(soco$PERPOV, listw = soco_nbq_w, zero.policy = T) # Value of 0.435
##
## Moran's I test under randomisation
##
## data: soco$PERPOV
## weights: soco_nbq_w
##
## Moran I statistic standard deviate = 26.96, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.4346902 -0.0007215 0.0002609
# Null Hypothesis: There is no spatial autocorrelation for this variable.
# Alt Hypothesis: There is spatial autocorrelation for this variable.
# Simulations of Moran's I statistic using moran.mc listw= is for the
# weight matrix object nsim= is for the # of simulations to include.
# Always make it 99, 999, 9999, etc. because we're adding in the real
# world Moran's I to the list as well. We want a round number in the end
# to make the p-value intuitive.
socoMoranSim <- moran.mc(soco$PERPOV, listw = soco_nbq_w, nsim = 999)
socoMoranSim
##
## Monte-Carlo simulation of Moran's I
##
## data: soco$PERPOV
## weights: soco_nbq_w
## number of simulations + 1: 1000
##
## statistic = 0.4347, observed rank = 1000, p-value = 0.001
## alternative hypothesis: greater
names(socoMoranSim) # socoMoranSim$res is the Moran's I result
## [1] "statistic" "parameter" "p.value" "alternative" "method"
## [6] "data.name" "res"
# Plot the Moran's I Simulation Results
hist(socoMoranSim$res, breaks = 50, main = "Histogram of Moran's I Simulations",
xlab = "Moran's I Stat", ylab = "Frequency")
# So we can confidently say that this Moran's I statistic is the real
# deal. There is spatial autocorrelation!
# Use moran.plot(x variable, lag x variable, labels=as.character(label))
# This plot will automatically create labels, so you might as well assign
# something intuitive.
socoMoranPlot <- moran.plot(soco$PPOV, soco_nbq_w, labels = as.character(soco$CNTY_ST),
xlab = "Percent Poverty", ylab = "Lag of Percent Poverty")
# Calculate the local Moran's I statistic using localmoran(DATA, WEIGHTS)
# where WEIGHTS is the list of weighted neighbors.
soco_locm <- localmoran(soco$PPOV, soco_nbq_w)
summary(soco_locm)
## Ii E.Ii Var.Ii Z.Ii
## Min. :-1.778 Min. :-0.000722 Min. :0.0901 Min. :-3.983
## 1st Qu.: 0.012 1st Qu.:-0.000722 1st Qu.:0.1420 1st Qu.: 0.029
## Median : 0.219 Median :-0.000722 Median :0.1658 Median : 0.517
## Mean : 0.589 Mean :-0.000722 Mean :0.1861 Mean : 1.408
## 3rd Qu.: 0.746 3rd Qu.:-0.000722 3rd Qu.:0.1990 3rd Qu.: 1.761
## Max. : 9.335 Max. :-0.000722 Max. :0.9981 Max. :18.977
## Pr(z > 0)
## Min. :0.0000
## 1st Qu.:0.0391
## Median :0.3025
## Mean :0.2900
## 3rd Qu.:0.4886
## Max. :1.0000
## Manually make a moran plot Standardize variables using the scale()
## function; save to a new column
soco$sPPOV <- scale(soco$PPOV)
# Create a lagged variable using the lag.listw(WEIGHTS, SCALED_DATA)
# function. Note: 'lag' variable means the values of the neighbors of the
# variable under consideration.
soco$lag_sPPOV <- lag.listw(soco_nbq_w, soco$sPPOV)
summary(soco$sPPOV)
## V1
## Min. :-2.151
## 1st Qu.:-0.700
## Median :-0.123
## Mean : 0.000
## 3rd Qu.: 0.591
## Max. : 4.062
summary(soco$lag_sPPOV)
## V1
## Min. :-1.9925
## 1st Qu.:-0.5298
## Median :-0.0762
## Mean : 0.0085
## 3rd Qu.: 0.4514
## Max. : 2.6478
# Plot the lag v. the scaled variable.
plot(x = soco$sPPOV, y = soco$lag_sPPOV, main = "Moran Scatterplot PPOV", xlab = "Scaled % Poverty",
ylab = "Scaled Lag of % Poverty")
abline(h = 0, v = 0) # Divides the plot into quadrants
abline(lm(soco$lag_sPPOV ~ soco$sPPOV), lty = 3, lwd = 4, col = "red") # linear model used to find line of best fit.
# Investigate outliers by clicking on one or two and then hitting escape
# (or click finish)
identify(soco$sPPOV, soco$lag_sPPOV, soco$CNTY_ST, cex = 0.8)
## integer(0)
## Quadrant I: high poverty counties surrounded by high poverty counties.
## Quadrant II: low poverty counties surrounded by high poverty counties.
## Quadrant III: low poverty counties surrounded by low poverty counties.
## Quadrant IV: high poverty counties surrounded by low poverty counties.
# Identify the Moran plot quadrant for each observation
soco$quad_sig <- NA # Start by creating a new column in the original data frame.
# Assign the appropriate quadrant based on whether the scaled and lag
# values are positive or negative. If not significant, assign a 5.
soco@data[(soco$sPPOV >= 0 & soco$lag_sPPOV >= 0) & (soco_locm[, 5] <= 0.05),
"quad_sig"] <- 1
soco@data[(soco$sPPOV <= 0 & soco$lag_sPPOV <= 0) & (soco_locm[, 5] <= 0.05),
"quad_sig"] <- 2
soco@data[(soco$sPPOV >= 0 & soco$lag_sPPOV <= 0) & (soco_locm[, 5] <= 0.05),
"quad_sig"] <- 3
soco@data[(soco$sPPOV <= 0 & soco$lag_sPPOV >= 0) & (soco_locm[, 5] <= 0.05),
"quad_sig"] <- 4
soco@data[(soco_locm[, 5] > 0.05), "quad_sig"] <- 5
# Presenting the results (from Paul Voss) Set the breaks for the thematic
# map classes
breaks <- seq(1, 5, 1)
# Set the corresponding labels for the thematic map classes
labels <- c("High-High", "Low-Low", "High-Low", "Low-High", "Not Signif.")
# see ?findInterval - This is necessary for making a map
np <- findInterval(soco$quad_sig, breaks)
# Assign colors to each map class
colors <- c("red", "blue", "lightpink", "skyblue2", "white")
plot(soco, col = colors[np]) #colors[np] manually sets the color for each county
mtext("Local Moran's I\n% Poverty in Southern Counties", cex = 1.5, side = 3,
line = 1)
legend("topleft", legend = labels, fill = colors, bty = "n")
# Correlelogram
cor10 <- sp.correlogram(soco_nbq, soco$PPOV, order = 10, method = "I")
cor10 # Notice how the values for Moran's I go from significantly positive to significantly negative, crossing 0 around a lag of 8.
## Spatial correlogram for soco$PPOV
## method: Moran's I
## estimate expectation variance standard deviate Pr(I) two sided
## 1 (1387) 5.89e-01 -7.22e-04 2.61e-04 36.54 < 2e-16
## 2 (1387) 4.58e-01 -7.22e-04 1.30e-04 40.20 < 2e-16
## 3 (1387) 3.34e-01 -7.22e-04 8.85e-05 35.60 < 2e-16
## 4 (1387) 2.40e-01 -7.22e-04 7.03e-05 28.69 < 2e-16
## 5 (1387) 1.48e-01 -7.22e-04 5.90e-05 19.39 < 2e-16
## 6 (1387) 8.35e-02 -7.22e-04 5.03e-05 11.88 < 2e-16
## 7 (1387) 3.09e-02 -7.22e-04 4.33e-05 4.81 1.5e-06
## 8 (1387) -1.46e-02 -7.22e-04 3.83e-05 -2.24 0.025
## 9 (1387) -5.41e-02 -7.22e-04 3.47e-05 -9.06 < 2e-16
## 10 (1387) -7.38e-02 -7.22e-04 3.23e-05 -12.85 < 2e-16
##
## 1 (1387) ***
## 2 (1387) ***
## 3 (1387) ***
## 4 (1387) ***
## 5 (1387) ***
## 6 (1387) ***
## 7 (1387) ***
## 8 (1387) *
## 9 (1387) ***
## 10 (1387) ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(cor10, main = "Correlelogram of % Poverty")
# Variogram shows how the variance changes with lag distance from location
# of focus. variogram(soco$PPOV~1, locations=coordinates(soco),
# data=soco, cloud=F,type='b”)
# Create a Palette
library(classInt)
library(RColorBrewer)
pal5 <- brewer.pal(5, "Reds") # Makes a 7-color spectral palette
display.brewer.pal(5, "Reds") # This displays the colors
# Classifying Data using classIntervals() and findColours()
cats5 <- classIntervals(soco$PPOV, n = 5, style = "quantile")
cats5 # Returns the bins for each classification.
## style: quantile
## [0.02846,0.1488) [0.1488,0.1926) [0.1926,0.2365) [0.2365,0.2936)
## 278 277 277 277
## [0.2936,0.5953]
## 278
# Link the color pallette to the categories using
# findColours(CATEGORIES,PALETTE)
FiveColors <- findColours(cats5, pal5)
# Draw a map using specificed data and colors
plot(soco, col = FiveColors) # The col= argument provides the coloring scheme.
mtext("Percent Poverty in Southern Counties", cex = 1.5, side = 3, line = 1)
legend("topleft", legend = c("2.8% - 14.9%", "14.9% - 19.3%", "19.3% - 23.6%",
"23.6% - 29.4%", "29.4% - 59.5%"), fill = pal5, bty = "n", cex = 0.8)
names(soco)
## [1] "CNTY_ST" "STUSAB" "FIPS" "YCOORD" "XCOORD"
## [6] "SQYCORD" "SQXCORD" "XYCOORD" "PPOV" "PHSP"
## [11] "PFHH" "PWKCO" "PHSLS" "PUNEM" "PUDEM"
## [16] "PEXTR" "PPSRV" "PMSRV" "PNDMFG" "PNHSPW"
## [21] "PMNRTY" "LO_POV" "PFRN" "PNAT" "PBLK"
## [26] "P65UP" "PDSABL" "METRO" "PERPOV" "OTMIG"
## [31] "SQOTMIG" "BINMIG" "INCARC" "BINCARC" "SQRTPPOV"
## [36] "SQRTUNEM" "SQRTPFHH" "LOGHSPLS" "PHSPLUS" "sPPOV"
## [41] "lag_sPPOV" "quad_sig"
library(HH)
## Loading required package: multcomp
## Loading required package: mvtnorm
## Loading required package: survival
## Attaching package: 'survival'
## The following object(s) are masked from 'package:boot':
##
## aml
## Loading required package: leaps
## Loading required package: latticeExtra
## Attaching package: 'HH'
## The following object(s) are masked from 'package:coda':
##
## acfplot
## The following object(s) are masked from 'package:boot':
##
## logit
library(leaps)
subsetsmodel <- regsubsets(PPOV ~ PMNRTY + PFHH + PWKCO + PHSLS + PUNEM + PUDEM +
PEXTR + PPSRV + PMSRV + PNDMFG + PNHSPW + PFRN + PNAT + P65UP + PHSP + PBLK +
PDSABL + METRO + OTMIG, data = soco, nvmax = 10)
## Warning: 1 linear dependencies found
## Reordering variables and trying again:
subsetsum <- summary(subsetsmodel)
plot(subsetsmodel, main = "Regsubsets Results by BIC")
plot(1:11, y = subsetsum$adjr2, type = "o", xlab = "Number of Parameters", ylab = "Adj. r^2",
main = "Adj. r^2 by Model Size")
plot(1:11, y = subsetsum$bic, type = "o", xlab = "Number of Parameters", ylab = "BIC",
main = "BIC by Model Size")
# There seems to be an asymptote in improved explanatory power and BIC at
# 7 variables.
soco.lm7 <- lm(PPOV ~ PFHH + PHSLS + PUNEM + PUDEM + PEXTR + PBLK + METRO, data = soco)
breaks.lm <- c(min(soco.lm7$resid), 0, max(soco.lm7$resid))
labels.lm <- c("negative residuals", "positive residuals")
np <- findInterval(soco.lm7$resid, breaks.lm)
colors.lm <- c("red", "blue")
dev.off()
## null device
## 1
plot(soco, col = colors[np]) #colors[np] manually sets the color for each county
mtext("OLS Residuals", cex = 1.5, side = 3, line = 1)
legend("topleft", legend = labels.lm, fill = colors.lm, bty = "n")
soco.lm7resid <- residuals(soco.lm7)
# Find distance-based nearest neighbors:
coords <- coordinates(soco)
soco_kn1 <- knn2nb(knearneigh(coords, k = 1))
dist <- unlist(nbdists(soco_kn1, coords))
max_k1 <- max(dist) # Store the max dist as an object.
soco_kd <- dnearneigh(coords, d1 = 0, d2 = 1 * max_k1)
# Assign IDW Weights:
soco_idw <- lapply(dist, function(x) 1/(x/1000))
soco_kdw <- nb2listw(soco_kn1, glist = soco_idw, style = "B")
# Moran's I
moran.test(soco.lm7$resid, listw = soco_kdw, zero.policy = T) # 0.296
##
## Moran's I test under randomisation
##
## data: soco.lm7$resid
## weights: soco_kdw
##
## Moran I statistic standard deviate = 7.655, p-value = 9.638e-15
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.2959646 -0.0007215 0.0015020
# Null Hypothesis: There is no spatial autocorrelation for this variable.
# Alt Hypothesis: There is spatial autocorrelation for this variable.
# The lagsarlm model only differs from the OLS code in two ways. 1) The
# fucntion in lagsarlm() instead of lm() 2) Include the argument listw =
# NEIGHBOR.WEIGHTS Optional: Include the argument: zero.policy=T
soco.lm7lag <- lagsarlm(PPOV ~ PFHH + PHSLS + PUNEM + PUDEM + PEXTR + PBLK +
METRO, data = soco, listw = soco_kdw, zero.policy = T)
# Run Moran's I for the lag model to see how much spatial autocorrelation
# remains: Note: best to use the same weighted neighbors as before.
moran.test(soco.lm7lag$resid, listw = soco_kdw, zero.policy = T) # 0.196
##
## Moran's I test under randomisation
##
## data: soco.lm7lag$resid
## weights: soco_kdw
##
## Moran I statistic standard deviate = 5.08, p-value = 1.888e-07
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.1961457 -0.0007215 0.0015019
# Null Hypothesis: There is no spatial autocorrelation for this variable.
# Alt Hypothesis: There is spatial autocorrelation for this variable.
# The spatial autocorrelation has been reduced from 0.296 to 0.196, which
# means its in that gray area between spatial dependence and noise. This
# indicates that the weights of the neighbors I selected have accounted
# for some of the spatial dependence.
# Comparison of results:
summary(soco.lm7)
##
## Call:
## lm(formula = PPOV ~ PFHH + PHSLS + PUNEM + PUDEM + PEXTR + PBLK +
## METRO, data = soco)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.18763 -0.02516 -0.00118 0.02414 0.20958
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.23668 0.01195 -19.81 < 2e-16 ***
## PFHH 0.72210 0.03690 19.57 < 2e-16 ***
## PHSLS 0.26748 0.01326 20.17 < 2e-16 ***
## PUNEM 1.27693 0.06146 20.78 < 2e-16 ***
## PUDEM 0.23699 0.03097 7.65 3.7e-14 ***
## PEXTR 0.41578 0.02203 18.87 < 2e-16 ***
## PBLK -0.10990 0.01354 -8.12 1.0e-15 ***
## METRO -0.00307 0.00286 -1.07 0.28
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0421 on 1379 degrees of freedom
## Multiple R-squared: 0.788, Adjusted R-squared: 0.787
## F-statistic: 733 on 7 and 1379 DF, p-value: <2e-16
summary(soco.lm7lag)
##
## Call:lagsarlm(formula = PPOV ~ PFHH + PHSLS + PUNEM + PUDEM + PEXTR +
## PBLK + METRO, data = soco, listw = soco_kdw, zero.policy = T)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2038179 -0.0239862 -0.0008581 0.0228565 0.1894518
##
## Type: lag
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.2376510 0.0116638 -20.3752 < 2.2e-16
## PFHH 0.7215938 0.0360197 20.0333 < 2.2e-16
## PHSLS 0.2528998 0.0131156 19.2824 < 2.2e-16
## PUNEM 1.2110850 0.0602474 20.1019 < 2.2e-16
## PUDEM 0.2304071 0.0302502 7.6167 2.598e-14
## PEXTR 0.4206466 0.0215299 19.5378 < 2.2e-16
## PBLK -0.1163250 0.0132391 -8.7865 < 2.2e-16
## METRO -0.0017139 0.0027949 -0.6132 0.5397
##
## Rho: 2.138, LR test value: 51.77, p-value: 6.2239e-13
## Asymptotic standard error: 0.2329
## z-value: 9.181, p-value: < 2.22e-16
## Wald statistic: 84.29, p-value: < 2.22e-16
##
## Log likelihood: 2456 for lag model
## ML residual variance (sigma squared): 0.001688, (sigma: 0.04109)
## Number of observations: 1387
## Number of parameters estimated: 10
## AIC: -4891, (AIC for lm: -4842)
## LM test for residual autocorrelation
## test value: 36.41, p-value: 1.6031e-09
anova(soco.lm7lag, soco.lm7)
## Model df AIC logLik Test L.Ratio p-value
## soco.lm7lag 1 10 -4891 2456 1
## soco.lm7 2 9 -4842 2430 2 51.8 6.22e-13
soco.lm7coefs <- coef(soco.lm7)
soco.lm7lagcoefs <- coef(soco.lm7lag)
soco.lm7lagcoefs <- soco.lm7lagcoefs[2:9] # Remove rho in order to compare coefs.
soco.lm7lagcoefs
## (Intercept) PFHH PHSLS PUNEM PUDEM PEXTR
## -0.237651 0.721594 0.252900 1.211085 0.230407 0.420647
## PBLK METRO
## -0.116325 -0.001714
abs(soco.lm7coefs) - abs(soco.lm7lagcoefs)
## (Intercept) PFHH PHSLS PUNEM PUDEM PEXTR
## -0.0009700 0.0005034 0.0145792 0.0658421 0.0065873 -0.0048638
## PBLK METRO
## -0.0064294 0.0013553
# Observing the above differences in the magnitude of the coefficients, in
# most cases, the coefficients decreased in magnitude. However, most
# differences were also less than a magntiude of 0.01. PUNEM decreased by
# 0.658, indicating that it is the most spatially dependent variable. All
# variables that were significant in the OLS are still significant in the
# spatial lag model. The Moran's I tests show that some spatial
# autocorrelation was present, but that much of it was accounted for by
# the weights list.