library(classInt)
## Loading required package: class
## Loading required package: e1071
library(spdep)
## Loading required package: sp
## Loading required package: boot
## Loading required package: Matrix
## Loading required package: lattice
## Attaching package: 'lattice'
## The following object(s) are masked from 'package:boot':
##
## melanoma
## Loading required package: MASS
## Loading required package: nlme
## Loading required package: maptools
## Loading required package: foreign
## Loading required package: grid
## Checking rgeos availability: FALSE Note: when rgeos is not available,
## polygon geometry computations in maptools depend on gpclib, which has a
## restricted licence. It is disabled by default; to enable gpclib, type
## gpclibPermit()
## Loading required package: deldir
## deldir 0.0-21
## Loading required package: coda
## Loading required package: splines
library(RColorBrewer)
library(gstat)
poverty <- readShapePoly("/Users/telekineticturtle/Desktop/Colorado 13/Quant Methods/Data/poverty/poverty.shp")
# Variogram shows how the variance changes with lag distance from location
# of focus.
plot(variogram(poverty$PROPOV ~ 1, locations = coordinates(poverty), data = poverty,
cloud = F), type = "b", pch = 16, main = "Variogram of % Child Poverty")
coords <- coordinates(poverty) # coordinates function extracts coordinates
FIPS <- row.names(as(poverty, "data.frame"))
pov_kn6 <- knn2nb(knearneigh(coords, k = 6), row.names = FIPS)
pov_kn6_wb <- nb2listw(pov_kn6, style = "B")
pov_kn6_mor <- moran.test(poverty$PROPOV, listw = pov_kn6_wb, zero.policy = T)
pov_kn6_mor # 0.611 = Moran's I -- high spatial autocorrelation.
##
## Moran's I test under randomisation
##
## data: poverty$PROPOV
## weights: pov_kn6_wb
##
## Moran I statistic standard deviate = 60.92, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.6107716 -0.0003220 0.0001006
## Test of significance using Monte Carlo Simulation:
povMoranSim <- moran.mc(poverty$PROPOV, listw = pov_kn6_wb, nsim = 999)
povMoranSim # The real observation is significantly different from modeled Moran's I statistics.
##
## Monte-Carlo simulation of Moran's I
##
## data: poverty$PROPOV
## weights: pov_kn6_wb
## number of simulations + 1: 1000
##
## statistic = 0.6108, observed rank = 1000, p-value = 0.001
## alternative hypothesis: greater
names(povMoranSim) # povMoranSim$res is the Moran's I result
## [1] "statistic" "parameter" "p.value" "alternative" "method"
## [6] "data.name" "res"
# Plot the Moran's I Simulation Results
hist(povMoranSim$res, breaks = 50, main = "Histogram of Moran's I Simulations",
xlab = "Moran's I Stat", ylab = "Frequency")
# So we can confidently say that this Moran's I statistic is the real
# deal. There is spatial autocorrelation!
lm1 <- lm(PROPOV ~ PROAUNEM + PROFEMHH + PROHSLS + PROBLCK + PROHISP, data = poverty)
summary(lm1)
##
## Call:
## lm(formula = PROPOV ~ PROAUNEM + PROFEMHH + PROHSLS + PROBLCK +
## PROHISP, data = poverty)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.1937 -0.0429 -0.0072 0.0336 0.5298
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.14124 0.00807 -17.50 <2e-16 ***
## PROAUNEM 1.24605 0.04646 26.82 <2e-16 ***
## PROFEMHH 0.35913 0.03066 11.71 <2e-16 ***
## PROHSLS 0.30492 0.01138 26.79 <2e-16 ***
## PROBLCK 0.11967 0.01226 9.76 <2e-16 ***
## PROHISP 0.22029 0.01044 21.09 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0624 on 3101 degrees of freedom
## Multiple R-squared: 0.643, Adjusted R-squared: 0.642
## F-statistic: 1.12e+03 on 5 and 3101 DF, p-value: <2e-16
laglm1 <- lagsarlm(PROPOV ~ PROAUNEM + PROFEMHH + PROHSLS + PROBLCK + PROHISP,
data = poverty, pov_kn6_wb)
summary(laglm1)
##
## Call:
## lagsarlm(formula = PROPOV ~ PROAUNEM + PROFEMHH + PROHSLS + PROBLCK +
## PROHISP, data = poverty, listw = pov_kn6_wb)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.1849654 -0.0328936 -0.0069813 0.0255106 0.5020163
##
## Type: lag
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.1628554 0.0065446 -24.8839 <2e-16
## PROAUNEM 0.7590961 0.0403112 18.8309 <2e-16
## PROFEMHH 0.4388358 0.0248662 17.6479 <2e-16
## PROHSLS 0.2196769 0.0095882 22.9112 <2e-16
## PROBLCK 0.0015337 0.0103833 0.1477 0.8826
## PROHISP 0.1241931 0.0088771 13.9904 <2e-16
##
## Rho: 0.08705, LR test value: 1147, p-value: < 2.22e-16
## Asymptotic standard error: 0.002274
## z-value: 38.27, p-value: < 2.22e-16
## Wald statistic: 1465, p-value: < 2.22e-16
##
## Log likelihood: 4788 for lag model
## ML residual variance (sigma squared): 0.002557, (sigma: 0.05057)
## Number of observations: 3107
## Number of parameters estimated: 8
## AIC: -9559, (AIC for lm: -8414)
## LM test for residual autocorrelation
## test value: 341.7, p-value: < 2.22e-16
poverty.new <- poverty #copy the data frame (so we don't mess up the original)
# Find a county name:
names(poverty.new)
## [1] "NAME" "FIPS" "MET" "PROPOV" "WGHT" "PROBLCK"
## [7] "PROHISP" "SOUTH" "PROHSLS" "PROWKCO" "PROFEMHH" "PROEXTR"
## [13] "PRONMAN" "PROMSERV" "PROPSERV" "PROAUNEM" "PROMUNDR" "XCOORD"
## [19] "YCOORD" "X2" "Y2"
head(poverty.new$NAME)
## [1] Lake of the Woods Ferry Stevens Okanogan
## [5] Pend Oreille Boundary
## 1801 Levels: Abbeville Acadia Accomack Ada Adair Adams Addison ... Ziebach
# Change the unemployment rate
poverty.new@data[poverty.new@data$NAME == "Addison", "PROAUNEM"] <- 0.75
# The original predicted values
orig.pred <- as.data.frame(predict(laglm1))
# The predicted values with the new unemployment rate in Alabama
new.pred <- as.data.frame(predict(laglm1, newdata = poverty.new, listw = pov_kn6_wb))
# the difference between the predicted values
jCoAL_effect <- new.pred$fit - orig.pred$fit
el <- data.frame(name = poverty$NAME, diff_in_pred_PROPOV = jCoAL_effect) # To simplify things, placing the differences in a new data frame.
poverty.new$jee <- el$diff_in_pred_PROPOV # And placing them in the origianl, as well
# sort counties by absolute value of the change in predicted PPOV
el <- el[rev(order(abs(el$diff_in_pred_PROPOV))), ]
el[1:10, ] #show the top 10 counties
## name diff_in_pred_PROPOV
## 352 Addison 0.5924
## 584 Thomas -0.1300
## 582 Hooker -0.1205
## 632 Logan -0.1160
## 496 Cherry -0.1145
## 708 Lincoln -0.1135
## 458 Bennett -0.1080
## 420 Tripp -0.1066
## 459 Todd -0.1048
## 580 Grant -0.1044
# Make breaks to show positive, negative, and netural effects:
povbreaks <- c(min(poverty.new$jee), -0.03, 0.03, max(poverty.new$jee))
# Create labels for those classes:
povlabels <- c("Negative effect (greater than .03)", "No Effect (effect -.03 to .03)",
"Positive effect (greater than .03)")
# Assign colors to the classes:
pov_np <- findInterval(poverty.new$jee, povbreaks)
povcolors <- c("red", "yellow", "blue")
# Draw Map
plot(poverty.new, col = povcolors[pov_np])
mtext("Effects of a change in Addison County, VT (set unemployment rate = 0.75)\n on predicted values in a spatial lag model",
side = 3, line = 1)
legend("topleft", legend = povlabels, fill = povcolors, bty = "n")
pal5 <- brewer.pal(6, "Spectral")
cats5 <- classIntervals(poverty.new$jee, n = 5, style = "jenks")
colors5 <- findColours(cats5, pal5)
plot(poverty.new, col = colors5)
legend("topleft", legend = round(cats5$brks, 2), fill = pal5, bty = "n")
mtext("Effects of a change in Addison County, VT (set unemployment rate = 0.75)\n on predicted values in a spatial lag model",
side = 3, line = 1)