date()
## [1] "Thu Nov 29 13:31:22 2012"
Due Date: November 29, 2012
Total Points: 50
a. Download the zip file, unzip it and use the readShapeSpatial() function from the maptools package to get the data into R. Hint: After unzipping the shape files are in the directory for Georgia. (10)
suppressMessages(require(maptools))
## Warning: package 'maptools' was built under R version 2.15.2
temp = download.file("http://myweb.fsu.edu/jelsner/Georgia.zip", "Georgia.zip",
mode = "wb")
unzip("Georgia.zip")
Bachelor = readShapeSpatial("Georgia/GeorgiaEduc")
b. Start with a multiple regression model using all six explanatory variables listed above. Create a final model by removing variables that are not significant in explaining percentage of bachelor degrees. (10)
Model = lm(PctBach ~ TotPop90 + PctRural + PctEld + PctFB + PctPov + PctBlack,
data = Bachelor)
summary(Model)
##
## Call:
## lm(formula = PctBach ~ TotPop90 + PctRural + PctEld + PctFB +
## PctPov + PctBlack, data = Bachelor)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.369 -2.052 -0.111 1.175 19.894
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.50e+01 1.60e+00 9.37 < 2e-16 ***
## TotPop90 2.33e-05 4.55e-06 5.11 8.7e-07 ***
## PctRural -4.27e-02 1.30e-02 -3.28 0.0013 **
## PctEld -7.88e-02 1.15e-01 -0.69 0.4935
## PctFB 1.25e+00 2.98e-01 4.18 4.6e-05 ***
## PctPov -1.55e-01 6.60e-02 -2.35 0.0201 *
## PctBlack 2.08e-02 2.37e-02 0.88 0.3809
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.35 on 167 degrees of freedom
## Multiple R-squared: 0.649, Adjusted R-squared: 0.637
## F-statistic: 51.6 on 6 and 167 DF, p-value: <2e-16
##
step(Model)
## Start: AIC=427.2
## PctBach ~ TotPop90 + PctRural + PctEld + PctFB + PctPov + PctBlack
##
## Df Sum of Sq RSS AIC
## - PctEld 1 5.3 1876 426
## - PctBlack 1 8.6 1879 426
## <none> 1870 427
## - PctPov 1 61.7 1932 431
## - PctRural 1 120.5 1991 436
## - PctFB 1 195.9 2066 443
## - TotPop90 1 292.4 2163 450
##
## Step: AIC=425.7
## PctBach ~ TotPop90 + PctRural + PctFB + PctPov + PctBlack
##
## Df Sum of Sq RSS AIC
## - PctBlack 1 10.9 1886 425
## <none> 1876 426
## - PctPov 1 100.1 1976 433
## - PctRural 1 137.1 2013 436
## - PctFB 1 228.9 2104 444
## - TotPop90 1 287.2 2163 448
##
## Step: AIC=424.7
## PctBach ~ TotPop90 + PctRural + PctFB + PctPov
##
## Df Sum of Sq RSS AIC
## <none> 1886 425
## - PctPov 1 137 2024 435
## - PctRural 1 152 2038 436
## - PctFB 1 228 2114 443
## - TotPop90 1 320 2206 450
##
## Call:
## lm(formula = PctBach ~ TotPop90 + PctRural + PctFB + PctPov,
## data = Bachelor)
##
## Coefficients:
## (Intercept) TotPop90 PctRural PctFB PctPov
## 1.44e+01 2.37e-05 -4.64e-02 1.30e+00 -1.31e-01
##
Model2 = lm(PctBach ~ TotPop90 + PctRural + PctFB + PctPov, data = Bachelor)
summary(Model2)
##
## Call:
## lm(formula = PctBach ~ TotPop90 + PctRural + PctFB + PctPov,
## data = Bachelor)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.165 -2.066 -0.087 1.311 19.584
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.44e+01 1.42e+00 10.14 < 2e-16 ***
## TotPop90 2.37e-05 4.43e-06 5.35 2.8e-07 ***
## PctRural -4.64e-02 1.26e-02 -3.69 0.00030 ***
## PctFB 1.30e+00 2.87e-01 4.52 1.2e-05 ***
## PctPov -1.31e-01 3.73e-02 -3.51 0.00058 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.34 on 169 degrees of freedom
## Multiple R-squared: 0.646, Adjusted R-squared: 0.638
## F-statistic: 77.2 on 4 and 169 DF, p-value: <2e-16
##
c. Use the significant explanatory variables and create a geographic regression model using a fixed bandwidth. Plot a choropleth map of the predictions from the model. (10)
suppressMessages(require(spgwr))
## Warning: package 'spgwr' was built under R version 2.15.2
BwBachelors = gwr.sel(PctBach ~ TotPop90 + PctRural + PctFB + PctPov, data = Bachelor)
## Bandwidth: 241605 CV score: 2012
## Bandwidth: 390534 CV score: 2052
## Bandwidth: 149561 CV score: 1995
## Bandwidth: 92675 CV score: 2100
## Bandwidth: 184719 CV score: 1993
## Bandwidth: 173020 CV score: 1991
## Bandwidth: 170165 CV score: 1991
## Bandwidth: 167827 CV score: 1991
## Bandwidth: 168455 CV score: 1991
## Bandwidth: 168480 CV score: 1991
## Bandwidth: 168474 CV score: 1991
## Bandwidth: 168474 CV score: 1991
## Bandwidth: 168474 CV score: 1991
## Bandwidth: 168476 CV score: 1991
## Bandwidth: 168475 CV score: 1991
## Bandwidth: 168474 CV score: 1991
## Bandwidth: 168474 CV score: 1991
## Bandwidth: 168474 CV score: 1991
## Bandwidth: 168474 CV score: 1991
## Bandwidth: 168474 CV score: 1991
## Bandwidth: 168474 CV score: 1991
GwrBachelors = gwr(PctBach ~ TotPop90 + PctRural + PctFB + PctPov, data = Bachelor,
coords = cbind(Bachelor$X_COORD, Bachelor$Y_COORD), bandwidth = BwBachelors)
## Warning: data is Spatial* object, ignoring coords argument
GwrBachelors
## Call:
## gwr(formula = PctBach ~ TotPop90 + PctRural + PctFB + PctPov,
## data = Bachelor, coords = cbind(Bachelor$X_COORD, Bachelor$Y_COORD),
## bandwidth = BwBachelors)
## Kernel function: gwr.Gauss
## Fixed bandwidth: 168474
## Summary of GWR coefficient estimates:
## Min. 1st Qu. Median 3rd Qu. Max. Global
## X.Intercept. 1.12e+01 1.33e+01 1.45e+01 1.52e+01 1.58e+01 14.39
## TotPop90 1.35e-05 1.83e-05 2.20e-05 2.62e-05 3.37e-05 0.00
## PctRural -6.24e-02 -5.14e-02 -4.46e-02 -3.89e-02 -2.58e-02 -0.05
## PctFB 4.54e-01 9.40e-01 1.43e+00 2.00e+00 2.52e+00 1.30
## PctPov -1.46e-01 -1.39e-01 -1.34e-01 -1.21e-01 -9.26e-02 -0.13
Map = slot(GwrBachelors$SDF, "data")
names(Map)
## [1] "sum.w" "X.Intercept." "TotPop90" "PctRural"
## [5] "PctFB" "PctPov" "gwr.e" "pred"
## [9] "localR2"
brks = round(quantile(Map$pred, probs = seq(0, 1, 0.2)), digits = 2)
ints = findInterval(Map$pred, brks, all.inside = TRUE)
cls = rev(heat.colors(5))
plot(Bachelor, col = cls[ints])
legend(x = "topright", legend = leglabs(brks), cex = 0.9, fill = cls)
title(main = "Predicted Percentage of Population with Bachelors Degree in Georgia")
d. Plot a choropleth map of the percent poverty coefficient. (10)
brks = cut(Map$PctPov, 6)
ints = as.integer(brks)
plot(Bachelor, col = cls[ints])
legend(x = "topright", legend = levels(brks), fill = cls, cex = 0.9, bty = "n")
title(main = "Percent of Poverty Coefficient in Georgia at County Level")
e. Plot a choropleth map of the R squared value. (10).
brks = cut(Map$localR2, 6)
ints = as.integer(brks)
plot(Bachelor, col = cls[ints])
legend(x = "topright", legend = levels(brks), fill = cls, cex = 0.9, bty = "n",
horiz = FALSE)
title(main = "Local R Squared in Georgia at County Level")