date()
## [1] "Wed Nov 28 11:48:02 2012"
Due Date: November 29, 2012
Total Points: 50
The file Georgia.zip contains ESRI shape files in a folder called GeorgiaEduc. The dbf contains the percentage of Georgia county residents with a bachelors degree along with other countywide information. Let the dependent variable be PctBach (percent of population with a bachelors degree) and the explanatory variables be TotPop90, PctRural, PctEld, PctFB, PctPov, PctBlack.
a. Download the zip file, unzip it and use the readShapeSpatial() function from the maptools package to get the data into R. Hint: After unzipping the shape files are in the directory Georgia. (10)
require(maptools)
## Loading required package: maptools
## Loading required package: foreign
## Loading required package: sp
## Loading required package: lattice
## Checking rgeos availability: FALSE Note: when rgeos is not available,
## polygon geometry computations in maptools depend on gpclib, which has a
## restricted licence. It is disabled by default; to enable gpclib, type
## gpclibPermit()
tmp = download.file("http://myweb.fsu.edu/jelsner/Georgia.zip", "Georgia.zip",
mode = "wb")
unzip("Georgia.zip")
GAresidents = readShapeSpatial("Georgia/GeorgiaEduc")
slotNames(GAresidents)
## [1] "data" "polygons" "plotOrder" "bbox" "proj4string"
str(GAresidents, max.level = 2)
## Formal class 'SpatialPolygonsDataFrame' [package "sp"] with 5 slots
## ..@ data :'data.frame': 174 obs. of 25 variables:
## .. ..- attr(*, "data_types")= chr [1:25] "N" "F" "F" "N" ...
## ..@ polygons :List of 174
## .. .. [list output truncated]
## ..@ plotOrder : int [1:174] 162 18 33 89 25 17 54 21 164 135 ...
## ..@ bbox : num [1:2, 1:2] 627306 3368056 1082188 3879805
## .. ..- attr(*, "dimnames")=List of 2
## ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slots
b. Start with a multiple regression model using all six explanatory variables listed above. Create a final model by removing variables that are not significant in explaining percentage of bachelor degrees. (10)
model1 = lm(PctBach ~ TotPop90 + PctRural + PctEld + PctFB + PctPov + PctBlack,
data = GAresidents)
summary(model1)
##
## Call:
## lm(formula = PctBach ~ TotPop90 + PctRural + PctEld + PctFB +
## PctPov + PctBlack, data = GAresidents)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.369 -2.052 -0.111 1.175 19.894
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.50e+01 1.60e+00 9.37 < 2e-16 ***
## TotPop90 2.33e-05 4.55e-06 5.11 8.7e-07 ***
## PctRural -4.27e-02 1.30e-02 -3.28 0.0013 **
## PctEld -7.88e-02 1.15e-01 -0.69 0.4935
## PctFB 1.25e+00 2.98e-01 4.18 4.6e-05 ***
## PctPov -1.55e-01 6.60e-02 -2.35 0.0201 *
## PctBlack 2.08e-02 2.37e-02 0.88 0.3809
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.35 on 167 degrees of freedom
## Multiple R-squared: 0.649, Adjusted R-squared: 0.637
## F-statistic: 51.6 on 6 and 167 DF, p-value: <2e-16
drop1(model1)
## Single term deletions
##
## Model:
## PctBach ~ TotPop90 + PctRural + PctEld + PctFB + PctPov + PctBlack
## Df Sum of Sq RSS AIC
## <none> 1870 427
## TotPop90 1 292.4 2163 450
## PctRural 1 120.5 1991 436
## PctEld 1 5.3 1876 426
## PctFB 1 195.9 2066 443
## PctPov 1 61.7 1932 431
## PctBlack 1 8.6 1879 426
model2 = lm(PctBach ~ TotPop90 + PctRural + PctFB + PctPov, data = GAresidents)
summary(model2)
##
## Call:
## lm(formula = PctBach ~ TotPop90 + PctRural + PctFB + PctPov,
## data = GAresidents)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.165 -2.066 -0.087 1.311 19.584
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.44e+01 1.42e+00 10.14 < 2e-16 ***
## TotPop90 2.37e-05 4.43e-06 5.35 2.8e-07 ***
## PctRural -4.64e-02 1.26e-02 -3.69 0.00030 ***
## PctFB 1.30e+00 2.87e-01 4.52 1.2e-05 ***
## PctPov -1.31e-01 3.73e-02 -3.51 0.00058 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.34 on 169 degrees of freedom
## Multiple R-squared: 0.646, Adjusted R-squared: 0.638
## F-statistic: 77.2 on 4 and 169 DF, p-value: <2e-16
c. Use the significant explanatory variables and create a geographic regression model using a fixed bandwidth. Plot a choropleth map of the predictions from the model. (10)
suppressMessages(require(spgwr))
bw = gwr.sel(PctBach ~ TotPop90 + PctRural + PctFB + PctPov, data = GAresidents)
## Bandwidth: 241605 CV score: 2012
## Bandwidth: 390534 CV score: 2052
## Bandwidth: 149561 CV score: 1995
## Bandwidth: 92675 CV score: 2100
## Bandwidth: 184719 CV score: 1993
## Bandwidth: 173020 CV score: 1991
## Bandwidth: 170165 CV score: 1991
## Bandwidth: 167827 CV score: 1991
## Bandwidth: 168455 CV score: 1991
## Bandwidth: 168480 CV score: 1991
## Bandwidth: 168474 CV score: 1991
## Bandwidth: 168474 CV score: 1991
## Bandwidth: 168474 CV score: 1991
## Bandwidth: 168474 CV score: 1991
## Bandwidth: 168474 CV score: 1991
## Bandwidth: 168474 CV score: 1991
## Bandwidth: 168474 CV score: 1991
modelgwr = gwr(PctBach ~ TotPop90 + PctRural + PctFB + PctPov, data = GAresidents,
bandwidth = bw)
df = slot(modelgwr$SDF, "data")
brks = round(quantile(df$pred, probs = seq(0, 1, 0.2)), digits = 2)
ints = findInterval(df$pred, brks, all.inside = TRUE)
cls = rev(rainbow(5))
plot(GAresidents, col = cls[ints])
legend(x = "topleft", legend = leglabs(brks), fill = cls, bty = "n", title = "Predicted",
horiz = FALSE, cex = 0.8)
title(main = "Predicted (gwr) Percent of Population With a Bachelor's Degree")
d. Plot a choropleth map of the percent poverty coefficient. (10)
brks = cut(df$PctPov, 6)
ints = as.integer(brks)
cls = rev(rainbow(6))
plot(GAresidents, col = cls[ints])
legend(x = "topleft", legend = levels(brks), fill = cls, bty = "n", title = "Percent Poverty Coefficient",
horiz = FALSE, cex = 0.8)
e. Plot a choropleth map of the R squared value. (10).
brks = cut(df$localR2, 6)
ints = as.integer(brks)
cls = rainbow(6)
plot(GAresidents, col = cls[ints])
legend(x = "topleft", legend = levels(brks), fill = cls, bty = "n", title = "Local R Squared",
horiz = FALSE, cex = 0.8)