Problem Set # 5

William “Luke” Tabbert

date()
## [1] "Thu Nov 29 19:40:18 2012"

Due Date: November 29, 2012 Total Points: 50

a. Download the zip file, unzip it and use the readShapeSpatial() function from the maptools package to get the data into R. Hint: After unzipping the shape files are in the directory Georgia. (10)

require(maptools)
## Loading required package: maptools
## Loading required package: foreign
## Loading required package: sp
## Loading required package: grid
## Loading required package: lattice
## Checking rgeos availability: FALSE Note: when rgeos is not available,
## polygon geometry computations in maptools depend on gpclib, which has a
## restricted licence. It is disabled by default; to enable gpclib, type
## gpclibPermit()
require(spgwr)
## Loading required package: spgwr
## NOTE: This package does not constitute approval of GWR as a method of
## spatial analysis; see example(gwr)
temp = download.file("http://myweb.fsu.edu/jelsner/Georgia.zip", "Georgia.zip", 
    mode = "wb")
unzip("Georgia.zip")
georgia = readShapeSpatial("Georgia/GeorgiaEduc")

b. Start with a multiple regression model using all six explanatory variables listed above. Create a final model by removing variables that are not significant in explaining percentage of bachelor degrees. (10)

model = lm(PctBach ~ TotPop90 + PctRural + PctEld + PctFB + PctPov + PctBlack, 
    data = georgia)
step(model)
## Start:  AIC=427.2
## PctBach ~ TotPop90 + PctRural + PctEld + PctFB + PctPov + PctBlack
## 
##            Df Sum of Sq  RSS AIC
## - PctEld    1       5.3 1876 426
## - PctBlack  1       8.6 1879 426
## <none>                  1870 427
## - PctPov    1      61.7 1932 431
## - PctRural  1     120.5 1991 436
## - PctFB     1     195.9 2066 443
## - TotPop90  1     292.4 2163 450
## 
## Step:  AIC=425.7
## PctBach ~ TotPop90 + PctRural + PctFB + PctPov + PctBlack
## 
##            Df Sum of Sq  RSS AIC
## - PctBlack  1      10.9 1886 425
## <none>                  1876 426
## - PctPov    1     100.1 1976 433
## - PctRural  1     137.1 2013 436
## - PctFB     1     228.9 2104 444
## - TotPop90  1     287.2 2163 448
## 
## Step:  AIC=424.7
## PctBach ~ TotPop90 + PctRural + PctFB + PctPov
## 
##            Df Sum of Sq  RSS AIC
## <none>                  1886 425
## - PctPov    1       137 2024 435
## - PctRural  1       152 2038 436
## - PctFB     1       228 2114 443
## - TotPop90  1       320 2206 450
## 
## Call:
## lm(formula = PctBach ~ TotPop90 + PctRural + PctFB + PctPov, 
##     data = georgia)
## 
## Coefficients:
## (Intercept)     TotPop90     PctRural        PctFB       PctPov  
##    1.44e+01     2.37e-05    -4.64e-02     1.30e+00    -1.31e-01

c. Use the significant explanatory variables and create a geographic regression model using a fixed bandwidth. Plot a choropleth map of the predictions from the model. (10)

georgia.bw = gwr.sel(PctBach ~ TotPop90 + PctRural + PctFB + PctPov, data = georgia)
## Bandwidth: 241605 CV score: 2012 
## Bandwidth: 390534 CV score: 2052 
## Bandwidth: 149561 CV score: 1995 
## Bandwidth: 92675 CV score: 2100 
## Bandwidth: 184719 CV score: 1993 
## Bandwidth: 173020 CV score: 1991 
## Bandwidth: 170165 CV score: 1991 
## Bandwidth: 167827 CV score: 1991 
## Bandwidth: 168455 CV score: 1991 
## Bandwidth: 168480 CV score: 1991 
## Bandwidth: 168474 CV score: 1991 
## Bandwidth: 168474 CV score: 1991 
## Bandwidth: 168474 CV score: 1991 
## Bandwidth: 168474 CV score: 1991 
## Bandwidth: 168474 CV score: 1991 
## Bandwidth: 168474 CV score: 1991 
## Bandwidth: 168474 CV score: 1991
georgia.gwr = gwr(PctBach ~ TotPop90 + PctRural + PctFB + PctPov, data = georgia, 
    bandwidth = georgia.bw)
df = slot(georgia.gwr$SDF, "data")
brks = round(quantile(df$pred, probs = seq(0, 1, 0.2)), digits = 2)
ints = findInterval(df$pred, brks, all.inside = TRUE)
cls = rev(heat.colors(5))
par(mfrow = c(1, 1))
plot(georgia, col = cls[ints])
legend(x = "topright", legend = leglabs(brks), fill = cls, bty = "n", horiz = FALSE, 
    cex = 0.8)
title(main = "Predicted (GWR) Percentage of Residents with Bachelor's Degrees")

plot of chunk questionc

d. Plot a choropleth map of the percent poverty coefficient. (10)

brks = cut(df$PctPov, 6)
ints = as.integer(brks)
cls = rev(heat.colors(6))
plot(georgia, col = cls[ints])
legend(x = "topright", legend = levels(brks), fill = cls, bty = "n", horiz = FALSE, 
    cex = 0.8)
title(main = "Percent Poverty Coefficient")

plot of chunk questiond

e. Plot a choropleth map of the R squared value. (10).

brks = cut(df$localR2, 6)
ints = as.integer(brks)
cls = rev(heat.colors(6))
plot(georgia, col = cls[ints])
legend(x = "topright", legend = levels(brks), fill = cls, bty = "n", horiz = FALSE, 
    cex = 0.8)
title(main = "Local R Squared")

plot of chunk questione