Problem Set # 5

Brendan Mulholland

date()
## [1] "Thu Nov 29 13:31:22 2012"

Due Date: November 29, 2012
Total Points: 50

a. Download the zip file, unzip it and use the readShapeSpatial() function from the maptools package to get the data into R. Hint: After unzipping the shape files are in the directory for Georgia. (10)

suppressMessages(require(maptools))
## Warning: package 'maptools' was built under R version 2.15.2
temp = download.file("http://myweb.fsu.edu/jelsner/Georgia.zip", "Georgia.zip", 
    mode = "wb")
unzip("Georgia.zip")
Bachelor = readShapeSpatial("Georgia/GeorgiaEduc")

b. Start with a multiple regression model using all six explanatory variables listed above. Create a final model by removing variables that are not significant in explaining percentage of bachelor degrees. (10)

Model = lm(PctBach ~ TotPop90 + PctRural + PctEld + PctFB + PctPov + PctBlack, 
    data = Bachelor)
summary(Model)
## 
## Call:
## lm(formula = PctBach ~ TotPop90 + PctRural + PctEld + PctFB + 
##     PctPov + PctBlack, data = Bachelor)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.369 -2.052 -0.111  1.175 19.894 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.50e+01   1.60e+00    9.37  < 2e-16 ***
## TotPop90     2.33e-05   4.55e-06    5.11  8.7e-07 ***
## PctRural    -4.27e-02   1.30e-02   -3.28   0.0013 ** 
## PctEld      -7.88e-02   1.15e-01   -0.69   0.4935    
## PctFB        1.25e+00   2.98e-01    4.18  4.6e-05 ***
## PctPov      -1.55e-01   6.60e-02   -2.35   0.0201 *  
## PctBlack     2.08e-02   2.37e-02    0.88   0.3809    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 3.35 on 167 degrees of freedom
## Multiple R-squared: 0.649,   Adjusted R-squared: 0.637 
## F-statistic: 51.6 on 6 and 167 DF,  p-value: <2e-16 
## 
step(Model)
## Start:  AIC=427.2
## PctBach ~ TotPop90 + PctRural + PctEld + PctFB + PctPov + PctBlack
## 
##            Df Sum of Sq  RSS AIC
## - PctEld    1       5.3 1876 426
## - PctBlack  1       8.6 1879 426
## <none>                  1870 427
## - PctPov    1      61.7 1932 431
## - PctRural  1     120.5 1991 436
## - PctFB     1     195.9 2066 443
## - TotPop90  1     292.4 2163 450
## 
## Step:  AIC=425.7
## PctBach ~ TotPop90 + PctRural + PctFB + PctPov + PctBlack
## 
##            Df Sum of Sq  RSS AIC
## - PctBlack  1      10.9 1886 425
## <none>                  1876 426
## - PctPov    1     100.1 1976 433
## - PctRural  1     137.1 2013 436
## - PctFB     1     228.9 2104 444
## - TotPop90  1     287.2 2163 448
## 
## Step:  AIC=424.7
## PctBach ~ TotPop90 + PctRural + PctFB + PctPov
## 
##            Df Sum of Sq  RSS AIC
## <none>                  1886 425
## - PctPov    1       137 2024 435
## - PctRural  1       152 2038 436
## - PctFB     1       228 2114 443
## - TotPop90  1       320 2206 450
## 
## Call:
## lm(formula = PctBach ~ TotPop90 + PctRural + PctFB + PctPov, 
##     data = Bachelor)
## 
## Coefficients:
## (Intercept)     TotPop90     PctRural        PctFB       PctPov  
##    1.44e+01     2.37e-05    -4.64e-02     1.30e+00    -1.31e-01  
## 

Model2 = lm(PctBach ~ TotPop90 + PctRural + PctFB + PctPov, data = Bachelor)

summary(Model2)
## 
## Call:
## lm(formula = PctBach ~ TotPop90 + PctRural + PctFB + PctPov, 
##     data = Bachelor)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.165 -2.066 -0.087  1.311 19.584 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.44e+01   1.42e+00   10.14  < 2e-16 ***
## TotPop90     2.37e-05   4.43e-06    5.35  2.8e-07 ***
## PctRural    -4.64e-02   1.26e-02   -3.69  0.00030 ***
## PctFB        1.30e+00   2.87e-01    4.52  1.2e-05 ***
## PctPov      -1.31e-01   3.73e-02   -3.51  0.00058 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 3.34 on 169 degrees of freedom
## Multiple R-squared: 0.646,   Adjusted R-squared: 0.638 
## F-statistic: 77.2 on 4 and 169 DF,  p-value: <2e-16 
## 

c. Use the significant explanatory variables and create a geographic regression model using a fixed bandwidth. Plot a choropleth map of the predictions from the model. (10)

suppressMessages(require(spgwr))
## Warning: package 'spgwr' was built under R version 2.15.2
BwBachelors = gwr.sel(PctBach ~ TotPop90 + PctRural + PctFB + PctPov, data = Bachelor)
## Bandwidth: 241605 CV score: 2012 
## Bandwidth: 390534 CV score: 2052 
## Bandwidth: 149561 CV score: 1995 
## Bandwidth: 92675 CV score: 2100 
## Bandwidth: 184719 CV score: 1993 
## Bandwidth: 173020 CV score: 1991 
## Bandwidth: 170165 CV score: 1991 
## Bandwidth: 167827 CV score: 1991 
## Bandwidth: 168455 CV score: 1991 
## Bandwidth: 168480 CV score: 1991 
## Bandwidth: 168474 CV score: 1991 
## Bandwidth: 168474 CV score: 1991 
## Bandwidth: 168474 CV score: 1991 
## Bandwidth: 168476 CV score: 1991 
## Bandwidth: 168475 CV score: 1991 
## Bandwidth: 168474 CV score: 1991 
## Bandwidth: 168474 CV score: 1991 
## Bandwidth: 168474 CV score: 1991 
## Bandwidth: 168474 CV score: 1991 
## Bandwidth: 168474 CV score: 1991 
## Bandwidth: 168474 CV score: 1991 
GwrBachelors = gwr(PctBach ~ TotPop90 + PctRural + PctFB + PctPov, data = Bachelor, 
    coords = cbind(Bachelor$X_COORD, Bachelor$Y_COORD), bandwidth = BwBachelors)
## Warning: data is Spatial* object, ignoring coords argument

GwrBachelors
## Call:
## gwr(formula = PctBach ~ TotPop90 + PctRural + PctFB + PctPov, 
##     data = Bachelor, coords = cbind(Bachelor$X_COORD, Bachelor$Y_COORD), 
##     bandwidth = BwBachelors)
## Kernel function: gwr.Gauss 
## Fixed bandwidth: 168474 
## Summary of GWR coefficient estimates:
##                   Min.   1st Qu.    Median   3rd Qu.      Max. Global
## X.Intercept.  1.12e+01  1.33e+01  1.45e+01  1.52e+01  1.58e+01  14.39
## TotPop90      1.35e-05  1.83e-05  2.20e-05  2.62e-05  3.37e-05   0.00
## PctRural     -6.24e-02 -5.14e-02 -4.46e-02 -3.89e-02 -2.58e-02  -0.05
## PctFB         4.54e-01  9.40e-01  1.43e+00  2.00e+00  2.52e+00   1.30
## PctPov       -1.46e-01 -1.39e-01 -1.34e-01 -1.21e-01 -9.26e-02  -0.13

Map = slot(GwrBachelors$SDF, "data")
names(Map)
## [1] "sum.w"        "X.Intercept." "TotPop90"     "PctRural"    
## [5] "PctFB"        "PctPov"       "gwr.e"        "pred"        
## [9] "localR2"     

brks = round(quantile(Map$pred, probs = seq(0, 1, 0.2)), digits = 2)
ints = findInterval(Map$pred, brks, all.inside = TRUE)
cls = rev(heat.colors(5))
plot(Bachelor, col = cls[ints])
legend(x = "topright", legend = leglabs(brks), cex = 0.9, fill = cls)
title(main = "Predicted Percentage of Population with Bachelors Degree in Georgia")

plot of chunk unnamed-chunk-4

d. Plot a choropleth map of the percent poverty coefficient. (10)

brks = cut(Map$PctPov, 6)
ints = as.integer(brks)
plot(Bachelor, col = cls[ints])
legend(x = "topright", legend = levels(brks), fill = cls, cex = 0.9, bty = "n")
title(main = "Percent of Poverty Coefficient in Georgia at County Level")

plot of chunk unnamed-chunk-5

e. Plot a choropleth map of the R squared value. (10).

brks = cut(Map$localR2, 6)
ints = as.integer(brks)
plot(Bachelor, col = cls[ints])
legend(x = "topright", legend = levels(brks), fill = cls, cex = 0.9, bty = "n", 
    horiz = FALSE)
title(main = "Local R Squared in Georgia at County Level")

plot of chunk unnamed-chunk-6