library(sp)
## Warning: package 'sp' was built under R version 2.15.3
library(maptools)
## Loading required package: foreign
## Loading required package: grid
## Loading required package: lattice
## Checking rgeos availability: FALSE Note: when rgeos is not available,
## polygon geometry computations in maptools depend on gpclib, which has a
## restricted licence. It is disabled by default; to enable gpclib, type
## gpclibPermit()
library(classInt)
## Loading required package: class
## Loading required package: e1071
library(RColorBrewer)
# READ DATA 'USA.shp'
USA <- readShapePoly(choose.files())
USA <- USA[USA$Total > 0, ]
lmBush <- lm(Bush_pct ~ pctfemhh + homevalu, data = USA)
summary(lmBush)
##
## Call:
## lm(formula = Bush_pct ~ pctfemhh + homevalu, data = USA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -60.82 -7.31 0.89 7.79 35.58
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.77e+01 5.60e-01 138.9 <2e-16 ***
## pctfemhh -1.01e+00 3.61e-02 -27.9 <2e-16 ***
## homevalu -7.60e-05 6.01e-06 -12.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.9 on 3105 degrees of freedom
## Multiple R-squared: 0.256, Adjusted R-squared: 0.255
## F-statistic: 533 on 2 and 3105 DF, p-value: <2e-16
lmBush.resid <- resid(lmBush)
plot(lmBush.resid ~ USA$Bush_pct)
## Try to look at spatial pattern of residuals. add a new column to USA
## to hold the residuals
USA$resid <- resid(lmBush)
pal3 <- brewer.pal(3, "Spectral")
cats3 <- classIntervals(USA$resid, n = 3, style = "quantile")
ThreeColors <- findColours(cats3, pal3)
plot(USA, col = ThreeColors)
# library for regsubsets()
library(leaps)
# return 1 best model with up to 14 variables the 14th column is Bush_pct;
# 17th :30th columns are the variables considered.
d <- regsubsets(Bush_pct ~ ., nbest = 1, nvmax = 14, data = USA[, c(14, 17:30)])
d.fit <- summary(d)
par(mfrow = c(2, 2))
plot(1:14, y = d.fit$adjr2, type = "o", xlab = "Number of Parameters", ylab = "Adj. r^2")
abline(v = "10", col = "blue")
plot(1:14, y = d.fit$bic, type = "o", xlab = "Number of Parameters", ylab = "BIC")
abline(v = "10", col = "blue")
# see what variables in each best model by plotting them
plot(d)
# specify a model, the one below is showing the best 10-variable model
# (the ones with stars)
d.fit$outmat[10, ]
## MDratio pcturban pctfemhh pcincome pctpoor pctcoled
## "*" " " "*" "*" "*" "*"
## unemploy homevalu popdens Obese Noins HISP_LAT
## "*" "*" " " "*" "*" " "
## MEDAGE2000 PEROVER65
## " " "*"
lmBush2 <- lm(Bush_pct ~ MDratio + pcincome + pctfemhh + pctpoor + pctcoled +
unemploy + homevalu + Obese + Noins + PEROVER65, USA)
summary(lmBush2)
##
## Call:
## lm(formula = Bush_pct ~ MDratio + pcincome + pctfemhh + pctpoor +
## pctcoled + unemploy + homevalu + Obese + Noins + PEROVER65,
## data = USA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -45.31 -6.82 0.64 7.33 33.56
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.77e+01 1.41e+00 47.92 < 2e-16 ***
## MDratio -8.82e-03 2.29e-03 -3.86 0.00012 ***
## pcincome 7.79e-04 7.55e-05 10.31 < 2e-16 ***
## pctfemhh -1.25e+00 5.10e-02 -24.60 < 2e-16 ***
## pctpoor 2.42e-01 3.86e-02 6.26 4.4e-10 ***
## pctcoled -4.04e-01 4.71e-02 -8.56 < 2e-16 ***
## unemploy -7.65e-01 7.32e-02 -10.45 < 2e-16 ***
## homevalu -8.79e-05 9.03e-06 -9.74 < 2e-16 ***
## Obese 1.76e+01 3.76e+00 4.68 3.0e-06 ***
## Noins 5.31e+01 4.32e+00 12.29 < 2e-16 ***
## PEROVER65 -4.19e-01 5.18e-02 -8.10 7.8e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.99 on 3097 degrees of freedom
## Multiple R-squared: 0.376, Adjusted R-squared: 0.374
## F-statistic: 186 on 10 and 3097 DF, p-value: <2e-16
library(car)
## Loading required package: MASS
## Loading required package: nnet
vif(lmBush2)
## MDratio pcincome pctfemhh pctpoor pctcoled unemploy homevalu
## 1.795 3.921 2.425 3.079 2.947 1.632 2.745
## Obese Noins PEROVER65
## 1.486 1.243 1.415
anova(lmBush2, lmBush, test = "F")
## Analysis of Variance Table
##
## Model 1: Bush_pct ~ MDratio + pcincome + pctfemhh + pctpoor + pctcoled +
## unemploy + homevalu + Obese + Noins + PEROVER65
## Model 2: Bush_pct ~ pctfemhh + homevalu
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 3097 309162
## 2 3105 368761 -8 -59599 74.6 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(resid(lmBush2) ~ USA$Bush_pct)
pal3 <- brewer.pal(3, "Spectral")
cats3 <- classIntervals(resid(lmBush2), n = 3, style = "quantile")
ThreeColors <- findColours(cats3, pal3)
plot(USA, col = ThreeColors)
Created by: Li Xu; Created on: 04/11/2013; Updated on: 05/04/2013