Alan Zarychta's R-Journal

GEOG 5023: Quantitative Methods In Geography

Multiple Regression and Mapping Exercise

Options and packages:

options(scipen = 999)

library(sp)
## Warning: package 'sp' was built under R version 2.15.3
library(maptools)
## Loading required package: foreign
## Loading required package: lattice
## Checking rgeos availability: FALSE Note: when rgeos is not available,
## polygon geometry computations in maptools depend on gpclib, which has a
## restricted licence. It is disabled by default; to enable gpclib, type
## gpclibPermit()
library(classInt)
## Warning: package 'classInt' was built under R version 2.15.3
## Loading required package: class
## Loading required package: e1071
library(RColorBrewer)

Loading and preparing the data:

Read in the shapefile

USA <- readShapePoly("C:/CU BOULDER/Coursework/Y2S2/GEOG 5023 - Quant Methods Geo/Week 4 - Multiple Regression/Data/2004_Election_Counties.shp")

Visualize the data

par(mar = c(1, 1, 1, 1))
plot(USA)

plot of chunk unnamed-chunk-3

Clean and Summarize the data

USA <- USA[USA$Total > 0, ]  #remove counties with no votes
slotNames(USA)  #list pieces of the file
## [1] "data"        "polygons"    "plotOrder"   "bbox"        "proj4string"
summary(USA@data)  #summarizing the data
##          NAME         STATE_NAME     STATE_FIPS     CNTY_FIPS   
##  Washington:  32   Texas   : 254   48     : 254   001    :  48  
##  Jefferson :  26   Georgia : 159   13     : 159   003    :  48  
##  Franklin  :  25   Virginia: 134   51     : 134   005    :  48  
##  Jackson   :  24   Kentucky: 120   21     : 120   009    :  47  
##  Lincoln   :  24   Missouri: 115   29     : 115   007    :  46  
##  Madison   :  20   Kansas  : 105   20     : 105   011    :  46  
##  (Other)   :2957   (Other) :2221   (Other):2221   (Other):2825  
##       FIPS           AREA          FIPS_num          Bush       
##  01001  :   1   Min.   :    2   Min.   : 1001   Min.   :    65  
##  01003  :   1   1st Qu.:  435   1st Qu.:19046   1st Qu.:  2941  
##  01005  :   1   Median :  622   Median :29214   Median :  6364  
##  01007  :   1   Mean   :  966   Mean   :30686   Mean   : 19073  
##  01009  :   1   3rd Qu.:  931   3rd Qu.:46010   3rd Qu.: 15924  
##  01011  :   1   Max.   :20175   Max.   :56045   Max.   :954764  
##  (Other):3102                                                   
##      Kerry            County_F         Nader           Total        
##  Min.   :     12   Min.   : 1001   Min.   :    0   Min.   :     77  
##  1st Qu.:   1782   1st Qu.:19046   1st Qu.:    0   1st Qu.:   4831  
##  Median :   4041   Median :29214   Median :   14   Median :  10416  
##  Mean   :  17957   Mean   :30686   Mean   :  145   Mean   :  37176  
##  3rd Qu.:  10434   3rd Qu.:46010   3rd Qu.:   67   3rd Qu.:  26599  
##  Max.   :1670341   Max.   :56045   Max.   :13251   Max.   :2625105  
##                                                                     
##     Bush_pct       Kerry_pct       Nader_pct        MDratio      
##  Min.   : 9.31   Min.   : 7.17   Min.   :0.000   Min.   :   0.0  
##  1st Qu.:52.73   1st Qu.:30.23   1st Qu.:0.000   1st Qu.:  37.3  
##  Median :61.17   Median :38.49   Median :0.303   Median :  65.6  
##  Mean   :60.66   Mean   :38.94   Mean   :0.401   Mean   :  93.1  
##  3rd Qu.:69.37   3rd Qu.:46.79   3rd Qu.:0.633   3rd Qu.: 117.6  
##  Max.   :92.83   Max.   :90.05   Max.   :4.467   Max.   :2189.5  
##                                                                  
##       hosp          pcthisp         pcturban        urbrural   
##  Min.   : 0.00   Min.   :  0.0   Min.   :  0.0   Min.   :0.00  
##  1st Qu.: 1.32   1st Qu.:  4.0   1st Qu.:  0.0   1st Qu.:3.00  
##  Median : 3.29   Median :  8.0   Median : 33.5   Median :6.00  
##  Mean   : 5.68   Mean   : 44.5   Mean   : 35.3   Mean   :5.54  
##  3rd Qu.: 6.75   3rd Qu.: 24.0   3rd Qu.: 56.5   3rd Qu.:7.00  
##  Max.   :84.07   Max.   :972.0   Max.   :100.0   Max.   :9.00  
##                                                                
##     pctfemhh       pcincome        pctpoor        pctlt9ed   
##  Min.   : 0.0   Min.   :    0   Min.   : 0.0   Min.   : 0.0  
##  1st Qu.: 9.6   1st Qu.:15474   1st Qu.:11.1   1st Qu.: 8.9  
##  Median :12.2   Median :17450   Median :15.1   Median :13.2  
##  Mean   :13.0   Mean   :17805   Mean   :16.5   Mean   :14.3  
##  3rd Qu.:15.4   3rd Qu.:19818   3rd Qu.:20.4   3rd Qu.:18.7  
##  Max.   :41.1   Max.   :58096   Max.   :63.1   Max.   :56.3  
##                                                              
##     pcthsed        pctcoled       unemploy        pctwhtcl   
##  Min.   : 0.0   Min.   : 0.0   Min.   : 0.00   Min.   : 0.0  
##  1st Qu.:61.1   1st Qu.: 9.0   1st Qu.: 3.90   1st Qu.:38.5  
##  Median :71.2   Median :11.6   Median : 5.30   Median :43.5  
##  Mean   :68.4   Mean   :13.1   Mean   : 5.88   Mean   :44.6  
##  3rd Qu.:77.1   3rd Qu.:15.3   3rd Qu.: 7.20   3rd Qu.:50.7  
##  Max.   :95.5   Max.   :53.4   Max.   :37.90   Max.   :81.4  
##                                                              
##     homevalu           rent        popdens         crowded     
##  Min.   :     0   Min.   :  0   Min.   :    0   Min.   : 0.00  
##  1st Qu.: 35900   1st Qu.:255   1st Qu.:   15   1st Qu.: 1.80  
##  Median : 44400   Median :297   Median :   39   Median : 2.60  
##  Mean   : 52066   Mean   :314   Mean   :  194   Mean   : 3.61  
##  3rd Qu.: 58600   3rd Qu.:352   3rd Qu.:   93   3rd Qu.: 4.50  
##  Max.   :500001   Max.   :926   Max.   :53801   Max.   :44.40  
##                                                                
##     ginirev        SmokecurM        SmokevrM       SmokecurF    
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.390   1st Qu.:0.220   1st Qu.:0.490   1st Qu.:0.190  
##  Median :0.420   Median :0.240   Median :0.520   Median :0.210  
##  Mean   :0.414   Mean   :0.242   Mean   :0.505   Mean   :0.209  
##  3rd Qu.:0.440   3rd Qu.:0.270   3rd Qu.:0.540   3rd Qu.:0.240  
##  Max.   :0.580   Max.   :0.580   Max.   :0.780   Max.   :0.420  
##                                                                 
##     SmokevrF         Obese           Noins         XYLENES__M   
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :    0  
##  1st Qu.:0.390   1st Qu.:0.320   1st Qu.:0.100   1st Qu.:   27  
##  Median :0.420   Median :0.340   Median :0.120   Median :   58  
##  Mean   :0.412   Mean   :0.335   Mean   :0.129   Mean   :  222  
##  3rd Qu.:0.460   3rd Qu.:0.360   3rd Qu.:0.150   3rd Qu.:  171  
##  Max.   :0.630   Max.   :0.630   Max.   :0.410   Max.   :16661  
##                                                                 
##     TOLUENE        TETRACHLOR        STYRENE         NICKEL_COM   
##  Min.   :    0   Min.   :   0.0   Min.   :   0.0   Min.   : 0.00  
##  1st Qu.:   44   1st Qu.:   0.7   1st Qu.:   0.8   1st Qu.: 0.00  
##  Median :   91   Median :   1.9   Median :   1.9   Median : 0.01  
##  Mean   :  336   Mean   :  13.7   Mean   :  15.4   Mean   : 0.37  
##  3rd Qu.:  256   3rd Qu.:   6.6   3rd Qu.:   8.1   3rd Qu.: 0.11  
##  Max.   :28305   Max.   :1966.6   Max.   :1413.0   Max.   :69.01  
##                                                                   
##    METHYLENE_       MERCURY_CO      LEAD_COMPO       BENZENE__I  
##  Min.   :   0.0   Min.   :0.000   Min.   :  0.00   Min.   :   0  
##  1st Qu.:   1.6   1st Qu.:0.002   1st Qu.:  0.01   1st Qu.:  23  
##  Median :   3.9   Median :0.004   Median :  0.02   Median :  42  
##  Mean   :  26.4   Mean   :0.057   Mean   :  0.82   Mean   : 106  
##  3rd Qu.:  12.5   3rd Qu.:0.020   3rd Qu.:  0.23   3rd Qu.:  97  
##  Max.   :2764.2   Max.   :3.220   Max.   :290.63   Max.   :4612  
##                                                                  
##    ARSENIC_CO       POP2000          POP00SQMIL       MALE2000      
##  Min.   : 0.00   Min.   :      0   Min.   :    0   Min.   :      0  
##  1st Qu.: 0.00   1st Qu.:  11368   1st Qu.:   18   1st Qu.:   5600  
##  Median : 0.00   Median :  24770   Median :   43   Median :  12280  
##  Mean   : 0.11   Mean   :  89230   Mean   :  244   Mean   :  43768  
##  3rd Qu.: 0.02   3rd Qu.:  62028   3rd Qu.:  105   3rd Qu.:  30396  
##  Max.   :32.47   Max.   :9519338   Max.   :66934   Max.   :4704105  
##                                                                     
##    FEMALE2000         MAL2FEM         UNDER18          AIAN      
##  Min.   :      0   Min.   :  0.0   Min.   : 0.0   Min.   : 0.00  
##  1st Qu.:   5608   1st Qu.: 94.0   1st Qu.:23.7   1st Qu.: 0.20  
##  Median :  12548   Median : 97.0   Median :25.3   Median : 0.30  
##  Mean   :  45462   Mean   : 98.4   Mean   :25.5   Mean   : 1.61  
##  3rd Qu.:  31554   3rd Qu.:100.0   3rd Qu.:27.1   3rd Qu.: 0.80  
##  Max.   :4815233   Max.   :205.0   Max.   :45.3   Max.   :94.20  
##                                                                  
##       ASIA            BLACK            NHPI            WHITE     
##  Min.   : 0.000   Min.   : 0.00   Min.   :0.0000   Min.   : 0.0  
##  1st Qu.: 0.200   1st Qu.: 0.30   1st Qu.:0.0000   1st Qu.:77.2  
##  Median : 0.300   Median : 1.70   Median :0.0000   Median :91.3  
##  Mean   : 0.771   Mean   : 8.84   Mean   :0.0361   Mean   :84.8  
##  3rd Qu.: 0.700   3rd Qu.:10.10   3rd Qu.:0.1000   3rd Qu.:96.7  
##  Max.   :30.800   Max.   :86.50   Max.   :1.5000   Max.   :99.7  
##                                                                  
##    AIAN_MORE       ASIA_MORE        BLK_MORE       NHPI_MORE     
##  Min.   : 0.00   Min.   : 0.00   Min.   : 0.00   Min.   :0.0000  
##  1st Qu.: 0.50   1st Qu.: 0.30   1st Qu.: 0.40   1st Qu.:0.0000  
##  Median : 0.80   Median : 0.50   Median : 2.10   Median :0.1000  
##  Mean   : 2.22   Mean   : 0.98   Mean   : 9.13   Mean   :0.0996  
##  3rd Qu.: 1.40   3rd Qu.: 0.90   3rd Qu.:10.70   3rd Qu.:0.1000  
##  Max.   :95.10   Max.   :32.60   Max.   :86.70   Max.   :2.6000  
##                                                                  
##     WHT_MORE       HISP_LAT       CH19902000      MEDAGE2000  
##  Min.   : 0.0   Min.   : 0.00   Min.   :-37.4   Min.   : 0.0  
##  1st Qu.:79.1   1st Qu.: 0.90   1st Qu.:  1.0   1st Qu.:35.2  
##  Median :92.7   Median : 1.80   Median :  8.4   Median :37.4  
##  Mean   :86.0   Mean   : 6.19   Mean   : 11.1   Mean   :37.4  
##  3rd Qu.:97.6   3rd Qu.: 5.10   3rd Qu.: 17.4   3rd Qu.:39.8  
##  Max.   :99.9   Max.   :97.50   Max.   :191.0   Max.   :54.3  
##                                                               
##    PEROVER65   
##  Min.   : 0.0  
##  1st Qu.:12.1  
##  Median :14.4  
##  Mean   :14.8  
##  3rd Qu.:17.1  
##  Max.   :34.7  
## 

Plot as a table

par(mar = c(1, 1, 1, 1))
plot(USA@data)
## Error: figure margins too large

Making Maps in R:

All colors

display.brewer.all()

plot of chunk unnamed-chunk-6

Make a 7-color spectral palette

pal7 <- brewer.pal(7, "Spectral")
display.brewer.pal(7, "Spectral")  #view the colors

plot of chunk unnamed-chunk-7

Associate the colors with the map units

USA$BushPct <- USA$Bush/USA$Total  #percent of votes for Bush
cats7 <- classIntervals(USA$BushPct, n = 7, style = "quantile")  #create categories
cats7
## style: quantile
## [0.09308,0.4746)  [0.4746,0.5415)  [0.5415,0.5907)  [0.5907,0.6336) 
##              444              444              444              444 
##  [0.6336,0.6801)  [0.6801,0.7421)  [0.7421,0.9283] 
##              444              444              444
SevenColors <- findColours(cats7, pal7)  #connect categories to colors

Plot the map with its colors

par(mar = c(1, 1, 1, 1))
plot(USA, col = SevenColors)

plot of chunk unnamed-chunk-9

Calculate deviations from mean to coincide with spectral color palette

USA$BushPctZ <- (USA$BushPct - mean(USA$BushPct, na.rm = TRUE))/sd(USA$BushPct, 
    na.rm = TRUE)
pal7 <- brewer.pal(7, "Spectral")
cats7 <- classIntervals(USA$BushPctZ, n = 7, style = "quantile")
SevenColors <- findColours(cats7, pal7)  #connect new categories to colors

Plot map with new colors

par(mar = c(1, 1, 1, 1))
plot(USA, col = SevenColors)

plot of chunk unnamed-chunk-11

Multiple Regression:

Modeling percent of votes for Bush

lm1 <- lm(BushPct ~ pcturban + pctfemhh + pctpoor + HISP_LAT + MEDAGE2000, USA)
summary(lm1)
## 
## Call:
## lm(formula = BushPct ~ pcturban + pctfemhh + pctpoor + HISP_LAT + 
##     MEDAGE2000, data = USA)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5687 -0.0733  0.0102  0.0808  0.2708 
## 
## Coefficients:
##               Estimate Std. Error t value            Pr(>|t|)    
## (Intercept)  0.7818017  0.0242582   32.23 <0.0000000000000002 ***
## pcturban    -0.0000132  0.0000888   -0.15               0.882    
## pctfemhh    -0.0134822  0.0005353  -25.19 <0.0000000000000002 ***
## pctpoor      0.0029674  0.0003551    8.36 <0.0000000000000002 ***
## HISP_LAT    -0.0002907  0.0001866   -1.56               0.119    
## MEDAGE2000  -0.0012489  0.0005649   -2.21               0.027 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.11 on 3102 degrees of freedom
## Multiple R-squared: 0.242,   Adjusted R-squared: 0.241 
## F-statistic:  198 on 5 and 3102 DF,  p-value: <0.0000000000000002
lm1.resid <- resid(lm1)  #residuals
plot(lm1.resid ~ USA$BushPct)  #residuals look very bad

plot of chunk unnamed-chunk-12

Trying a different model

lmBush <- lm(BushPct ~ pctfemhh + homevalu, data = USA)
summary(lmBush)
## 
## Call:
## lm(formula = BushPct ~ pctfemhh + homevalu, data = USA)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6082 -0.0731  0.0089  0.0779  0.3558 
## 
## Coefficients:
##                  Estimate    Std. Error t value            Pr(>|t|)    
## (Intercept)  0.7771534694  0.0055962539   138.9 <0.0000000000000002 ***
## pctfemhh    -0.0100754006  0.0003608127   -27.9 <0.0000000000000002 ***
## homevalu    -0.0000007603  0.0000000601   -12.7 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.109 on 3105 degrees of freedom
## Multiple R-squared: 0.256,   Adjusted R-squared: 0.255 
## F-statistic:  533 on 2 and 3105 DF,  p-value: <0.0000000000000002
lmBush.resid <- resid(lmBush)
lmBush.pred <- predict(lmBush)
plot(lmBush.resid ~ USA$BushPct)  #residuals still look bad

plot of chunk unnamed-chunk-13

plot(lmBush.resid ~ USA$pctfemhh)

plot of chunk unnamed-chunk-13

plot(lmBush.resid ~ USA$homevalu)

plot of chunk unnamed-chunk-13

plot(lmBush.resid ~ lmBush.pred)

plot of chunk unnamed-chunk-13

Create color palette to be able to map the residuals

USA$resid <- resid(lmBush)
pal3 <- brewer.pal(3, "Spectral")
cats3 <- classIntervals(USA$resid, n = 3, style = "quantile")
ThreeColors <- findColours(cats3, pal3)

Map the residuals

par(mar = c(1, 1, 1, 1))
plot(USA, col = ThreeColors)  #map shows clear pattern in the residuals

plot of chunk unnamed-chunk-15

Creating a Better Fitting Model:

Potential variables

names(USA)
##  [1] "NAME"       "STATE_NAME" "STATE_FIPS" "CNTY_FIPS"  "FIPS"      
##  [6] "AREA"       "FIPS_num"   "Bush"       "Kerry"      "County_F"  
## [11] "Nader"      "Total"      "Bush_pct"   "Kerry_pct"  "Nader_pct" 
## [16] "MDratio"    "hosp"       "pcthisp"    "pcturban"   "urbrural"  
## [21] "pctfemhh"   "pcincome"   "pctpoor"    "pctlt9ed"   "pcthsed"   
## [26] "pctcoled"   "unemploy"   "pctwhtcl"   "homevalu"   "rent"      
## [31] "popdens"    "crowded"    "ginirev"    "SmokecurM"  "SmokevrM"  
## [36] "SmokecurF"  "SmokevrF"   "Obese"      "Noins"      "XYLENES__M"
## [41] "TOLUENE"    "TETRACHLOR" "STYRENE"    "NICKEL_COM" "METHYLENE_"
## [46] "MERCURY_CO" "LEAD_COMPO" "BENZENE__I" "ARSENIC_CO" "POP2000"   
## [51] "POP00SQMIL" "MALE2000"   "FEMALE2000" "MAL2FEM"    "UNDER18"   
## [56] "AIAN"       "ASIA"       "BLACK"      "NHPI"       "WHITE"     
## [61] "AIAN_MORE"  "ASIA_MORE"  "BLK_MORE"   "NHPI_MORE"  "WHT_MORE"  
## [66] "HISP_LAT"   "CH19902000" "MEDAGE2000" "PEROVER65"  "BushPct"   
## [71] "BushPctZ"   "resid"

Try a fixed effect model

lmFixed <- lm(BushPct ~ factor(STATE_NAME) + pctfemhh + homevalu, data = USA)  #fixed effects, including a dummy variable for all but one state
summary(lmFixed)
## 
## Call:
## lm(formula = BushPct ~ factor(STATE_NAME) + pctfemhh + homevalu, 
##     data = USA)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5928 -0.0467  0.0071  0.0556  0.2867 
## 
## Coefficients:
##                                             Estimate    Std. Error t value
## (Intercept)                             0.8688855429  0.0127841233   67.97
## factor(STATE_NAME)Arizona              -0.0990669145  0.0248571545   -3.99
## factor(STATE_NAME)Arkansas             -0.1306708800  0.0146640734   -8.91
## factor(STATE_NAME)California           -0.1189931905  0.0169820232   -7.01
## factor(STATE_NAME)Colorado             -0.1171683114  0.0155897238   -7.52
## factor(STATE_NAME)Connecticut          -0.1916002862  0.0335642249   -5.71
## factor(STATE_NAME)Delaware             -0.1143393743  0.0513223215   -2.23
## factor(STATE_NAME)District of Columbia -0.2190191307  0.0879025445   -2.49
## factor(STATE_NAME)Florida              -0.0617952171  0.0151199634   -4.09
## factor(STATE_NAME)Georgia               0.0264870225  0.0126495893    2.09
## factor(STATE_NAME)Idaho                 0.0034561525  0.0172061938    0.20
## factor(STATE_NAME)Illinois             -0.1191754347  0.0138609755   -8.60
## factor(STATE_NAME)Indiana              -0.0496437196  0.0141713605   -3.50
## factor(STATE_NAME)Iowa                 -0.1958287619  0.0141304825  -13.86
## factor(STATE_NAME)Kansas               -0.0185120240  0.0140101184   -1.32
## factor(STATE_NAME)Kentucky             -0.0526115662  0.0133508476   -3.94
## factor(STATE_NAME)Louisiana             0.0039311322  0.0151805091    0.26
## factor(STATE_NAME)Maine                -0.2184542974  0.0243497633   -8.97
## factor(STATE_NAME)Maryland             -0.1071135487  0.0210597078   -5.09
## factor(STATE_NAME)Massachusetts        -0.2538809930  0.0266921807   -9.51
## factor(STATE_NAME)Michigan             -0.1405570347  0.0143978007   -9.76
## factor(STATE_NAME)Minnesota            -0.2040343886  0.0145122559  -14.06
## factor(STATE_NAME)Mississippi           0.0008641367  0.0143500089    0.06
## factor(STATE_NAME)Missouri             -0.0927936740  0.0136331709   -6.81
## factor(STATE_NAME)Montana              -0.0603453723  0.0160115951   -3.77
## factor(STATE_NAME)Nebraska             -0.0006254248  0.0144456256   -0.04
## factor(STATE_NAME)Nevada               -0.0494043289  0.0238640908   -2.07
## factor(STATE_NAME)New Hampshire        -0.2042405192  0.0298945562   -6.83
## factor(STATE_NAME)New Jersey           -0.1358943106  0.0228416946   -5.95
## factor(STATE_NAME)New Mexico           -0.1067993898  0.0185092156   -5.77
## factor(STATE_NAME)New York             -0.1380033744  0.0157607627   -8.76
## factor(STATE_NAME)North Carolina       -0.0518595692  0.0137451576   -3.77
## factor(STATE_NAME)North Dakota         -0.1080430835  0.0163708422   -6.60
## factor(STATE_NAME)Ohio                 -0.0977370126  0.0142369969   -6.87
## factor(STATE_NAME)Oklahoma             -0.0206343505  0.0146672980   -1.41
## factor(STATE_NAME)Oregon               -0.1144779034  0.0181602256   -6.30
## factor(STATE_NAME)Pennsylvania         -0.0982098153  0.0151958327   -6.46
## factor(STATE_NAME)Rhode Island         -0.2360312665  0.0408165212   -5.78
## factor(STATE_NAME)South Carolina       -0.0397792387  0.0166538425   -2.39
## factor(STATE_NAME)South Dakota         -0.1159343029  0.0153431273   -7.56
## factor(STATE_NAME)Tennessee            -0.0874536138  0.0139399958   -6.27
## factor(STATE_NAME)Texas                 0.0053005053  0.0121337166    0.44
## factor(STATE_NAME)Utah                  0.0465632112  0.0196286324    2.37
## factor(STATE_NAME)Vermont              -0.2644860105  0.0257734286  -10.26
## factor(STATE_NAME)Virginia             -0.1613607561  0.0133871672  -12.05
## factor(STATE_NAME)Washington           -0.1378044769  0.0177341247   -7.77
## factor(STATE_NAME)West Virginia        -0.1000959981  0.0158880566   -6.30
## factor(STATE_NAME)Wisconsin            -0.2025707788  0.0149989116  -13.51
## factor(STATE_NAME)Wyoming               0.0136112878  0.0212206132    0.64
## pctfemhh                               -0.0132700307  0.0003675416  -36.10
## homevalu                               -0.0000002353  0.0000000658   -3.58
##                                                    Pr(>|t|)    
## (Intercept)                            < 0.0000000000000002 ***
## factor(STATE_NAME)Arizona                 0.000068926600940 ***
## factor(STATE_NAME)Arkansas             < 0.0000000000000002 ***
## factor(STATE_NAME)California              0.000000000002982 ***
## factor(STATE_NAME)Colorado                0.000000000000074 ***
## factor(STATE_NAME)Connecticut             0.000000012491643 ***
## factor(STATE_NAME)Delaware                          0.02596 *  
## factor(STATE_NAME)District of Columbia              0.01277 *  
## factor(STATE_NAME)Florida                 0.000044824002734 ***
## factor(STATE_NAME)Georgia                           0.03635 *  
## factor(STATE_NAME)Idaho                             0.84082    
## factor(STATE_NAME)Illinois             < 0.0000000000000002 ***
## factor(STATE_NAME)Indiana                           0.00047 ***
## factor(STATE_NAME)Iowa                 < 0.0000000000000002 ***
## factor(STATE_NAME)Kansas                            0.18649    
## factor(STATE_NAME)Kentucky                0.000083065651533 ***
## factor(STATE_NAME)Louisiana                         0.79568    
## factor(STATE_NAME)Maine                < 0.0000000000000002 ***
## factor(STATE_NAME)Maryland                0.000000387377686 ***
## factor(STATE_NAME)Massachusetts        < 0.0000000000000002 ***
## factor(STATE_NAME)Michigan             < 0.0000000000000002 ***
## factor(STATE_NAME)Minnesota            < 0.0000000000000002 ***
## factor(STATE_NAME)Mississippi                       0.95199    
## factor(STATE_NAME)Missouri                0.000000000011990 ***
## factor(STATE_NAME)Montana                           0.00017 ***
## factor(STATE_NAME)Nebraska                          0.96547    
## factor(STATE_NAME)Nevada                            0.03851 *  
## factor(STATE_NAME)New Hampshire           0.000000000010062 ***
## factor(STATE_NAME)New Jersey              0.000000002996345 ***
## factor(STATE_NAME)New Mexico              0.000000008716794 ***
## factor(STATE_NAME)New York             < 0.0000000000000002 ***
## factor(STATE_NAME)North Carolina                    0.00016 ***
## factor(STATE_NAME)North Dakota            0.000000000048372 ***
## factor(STATE_NAME)Ohio                    0.000000000008019 ***
## factor(STATE_NAME)Oklahoma                          0.15958    
## factor(STATE_NAME)Oregon                  0.000000000332282 ***
## factor(STATE_NAME)Pennsylvania            0.000000000119078 ***
## factor(STATE_NAME)Rhode Island            0.000000008091255 ***
## factor(STATE_NAME)South Carolina                    0.01697 *  
## factor(STATE_NAME)South Dakota            0.000000000000055 ***
## factor(STATE_NAME)Tennessee               0.000000000402612 ***
## factor(STATE_NAME)Texas                             0.66226    
## factor(STATE_NAME)Utah                              0.01774 *  
## factor(STATE_NAME)Vermont              < 0.0000000000000002 ***
## factor(STATE_NAME)Virginia             < 0.0000000000000002 ***
## factor(STATE_NAME)Washington              0.000000000000011 ***
## factor(STATE_NAME)West Virginia           0.000000000340192 ***
## factor(STATE_NAME)Wisconsin            < 0.0000000000000002 ***
## factor(STATE_NAME)Wyoming                           0.52130    
## pctfemhh                               < 0.0000000000000002 ***
## homevalu                                            0.00035 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.0868 on 3057 degrees of freedom
## Multiple R-squared: 0.535,   Adjusted R-squared: 0.527 
## F-statistic: 70.4 on 50 and 3057 DF,  p-value: <0.0000000000000002

Model diagnostics

lmFixed.resid <- resid(lmFixed)
lmFixed.pred <- predict(lmFixed)
plot(lmFixed.resid ~ USA$BushPct)  #residuals still don't look very good, but are a little better than before

plot of chunk unnamed-chunk-18

plot(lmFixed.resid ~ USA$pctfemhh)

plot of chunk unnamed-chunk-18

plot(lmFixed.resid ~ USA$homevalu)

plot of chunk unnamed-chunk-18

plot(lmFixed.resid ~ lmFixed.pred)

plot of chunk unnamed-chunk-18

shapiro.test(lmFixed.resid)  #but not normally distributed
## 
##  Shapiro-Wilk normality test
## 
## data:  lmFixed.resid 
## W = 0.9619, p-value < 0.00000000000000022

Create color palette to be able to map the residuals from the fixed effects model

USA$fixed.resid <- resid(lmFixed)
pal3 <- brewer.pal(3, "Spectral")
cats3 <- classIntervals(USA$fixed.resid, n = 3, style = "quantile")
ThreeColors <- findColours(cats3, pal3)

Map the residuals from the fixed effects model

par(mar = c(1, 1, 1, 1))
plot(USA, col = ThreeColors)  #still see some pattern to residuals, but better than before

plot of chunk unnamed-chunk-20

Comparing the two models

anova(lmBush, lmFixed)  #fixed effects model performs better
## Analysis of Variance Table
## 
## Model 1: BushPct ~ pctfemhh + homevalu
## Model 2: BushPct ~ factor(STATE_NAME) + pctfemhh + homevalu
##   Res.Df  RSS Df Sum of Sq    F              Pr(>F)    
## 1   3105 36.9                                          
## 2   3057 23.0 48      13.8 38.3 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1