Exercise: Multiple Regression and Mapping

Load and Prep Data

library(sp)
## Warning: package 'sp' was built under R version 2.15.3
library(maptools)
## Loading required package: foreign
## Warning: package 'foreign' was built under R version 2.15.3
## Loading required package: grid
## Loading required package: lattice
## Checking rgeos availability: FALSE Note: when rgeos is not available,
## polygon geometry computations in maptools depend on gpclib, which has a
## restricted licence. It is disabled by default; to enable gpclib, type
## gpclibPermit()
library(classInt)
## Loading required package: class
## Loading required package: e1071
library(RColorBrewer)

USA <- readShapePoly("/Users/xiwang/Dropbox/GEOG 5023 - offline/Data/2004_Election_Counties.shp")
plot(USA)

plot of chunk unnamed-chunk-1

# Remove counties with no votes
USA <- USA[USA$Total > 0, ]
# List the pieces of the file
slotNames(USA)
## [1] "data"        "polygons"    "plotOrder"   "bbox"        "proj4string"
# Summarize the file
summary(USA)
## Object of class SpatialPolygonsDataFrame
## Coordinates:
##       min    max
## x -124.73 -66.97
## y   24.96  49.37
## Is projected: NA 
## proj4string : [NA]
## Data attributes:
##          NAME         STATE_NAME     STATE_FIPS     CNTY_FIPS   
##  Washington:  32   Texas   : 254   48     : 254   001    :  48  
##  Jefferson :  26   Georgia : 159   13     : 159   003    :  48  
##  Franklin  :  25   Virginia: 134   51     : 134   005    :  48  
##  Jackson   :  24   Kentucky: 120   21     : 120   009    :  47  
##  Lincoln   :  24   Missouri: 115   29     : 115   007    :  46  
##  Madison   :  20   Kansas  : 105   20     : 105   011    :  46  
##  (Other)   :2957   (Other) :2221   (Other):2221   (Other):2825  
##       FIPS           AREA          FIPS_num          Bush       
##  01001  :   1   Min.   :    2   Min.   : 1001   Min.   :    65  
##  01003  :   1   1st Qu.:  435   1st Qu.:19046   1st Qu.:  2941  
##  01005  :   1   Median :  622   Median :29214   Median :  6364  
##  01007  :   1   Mean   :  966   Mean   :30686   Mean   : 19073  
##  01009  :   1   3rd Qu.:  931   3rd Qu.:46010   3rd Qu.: 15924  
##  01011  :   1   Max.   :20175   Max.   :56045   Max.   :954764  
##  (Other):3102                                                   
##      Kerry            County_F         Nader           Total        
##  Min.   :     12   Min.   : 1001   Min.   :    0   Min.   :     77  
##  1st Qu.:   1782   1st Qu.:19046   1st Qu.:    0   1st Qu.:   4831  
##  Median :   4041   Median :29214   Median :   14   Median :  10416  
##  Mean   :  17957   Mean   :30686   Mean   :  145   Mean   :  37176  
##  3rd Qu.:  10434   3rd Qu.:46010   3rd Qu.:   67   3rd Qu.:  26599  
##  Max.   :1670341   Max.   :56045   Max.   :13251   Max.   :2625105  
##                                                                     
##     Bush_pct       Kerry_pct       Nader_pct        MDratio      
##  Min.   : 9.31   Min.   : 7.17   Min.   :0.000   Min.   :   0.0  
##  1st Qu.:52.73   1st Qu.:30.23   1st Qu.:0.000   1st Qu.:  37.3  
##  Median :61.17   Median :38.49   Median :0.303   Median :  65.6  
##  Mean   :60.66   Mean   :38.94   Mean   :0.401   Mean   :  93.1  
##  3rd Qu.:69.37   3rd Qu.:46.79   3rd Qu.:0.633   3rd Qu.: 117.6  
##  Max.   :92.83   Max.   :90.05   Max.   :4.467   Max.   :2189.5  
##                                                                  
##       hosp          pcthisp         pcturban        urbrural   
##  Min.   : 0.00   Min.   :  0.0   Min.   :  0.0   Min.   :0.00  
##  1st Qu.: 1.32   1st Qu.:  4.0   1st Qu.:  0.0   1st Qu.:3.00  
##  Median : 3.29   Median :  8.0   Median : 33.5   Median :6.00  
##  Mean   : 5.68   Mean   : 44.5   Mean   : 35.3   Mean   :5.54  
##  3rd Qu.: 6.75   3rd Qu.: 24.0   3rd Qu.: 56.5   3rd Qu.:7.00  
##  Max.   :84.07   Max.   :972.0   Max.   :100.0   Max.   :9.00  
##                                                                
##     pctfemhh       pcincome        pctpoor        pctlt9ed   
##  Min.   : 0.0   Min.   :    0   Min.   : 0.0   Min.   : 0.0  
##  1st Qu.: 9.6   1st Qu.:15474   1st Qu.:11.1   1st Qu.: 8.9  
##  Median :12.2   Median :17450   Median :15.1   Median :13.2  
##  Mean   :13.0   Mean   :17805   Mean   :16.5   Mean   :14.3  
##  3rd Qu.:15.4   3rd Qu.:19818   3rd Qu.:20.4   3rd Qu.:18.7  
##  Max.   :41.1   Max.   :58096   Max.   :63.1   Max.   :56.3  
##                                                              
##     pcthsed        pctcoled       unemploy        pctwhtcl   
##  Min.   : 0.0   Min.   : 0.0   Min.   : 0.00   Min.   : 0.0  
##  1st Qu.:61.1   1st Qu.: 9.0   1st Qu.: 3.90   1st Qu.:38.5  
##  Median :71.2   Median :11.7   Median : 5.30   Median :43.5  
##  Mean   :68.4   Mean   :13.1   Mean   : 5.88   Mean   :44.6  
##  3rd Qu.:77.1   3rd Qu.:15.3   3rd Qu.: 7.20   3rd Qu.:50.7  
##  Max.   :95.5   Max.   :53.4   Max.   :37.90   Max.   :81.4  
##                                                              
##     homevalu           rent        popdens         crowded     
##  Min.   :     0   Min.   :  0   Min.   :    0   Min.   : 0.00  
##  1st Qu.: 35900   1st Qu.:255   1st Qu.:   15   1st Qu.: 1.80  
##  Median : 44400   Median :297   Median :   39   Median : 2.60  
##  Mean   : 52066   Mean   :314   Mean   :  194   Mean   : 3.61  
##  3rd Qu.: 58600   3rd Qu.:352   3rd Qu.:   93   3rd Qu.: 4.50  
##  Max.   :500001   Max.   :926   Max.   :53801   Max.   :44.40  
##                                                                
##     ginirev        SmokecurM        SmokevrM       SmokecurF    
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.390   1st Qu.:0.220   1st Qu.:0.490   1st Qu.:0.190  
##  Median :0.420   Median :0.240   Median :0.520   Median :0.210  
##  Mean   :0.414   Mean   :0.242   Mean   :0.505   Mean   :0.209  
##  3rd Qu.:0.440   3rd Qu.:0.270   3rd Qu.:0.540   3rd Qu.:0.240  
##  Max.   :0.580   Max.   :0.580   Max.   :0.780   Max.   :0.420  
##                                                                 
##     SmokevrF         Obese           Noins         XYLENES__M   
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :    0  
##  1st Qu.:0.390   1st Qu.:0.320   1st Qu.:0.100   1st Qu.:   27  
##  Median :0.420   Median :0.340   Median :0.120   Median :   58  
##  Mean   :0.412   Mean   :0.335   Mean   :0.129   Mean   :  222  
##  3rd Qu.:0.460   3rd Qu.:0.360   3rd Qu.:0.150   3rd Qu.:  171  
##  Max.   :0.630   Max.   :0.630   Max.   :0.410   Max.   :16661  
##                                                                 
##     TOLUENE        TETRACHLOR        STYRENE         NICKEL_COM   
##  Min.   :    0   Min.   :   0.0   Min.   :   0.0   Min.   : 0.00  
##  1st Qu.:   44   1st Qu.:   0.7   1st Qu.:   0.8   1st Qu.: 0.00  
##  Median :   91   Median :   1.9   Median :   1.9   Median : 0.01  
##  Mean   :  336   Mean   :  13.7   Mean   :  15.4   Mean   : 0.37  
##  3rd Qu.:  256   3rd Qu.:   6.6   3rd Qu.:   8.1   3rd Qu.: 0.11  
##  Max.   :28305   Max.   :1966.6   Max.   :1413.0   Max.   :69.01  
##                                                                   
##    METHYLENE_       MERCURY_CO      LEAD_COMPO       BENZENE__I  
##  Min.   :   0.0   Min.   :0.000   Min.   :  0.00   Min.   :   0  
##  1st Qu.:   1.6   1st Qu.:0.002   1st Qu.:  0.01   1st Qu.:  23  
##  Median :   3.9   Median :0.004   Median :  0.02   Median :  42  
##  Mean   :  26.4   Mean   :0.057   Mean   :  0.82   Mean   : 106  
##  3rd Qu.:  12.5   3rd Qu.:0.020   3rd Qu.:  0.23   3rd Qu.:  97  
##  Max.   :2764.2   Max.   :3.220   Max.   :290.63   Max.   :4612  
##                                                                  
##    ARSENIC_CO       POP2000          POP00SQMIL       MALE2000      
##  Min.   : 0.00   Min.   :      0   Min.   :    0   Min.   :      0  
##  1st Qu.: 0.00   1st Qu.:  11368   1st Qu.:   18   1st Qu.:   5600  
##  Median : 0.00   Median :  24770   Median :   43   Median :  12280  
##  Mean   : 0.11   Mean   :  89230   Mean   :  244   Mean   :  43768  
##  3rd Qu.: 0.02   3rd Qu.:  62028   3rd Qu.:  105   3rd Qu.:  30396  
##  Max.   :32.47   Max.   :9519338   Max.   :66934   Max.   :4704105  
##                                                                     
##    FEMALE2000         MAL2FEM         UNDER18          AIAN      
##  Min.   :      0   Min.   :  0.0   Min.   : 0.0   Min.   : 0.00  
##  1st Qu.:   5608   1st Qu.: 94.0   1st Qu.:23.7   1st Qu.: 0.20  
##  Median :  12548   Median : 97.0   Median :25.3   Median : 0.30  
##  Mean   :  45462   Mean   : 98.4   Mean   :25.5   Mean   : 1.61  
##  3rd Qu.:  31554   3rd Qu.:100.0   3rd Qu.:27.1   3rd Qu.: 0.80  
##  Max.   :4815233   Max.   :205.0   Max.   :45.3   Max.   :94.20  
##                                                                  
##       ASIA            BLACK            NHPI            WHITE     
##  Min.   : 0.000   Min.   : 0.00   Min.   :0.0000   Min.   : 0.0  
##  1st Qu.: 0.200   1st Qu.: 0.30   1st Qu.:0.0000   1st Qu.:77.2  
##  Median : 0.300   Median : 1.70   Median :0.0000   Median :91.3  
##  Mean   : 0.771   Mean   : 8.84   Mean   :0.0361   Mean   :84.8  
##  3rd Qu.: 0.700   3rd Qu.:10.10   3rd Qu.:0.1000   3rd Qu.:96.7  
##  Max.   :30.800   Max.   :86.50   Max.   :1.5000   Max.   :99.7  
##                                                                  
##    AIAN_MORE       ASIA_MORE        BLK_MORE       NHPI_MORE     
##  Min.   : 0.00   Min.   : 0.00   Min.   : 0.00   Min.   :0.0000  
##  1st Qu.: 0.50   1st Qu.: 0.30   1st Qu.: 0.40   1st Qu.:0.0000  
##  Median : 0.80   Median : 0.50   Median : 2.10   Median :0.1000  
##  Mean   : 2.22   Mean   : 0.98   Mean   : 9.13   Mean   :0.0996  
##  3rd Qu.: 1.40   3rd Qu.: 0.90   3rd Qu.:10.70   3rd Qu.:0.1000  
##  Max.   :95.10   Max.   :32.60   Max.   :86.70   Max.   :2.6000  
##                                                                  
##     WHT_MORE       HISP_LAT       CH19902000      MEDAGE2000  
##  Min.   : 0.0   Min.   : 0.00   Min.   :-37.4   Min.   : 0.0  
##  1st Qu.:79.1   1st Qu.: 0.90   1st Qu.:  1.0   1st Qu.:35.2  
##  Median :92.7   Median : 1.80   Median :  8.4   Median :37.4  
##  Mean   :86.0   Mean   : 6.19   Mean   : 11.1   Mean   :37.4  
##  3rd Qu.:97.6   3rd Qu.: 5.10   3rd Qu.: 17.4   3rd Qu.:39.8  
##  Max.   :99.9   Max.   :97.50   Max.   :191.0   Max.   :54.3  
##                                                               
##    PEROVER65   
##  Min.   : 0.0  
##  1st Qu.:12.1  
##  Median :14.4  
##  Mean   :14.8  
##  3rd Qu.:17.1  
##  Max.   :34.7  
## 
# Summarize the data slot with '@'
summary(USA@data)
##          NAME         STATE_NAME     STATE_FIPS     CNTY_FIPS   
##  Washington:  32   Texas   : 254   48     : 254   001    :  48  
##  Jefferson :  26   Georgia : 159   13     : 159   003    :  48  
##  Franklin  :  25   Virginia: 134   51     : 134   005    :  48  
##  Jackson   :  24   Kentucky: 120   21     : 120   009    :  47  
##  Lincoln   :  24   Missouri: 115   29     : 115   007    :  46  
##  Madison   :  20   Kansas  : 105   20     : 105   011    :  46  
##  (Other)   :2957   (Other) :2221   (Other):2221   (Other):2825  
##       FIPS           AREA          FIPS_num          Bush       
##  01001  :   1   Min.   :    2   Min.   : 1001   Min.   :    65  
##  01003  :   1   1st Qu.:  435   1st Qu.:19046   1st Qu.:  2941  
##  01005  :   1   Median :  622   Median :29214   Median :  6364  
##  01007  :   1   Mean   :  966   Mean   :30686   Mean   : 19073  
##  01009  :   1   3rd Qu.:  931   3rd Qu.:46010   3rd Qu.: 15924  
##  01011  :   1   Max.   :20175   Max.   :56045   Max.   :954764  
##  (Other):3102                                                   
##      Kerry            County_F         Nader           Total        
##  Min.   :     12   Min.   : 1001   Min.   :    0   Min.   :     77  
##  1st Qu.:   1782   1st Qu.:19046   1st Qu.:    0   1st Qu.:   4831  
##  Median :   4041   Median :29214   Median :   14   Median :  10416  
##  Mean   :  17957   Mean   :30686   Mean   :  145   Mean   :  37176  
##  3rd Qu.:  10434   3rd Qu.:46010   3rd Qu.:   67   3rd Qu.:  26599  
##  Max.   :1670341   Max.   :56045   Max.   :13251   Max.   :2625105  
##                                                                     
##     Bush_pct       Kerry_pct       Nader_pct        MDratio      
##  Min.   : 9.31   Min.   : 7.17   Min.   :0.000   Min.   :   0.0  
##  1st Qu.:52.73   1st Qu.:30.23   1st Qu.:0.000   1st Qu.:  37.3  
##  Median :61.17   Median :38.49   Median :0.303   Median :  65.6  
##  Mean   :60.66   Mean   :38.94   Mean   :0.401   Mean   :  93.1  
##  3rd Qu.:69.37   3rd Qu.:46.79   3rd Qu.:0.633   3rd Qu.: 117.6  
##  Max.   :92.83   Max.   :90.05   Max.   :4.467   Max.   :2189.5  
##                                                                  
##       hosp          pcthisp         pcturban        urbrural   
##  Min.   : 0.00   Min.   :  0.0   Min.   :  0.0   Min.   :0.00  
##  1st Qu.: 1.32   1st Qu.:  4.0   1st Qu.:  0.0   1st Qu.:3.00  
##  Median : 3.29   Median :  8.0   Median : 33.5   Median :6.00  
##  Mean   : 5.68   Mean   : 44.5   Mean   : 35.3   Mean   :5.54  
##  3rd Qu.: 6.75   3rd Qu.: 24.0   3rd Qu.: 56.5   3rd Qu.:7.00  
##  Max.   :84.07   Max.   :972.0   Max.   :100.0   Max.   :9.00  
##                                                                
##     pctfemhh       pcincome        pctpoor        pctlt9ed   
##  Min.   : 0.0   Min.   :    0   Min.   : 0.0   Min.   : 0.0  
##  1st Qu.: 9.6   1st Qu.:15474   1st Qu.:11.1   1st Qu.: 8.9  
##  Median :12.2   Median :17450   Median :15.1   Median :13.2  
##  Mean   :13.0   Mean   :17805   Mean   :16.5   Mean   :14.3  
##  3rd Qu.:15.4   3rd Qu.:19818   3rd Qu.:20.4   3rd Qu.:18.7  
##  Max.   :41.1   Max.   :58096   Max.   :63.1   Max.   :56.3  
##                                                              
##     pcthsed        pctcoled       unemploy        pctwhtcl   
##  Min.   : 0.0   Min.   : 0.0   Min.   : 0.00   Min.   : 0.0  
##  1st Qu.:61.1   1st Qu.: 9.0   1st Qu.: 3.90   1st Qu.:38.5  
##  Median :71.2   Median :11.7   Median : 5.30   Median :43.5  
##  Mean   :68.4   Mean   :13.1   Mean   : 5.88   Mean   :44.6  
##  3rd Qu.:77.1   3rd Qu.:15.3   3rd Qu.: 7.20   3rd Qu.:50.7  
##  Max.   :95.5   Max.   :53.4   Max.   :37.90   Max.   :81.4  
##                                                              
##     homevalu           rent        popdens         crowded     
##  Min.   :     0   Min.   :  0   Min.   :    0   Min.   : 0.00  
##  1st Qu.: 35900   1st Qu.:255   1st Qu.:   15   1st Qu.: 1.80  
##  Median : 44400   Median :297   Median :   39   Median : 2.60  
##  Mean   : 52066   Mean   :314   Mean   :  194   Mean   : 3.61  
##  3rd Qu.: 58600   3rd Qu.:352   3rd Qu.:   93   3rd Qu.: 4.50  
##  Max.   :500001   Max.   :926   Max.   :53801   Max.   :44.40  
##                                                                
##     ginirev        SmokecurM        SmokevrM       SmokecurF    
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.390   1st Qu.:0.220   1st Qu.:0.490   1st Qu.:0.190  
##  Median :0.420   Median :0.240   Median :0.520   Median :0.210  
##  Mean   :0.414   Mean   :0.242   Mean   :0.505   Mean   :0.209  
##  3rd Qu.:0.440   3rd Qu.:0.270   3rd Qu.:0.540   3rd Qu.:0.240  
##  Max.   :0.580   Max.   :0.580   Max.   :0.780   Max.   :0.420  
##                                                                 
##     SmokevrF         Obese           Noins         XYLENES__M   
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :    0  
##  1st Qu.:0.390   1st Qu.:0.320   1st Qu.:0.100   1st Qu.:   27  
##  Median :0.420   Median :0.340   Median :0.120   Median :   58  
##  Mean   :0.412   Mean   :0.335   Mean   :0.129   Mean   :  222  
##  3rd Qu.:0.460   3rd Qu.:0.360   3rd Qu.:0.150   3rd Qu.:  171  
##  Max.   :0.630   Max.   :0.630   Max.   :0.410   Max.   :16661  
##                                                                 
##     TOLUENE        TETRACHLOR        STYRENE         NICKEL_COM   
##  Min.   :    0   Min.   :   0.0   Min.   :   0.0   Min.   : 0.00  
##  1st Qu.:   44   1st Qu.:   0.7   1st Qu.:   0.8   1st Qu.: 0.00  
##  Median :   91   Median :   1.9   Median :   1.9   Median : 0.01  
##  Mean   :  336   Mean   :  13.7   Mean   :  15.4   Mean   : 0.37  
##  3rd Qu.:  256   3rd Qu.:   6.6   3rd Qu.:   8.1   3rd Qu.: 0.11  
##  Max.   :28305   Max.   :1966.6   Max.   :1413.0   Max.   :69.01  
##                                                                   
##    METHYLENE_       MERCURY_CO      LEAD_COMPO       BENZENE__I  
##  Min.   :   0.0   Min.   :0.000   Min.   :  0.00   Min.   :   0  
##  1st Qu.:   1.6   1st Qu.:0.002   1st Qu.:  0.01   1st Qu.:  23  
##  Median :   3.9   Median :0.004   Median :  0.02   Median :  42  
##  Mean   :  26.4   Mean   :0.057   Mean   :  0.82   Mean   : 106  
##  3rd Qu.:  12.5   3rd Qu.:0.020   3rd Qu.:  0.23   3rd Qu.:  97  
##  Max.   :2764.2   Max.   :3.220   Max.   :290.63   Max.   :4612  
##                                                                  
##    ARSENIC_CO       POP2000          POP00SQMIL       MALE2000      
##  Min.   : 0.00   Min.   :      0   Min.   :    0   Min.   :      0  
##  1st Qu.: 0.00   1st Qu.:  11368   1st Qu.:   18   1st Qu.:   5600  
##  Median : 0.00   Median :  24770   Median :   43   Median :  12280  
##  Mean   : 0.11   Mean   :  89230   Mean   :  244   Mean   :  43768  
##  3rd Qu.: 0.02   3rd Qu.:  62028   3rd Qu.:  105   3rd Qu.:  30396  
##  Max.   :32.47   Max.   :9519338   Max.   :66934   Max.   :4704105  
##                                                                     
##    FEMALE2000         MAL2FEM         UNDER18          AIAN      
##  Min.   :      0   Min.   :  0.0   Min.   : 0.0   Min.   : 0.00  
##  1st Qu.:   5608   1st Qu.: 94.0   1st Qu.:23.7   1st Qu.: 0.20  
##  Median :  12548   Median : 97.0   Median :25.3   Median : 0.30  
##  Mean   :  45462   Mean   : 98.4   Mean   :25.5   Mean   : 1.61  
##  3rd Qu.:  31554   3rd Qu.:100.0   3rd Qu.:27.1   3rd Qu.: 0.80  
##  Max.   :4815233   Max.   :205.0   Max.   :45.3   Max.   :94.20  
##                                                                  
##       ASIA            BLACK            NHPI            WHITE     
##  Min.   : 0.000   Min.   : 0.00   Min.   :0.0000   Min.   : 0.0  
##  1st Qu.: 0.200   1st Qu.: 0.30   1st Qu.:0.0000   1st Qu.:77.2  
##  Median : 0.300   Median : 1.70   Median :0.0000   Median :91.3  
##  Mean   : 0.771   Mean   : 8.84   Mean   :0.0361   Mean   :84.8  
##  3rd Qu.: 0.700   3rd Qu.:10.10   3rd Qu.:0.1000   3rd Qu.:96.7  
##  Max.   :30.800   Max.   :86.50   Max.   :1.5000   Max.   :99.7  
##                                                                  
##    AIAN_MORE       ASIA_MORE        BLK_MORE       NHPI_MORE     
##  Min.   : 0.00   Min.   : 0.00   Min.   : 0.00   Min.   :0.0000  
##  1st Qu.: 0.50   1st Qu.: 0.30   1st Qu.: 0.40   1st Qu.:0.0000  
##  Median : 0.80   Median : 0.50   Median : 2.10   Median :0.1000  
##  Mean   : 2.22   Mean   : 0.98   Mean   : 9.13   Mean   :0.0996  
##  3rd Qu.: 1.40   3rd Qu.: 0.90   3rd Qu.:10.70   3rd Qu.:0.1000  
##  Max.   :95.10   Max.   :32.60   Max.   :86.70   Max.   :2.6000  
##                                                                  
##     WHT_MORE       HISP_LAT       CH19902000      MEDAGE2000  
##  Min.   : 0.0   Min.   : 0.00   Min.   :-37.4   Min.   : 0.0  
##  1st Qu.:79.1   1st Qu.: 0.90   1st Qu.:  1.0   1st Qu.:35.2  
##  Median :92.7   Median : 1.80   Median :  8.4   Median :37.4  
##  Mean   :86.0   Mean   : 6.19   Mean   : 11.1   Mean   :37.4  
##  3rd Qu.:97.6   3rd Qu.: 5.10   3rd Qu.: 17.4   3rd Qu.:39.8  
##  Max.   :99.9   Max.   :97.50   Max.   :191.0   Max.   :54.3  
##                                                               
##    PEROVER65   
##  Min.   : 0.0  
##  1st Qu.:12.1  
##  Median :14.4  
##  Mean   :14.8  
##  3rd Qu.:17.1  
##  Max.   :34.7  
## 

Make Maps in R

display.brewer.all()

plot of chunk unnamed-chunk-2

# Make a 7-color 'spectral' palette
pal7 <- brewer.pal(7, "Spectral")
display.brewer.pal(7, "Spectral")

plot of chunk unnamed-chunk-2

# Create a column that holds the percent of all votes that went to G.W.
# Bush in 2004
USA$BushPct <- USA$Bush/USA$Total
# Create categories
cats7 <- classIntervals(USA$BushPct, n = 7, style = "quantile")
cats7  # Output shows the range for BushPct within each category each group should have about 440 counties
## style: quantile
## [0.09308,0.4746)  [0.4746,0.5415)  [0.5415,0.5907)  [0.5907,0.6336) 
##              444              444              444              444 
##  [0.6336,0.6801)  [0.6801,0.7421)  [0.7421,0.9283] 
##              444              444              444
# Connect the categories to the palette with findColours()
SevenColors <- findColours(cats7, pal7)
# Draw map using specificed data and colors
plot(USA, col = SevenColors)  # Not a great cartographic map

plot of chunk unnamed-chunk-2

# Convert BushPct column into standard units so that the mean is equal to
# zero and we map deviations from the mean
USA$BushPctZ <- (USA$BushPct - mean(USA$BushPct, na.rm = TRUE))/sd(USA$BushPct, 
    na.rm = TRUE)  # Create a new column to hold the standardized percent bush
# Create new categories with the standardized column and map
pal7 <- brewer.pal(7, "Spectral")
cats7 <- classIntervals(USA$BushPctZ, n = 7, style = "quantile")
SevenColors <- findColours(cats7, pal7)
plot(USA, col = SevenColors)
# In this map, the areas that are red have below average BushPct and blue
# areas are above average

Multiple Regression

# Predict the percent of votes for George Bush based upon the percent
# urban, the percent poor, the percent of households that are female
# headed, and the median age
lm1 <- lm(BushPct ~ pcturban + pctfemhh + pctpoor + HISP_LAT + MEDAGE2000, USA)
summary(lm1)  # Overall, the signs of the coefficients align with expectations
## 
## Call:
## lm(formula = BushPct ~ pcturban + pctfemhh + pctpoor + HISP_LAT + 
##     MEDAGE2000, data = USA)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5687 -0.0733  0.0102  0.0808  0.2708 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.82e-01   2.43e-02   32.23   <2e-16 ***
## pcturban    -1.32e-05   8.88e-05   -0.15    0.882    
## pctfemhh    -1.35e-02   5.35e-04  -25.19   <2e-16 ***
## pctpoor      2.97e-03   3.55e-04    8.36   <2e-16 ***
## HISP_LAT    -2.91e-04   1.87e-04   -1.56    0.119    
## MEDAGE2000  -1.25e-03   5.65e-04   -2.21    0.027 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.11 on 3102 degrees of freedom
## Multiple R-squared: 0.242,   Adjusted R-squared: 0.241 
## F-statistic:  198 on 5 and 3102 DF,  p-value: <2e-16
lm1.resid <- resid(lm1)
plot(lm1.resid ~ USA$BushPct)  # Residuals don't look great, try again

plot of chunk unnamed-chunk-3

lm2 <- lm(BushPct ~ pctfemhh + homevalu, data = USA)  # Negative coefficients, and significant
summary(lm2)
## 
## Call:
## lm(formula = BushPct ~ pctfemhh + homevalu, data = USA)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6082 -0.0731  0.0089  0.0779  0.3558 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.77e-01   5.60e-03   138.9   <2e-16 ***
## pctfemhh    -1.01e-02   3.61e-04   -27.9   <2e-16 ***
## homevalu    -7.60e-07   6.01e-08   -12.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.109 on 3105 degrees of freedom
## Multiple R-squared: 0.256,   Adjusted R-squared: 0.255 
## F-statistic:  533 on 2 and 3105 DF,  p-value: <2e-16
lm2.resid <- resid(lm2)
plot(lm2.resid ~ USA$BushPct)  # Residuals still look bad

plot of chunk unnamed-chunk-3

# Map residuals to explore geographic patterns Add a new column to USA to
# hold the residuals
USA$resid <- resid(lm2)
pal3 <- brewer.pal(3, "Spectral")
cats3 <- classIntervals(USA$resid, n = 3, style = "quantile")
ThreeColors <- findColours(cats3, pal3)
plot(USA, col = ThreeColors)  # There is a clear pattern to the residuals, which suggest some missing variable

plot of chunk unnamed-chunk-3


names(USA@data)  # Examine variables
##  [1] "NAME"       "STATE_NAME" "STATE_FIPS" "CNTY_FIPS"  "FIPS"      
##  [6] "AREA"       "FIPS_num"   "Bush"       "Kerry"      "County_F"  
## [11] "Nader"      "Total"      "Bush_pct"   "Kerry_pct"  "Nader_pct" 
## [16] "MDratio"    "hosp"       "pcthisp"    "pcturban"   "urbrural"  
## [21] "pctfemhh"   "pcincome"   "pctpoor"    "pctlt9ed"   "pcthsed"   
## [26] "pctcoled"   "unemploy"   "pctwhtcl"   "homevalu"   "rent"      
## [31] "popdens"    "crowded"    "ginirev"    "SmokecurM"  "SmokevrM"  
## [36] "SmokecurF"  "SmokevrF"   "Obese"      "Noins"      "XYLENES__M"
## [41] "TOLUENE"    "TETRACHLOR" "STYRENE"    "NICKEL_COM" "METHYLENE_"
## [46] "MERCURY_CO" "LEAD_COMPO" "BENZENE__I" "ARSENIC_CO" "POP2000"   
## [51] "POP00SQMIL" "MALE2000"   "FEMALE2000" "MAL2FEM"    "UNDER18"   
## [56] "AIAN"       "ASIA"       "BLACK"      "NHPI"       "WHITE"     
## [61] "AIAN_MORE"  "ASIA_MORE"  "BLK_MORE"   "NHPI_MORE"  "WHT_MORE"  
## [66] "HISP_LAT"   "CH19902000" "MEDAGE2000" "PEROVER65"  "BushPct"   
## [71] "BushPctZ"   "resid"

# Since the pctfemhh, pctpoor, and homevalu are significant in previous
# models, try those, plus pcincome, pctwhtcl, PEROVER 65
lm3 <- lm(BushPct ~ pctfemhh + pctpoor + homevalu + pcincome + pctwhtcl + PEROVER65, 
    USA)
summary(lm3)  # All predictors are significant, except pctwhtcl; R^2 = 0.2992
## 
## Call:
## lm(formula = BushPct ~ pctfemhh + pctpoor + homevalu + pcincome + 
##     pctwhtcl + PEROVER65, data = USA)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5041 -0.0720  0.0105  0.0779  0.3327 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.23e-01   1.41e-02   51.36  < 2e-16 ***
## pctfemhh    -1.36e-02   5.10e-04  -26.62  < 2e-16 ***
## pctpoor      3.10e-03   3.53e-04    8.81  < 2e-16 ***
## homevalu    -1.33e-06   8.78e-08  -15.19  < 2e-16 ***
## pcincome     8.38e-06   7.50e-07   11.18  < 2e-16 ***
## pctwhtcl    -3.34e-04   2.94e-04   -1.13     0.26    
## PEROVER65   -3.80e-03   5.41e-04   -7.04  2.4e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.106 on 3101 degrees of freedom
## Multiple R-squared: 0.301,   Adjusted R-squared: 0.299 
## F-statistic:  222 on 6 and 3101 DF,  p-value: <2e-16
par(mfrow = c(2, 2))
plot(lm3, main = "lm3 Plots")  # Residuals v. fitted is not random or mean of 0, qq-plot indicate residuals aren't normal

plot of chunk unnamed-chunk-3


# Since pctwhtcl is not significant, remove that, add urbrural and
# unemploy
lm4 <- lm(BushPct ~ pctfemhh + pctpoor + homevalu + pcincome + urbrural + unemploy + 
    PEROVER65, USA)
summary(lm4)  # All predictors are significant; R^2 = 0.3199
## 
## Call:
## lm(formula = BushPct ~ pctfemhh + pctpoor + homevalu + pcincome + 
##     urbrural + unemploy + PEROVER65, data = USA)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4856 -0.0680  0.0100  0.0769  0.3246 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.17e-01   1.32e-02   54.45  < 2e-16 ***
## pctfemhh    -1.17e-02   5.37e-04  -21.84  < 2e-16 ***
## pctpoor      2.37e-03   4.11e-04    5.76  9.4e-09 ***
## homevalu    -1.11e-06   8.88e-08  -12.55  < 2e-16 ***
## pcincome     6.73e-06   6.16e-07   10.93  < 2e-16 ***
## urbrural     7.23e-03   9.34e-04    7.74  1.3e-14 ***
## unemploy    -4.92e-03   7.36e-04   -6.69  2.7e-11 ***
## PEROVER65   -4.72e-03   5.47e-04   -8.64  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.104 on 3100 degrees of freedom
## Multiple R-squared: 0.321,   Adjusted R-squared: 0.32 
## F-statistic:  210 on 7 and 3100 DF,  p-value: <2e-16
par(mfrow = c(2, 2))
plot(lm4, main = "lm4 Plots")  # Residuals v. fitted is not random or mean of 0, qq-plot indicate residuals aren't normal, but slightly better than lm3

plot of chunk unnamed-chunk-3


# Try interaction term
lm5 <- lm(BushPct ~ pctfemhh * pctpoor + homevalu + pcincome + urbrural + unemploy + 
    PEROVER65, USA)
summary(lm5)  # All predictors are significant; R^2 = 0.3456
## 
## Call:
## lm(formula = BushPct ~ pctfemhh * pctpoor + homevalu + pcincome + 
##     urbrural + unemploy + PEROVER65, data = USA)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4378 -0.0696  0.0098  0.0759  0.3319 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       6.71e-01   1.36e-02   49.49  < 2e-16 ***
## pctfemhh         -4.70e-03   8.24e-04   -5.71  1.2e-08 ***
## pctpoor           6.63e-03   5.57e-04   11.90  < 2e-16 ***
## homevalu         -1.09e-06   8.71e-08  -12.45  < 2e-16 ***
## pcincome          4.74e-06   6.30e-07    7.52  7.0e-14 ***
## urbrural          6.92e-03   9.17e-04    7.55  5.7e-14 ***
## unemploy         -5.56e-03   7.24e-04   -7.68  2.2e-14 ***
## PEROVER65        -4.87e-03   5.36e-04   -9.09  < 2e-16 ***
## pctfemhh:pctpoor -3.10e-04   2.80e-05  -11.08  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.102 on 3099 degrees of freedom
## Multiple R-squared: 0.347,   Adjusted R-squared: 0.346 
## F-statistic:  206 on 8 and 3099 DF,  p-value: <2e-16
par(mfrow = c(2, 2))
plot(lm5, main = "lm5 Plots")  # Residuals v. fitted is not random or mean of 0, qq-plot indicate residuals aren't normal, but both look better and the R^2 is also higher, so it looks like this is a good direction to go in ... perhaps interact more terms

plot of chunk unnamed-chunk-3