Maps and multiple regression

library(sp)
## Warning: package 'sp' was built under R version 2.15.3
library(maptools)
## Warning: package 'maptools' was built under R version 2.15.3
## Loading required package: foreign
## Loading required package: grid
## Loading required package: lattice
## Checking rgeos availability: TRUE
library(classInt)
## Warning: package 'classInt' was built under R version 2.15.3
## Loading required package: class
## Warning: package 'class' was built under R version 2.15.3
## Loading required package: e1071
## Warning: package 'e1071' was built under R version 2.15.3
library(RColorBrewer)
library(MASS)
## Warning: package 'MASS' was built under R version 2.15.3
library(leaps)
## Warning: package 'leaps' was built under R version 2.15.3
USA <- readShapePoly("C:\\Users\\QINGHUAN\\Desktop\\Data 2\\2004_Election_Counties.shp")

plot(USA)  #it's a map

plot of chunk unnamed-chunk-1

# remove counties with no votes
USA <- USA[USA$Total > 0, ]
# list the pieces of the file
slotNames(USA)
## [1] "data"        "polygons"    "plotOrder"   "bbox"        "proj4string"
# summarize the file
summary(USA)
## Object of class SpatialPolygonsDataFrame
## Coordinates:
##       min    max
## x -124.73 -66.97
## y   24.96  49.37
## Is projected: NA 
## proj4string : [NA]
## Data attributes:
##          NAME         STATE_NAME     STATE_FIPS     CNTY_FIPS   
##  Washington:  32   Texas   : 254   48     : 254   001    :  48  
##  Jefferson :  26   Georgia : 159   13     : 159   003    :  48  
##  Franklin  :  25   Virginia: 134   51     : 134   005    :  48  
##  Jackson   :  24   Kentucky: 120   21     : 120   009    :  47  
##  Lincoln   :  24   Missouri: 115   29     : 115   007    :  46  
##  Madison   :  20   Kansas  : 105   20     : 105   011    :  46  
##  (Other)   :2957   (Other) :2221   (Other):2221   (Other):2825  
##       FIPS           AREA          FIPS_num          Bush       
##  01001  :   1   Min.   :    2   Min.   : 1001   Min.   :    65  
##  01003  :   1   1st Qu.:  435   1st Qu.:19046   1st Qu.:  2941  
##  01005  :   1   Median :  622   Median :29214   Median :  6364  
##  01007  :   1   Mean   :  966   Mean   :30686   Mean   : 19073  
##  01009  :   1   3rd Qu.:  931   3rd Qu.:46010   3rd Qu.: 15924  
##  01011  :   1   Max.   :20175   Max.   :56045   Max.   :954764  
##  (Other):3102                                                   
##      Kerry            County_F         Nader           Total        
##  Min.   :     12   Min.   : 1001   Min.   :    0   Min.   :     77  
##  1st Qu.:   1782   1st Qu.:19046   1st Qu.:    0   1st Qu.:   4831  
##  Median :   4041   Median :29214   Median :   14   Median :  10416  
##  Mean   :  17957   Mean   :30686   Mean   :  145   Mean   :  37176  
##  3rd Qu.:  10434   3rd Qu.:46010   3rd Qu.:   67   3rd Qu.:  26599  
##  Max.   :1670341   Max.   :56045   Max.   :13251   Max.   :2625105  
##                                                                     
##     Bush_pct       Kerry_pct       Nader_pct        MDratio      
##  Min.   : 9.31   Min.   : 7.17   Min.   :0.000   Min.   :   0.0  
##  1st Qu.:52.73   1st Qu.:30.23   1st Qu.:0.000   1st Qu.:  37.3  
##  Median :61.17   Median :38.49   Median :0.303   Median :  65.6  
##  Mean   :60.66   Mean   :38.94   Mean   :0.401   Mean   :  93.1  
##  3rd Qu.:69.37   3rd Qu.:46.79   3rd Qu.:0.633   3rd Qu.: 117.6  
##  Max.   :92.83   Max.   :90.05   Max.   :4.467   Max.   :2189.5  
##                                                                  
##       hosp          pcthisp         pcturban        urbrural   
##  Min.   : 0.00   Min.   :  0.0   Min.   :  0.0   Min.   :0.00  
##  1st Qu.: 1.32   1st Qu.:  4.0   1st Qu.:  0.0   1st Qu.:3.00  
##  Median : 3.29   Median :  8.0   Median : 33.5   Median :6.00  
##  Mean   : 5.68   Mean   : 44.5   Mean   : 35.3   Mean   :5.54  
##  3rd Qu.: 6.75   3rd Qu.: 24.0   3rd Qu.: 56.5   3rd Qu.:7.00  
##  Max.   :84.07   Max.   :972.0   Max.   :100.0   Max.   :9.00  
##                                                                
##     pctfemhh       pcincome        pctpoor        pctlt9ed   
##  Min.   : 0.0   Min.   :    0   Min.   : 0.0   Min.   : 0.0  
##  1st Qu.: 9.6   1st Qu.:15474   1st Qu.:11.1   1st Qu.: 8.9  
##  Median :12.2   Median :17450   Median :15.1   Median :13.2  
##  Mean   :13.0   Mean   :17805   Mean   :16.5   Mean   :14.3  
##  3rd Qu.:15.4   3rd Qu.:19818   3rd Qu.:20.4   3rd Qu.:18.7  
##  Max.   :41.1   Max.   :58096   Max.   :63.1   Max.   :56.3  
##                                                              
##     pcthsed        pctcoled       unemploy        pctwhtcl   
##  Min.   : 0.0   Min.   : 0.0   Min.   : 0.00   Min.   : 0.0  
##  1st Qu.:61.1   1st Qu.: 9.0   1st Qu.: 3.90   1st Qu.:38.5  
##  Median :71.2   Median :11.6   Median : 5.30   Median :43.5  
##  Mean   :68.4   Mean   :13.1   Mean   : 5.88   Mean   :44.6  
##  3rd Qu.:77.1   3rd Qu.:15.3   3rd Qu.: 7.20   3rd Qu.:50.7  
##  Max.   :95.5   Max.   :53.4   Max.   :37.90   Max.   :81.4  
##                                                              
##     homevalu           rent        popdens         crowded     
##  Min.   :     0   Min.   :  0   Min.   :    0   Min.   : 0.00  
##  1st Qu.: 35900   1st Qu.:255   1st Qu.:   15   1st Qu.: 1.80  
##  Median : 44400   Median :297   Median :   39   Median : 2.60  
##  Mean   : 52066   Mean   :314   Mean   :  194   Mean   : 3.61  
##  3rd Qu.: 58600   3rd Qu.:352   3rd Qu.:   93   3rd Qu.: 4.50  
##  Max.   :500001   Max.   :926   Max.   :53801   Max.   :44.40  
##                                                                
##     ginirev        SmokecurM        SmokevrM       SmokecurF    
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.390   1st Qu.:0.220   1st Qu.:0.490   1st Qu.:0.190  
##  Median :0.420   Median :0.240   Median :0.520   Median :0.210  
##  Mean   :0.414   Mean   :0.242   Mean   :0.505   Mean   :0.209  
##  3rd Qu.:0.440   3rd Qu.:0.270   3rd Qu.:0.540   3rd Qu.:0.240  
##  Max.   :0.580   Max.   :0.580   Max.   :0.780   Max.   :0.420  
##                                                                 
##     SmokevrF         Obese           Noins         XYLENES__M   
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :    0  
##  1st Qu.:0.390   1st Qu.:0.320   1st Qu.:0.100   1st Qu.:   27  
##  Median :0.420   Median :0.340   Median :0.120   Median :   58  
##  Mean   :0.412   Mean   :0.335   Mean   :0.129   Mean   :  222  
##  3rd Qu.:0.460   3rd Qu.:0.360   3rd Qu.:0.150   3rd Qu.:  171  
##  Max.   :0.630   Max.   :0.630   Max.   :0.410   Max.   :16661  
##                                                                 
##     TOLUENE        TETRACHLOR        STYRENE         NICKEL_COM   
##  Min.   :    0   Min.   :   0.0   Min.   :   0.0   Min.   : 0.00  
##  1st Qu.:   44   1st Qu.:   0.7   1st Qu.:   0.8   1st Qu.: 0.00  
##  Median :   91   Median :   1.9   Median :   1.9   Median : 0.01  
##  Mean   :  336   Mean   :  13.7   Mean   :  15.4   Mean   : 0.37  
##  3rd Qu.:  256   3rd Qu.:   6.6   3rd Qu.:   8.1   3rd Qu.: 0.11  
##  Max.   :28305   Max.   :1966.6   Max.   :1413.0   Max.   :69.01  
##                                                                   
##    METHYLENE_       MERCURY_CO      LEAD_COMPO       BENZENE__I  
##  Min.   :   0.0   Min.   :0.000   Min.   :  0.00   Min.   :   0  
##  1st Qu.:   1.6   1st Qu.:0.002   1st Qu.:  0.01   1st Qu.:  23  
##  Median :   3.9   Median :0.004   Median :  0.02   Median :  42  
##  Mean   :  26.4   Mean   :0.057   Mean   :  0.82   Mean   : 106  
##  3rd Qu.:  12.5   3rd Qu.:0.020   3rd Qu.:  0.23   3rd Qu.:  97  
##  Max.   :2764.2   Max.   :3.220   Max.   :290.63   Max.   :4612  
##                                                                  
##    ARSENIC_CO       POP2000          POP00SQMIL       MALE2000      
##  Min.   : 0.00   Min.   :      0   Min.   :    0   Min.   :      0  
##  1st Qu.: 0.00   1st Qu.:  11368   1st Qu.:   18   1st Qu.:   5600  
##  Median : 0.00   Median :  24770   Median :   43   Median :  12280  
##  Mean   : 0.11   Mean   :  89230   Mean   :  244   Mean   :  43768  
##  3rd Qu.: 0.02   3rd Qu.:  62028   3rd Qu.:  105   3rd Qu.:  30396  
##  Max.   :32.47   Max.   :9519338   Max.   :66934   Max.   :4704105  
##                                                                     
##    FEMALE2000         MAL2FEM         UNDER18          AIAN      
##  Min.   :      0   Min.   :  0.0   Min.   : 0.0   Min.   : 0.00  
##  1st Qu.:   5608   1st Qu.: 94.0   1st Qu.:23.7   1st Qu.: 0.20  
##  Median :  12548   Median : 97.0   Median :25.3   Median : 0.30  
##  Mean   :  45462   Mean   : 98.4   Mean   :25.5   Mean   : 1.61  
##  3rd Qu.:  31554   3rd Qu.:100.0   3rd Qu.:27.1   3rd Qu.: 0.80  
##  Max.   :4815233   Max.   :205.0   Max.   :45.3   Max.   :94.20  
##                                                                  
##       ASIA            BLACK            NHPI            WHITE     
##  Min.   : 0.000   Min.   : 0.00   Min.   :0.0000   Min.   : 0.0  
##  1st Qu.: 0.200   1st Qu.: 0.30   1st Qu.:0.0000   1st Qu.:77.2  
##  Median : 0.300   Median : 1.70   Median :0.0000   Median :91.3  
##  Mean   : 0.771   Mean   : 8.84   Mean   :0.0361   Mean   :84.8  
##  3rd Qu.: 0.700   3rd Qu.:10.10   3rd Qu.:0.1000   3rd Qu.:96.7  
##  Max.   :30.800   Max.   :86.50   Max.   :1.5000   Max.   :99.7  
##                                                                  
##    AIAN_MORE       ASIA_MORE        BLK_MORE       NHPI_MORE     
##  Min.   : 0.00   Min.   : 0.00   Min.   : 0.00   Min.   :0.0000  
##  1st Qu.: 0.50   1st Qu.: 0.30   1st Qu.: 0.40   1st Qu.:0.0000  
##  Median : 0.80   Median : 0.50   Median : 2.10   Median :0.1000  
##  Mean   : 2.22   Mean   : 0.98   Mean   : 9.13   Mean   :0.0996  
##  3rd Qu.: 1.40   3rd Qu.: 0.90   3rd Qu.:10.70   3rd Qu.:0.1000  
##  Max.   :95.10   Max.   :32.60   Max.   :86.70   Max.   :2.6000  
##                                                                  
##     WHT_MORE       HISP_LAT       CH19902000      MEDAGE2000  
##  Min.   : 0.0   Min.   : 0.00   Min.   :-37.4   Min.   : 0.0  
##  1st Qu.:79.1   1st Qu.: 0.90   1st Qu.:  1.0   1st Qu.:35.2  
##  Median :92.7   Median : 1.80   Median :  8.4   Median :37.4  
##  Mean   :86.0   Mean   : 6.19   Mean   : 11.1   Mean   :37.4  
##  3rd Qu.:97.6   3rd Qu.: 5.10   3rd Qu.: 17.4   3rd Qu.:39.8  
##  Max.   :99.9   Max.   :97.50   Max.   :191.0   Max.   :54.3  
##                                                               
##    PEROVER65   
##  Min.   : 0.0  
##  1st Qu.:12.1  
##  Median :14.4  
##  Mean   :14.8  
##  3rd Qu.:17.1  
##  Max.   :34.7  
## 
# summarize the data
summary(USA@data)
##          NAME         STATE_NAME     STATE_FIPS     CNTY_FIPS   
##  Washington:  32   Texas   : 254   48     : 254   001    :  48  
##  Jefferson :  26   Georgia : 159   13     : 159   003    :  48  
##  Franklin  :  25   Virginia: 134   51     : 134   005    :  48  
##  Jackson   :  24   Kentucky: 120   21     : 120   009    :  47  
##  Lincoln   :  24   Missouri: 115   29     : 115   007    :  46  
##  Madison   :  20   Kansas  : 105   20     : 105   011    :  46  
##  (Other)   :2957   (Other) :2221   (Other):2221   (Other):2825  
##       FIPS           AREA          FIPS_num          Bush       
##  01001  :   1   Min.   :    2   Min.   : 1001   Min.   :    65  
##  01003  :   1   1st Qu.:  435   1st Qu.:19046   1st Qu.:  2941  
##  01005  :   1   Median :  622   Median :29214   Median :  6364  
##  01007  :   1   Mean   :  966   Mean   :30686   Mean   : 19073  
##  01009  :   1   3rd Qu.:  931   3rd Qu.:46010   3rd Qu.: 15924  
##  01011  :   1   Max.   :20175   Max.   :56045   Max.   :954764  
##  (Other):3102                                                   
##      Kerry            County_F         Nader           Total        
##  Min.   :     12   Min.   : 1001   Min.   :    0   Min.   :     77  
##  1st Qu.:   1782   1st Qu.:19046   1st Qu.:    0   1st Qu.:   4831  
##  Median :   4041   Median :29214   Median :   14   Median :  10416  
##  Mean   :  17957   Mean   :30686   Mean   :  145   Mean   :  37176  
##  3rd Qu.:  10434   3rd Qu.:46010   3rd Qu.:   67   3rd Qu.:  26599  
##  Max.   :1670341   Max.   :56045   Max.   :13251   Max.   :2625105  
##                                                                     
##     Bush_pct       Kerry_pct       Nader_pct        MDratio      
##  Min.   : 9.31   Min.   : 7.17   Min.   :0.000   Min.   :   0.0  
##  1st Qu.:52.73   1st Qu.:30.23   1st Qu.:0.000   1st Qu.:  37.3  
##  Median :61.17   Median :38.49   Median :0.303   Median :  65.6  
##  Mean   :60.66   Mean   :38.94   Mean   :0.401   Mean   :  93.1  
##  3rd Qu.:69.37   3rd Qu.:46.79   3rd Qu.:0.633   3rd Qu.: 117.6  
##  Max.   :92.83   Max.   :90.05   Max.   :4.467   Max.   :2189.5  
##                                                                  
##       hosp          pcthisp         pcturban        urbrural   
##  Min.   : 0.00   Min.   :  0.0   Min.   :  0.0   Min.   :0.00  
##  1st Qu.: 1.32   1st Qu.:  4.0   1st Qu.:  0.0   1st Qu.:3.00  
##  Median : 3.29   Median :  8.0   Median : 33.5   Median :6.00  
##  Mean   : 5.68   Mean   : 44.5   Mean   : 35.3   Mean   :5.54  
##  3rd Qu.: 6.75   3rd Qu.: 24.0   3rd Qu.: 56.5   3rd Qu.:7.00  
##  Max.   :84.07   Max.   :972.0   Max.   :100.0   Max.   :9.00  
##                                                                
##     pctfemhh       pcincome        pctpoor        pctlt9ed   
##  Min.   : 0.0   Min.   :    0   Min.   : 0.0   Min.   : 0.0  
##  1st Qu.: 9.6   1st Qu.:15474   1st Qu.:11.1   1st Qu.: 8.9  
##  Median :12.2   Median :17450   Median :15.1   Median :13.2  
##  Mean   :13.0   Mean   :17805   Mean   :16.5   Mean   :14.3  
##  3rd Qu.:15.4   3rd Qu.:19818   3rd Qu.:20.4   3rd Qu.:18.7  
##  Max.   :41.1   Max.   :58096   Max.   :63.1   Max.   :56.3  
##                                                              
##     pcthsed        pctcoled       unemploy        pctwhtcl   
##  Min.   : 0.0   Min.   : 0.0   Min.   : 0.00   Min.   : 0.0  
##  1st Qu.:61.1   1st Qu.: 9.0   1st Qu.: 3.90   1st Qu.:38.5  
##  Median :71.2   Median :11.6   Median : 5.30   Median :43.5  
##  Mean   :68.4   Mean   :13.1   Mean   : 5.88   Mean   :44.6  
##  3rd Qu.:77.1   3rd Qu.:15.3   3rd Qu.: 7.20   3rd Qu.:50.7  
##  Max.   :95.5   Max.   :53.4   Max.   :37.90   Max.   :81.4  
##                                                              
##     homevalu           rent        popdens         crowded     
##  Min.   :     0   Min.   :  0   Min.   :    0   Min.   : 0.00  
##  1st Qu.: 35900   1st Qu.:255   1st Qu.:   15   1st Qu.: 1.80  
##  Median : 44400   Median :297   Median :   39   Median : 2.60  
##  Mean   : 52066   Mean   :314   Mean   :  194   Mean   : 3.61  
##  3rd Qu.: 58600   3rd Qu.:352   3rd Qu.:   93   3rd Qu.: 4.50  
##  Max.   :500001   Max.   :926   Max.   :53801   Max.   :44.40  
##                                                                
##     ginirev        SmokecurM        SmokevrM       SmokecurF    
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :0.000  
##  1st Qu.:0.390   1st Qu.:0.220   1st Qu.:0.490   1st Qu.:0.190  
##  Median :0.420   Median :0.240   Median :0.520   Median :0.210  
##  Mean   :0.414   Mean   :0.242   Mean   :0.505   Mean   :0.209  
##  3rd Qu.:0.440   3rd Qu.:0.270   3rd Qu.:0.540   3rd Qu.:0.240  
##  Max.   :0.580   Max.   :0.580   Max.   :0.780   Max.   :0.420  
##                                                                 
##     SmokevrF         Obese           Noins         XYLENES__M   
##  Min.   :0.000   Min.   :0.000   Min.   :0.000   Min.   :    0  
##  1st Qu.:0.390   1st Qu.:0.320   1st Qu.:0.100   1st Qu.:   27  
##  Median :0.420   Median :0.340   Median :0.120   Median :   58  
##  Mean   :0.412   Mean   :0.335   Mean   :0.129   Mean   :  222  
##  3rd Qu.:0.460   3rd Qu.:0.360   3rd Qu.:0.150   3rd Qu.:  171  
##  Max.   :0.630   Max.   :0.630   Max.   :0.410   Max.   :16661  
##                                                                 
##     TOLUENE        TETRACHLOR        STYRENE         NICKEL_COM   
##  Min.   :    0   Min.   :   0.0   Min.   :   0.0   Min.   : 0.00  
##  1st Qu.:   44   1st Qu.:   0.7   1st Qu.:   0.8   1st Qu.: 0.00  
##  Median :   91   Median :   1.9   Median :   1.9   Median : 0.01  
##  Mean   :  336   Mean   :  13.7   Mean   :  15.4   Mean   : 0.37  
##  3rd Qu.:  256   3rd Qu.:   6.6   3rd Qu.:   8.1   3rd Qu.: 0.11  
##  Max.   :28305   Max.   :1966.6   Max.   :1413.0   Max.   :69.01  
##                                                                   
##    METHYLENE_       MERCURY_CO      LEAD_COMPO       BENZENE__I  
##  Min.   :   0.0   Min.   :0.000   Min.   :  0.00   Min.   :   0  
##  1st Qu.:   1.6   1st Qu.:0.002   1st Qu.:  0.01   1st Qu.:  23  
##  Median :   3.9   Median :0.004   Median :  0.02   Median :  42  
##  Mean   :  26.4   Mean   :0.057   Mean   :  0.82   Mean   : 106  
##  3rd Qu.:  12.5   3rd Qu.:0.020   3rd Qu.:  0.23   3rd Qu.:  97  
##  Max.   :2764.2   Max.   :3.220   Max.   :290.63   Max.   :4612  
##                                                                  
##    ARSENIC_CO       POP2000          POP00SQMIL       MALE2000      
##  Min.   : 0.00   Min.   :      0   Min.   :    0   Min.   :      0  
##  1st Qu.: 0.00   1st Qu.:  11368   1st Qu.:   18   1st Qu.:   5600  
##  Median : 0.00   Median :  24770   Median :   43   Median :  12280  
##  Mean   : 0.11   Mean   :  89230   Mean   :  244   Mean   :  43768  
##  3rd Qu.: 0.02   3rd Qu.:  62028   3rd Qu.:  105   3rd Qu.:  30396  
##  Max.   :32.47   Max.   :9519338   Max.   :66934   Max.   :4704105  
##                                                                     
##    FEMALE2000         MAL2FEM         UNDER18          AIAN      
##  Min.   :      0   Min.   :  0.0   Min.   : 0.0   Min.   : 0.00  
##  1st Qu.:   5608   1st Qu.: 94.0   1st Qu.:23.7   1st Qu.: 0.20  
##  Median :  12548   Median : 97.0   Median :25.3   Median : 0.30  
##  Mean   :  45462   Mean   : 98.4   Mean   :25.5   Mean   : 1.61  
##  3rd Qu.:  31554   3rd Qu.:100.0   3rd Qu.:27.1   3rd Qu.: 0.80  
##  Max.   :4815233   Max.   :205.0   Max.   :45.3   Max.   :94.20  
##                                                                  
##       ASIA            BLACK            NHPI            WHITE     
##  Min.   : 0.000   Min.   : 0.00   Min.   :0.0000   Min.   : 0.0  
##  1st Qu.: 0.200   1st Qu.: 0.30   1st Qu.:0.0000   1st Qu.:77.2  
##  Median : 0.300   Median : 1.70   Median :0.0000   Median :91.3  
##  Mean   : 0.771   Mean   : 8.84   Mean   :0.0361   Mean   :84.8  
##  3rd Qu.: 0.700   3rd Qu.:10.10   3rd Qu.:0.1000   3rd Qu.:96.7  
##  Max.   :30.800   Max.   :86.50   Max.   :1.5000   Max.   :99.7  
##                                                                  
##    AIAN_MORE       ASIA_MORE        BLK_MORE       NHPI_MORE     
##  Min.   : 0.00   Min.   : 0.00   Min.   : 0.00   Min.   :0.0000  
##  1st Qu.: 0.50   1st Qu.: 0.30   1st Qu.: 0.40   1st Qu.:0.0000  
##  Median : 0.80   Median : 0.50   Median : 2.10   Median :0.1000  
##  Mean   : 2.22   Mean   : 0.98   Mean   : 9.13   Mean   :0.0996  
##  3rd Qu.: 1.40   3rd Qu.: 0.90   3rd Qu.:10.70   3rd Qu.:0.1000  
##  Max.   :95.10   Max.   :32.60   Max.   :86.70   Max.   :2.6000  
##                                                                  
##     WHT_MORE       HISP_LAT       CH19902000      MEDAGE2000  
##  Min.   : 0.0   Min.   : 0.00   Min.   :-37.4   Min.   : 0.0  
##  1st Qu.:79.1   1st Qu.: 0.90   1st Qu.:  1.0   1st Qu.:35.2  
##  Median :92.7   Median : 1.80   Median :  8.4   Median :37.4  
##  Mean   :86.0   Mean   : 6.19   Mean   : 11.1   Mean   :37.4  
##  3rd Qu.:97.6   3rd Qu.: 5.10   3rd Qu.: 17.4   3rd Qu.:39.8  
##  Max.   :99.9   Max.   :97.50   Max.   :191.0   Max.   :54.3  
##                                                               
##    PEROVER65   
##  Min.   : 0.0  
##  1st Qu.:12.1  
##  Median :14.4  
##  Mean   :14.8  
##  3rd Qu.:17.1  
##  Max.   :34.7  
## 
# plotting the data slot is like plotting a regular table
plot(USA@data)
## Error: figure margins too large
# Making maps in R
display.brewer.all()

plot of chunk unnamed-chunk-3

# make a 7 color 'spectral' palette
pal7 <- brewer.pal(7, "Spectral")
# to see the colors
display.brewer.pal(7, "Spectral")

plot of chunk unnamed-chunk-3

# create a column that holds the percent of all votes that went to
# G.W.Bush in 2004
USA$BushPct <- USA$Bush/USA$Total
# create categories
cats7 <- classIntervals(USA$BushPct, n = 7, style = "quantile")
cats7
## style: quantile
## [0.09308,0.4746)  [0.4746,0.5415)  [0.5415,0.5907)  [0.5907,0.6336) 
##              444              444              444              444 
##  [0.6336,0.6801)  [0.6801,0.7421)  [0.7421,0.9283] 
##              444              444              444
# output shows the range for BushPct,within each category each group has
# 444 counties connect our categories to our palette with findColours()
SevenColors <- findColours(cats7, pal7)
# draw map using specified data and colros
plot(USA, col = SevenColors)

plot of chunk unnamed-chunk-3

# the map is not good,it uses a diverging color scheme to represent
# continuous data color schemes like 'spectral' are used to show
# deviations from the mean Convert our BushPct column into standard units
# so that the mean is equal to zero and we are mapping deviations from the
# mean

# create a new column to hold the standardized percent bush
USA$BushPctZ <- (USA$BushPct - mean(USA$BushPct, na.rm = T))/sd(USA$BushPct, 
    na.rm = T)
# create new categories with the standardized column and map
pal7 <- brewer.pal(7, "Spectral")
cats7 <- classIntervals(USA$BushPctZ, n = 7, style = "quantile")
SevenColors <- findColours(cats7, pal7)
plot(USA, col = SevenColors)
# In the map the areas that are red have below average BushPct and blue
# areas are above average
# use a model selection machine to find the best possible model 1.stepAIC
# in MASS 2.regsubsets in leaps
library(lmSupport)
## Warning: package 'lmSupport' was built under R version 2.15.3
## Loading required package: car
## Warning: package 'car' was built under R version 2.15.3
## Loading required package: nnet
## Warning: package 'nnet' was built under R version 2.15.3
## Loading required package: psych
## Attaching package: 'psych'
## The following object(s) are masked from 'package:car':
## 
## logit
## Loading required package: gplots
## Loading required package: gtools
## Attaching package: 'gtools'
## The following object(s) are masked from 'package:psych':
## 
## logit
## The following object(s) are masked from 'package:car':
## 
## logit
## The following object(s) are masked from 'package:e1071':
## 
## permutations
## Loading required package: gdata
## gdata: Unable to locate valid perl interpreter gdata: gdata: read.xls()
## will be unable to read Excel XLS and XLSX files gdata: unless the 'perl='
## argument is used to specify the location gdata: of a valid perl
## intrpreter. gdata: gdata: (To avoid display of this message in the future,
## please gdata: ensure perl is installed and available on the executable
## gdata: search path.)
## gdata: Unable to load perl libaries needed by read.xls() gdata: to support
## 'XLX' (Excel 97-2004) files.
## ```

gdata: Unable to load perl libaries needed by read.xls() gdata: to support

'XLSX' (Excel 2007+) files.


## gdata: Run the function 'installXLSXsupport()' gdata: to automatically
## download and install the perl gdata: libaries needed to support Excel XLS
## and XLSX formats.
## Attaching package: 'gdata'
## The following object(s) are masked from 'package:stats':
## 
## nobs
## The following object(s) are masked from 'package:utils':
## 
## object.size
## Loading required package: caTools
## Loading required package: KernSmooth
## KernSmooth 2.23 loaded Copyright M. P. Wand 1997-2009
## Attaching package: 'gplots'
## The following object(s) are masked from 'package:stats':
## 
## lowess
## Loading required package: gvlma
lm1 <- lm(BushPct ~ pcturban + pctfemhh + pctpoor + HISP_LAT + MEDAGE2000, USA)
lm.sumSquares(lm1)
##                    SS dR-sqr pEta-sqr   df         F p-value
## (Intercept) 1.258e+01 0.2539   0.2508    1 1038.6644  0.0000
## pcturban    2.691e-04 0.0000   0.0000    1    0.0222  0.8815
## pctfemhh    7.680e+00 0.1551   0.1698    1  634.3157  0.0000
## pctpoor     8.457e-01 0.0171   0.0220    1   69.8485  0.0000
## HISP_LAT    2.941e-02 0.0006   0.0008    1    2.4289  0.1192
## MEDAGE2000  5.919e-02 0.0012   0.0016    1    4.8883  0.0271
## Error (SSE) 3.756e+01     NA       NA 3102        NA      NA
## Total (SST) 4.953e+01     NA       NA   NA        NA      NA
# the variance inflation factor measures the increase in variance of each
# regression
vif(lm1)  #vif<10 indicates no problem
##   pcturban   pctfemhh    pctpoor   HISP_LAT MEDAGE2000 
##      1.749      2.206      2.143      1.289      1.325

d <- regsubsets(BushPct ~ ., nbest = 1, nvmax = 20, data = USA[, c(16:38, 70)])
d.fit <- summary(d)
plot(1:20, y = d.fit$adjr2, type = "o", xlab = "Num.of parameters", ylab = "Adj.r2")

plot of chunk unnamed-chunk-4

plot(1:20, y = d.fit$bic, type = "o", xlab = "Num.of parameters", ylab = "BIC")

plot of chunk unnamed-chunk-4

d <- regsubsets(BushPct ~ ., nbest = 1, nvmax = 20, data = USA[, c(16:38, 70)])
plot(d)

plot of chunk unnamed-chunk-4

d.fit <- summary(d)
d.fit$outmat[10, ]
##   MDratio      hosp   pcthisp  pcturban  urbrural  pctfemhh  pcincome 
##       " "       " "       "*"       " "       " "       "*"       " " 
##   pctpoor  pctlt9ed   pcthsed  pctcoled  unemploy  pctwhtcl  homevalu 
##       "*"       " "       " "       "*"       "*"       "*"       "*" 
##      rent   popdens   crowded   ginirev SmokecurM  SmokevrM SmokecurF 
##       " "       " "       "*"       "*"       " "       " "       " " 
##  SmokevrF     Obese 
##       "*"       " "

# for the 'best' model 1.compute the VIF 2.fit the model and complete
# diagnostics
lm2 <- lm(BushPct ~ pcthisp + pctfemhh + pctpoor + homevalu + pctwhtcl + unemploy + 
    pctcoled + crowded + ginirev + SmokevrF, USA)
summary(lm2)
## 
## Call:
## lm(formula = BushPct ~ pcthisp + pctfemhh + pctpoor + homevalu + 
##     pctwhtcl + unemploy + pctcoled + crowded + ginirev + SmokevrF, 
##     data = USA)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4356 -0.0651  0.0061  0.0678  0.3459 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.65e-01   1.24e-02   45.41   <2e-16 ***
## pcthisp     -1.09e-04   2.09e-05   -5.21    2e-07 ***
## pctfemhh    -1.41e-02   4.90e-04  -28.80   <2e-16 ***
## pctpoor     -4.28e-03   4.72e-04   -9.06   <2e-16 ***
## homevalu    -6.95e-07   8.30e-08   -8.38   <2e-16 ***
## pctwhtcl     1.52e-03   3.55e-04    4.27    2e-05 ***
## unemploy    -7.93e-03   7.26e-04  -10.92   <2e-16 ***
## pctcoled    -5.42e-03   5.30e-04  -10.23   <2e-16 ***
## crowded      9.11e-03   9.41e-04    9.69   <2e-16 ***
## ginirev      1.14e+00   4.89e-02   23.28   <2e-16 ***
## SmokevrF    -2.84e-01   3.11e-02   -9.15   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 0.0962 on 3097 degrees of freedom
## Multiple R-squared: 0.421,   Adjusted R-squared: 0.419 
## F-statistic:  225 on 10 and 3097 DF,  p-value: <2e-16
vif(lm2)
##  pcthisp pctfemhh  pctpoor homevalu pctwhtcl unemploy pctcoled  crowded 
##    1.809    2.413    4.945    2.499    4.941    1.730    4.016    3.024 
##  ginirev SmokevrF 
##    3.398    1.807
# plot residuals against fitted values
par(mfrow = c(2, 2))
plot(lm2)

plot of chunk unnamed-chunk-4

shapiro.test(lm2$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  lm2$residuals 
## W = 0.9929, p-value = 3.059e-11
# null:residuals are normally distributed,so reject null and accept alt
# that residuals are not normally distributed