library(sp)
## Warning: package 'sp' was built under R version 2.15.3
library(maptools)
## Loading required package: foreign
## Warning: package 'foreign' was built under R version 2.15.3
## Loading required package: grid
## Loading required package: lattice
## Checking rgeos availability: FALSE Note: when rgeos is not available,
## polygon geometry computations in maptools depend on gpclib, which has a
## restricted licence. It is disabled by default; to enable gpclib, type
## gpclibPermit()
library(classInt)
## Loading required package: class
## Loading required package: e1071
library(RColorBrewer)
USA <- readShapePoly("/Users/xiwang/Dropbox/GEOG 5023 - offline/Data/2004_Election_Counties.shp")
plot(USA)
# Remove counties with no votes
USA <- USA[USA$Total > 0, ]
# List the pieces of the file
slotNames(USA)
## [1] "data" "polygons" "plotOrder" "bbox" "proj4string"
# Summarize the file
summary(USA)
## Object of class SpatialPolygonsDataFrame
## Coordinates:
## min max
## x -124.73 -66.97
## y 24.96 49.37
## Is projected: NA
## proj4string : [NA]
## Data attributes:
## NAME STATE_NAME STATE_FIPS CNTY_FIPS
## Washington: 32 Texas : 254 48 : 254 001 : 48
## Jefferson : 26 Georgia : 159 13 : 159 003 : 48
## Franklin : 25 Virginia: 134 51 : 134 005 : 48
## Jackson : 24 Kentucky: 120 21 : 120 009 : 47
## Lincoln : 24 Missouri: 115 29 : 115 007 : 46
## Madison : 20 Kansas : 105 20 : 105 011 : 46
## (Other) :2957 (Other) :2221 (Other):2221 (Other):2825
## FIPS AREA FIPS_num Bush
## 01001 : 1 Min. : 2 Min. : 1001 Min. : 65
## 01003 : 1 1st Qu.: 435 1st Qu.:19046 1st Qu.: 2941
## 01005 : 1 Median : 622 Median :29214 Median : 6364
## 01007 : 1 Mean : 966 Mean :30686 Mean : 19073
## 01009 : 1 3rd Qu.: 931 3rd Qu.:46010 3rd Qu.: 15924
## 01011 : 1 Max. :20175 Max. :56045 Max. :954764
## (Other):3102
## Kerry County_F Nader Total
## Min. : 12 Min. : 1001 Min. : 0 Min. : 77
## 1st Qu.: 1782 1st Qu.:19046 1st Qu.: 0 1st Qu.: 4831
## Median : 4041 Median :29214 Median : 14 Median : 10416
## Mean : 17957 Mean :30686 Mean : 145 Mean : 37176
## 3rd Qu.: 10434 3rd Qu.:46010 3rd Qu.: 67 3rd Qu.: 26599
## Max. :1670341 Max. :56045 Max. :13251 Max. :2625105
##
## Bush_pct Kerry_pct Nader_pct MDratio
## Min. : 9.31 Min. : 7.17 Min. :0.000 Min. : 0.0
## 1st Qu.:52.73 1st Qu.:30.23 1st Qu.:0.000 1st Qu.: 37.3
## Median :61.17 Median :38.49 Median :0.303 Median : 65.6
## Mean :60.66 Mean :38.94 Mean :0.401 Mean : 93.1
## 3rd Qu.:69.37 3rd Qu.:46.79 3rd Qu.:0.633 3rd Qu.: 117.6
## Max. :92.83 Max. :90.05 Max. :4.467 Max. :2189.5
##
## hosp pcthisp pcturban urbrural
## Min. : 0.00 Min. : 0.0 Min. : 0.0 Min. :0.00
## 1st Qu.: 1.32 1st Qu.: 4.0 1st Qu.: 0.0 1st Qu.:3.00
## Median : 3.29 Median : 8.0 Median : 33.5 Median :6.00
## Mean : 5.68 Mean : 44.5 Mean : 35.3 Mean :5.54
## 3rd Qu.: 6.75 3rd Qu.: 24.0 3rd Qu.: 56.5 3rd Qu.:7.00
## Max. :84.07 Max. :972.0 Max. :100.0 Max. :9.00
##
## pctfemhh pcincome pctpoor pctlt9ed
## Min. : 0.0 Min. : 0 Min. : 0.0 Min. : 0.0
## 1st Qu.: 9.6 1st Qu.:15474 1st Qu.:11.1 1st Qu.: 8.9
## Median :12.2 Median :17450 Median :15.1 Median :13.2
## Mean :13.0 Mean :17805 Mean :16.5 Mean :14.3
## 3rd Qu.:15.4 3rd Qu.:19818 3rd Qu.:20.4 3rd Qu.:18.7
## Max. :41.1 Max. :58096 Max. :63.1 Max. :56.3
##
## pcthsed pctcoled unemploy pctwhtcl
## Min. : 0.0 Min. : 0.0 Min. : 0.00 Min. : 0.0
## 1st Qu.:61.1 1st Qu.: 9.0 1st Qu.: 3.90 1st Qu.:38.5
## Median :71.2 Median :11.7 Median : 5.30 Median :43.5
## Mean :68.4 Mean :13.1 Mean : 5.88 Mean :44.6
## 3rd Qu.:77.1 3rd Qu.:15.3 3rd Qu.: 7.20 3rd Qu.:50.7
## Max. :95.5 Max. :53.4 Max. :37.90 Max. :81.4
##
## homevalu rent popdens crowded
## Min. : 0 Min. : 0 Min. : 0 Min. : 0.00
## 1st Qu.: 35900 1st Qu.:255 1st Qu.: 15 1st Qu.: 1.80
## Median : 44400 Median :297 Median : 39 Median : 2.60
## Mean : 52066 Mean :314 Mean : 194 Mean : 3.61
## 3rd Qu.: 58600 3rd Qu.:352 3rd Qu.: 93 3rd Qu.: 4.50
## Max. :500001 Max. :926 Max. :53801 Max. :44.40
##
## ginirev SmokecurM SmokevrM SmokecurF
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.390 1st Qu.:0.220 1st Qu.:0.490 1st Qu.:0.190
## Median :0.420 Median :0.240 Median :0.520 Median :0.210
## Mean :0.414 Mean :0.242 Mean :0.505 Mean :0.209
## 3rd Qu.:0.440 3rd Qu.:0.270 3rd Qu.:0.540 3rd Qu.:0.240
## Max. :0.580 Max. :0.580 Max. :0.780 Max. :0.420
##
## SmokevrF Obese Noins XYLENES__M
## Min. :0.000 Min. :0.000 Min. :0.000 Min. : 0
## 1st Qu.:0.390 1st Qu.:0.320 1st Qu.:0.100 1st Qu.: 27
## Median :0.420 Median :0.340 Median :0.120 Median : 58
## Mean :0.412 Mean :0.335 Mean :0.129 Mean : 222
## 3rd Qu.:0.460 3rd Qu.:0.360 3rd Qu.:0.150 3rd Qu.: 171
## Max. :0.630 Max. :0.630 Max. :0.410 Max. :16661
##
## TOLUENE TETRACHLOR STYRENE NICKEL_COM
## Min. : 0 Min. : 0.0 Min. : 0.0 Min. : 0.00
## 1st Qu.: 44 1st Qu.: 0.7 1st Qu.: 0.8 1st Qu.: 0.00
## Median : 91 Median : 1.9 Median : 1.9 Median : 0.01
## Mean : 336 Mean : 13.7 Mean : 15.4 Mean : 0.37
## 3rd Qu.: 256 3rd Qu.: 6.6 3rd Qu.: 8.1 3rd Qu.: 0.11
## Max. :28305 Max. :1966.6 Max. :1413.0 Max. :69.01
##
## METHYLENE_ MERCURY_CO LEAD_COMPO BENZENE__I
## Min. : 0.0 Min. :0.000 Min. : 0.00 Min. : 0
## 1st Qu.: 1.6 1st Qu.:0.002 1st Qu.: 0.01 1st Qu.: 23
## Median : 3.9 Median :0.004 Median : 0.02 Median : 42
## Mean : 26.4 Mean :0.057 Mean : 0.82 Mean : 106
## 3rd Qu.: 12.5 3rd Qu.:0.020 3rd Qu.: 0.23 3rd Qu.: 97
## Max. :2764.2 Max. :3.220 Max. :290.63 Max. :4612
##
## ARSENIC_CO POP2000 POP00SQMIL MALE2000
## Min. : 0.00 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 0.00 1st Qu.: 11368 1st Qu.: 18 1st Qu.: 5600
## Median : 0.00 Median : 24770 Median : 43 Median : 12280
## Mean : 0.11 Mean : 89230 Mean : 244 Mean : 43768
## 3rd Qu.: 0.02 3rd Qu.: 62028 3rd Qu.: 105 3rd Qu.: 30396
## Max. :32.47 Max. :9519338 Max. :66934 Max. :4704105
##
## FEMALE2000 MAL2FEM UNDER18 AIAN
## Min. : 0 Min. : 0.0 Min. : 0.0 Min. : 0.00
## 1st Qu.: 5608 1st Qu.: 94.0 1st Qu.:23.7 1st Qu.: 0.20
## Median : 12548 Median : 97.0 Median :25.3 Median : 0.30
## Mean : 45462 Mean : 98.4 Mean :25.5 Mean : 1.61
## 3rd Qu.: 31554 3rd Qu.:100.0 3rd Qu.:27.1 3rd Qu.: 0.80
## Max. :4815233 Max. :205.0 Max. :45.3 Max. :94.20
##
## ASIA BLACK NHPI WHITE
## Min. : 0.000 Min. : 0.00 Min. :0.0000 Min. : 0.0
## 1st Qu.: 0.200 1st Qu.: 0.30 1st Qu.:0.0000 1st Qu.:77.2
## Median : 0.300 Median : 1.70 Median :0.0000 Median :91.3
## Mean : 0.771 Mean : 8.84 Mean :0.0361 Mean :84.8
## 3rd Qu.: 0.700 3rd Qu.:10.10 3rd Qu.:0.1000 3rd Qu.:96.7
## Max. :30.800 Max. :86.50 Max. :1.5000 Max. :99.7
##
## AIAN_MORE ASIA_MORE BLK_MORE NHPI_MORE
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. :0.0000
## 1st Qu.: 0.50 1st Qu.: 0.30 1st Qu.: 0.40 1st Qu.:0.0000
## Median : 0.80 Median : 0.50 Median : 2.10 Median :0.1000
## Mean : 2.22 Mean : 0.98 Mean : 9.13 Mean :0.0996
## 3rd Qu.: 1.40 3rd Qu.: 0.90 3rd Qu.:10.70 3rd Qu.:0.1000
## Max. :95.10 Max. :32.60 Max. :86.70 Max. :2.6000
##
## WHT_MORE HISP_LAT CH19902000 MEDAGE2000
## Min. : 0.0 Min. : 0.00 Min. :-37.4 Min. : 0.0
## 1st Qu.:79.1 1st Qu.: 0.90 1st Qu.: 1.0 1st Qu.:35.2
## Median :92.7 Median : 1.80 Median : 8.4 Median :37.4
## Mean :86.0 Mean : 6.19 Mean : 11.1 Mean :37.4
## 3rd Qu.:97.6 3rd Qu.: 5.10 3rd Qu.: 17.4 3rd Qu.:39.8
## Max. :99.9 Max. :97.50 Max. :191.0 Max. :54.3
##
## PEROVER65
## Min. : 0.0
## 1st Qu.:12.1
## Median :14.4
## Mean :14.8
## 3rd Qu.:17.1
## Max. :34.7
##
# Summarize the data slot with '@'
summary(USA@data)
## NAME STATE_NAME STATE_FIPS CNTY_FIPS
## Washington: 32 Texas : 254 48 : 254 001 : 48
## Jefferson : 26 Georgia : 159 13 : 159 003 : 48
## Franklin : 25 Virginia: 134 51 : 134 005 : 48
## Jackson : 24 Kentucky: 120 21 : 120 009 : 47
## Lincoln : 24 Missouri: 115 29 : 115 007 : 46
## Madison : 20 Kansas : 105 20 : 105 011 : 46
## (Other) :2957 (Other) :2221 (Other):2221 (Other):2825
## FIPS AREA FIPS_num Bush
## 01001 : 1 Min. : 2 Min. : 1001 Min. : 65
## 01003 : 1 1st Qu.: 435 1st Qu.:19046 1st Qu.: 2941
## 01005 : 1 Median : 622 Median :29214 Median : 6364
## 01007 : 1 Mean : 966 Mean :30686 Mean : 19073
## 01009 : 1 3rd Qu.: 931 3rd Qu.:46010 3rd Qu.: 15924
## 01011 : 1 Max. :20175 Max. :56045 Max. :954764
## (Other):3102
## Kerry County_F Nader Total
## Min. : 12 Min. : 1001 Min. : 0 Min. : 77
## 1st Qu.: 1782 1st Qu.:19046 1st Qu.: 0 1st Qu.: 4831
## Median : 4041 Median :29214 Median : 14 Median : 10416
## Mean : 17957 Mean :30686 Mean : 145 Mean : 37176
## 3rd Qu.: 10434 3rd Qu.:46010 3rd Qu.: 67 3rd Qu.: 26599
## Max. :1670341 Max. :56045 Max. :13251 Max. :2625105
##
## Bush_pct Kerry_pct Nader_pct MDratio
## Min. : 9.31 Min. : 7.17 Min. :0.000 Min. : 0.0
## 1st Qu.:52.73 1st Qu.:30.23 1st Qu.:0.000 1st Qu.: 37.3
## Median :61.17 Median :38.49 Median :0.303 Median : 65.6
## Mean :60.66 Mean :38.94 Mean :0.401 Mean : 93.1
## 3rd Qu.:69.37 3rd Qu.:46.79 3rd Qu.:0.633 3rd Qu.: 117.6
## Max. :92.83 Max. :90.05 Max. :4.467 Max. :2189.5
##
## hosp pcthisp pcturban urbrural
## Min. : 0.00 Min. : 0.0 Min. : 0.0 Min. :0.00
## 1st Qu.: 1.32 1st Qu.: 4.0 1st Qu.: 0.0 1st Qu.:3.00
## Median : 3.29 Median : 8.0 Median : 33.5 Median :6.00
## Mean : 5.68 Mean : 44.5 Mean : 35.3 Mean :5.54
## 3rd Qu.: 6.75 3rd Qu.: 24.0 3rd Qu.: 56.5 3rd Qu.:7.00
## Max. :84.07 Max. :972.0 Max. :100.0 Max. :9.00
##
## pctfemhh pcincome pctpoor pctlt9ed
## Min. : 0.0 Min. : 0 Min. : 0.0 Min. : 0.0
## 1st Qu.: 9.6 1st Qu.:15474 1st Qu.:11.1 1st Qu.: 8.9
## Median :12.2 Median :17450 Median :15.1 Median :13.2
## Mean :13.0 Mean :17805 Mean :16.5 Mean :14.3
## 3rd Qu.:15.4 3rd Qu.:19818 3rd Qu.:20.4 3rd Qu.:18.7
## Max. :41.1 Max. :58096 Max. :63.1 Max. :56.3
##
## pcthsed pctcoled unemploy pctwhtcl
## Min. : 0.0 Min. : 0.0 Min. : 0.00 Min. : 0.0
## 1st Qu.:61.1 1st Qu.: 9.0 1st Qu.: 3.90 1st Qu.:38.5
## Median :71.2 Median :11.7 Median : 5.30 Median :43.5
## Mean :68.4 Mean :13.1 Mean : 5.88 Mean :44.6
## 3rd Qu.:77.1 3rd Qu.:15.3 3rd Qu.: 7.20 3rd Qu.:50.7
## Max. :95.5 Max. :53.4 Max. :37.90 Max. :81.4
##
## homevalu rent popdens crowded
## Min. : 0 Min. : 0 Min. : 0 Min. : 0.00
## 1st Qu.: 35900 1st Qu.:255 1st Qu.: 15 1st Qu.: 1.80
## Median : 44400 Median :297 Median : 39 Median : 2.60
## Mean : 52066 Mean :314 Mean : 194 Mean : 3.61
## 3rd Qu.: 58600 3rd Qu.:352 3rd Qu.: 93 3rd Qu.: 4.50
## Max. :500001 Max. :926 Max. :53801 Max. :44.40
##
## ginirev SmokecurM SmokevrM SmokecurF
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.390 1st Qu.:0.220 1st Qu.:0.490 1st Qu.:0.190
## Median :0.420 Median :0.240 Median :0.520 Median :0.210
## Mean :0.414 Mean :0.242 Mean :0.505 Mean :0.209
## 3rd Qu.:0.440 3rd Qu.:0.270 3rd Qu.:0.540 3rd Qu.:0.240
## Max. :0.580 Max. :0.580 Max. :0.780 Max. :0.420
##
## SmokevrF Obese Noins XYLENES__M
## Min. :0.000 Min. :0.000 Min. :0.000 Min. : 0
## 1st Qu.:0.390 1st Qu.:0.320 1st Qu.:0.100 1st Qu.: 27
## Median :0.420 Median :0.340 Median :0.120 Median : 58
## Mean :0.412 Mean :0.335 Mean :0.129 Mean : 222
## 3rd Qu.:0.460 3rd Qu.:0.360 3rd Qu.:0.150 3rd Qu.: 171
## Max. :0.630 Max. :0.630 Max. :0.410 Max. :16661
##
## TOLUENE TETRACHLOR STYRENE NICKEL_COM
## Min. : 0 Min. : 0.0 Min. : 0.0 Min. : 0.00
## 1st Qu.: 44 1st Qu.: 0.7 1st Qu.: 0.8 1st Qu.: 0.00
## Median : 91 Median : 1.9 Median : 1.9 Median : 0.01
## Mean : 336 Mean : 13.7 Mean : 15.4 Mean : 0.37
## 3rd Qu.: 256 3rd Qu.: 6.6 3rd Qu.: 8.1 3rd Qu.: 0.11
## Max. :28305 Max. :1966.6 Max. :1413.0 Max. :69.01
##
## METHYLENE_ MERCURY_CO LEAD_COMPO BENZENE__I
## Min. : 0.0 Min. :0.000 Min. : 0.00 Min. : 0
## 1st Qu.: 1.6 1st Qu.:0.002 1st Qu.: 0.01 1st Qu.: 23
## Median : 3.9 Median :0.004 Median : 0.02 Median : 42
## Mean : 26.4 Mean :0.057 Mean : 0.82 Mean : 106
## 3rd Qu.: 12.5 3rd Qu.:0.020 3rd Qu.: 0.23 3rd Qu.: 97
## Max. :2764.2 Max. :3.220 Max. :290.63 Max. :4612
##
## ARSENIC_CO POP2000 POP00SQMIL MALE2000
## Min. : 0.00 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 0.00 1st Qu.: 11368 1st Qu.: 18 1st Qu.: 5600
## Median : 0.00 Median : 24770 Median : 43 Median : 12280
## Mean : 0.11 Mean : 89230 Mean : 244 Mean : 43768
## 3rd Qu.: 0.02 3rd Qu.: 62028 3rd Qu.: 105 3rd Qu.: 30396
## Max. :32.47 Max. :9519338 Max. :66934 Max. :4704105
##
## FEMALE2000 MAL2FEM UNDER18 AIAN
## Min. : 0 Min. : 0.0 Min. : 0.0 Min. : 0.00
## 1st Qu.: 5608 1st Qu.: 94.0 1st Qu.:23.7 1st Qu.: 0.20
## Median : 12548 Median : 97.0 Median :25.3 Median : 0.30
## Mean : 45462 Mean : 98.4 Mean :25.5 Mean : 1.61
## 3rd Qu.: 31554 3rd Qu.:100.0 3rd Qu.:27.1 3rd Qu.: 0.80
## Max. :4815233 Max. :205.0 Max. :45.3 Max. :94.20
##
## ASIA BLACK NHPI WHITE
## Min. : 0.000 Min. : 0.00 Min. :0.0000 Min. : 0.0
## 1st Qu.: 0.200 1st Qu.: 0.30 1st Qu.:0.0000 1st Qu.:77.2
## Median : 0.300 Median : 1.70 Median :0.0000 Median :91.3
## Mean : 0.771 Mean : 8.84 Mean :0.0361 Mean :84.8
## 3rd Qu.: 0.700 3rd Qu.:10.10 3rd Qu.:0.1000 3rd Qu.:96.7
## Max. :30.800 Max. :86.50 Max. :1.5000 Max. :99.7
##
## AIAN_MORE ASIA_MORE BLK_MORE NHPI_MORE
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. :0.0000
## 1st Qu.: 0.50 1st Qu.: 0.30 1st Qu.: 0.40 1st Qu.:0.0000
## Median : 0.80 Median : 0.50 Median : 2.10 Median :0.1000
## Mean : 2.22 Mean : 0.98 Mean : 9.13 Mean :0.0996
## 3rd Qu.: 1.40 3rd Qu.: 0.90 3rd Qu.:10.70 3rd Qu.:0.1000
## Max. :95.10 Max. :32.60 Max. :86.70 Max. :2.6000
##
## WHT_MORE HISP_LAT CH19902000 MEDAGE2000
## Min. : 0.0 Min. : 0.00 Min. :-37.4 Min. : 0.0
## 1st Qu.:79.1 1st Qu.: 0.90 1st Qu.: 1.0 1st Qu.:35.2
## Median :92.7 Median : 1.80 Median : 8.4 Median :37.4
## Mean :86.0 Mean : 6.19 Mean : 11.1 Mean :37.4
## 3rd Qu.:97.6 3rd Qu.: 5.10 3rd Qu.: 17.4 3rd Qu.:39.8
## Max. :99.9 Max. :97.50 Max. :191.0 Max. :54.3
##
## PEROVER65
## Min. : 0.0
## 1st Qu.:12.1
## Median :14.4
## Mean :14.8
## 3rd Qu.:17.1
## Max. :34.7
##
display.brewer.all()
# Make a 7-color 'spectral' palette
pal7 <- brewer.pal(7, "Spectral")
display.brewer.pal(7, "Spectral")
# Create a column that holds the percent of all votes that went to G.W.
# Bush in 2004
USA$BushPct <- USA$Bush/USA$Total
# Create categories
cats7 <- classIntervals(USA$BushPct, n = 7, style = "quantile")
cats7 # Output shows the range for BushPct within each category each group should have about 440 counties
## style: quantile
## [0.09308,0.4746) [0.4746,0.5415) [0.5415,0.5907) [0.5907,0.6336)
## 444 444 444 444
## [0.6336,0.6801) [0.6801,0.7421) [0.7421,0.9283]
## 444 444 444
# Connect the categories to the palette with findColours()
SevenColors <- findColours(cats7, pal7)
# Draw map using specificed data and colors
plot(USA, col = SevenColors) # Not a great cartographic map
# Convert BushPct column into standard units so that the mean is equal to
# zero and we map deviations from the mean
USA$BushPctZ <- (USA$BushPct - mean(USA$BushPct, na.rm = TRUE))/sd(USA$BushPct,
na.rm = TRUE) # Create a new column to hold the standardized percent bush
# Create new categories with the standardized column and map
pal7 <- brewer.pal(7, "Spectral")
cats7 <- classIntervals(USA$BushPctZ, n = 7, style = "quantile")
SevenColors <- findColours(cats7, pal7)
plot(USA, col = SevenColors)
# In this map, the areas that are red have below average BushPct and blue
# areas are above average
# Predict the percent of votes for George Bush based upon the percent
# urban, the percent poor, the percent of households that are female
# headed, and the median age
lm1 <- lm(BushPct ~ pcturban + pctfemhh + pctpoor + HISP_LAT + MEDAGE2000, USA)
summary(lm1) # Overall, the signs of the coefficients align with expectations
##
## Call:
## lm(formula = BushPct ~ pcturban + pctfemhh + pctpoor + HISP_LAT +
## MEDAGE2000, data = USA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5687 -0.0733 0.0102 0.0808 0.2708
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.82e-01 2.43e-02 32.23 <2e-16 ***
## pcturban -1.32e-05 8.88e-05 -0.15 0.882
## pctfemhh -1.35e-02 5.35e-04 -25.19 <2e-16 ***
## pctpoor 2.97e-03 3.55e-04 8.36 <2e-16 ***
## HISP_LAT -2.91e-04 1.87e-04 -1.56 0.119
## MEDAGE2000 -1.25e-03 5.65e-04 -2.21 0.027 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.11 on 3102 degrees of freedom
## Multiple R-squared: 0.242, Adjusted R-squared: 0.241
## F-statistic: 198 on 5 and 3102 DF, p-value: <2e-16
lm1.resid <- resid(lm1)
plot(lm1.resid ~ USA$BushPct) # Residuals don't look great, try again
lm2 <- lm(BushPct ~ pctfemhh + homevalu, data = USA) # Negative coefficients, and significant
summary(lm2)
##
## Call:
## lm(formula = BushPct ~ pctfemhh + homevalu, data = USA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6082 -0.0731 0.0089 0.0779 0.3558
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.77e-01 5.60e-03 138.9 <2e-16 ***
## pctfemhh -1.01e-02 3.61e-04 -27.9 <2e-16 ***
## homevalu -7.60e-07 6.01e-08 -12.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.109 on 3105 degrees of freedom
## Multiple R-squared: 0.256, Adjusted R-squared: 0.255
## F-statistic: 533 on 2 and 3105 DF, p-value: <2e-16
lm2.resid <- resid(lm2)
plot(lm2.resid ~ USA$BushPct) # Residuals still look bad
# Map residuals to explore geographic patterns Add a new column to USA to
# hold the residuals
USA$resid <- resid(lm2)
pal3 <- brewer.pal(3, "Spectral")
cats3 <- classIntervals(USA$resid, n = 3, style = "quantile")
ThreeColors <- findColours(cats3, pal3)
plot(USA, col = ThreeColors) # There is a clear pattern to the residuals, which suggest some missing variable
names(USA@data) # Examine variables
## [1] "NAME" "STATE_NAME" "STATE_FIPS" "CNTY_FIPS" "FIPS"
## [6] "AREA" "FIPS_num" "Bush" "Kerry" "County_F"
## [11] "Nader" "Total" "Bush_pct" "Kerry_pct" "Nader_pct"
## [16] "MDratio" "hosp" "pcthisp" "pcturban" "urbrural"
## [21] "pctfemhh" "pcincome" "pctpoor" "pctlt9ed" "pcthsed"
## [26] "pctcoled" "unemploy" "pctwhtcl" "homevalu" "rent"
## [31] "popdens" "crowded" "ginirev" "SmokecurM" "SmokevrM"
## [36] "SmokecurF" "SmokevrF" "Obese" "Noins" "XYLENES__M"
## [41] "TOLUENE" "TETRACHLOR" "STYRENE" "NICKEL_COM" "METHYLENE_"
## [46] "MERCURY_CO" "LEAD_COMPO" "BENZENE__I" "ARSENIC_CO" "POP2000"
## [51] "POP00SQMIL" "MALE2000" "FEMALE2000" "MAL2FEM" "UNDER18"
## [56] "AIAN" "ASIA" "BLACK" "NHPI" "WHITE"
## [61] "AIAN_MORE" "ASIA_MORE" "BLK_MORE" "NHPI_MORE" "WHT_MORE"
## [66] "HISP_LAT" "CH19902000" "MEDAGE2000" "PEROVER65" "BushPct"
## [71] "BushPctZ" "resid"
# Since the pctfemhh, pctpoor, and homevalu are significant in previous
# models, try those, plus pcincome, pctwhtcl, PEROVER 65
lm3 <- lm(BushPct ~ pctfemhh + pctpoor + homevalu + pcincome + pctwhtcl + PEROVER65,
USA)
summary(lm3) # All predictors are significant, except pctwhtcl; R^2 = 0.2992
##
## Call:
## lm(formula = BushPct ~ pctfemhh + pctpoor + homevalu + pcincome +
## pctwhtcl + PEROVER65, data = USA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5041 -0.0720 0.0105 0.0779 0.3327
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.23e-01 1.41e-02 51.36 < 2e-16 ***
## pctfemhh -1.36e-02 5.10e-04 -26.62 < 2e-16 ***
## pctpoor 3.10e-03 3.53e-04 8.81 < 2e-16 ***
## homevalu -1.33e-06 8.78e-08 -15.19 < 2e-16 ***
## pcincome 8.38e-06 7.50e-07 11.18 < 2e-16 ***
## pctwhtcl -3.34e-04 2.94e-04 -1.13 0.26
## PEROVER65 -3.80e-03 5.41e-04 -7.04 2.4e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.106 on 3101 degrees of freedom
## Multiple R-squared: 0.301, Adjusted R-squared: 0.299
## F-statistic: 222 on 6 and 3101 DF, p-value: <2e-16
par(mfrow = c(2, 2))
plot(lm3, main = "lm3 Plots") # Residuals v. fitted is not random or mean of 0, qq-plot indicate residuals aren't normal
# Since pctwhtcl is not significant, remove that, add urbrural and
# unemploy
lm4 <- lm(BushPct ~ pctfemhh + pctpoor + homevalu + pcincome + urbrural + unemploy +
PEROVER65, USA)
summary(lm4) # All predictors are significant; R^2 = 0.3199
##
## Call:
## lm(formula = BushPct ~ pctfemhh + pctpoor + homevalu + pcincome +
## urbrural + unemploy + PEROVER65, data = USA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4856 -0.0680 0.0100 0.0769 0.3246
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.17e-01 1.32e-02 54.45 < 2e-16 ***
## pctfemhh -1.17e-02 5.37e-04 -21.84 < 2e-16 ***
## pctpoor 2.37e-03 4.11e-04 5.76 9.4e-09 ***
## homevalu -1.11e-06 8.88e-08 -12.55 < 2e-16 ***
## pcincome 6.73e-06 6.16e-07 10.93 < 2e-16 ***
## urbrural 7.23e-03 9.34e-04 7.74 1.3e-14 ***
## unemploy -4.92e-03 7.36e-04 -6.69 2.7e-11 ***
## PEROVER65 -4.72e-03 5.47e-04 -8.64 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.104 on 3100 degrees of freedom
## Multiple R-squared: 0.321, Adjusted R-squared: 0.32
## F-statistic: 210 on 7 and 3100 DF, p-value: <2e-16
par(mfrow = c(2, 2))
plot(lm4, main = "lm4 Plots") # Residuals v. fitted is not random or mean of 0, qq-plot indicate residuals aren't normal, but slightly better than lm3
# Try interaction term
lm5 <- lm(BushPct ~ pctfemhh * pctpoor + homevalu + pcincome + urbrural + unemploy +
PEROVER65, USA)
summary(lm5) # All predictors are significant; R^2 = 0.3456
##
## Call:
## lm(formula = BushPct ~ pctfemhh * pctpoor + homevalu + pcincome +
## urbrural + unemploy + PEROVER65, data = USA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4378 -0.0696 0.0098 0.0759 0.3319
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.71e-01 1.36e-02 49.49 < 2e-16 ***
## pctfemhh -4.70e-03 8.24e-04 -5.71 1.2e-08 ***
## pctpoor 6.63e-03 5.57e-04 11.90 < 2e-16 ***
## homevalu -1.09e-06 8.71e-08 -12.45 < 2e-16 ***
## pcincome 4.74e-06 6.30e-07 7.52 7.0e-14 ***
## urbrural 6.92e-03 9.17e-04 7.55 5.7e-14 ***
## unemploy -5.56e-03 7.24e-04 -7.68 2.2e-14 ***
## PEROVER65 -4.87e-03 5.36e-04 -9.09 < 2e-16 ***
## pctfemhh:pctpoor -3.10e-04 2.80e-05 -11.08 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.102 on 3099 degrees of freedom
## Multiple R-squared: 0.347, Adjusted R-squared: 0.346
## F-statistic: 206 on 8 and 3099 DF, p-value: <2e-16
par(mfrow = c(2, 2))
plot(lm5, main = "lm5 Plots") # Residuals v. fitted is not random or mean of 0, qq-plot indicate residuals aren't normal, but both look better and the R^2 is also higher, so it looks like this is a good direction to go in ... perhaps interact more terms