library(sp)
## Warning: package 'sp' was built under R version 2.15.3
library(maptools)
## Warning: package 'maptools' was built under R version 2.15.3
## Loading required package: foreign
## Loading required package: grid
## Loading required package: lattice
## Checking rgeos availability: FALSE Note: when rgeos is not available,
## polygon geometry computations in maptools depend on gpclib, which has a
## restricted licence. It is disabled by default; to enable gpclib, type
## gpclibPermit()
library(classInt)
## Warning: package 'classInt' was built under R version 2.15.3
## Loading required package: class
## Loading required package: e1071
## Warning: package 'e1071' was built under R version 2.15.3
library(RColorBrewer)
# READ DATA
USA <- readShapePoly("C:/Users/Claudio Alvarez/Desktop/2004_Election_Counties.shp")
## ITS A MAP
plot(USA)
# Remove counties with no votes
USA <- USA[USA$Total > 0, ]
# List the pieces of the file
slotNames(USA)
## [1] "data" "polygons" "plotOrder" "bbox" "proj4string"
# summarize the file
summary(USA)
## Object of class SpatialPolygonsDataFrame
## Coordinates:
## min max
## x -124.73 -66.97
## y 24.96 49.37
## Is projected: NA
## proj4string : [NA]
## Data attributes:
## NAME STATE_NAME STATE_FIPS CNTY_FIPS
## Washington: 32 Texas : 254 48 : 254 001 : 48
## Jefferson : 26 Georgia : 159 13 : 159 003 : 48
## Franklin : 25 Virginia: 134 51 : 134 005 : 48
## Jackson : 24 Kentucky: 120 21 : 120 009 : 47
## Lincoln : 24 Missouri: 115 29 : 115 007 : 46
## Madison : 20 Kansas : 105 20 : 105 011 : 46
## (Other) :2957 (Other) :2221 (Other):2221 (Other):2825
## FIPS AREA FIPS_num Bush
## 01001 : 1 Min. : 2 Min. : 1001 Min. : 65
## 01003 : 1 1st Qu.: 435 1st Qu.:19046 1st Qu.: 2941
## 01005 : 1 Median : 622 Median :29214 Median : 6364
## 01007 : 1 Mean : 966 Mean :30686 Mean : 19073
## 01009 : 1 3rd Qu.: 931 3rd Qu.:46010 3rd Qu.: 15924
## 01011 : 1 Max. :20175 Max. :56045 Max. :954764
## (Other):3102
## Kerry County_F Nader Total
## Min. : 12 Min. : 1001 Min. : 0 Min. : 77
## 1st Qu.: 1782 1st Qu.:19046 1st Qu.: 0 1st Qu.: 4831
## Median : 4041 Median :29214 Median : 14 Median : 10416
## Mean : 17957 Mean :30686 Mean : 145 Mean : 37176
## 3rd Qu.: 10434 3rd Qu.:46010 3rd Qu.: 67 3rd Qu.: 26599
## Max. :1670341 Max. :56045 Max. :13251 Max. :2625105
##
## Bush_pct Kerry_pct Nader_pct MDratio
## Min. : 9.31 Min. : 7.17 Min. :0.000 Min. : 0.0
## 1st Qu.:52.73 1st Qu.:30.23 1st Qu.:0.000 1st Qu.: 37.3
## Median :61.17 Median :38.49 Median :0.303 Median : 65.6
## Mean :60.66 Mean :38.94 Mean :0.401 Mean : 93.1
## 3rd Qu.:69.37 3rd Qu.:46.79 3rd Qu.:0.633 3rd Qu.: 117.6
## Max. :92.83 Max. :90.05 Max. :4.467 Max. :2189.5
##
## hosp pcthisp pcturban urbrural
## Min. : 0.00 Min. : 0.0 Min. : 0.0 Min. :0.00
## 1st Qu.: 1.32 1st Qu.: 4.0 1st Qu.: 0.0 1st Qu.:3.00
## Median : 3.29 Median : 8.0 Median : 33.5 Median :6.00
## Mean : 5.68 Mean : 44.5 Mean : 35.3 Mean :5.54
## 3rd Qu.: 6.75 3rd Qu.: 24.0 3rd Qu.: 56.5 3rd Qu.:7.00
## Max. :84.07 Max. :972.0 Max. :100.0 Max. :9.00
##
## pctfemhh pcincome pctpoor pctlt9ed
## Min. : 0.0 Min. : 0 Min. : 0.0 Min. : 0.0
## 1st Qu.: 9.6 1st Qu.:15474 1st Qu.:11.1 1st Qu.: 8.9
## Median :12.2 Median :17450 Median :15.1 Median :13.2
## Mean :13.0 Mean :17805 Mean :16.5 Mean :14.3
## 3rd Qu.:15.4 3rd Qu.:19818 3rd Qu.:20.4 3rd Qu.:18.7
## Max. :41.1 Max. :58096 Max. :63.1 Max. :56.3
##
## pcthsed pctcoled unemploy pctwhtcl
## Min. : 0.0 Min. : 0.0 Min. : 0.00 Min. : 0.0
## 1st Qu.:61.1 1st Qu.: 9.0 1st Qu.: 3.90 1st Qu.:38.5
## Median :71.2 Median :11.6 Median : 5.30 Median :43.5
## Mean :68.4 Mean :13.1 Mean : 5.88 Mean :44.6
## 3rd Qu.:77.1 3rd Qu.:15.3 3rd Qu.: 7.20 3rd Qu.:50.7
## Max. :95.5 Max. :53.4 Max. :37.90 Max. :81.4
##
## homevalu rent popdens crowded
## Min. : 0 Min. : 0 Min. : 0 Min. : 0.00
## 1st Qu.: 35900 1st Qu.:255 1st Qu.: 15 1st Qu.: 1.80
## Median : 44400 Median :297 Median : 39 Median : 2.60
## Mean : 52066 Mean :314 Mean : 194 Mean : 3.61
## 3rd Qu.: 58600 3rd Qu.:352 3rd Qu.: 93 3rd Qu.: 4.50
## Max. :500001 Max. :926 Max. :53801 Max. :44.40
##
## ginirev SmokecurM SmokevrM SmokecurF
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.390 1st Qu.:0.220 1st Qu.:0.490 1st Qu.:0.190
## Median :0.420 Median :0.240 Median :0.520 Median :0.210
## Mean :0.414 Mean :0.242 Mean :0.505 Mean :0.209
## 3rd Qu.:0.440 3rd Qu.:0.270 3rd Qu.:0.540 3rd Qu.:0.240
## Max. :0.580 Max. :0.580 Max. :0.780 Max. :0.420
##
## SmokevrF Obese Noins XYLENES__M
## Min. :0.000 Min. :0.000 Min. :0.000 Min. : 0
## 1st Qu.:0.390 1st Qu.:0.320 1st Qu.:0.100 1st Qu.: 27
## Median :0.420 Median :0.340 Median :0.120 Median : 58
## Mean :0.412 Mean :0.335 Mean :0.129 Mean : 222
## 3rd Qu.:0.460 3rd Qu.:0.360 3rd Qu.:0.150 3rd Qu.: 171
## Max. :0.630 Max. :0.630 Max. :0.410 Max. :16661
##
## TOLUENE TETRACHLOR STYRENE NICKEL_COM
## Min. : 0 Min. : 0.0 Min. : 0.0 Min. : 0.00
## 1st Qu.: 44 1st Qu.: 0.7 1st Qu.: 0.8 1st Qu.: 0.00
## Median : 91 Median : 1.9 Median : 1.9 Median : 0.01
## Mean : 336 Mean : 13.7 Mean : 15.4 Mean : 0.37
## 3rd Qu.: 256 3rd Qu.: 6.6 3rd Qu.: 8.1 3rd Qu.: 0.11
## Max. :28305 Max. :1966.6 Max. :1413.0 Max. :69.01
##
## METHYLENE_ MERCURY_CO LEAD_COMPO BENZENE__I
## Min. : 0.0 Min. :0.000 Min. : 0.00 Min. : 0
## 1st Qu.: 1.6 1st Qu.:0.002 1st Qu.: 0.01 1st Qu.: 23
## Median : 3.9 Median :0.004 Median : 0.02 Median : 42
## Mean : 26.4 Mean :0.057 Mean : 0.82 Mean : 106
## 3rd Qu.: 12.5 3rd Qu.:0.020 3rd Qu.: 0.23 3rd Qu.: 97
## Max. :2764.2 Max. :3.220 Max. :290.63 Max. :4612
##
## ARSENIC_CO POP2000 POP00SQMIL MALE2000
## Min. : 0.00 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 0.00 1st Qu.: 11368 1st Qu.: 18 1st Qu.: 5600
## Median : 0.00 Median : 24770 Median : 43 Median : 12280
## Mean : 0.11 Mean : 89230 Mean : 244 Mean : 43768
## 3rd Qu.: 0.02 3rd Qu.: 62028 3rd Qu.: 105 3rd Qu.: 30396
## Max. :32.47 Max. :9519338 Max. :66934 Max. :4704105
##
## FEMALE2000 MAL2FEM UNDER18 AIAN
## Min. : 0 Min. : 0.0 Min. : 0.0 Min. : 0.00
## 1st Qu.: 5608 1st Qu.: 94.0 1st Qu.:23.7 1st Qu.: 0.20
## Median : 12548 Median : 97.0 Median :25.3 Median : 0.30
## Mean : 45462 Mean : 98.4 Mean :25.5 Mean : 1.61
## 3rd Qu.: 31554 3rd Qu.:100.0 3rd Qu.:27.1 3rd Qu.: 0.80
## Max. :4815233 Max. :205.0 Max. :45.3 Max. :94.20
##
## ASIA BLACK NHPI WHITE
## Min. : 0.000 Min. : 0.00 Min. :0.0000 Min. : 0.0
## 1st Qu.: 0.200 1st Qu.: 0.30 1st Qu.:0.0000 1st Qu.:77.2
## Median : 0.300 Median : 1.70 Median :0.0000 Median :91.3
## Mean : 0.771 Mean : 8.84 Mean :0.0361 Mean :84.8
## 3rd Qu.: 0.700 3rd Qu.:10.10 3rd Qu.:0.1000 3rd Qu.:96.7
## Max. :30.800 Max. :86.50 Max. :1.5000 Max. :99.7
##
## AIAN_MORE ASIA_MORE BLK_MORE NHPI_MORE
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. :0.0000
## 1st Qu.: 0.50 1st Qu.: 0.30 1st Qu.: 0.40 1st Qu.:0.0000
## Median : 0.80 Median : 0.50 Median : 2.10 Median :0.1000
## Mean : 2.22 Mean : 0.98 Mean : 9.13 Mean :0.0996
## 3rd Qu.: 1.40 3rd Qu.: 0.90 3rd Qu.:10.70 3rd Qu.:0.1000
## Max. :95.10 Max. :32.60 Max. :86.70 Max. :2.6000
##
## WHT_MORE HISP_LAT CH19902000 MEDAGE2000
## Min. : 0.0 Min. : 0.00 Min. :-37.4 Min. : 0.0
## 1st Qu.:79.1 1st Qu.: 0.90 1st Qu.: 1.0 1st Qu.:35.2
## Median :92.7 Median : 1.80 Median : 8.4 Median :37.4
## Mean :86.0 Mean : 6.19 Mean : 11.1 Mean :37.4
## 3rd Qu.:97.6 3rd Qu.: 5.10 3rd Qu.: 17.4 3rd Qu.:39.8
## Max. :99.9 Max. :97.50 Max. :191.0 Max. :54.3
##
## PEROVER65
## Min. : 0.0
## 1st Qu.:12.1
## Median :14.4
## Mean :14.8
## 3rd Qu.:17.1
## Max. :34.7
##
# summarize the data
summary(USA@data)
## NAME STATE_NAME STATE_FIPS CNTY_FIPS
## Washington: 32 Texas : 254 48 : 254 001 : 48
## Jefferson : 26 Georgia : 159 13 : 159 003 : 48
## Franklin : 25 Virginia: 134 51 : 134 005 : 48
## Jackson : 24 Kentucky: 120 21 : 120 009 : 47
## Lincoln : 24 Missouri: 115 29 : 115 007 : 46
## Madison : 20 Kansas : 105 20 : 105 011 : 46
## (Other) :2957 (Other) :2221 (Other):2221 (Other):2825
## FIPS AREA FIPS_num Bush
## 01001 : 1 Min. : 2 Min. : 1001 Min. : 65
## 01003 : 1 1st Qu.: 435 1st Qu.:19046 1st Qu.: 2941
## 01005 : 1 Median : 622 Median :29214 Median : 6364
## 01007 : 1 Mean : 966 Mean :30686 Mean : 19073
## 01009 : 1 3rd Qu.: 931 3rd Qu.:46010 3rd Qu.: 15924
## 01011 : 1 Max. :20175 Max. :56045 Max. :954764
## (Other):3102
## Kerry County_F Nader Total
## Min. : 12 Min. : 1001 Min. : 0 Min. : 77
## 1st Qu.: 1782 1st Qu.:19046 1st Qu.: 0 1st Qu.: 4831
## Median : 4041 Median :29214 Median : 14 Median : 10416
## Mean : 17957 Mean :30686 Mean : 145 Mean : 37176
## 3rd Qu.: 10434 3rd Qu.:46010 3rd Qu.: 67 3rd Qu.: 26599
## Max. :1670341 Max. :56045 Max. :13251 Max. :2625105
##
## Bush_pct Kerry_pct Nader_pct MDratio
## Min. : 9.31 Min. : 7.17 Min. :0.000 Min. : 0.0
## 1st Qu.:52.73 1st Qu.:30.23 1st Qu.:0.000 1st Qu.: 37.3
## Median :61.17 Median :38.49 Median :0.303 Median : 65.6
## Mean :60.66 Mean :38.94 Mean :0.401 Mean : 93.1
## 3rd Qu.:69.37 3rd Qu.:46.79 3rd Qu.:0.633 3rd Qu.: 117.6
## Max. :92.83 Max. :90.05 Max. :4.467 Max. :2189.5
##
## hosp pcthisp pcturban urbrural
## Min. : 0.00 Min. : 0.0 Min. : 0.0 Min. :0.00
## 1st Qu.: 1.32 1st Qu.: 4.0 1st Qu.: 0.0 1st Qu.:3.00
## Median : 3.29 Median : 8.0 Median : 33.5 Median :6.00
## Mean : 5.68 Mean : 44.5 Mean : 35.3 Mean :5.54
## 3rd Qu.: 6.75 3rd Qu.: 24.0 3rd Qu.: 56.5 3rd Qu.:7.00
## Max. :84.07 Max. :972.0 Max. :100.0 Max. :9.00
##
## pctfemhh pcincome pctpoor pctlt9ed
## Min. : 0.0 Min. : 0 Min. : 0.0 Min. : 0.0
## 1st Qu.: 9.6 1st Qu.:15474 1st Qu.:11.1 1st Qu.: 8.9
## Median :12.2 Median :17450 Median :15.1 Median :13.2
## Mean :13.0 Mean :17805 Mean :16.5 Mean :14.3
## 3rd Qu.:15.4 3rd Qu.:19818 3rd Qu.:20.4 3rd Qu.:18.7
## Max. :41.1 Max. :58096 Max. :63.1 Max. :56.3
##
## pcthsed pctcoled unemploy pctwhtcl
## Min. : 0.0 Min. : 0.0 Min. : 0.00 Min. : 0.0
## 1st Qu.:61.1 1st Qu.: 9.0 1st Qu.: 3.90 1st Qu.:38.5
## Median :71.2 Median :11.6 Median : 5.30 Median :43.5
## Mean :68.4 Mean :13.1 Mean : 5.88 Mean :44.6
## 3rd Qu.:77.1 3rd Qu.:15.3 3rd Qu.: 7.20 3rd Qu.:50.7
## Max. :95.5 Max. :53.4 Max. :37.90 Max. :81.4
##
## homevalu rent popdens crowded
## Min. : 0 Min. : 0 Min. : 0 Min. : 0.00
## 1st Qu.: 35900 1st Qu.:255 1st Qu.: 15 1st Qu.: 1.80
## Median : 44400 Median :297 Median : 39 Median : 2.60
## Mean : 52066 Mean :314 Mean : 194 Mean : 3.61
## 3rd Qu.: 58600 3rd Qu.:352 3rd Qu.: 93 3rd Qu.: 4.50
## Max. :500001 Max. :926 Max. :53801 Max. :44.40
##
## ginirev SmokecurM SmokevrM SmokecurF
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.390 1st Qu.:0.220 1st Qu.:0.490 1st Qu.:0.190
## Median :0.420 Median :0.240 Median :0.520 Median :0.210
## Mean :0.414 Mean :0.242 Mean :0.505 Mean :0.209
## 3rd Qu.:0.440 3rd Qu.:0.270 3rd Qu.:0.540 3rd Qu.:0.240
## Max. :0.580 Max. :0.580 Max. :0.780 Max. :0.420
##
## SmokevrF Obese Noins XYLENES__M
## Min. :0.000 Min. :0.000 Min. :0.000 Min. : 0
## 1st Qu.:0.390 1st Qu.:0.320 1st Qu.:0.100 1st Qu.: 27
## Median :0.420 Median :0.340 Median :0.120 Median : 58
## Mean :0.412 Mean :0.335 Mean :0.129 Mean : 222
## 3rd Qu.:0.460 3rd Qu.:0.360 3rd Qu.:0.150 3rd Qu.: 171
## Max. :0.630 Max. :0.630 Max. :0.410 Max. :16661
##
## TOLUENE TETRACHLOR STYRENE NICKEL_COM
## Min. : 0 Min. : 0.0 Min. : 0.0 Min. : 0.00
## 1st Qu.: 44 1st Qu.: 0.7 1st Qu.: 0.8 1st Qu.: 0.00
## Median : 91 Median : 1.9 Median : 1.9 Median : 0.01
## Mean : 336 Mean : 13.7 Mean : 15.4 Mean : 0.37
## 3rd Qu.: 256 3rd Qu.: 6.6 3rd Qu.: 8.1 3rd Qu.: 0.11
## Max. :28305 Max. :1966.6 Max. :1413.0 Max. :69.01
##
## METHYLENE_ MERCURY_CO LEAD_COMPO BENZENE__I
## Min. : 0.0 Min. :0.000 Min. : 0.00 Min. : 0
## 1st Qu.: 1.6 1st Qu.:0.002 1st Qu.: 0.01 1st Qu.: 23
## Median : 3.9 Median :0.004 Median : 0.02 Median : 42
## Mean : 26.4 Mean :0.057 Mean : 0.82 Mean : 106
## 3rd Qu.: 12.5 3rd Qu.:0.020 3rd Qu.: 0.23 3rd Qu.: 97
## Max. :2764.2 Max. :3.220 Max. :290.63 Max. :4612
##
## ARSENIC_CO POP2000 POP00SQMIL MALE2000
## Min. : 0.00 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 0.00 1st Qu.: 11368 1st Qu.: 18 1st Qu.: 5600
## Median : 0.00 Median : 24770 Median : 43 Median : 12280
## Mean : 0.11 Mean : 89230 Mean : 244 Mean : 43768
## 3rd Qu.: 0.02 3rd Qu.: 62028 3rd Qu.: 105 3rd Qu.: 30396
## Max. :32.47 Max. :9519338 Max. :66934 Max. :4704105
##
## FEMALE2000 MAL2FEM UNDER18 AIAN
## Min. : 0 Min. : 0.0 Min. : 0.0 Min. : 0.00
## 1st Qu.: 5608 1st Qu.: 94.0 1st Qu.:23.7 1st Qu.: 0.20
## Median : 12548 Median : 97.0 Median :25.3 Median : 0.30
## Mean : 45462 Mean : 98.4 Mean :25.5 Mean : 1.61
## 3rd Qu.: 31554 3rd Qu.:100.0 3rd Qu.:27.1 3rd Qu.: 0.80
## Max. :4815233 Max. :205.0 Max. :45.3 Max. :94.20
##
## ASIA BLACK NHPI WHITE
## Min. : 0.000 Min. : 0.00 Min. :0.0000 Min. : 0.0
## 1st Qu.: 0.200 1st Qu.: 0.30 1st Qu.:0.0000 1st Qu.:77.2
## Median : 0.300 Median : 1.70 Median :0.0000 Median :91.3
## Mean : 0.771 Mean : 8.84 Mean :0.0361 Mean :84.8
## 3rd Qu.: 0.700 3rd Qu.:10.10 3rd Qu.:0.1000 3rd Qu.:96.7
## Max. :30.800 Max. :86.50 Max. :1.5000 Max. :99.7
##
## AIAN_MORE ASIA_MORE BLK_MORE NHPI_MORE
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. :0.0000
## 1st Qu.: 0.50 1st Qu.: 0.30 1st Qu.: 0.40 1st Qu.:0.0000
## Median : 0.80 Median : 0.50 Median : 2.10 Median :0.1000
## Mean : 2.22 Mean : 0.98 Mean : 9.13 Mean :0.0996
## 3rd Qu.: 1.40 3rd Qu.: 0.90 3rd Qu.:10.70 3rd Qu.:0.1000
## Max. :95.10 Max. :32.60 Max. :86.70 Max. :2.6000
##
## WHT_MORE HISP_LAT CH19902000 MEDAGE2000
## Min. : 0.0 Min. : 0.00 Min. :-37.4 Min. : 0.0
## 1st Qu.:79.1 1st Qu.: 0.90 1st Qu.: 1.0 1st Qu.:35.2
## Median :92.7 Median : 1.80 Median : 8.4 Median :37.4
## Mean :86.0 Mean : 6.19 Mean : 11.1 Mean :37.4
## 3rd Qu.:97.6 3rd Qu.: 5.10 3rd Qu.: 17.4 3rd Qu.:39.8
## Max. :99.9 Max. :97.50 Max. :191.0 Max. :54.3
##
## PEROVER65
## Min. : 0.0
## 1st Qu.:12.1
## Median :14.4
## Mean :14.8
## 3rd Qu.:17.1
## Max. :34.7
##
# plotting the data slot is like plotting a regular table
plot(USA@data) ## error
## Error: figure margins too large
## Making maps in R
display.brewer.all()
# lets make a 7 color 'spectral' palette
pal7 <- brewer.pal(7, "Spectral")
# to see the colors
display.brewer.pal(7, "Spectral")
# create a column that holds the percent of all votes that went to G.W.
# Bush in 2004
USA$BushPct <- USA$Bush/USA$Total
# create categories
cats7 <- classIntervals(USA$BushPct, n = 7, style = "quantile")
cats7
## style: quantile
## [0.09308,0.4746) [0.4746,0.5415) [0.5415,0.5907) [0.5907,0.6336)
## 444 444 444 444
## [0.6336,0.6801) [0.6801,0.7421) [0.7421,0.9283]
## 444 444 444
# output shows the range for BushPct within each category each group
# should have about 440 counties.
SevenColors <- findColours(cats7, pal7)
par(mfrow = c(1, 1))
# draw map using specificed data and colors
plot(USA, col = SevenColors)
Multiple regression
lm1 <- lm(BushPct ~ pcturban + pctfemhh + pctpoor + HISP_LAT + MEDAGE2000, USA)
summary(lm1)
##
## Call:
## lm(formula = BushPct ~ pcturban + pctfemhh + pctpoor + HISP_LAT +
## MEDAGE2000, data = USA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5687 -0.0733 0.0102 0.0808 0.2708
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.82e-01 2.43e-02 32.23 <2e-16 ***
## pcturban -1.32e-05 8.88e-05 -0.15 0.882
## pctfemhh -1.35e-02 5.35e-04 -25.19 <2e-16 ***
## pctpoor 2.97e-03 3.55e-04 8.36 <2e-16 ***
## HISP_LAT -2.91e-04 1.87e-04 -1.56 0.119
## MEDAGE2000 -1.25e-03 5.65e-04 -2.21 0.027 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.11 on 3102 degrees of freedom
## Multiple R-squared: 0.242, Adjusted R-squared: 0.241
## F-statistic: 198 on 5 and 3102 DF, p-value: <2e-16
## calculate residuals
lm1.resid <- resid(lm1)
plot(lm1.resid ~ USA$BushPct)
##
lmBush <- lm(BushPct ~ pctfemhh + homevalu, data = USA)
summary(lmBush)
##
## Call:
## lm(formula = BushPct ~ pctfemhh + homevalu, data = USA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6082 -0.0731 0.0089 0.0779 0.3558
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.77e-01 5.60e-03 138.9 <2e-16 ***
## pctfemhh -1.01e-02 3.61e-04 -27.9 <2e-16 ***
## homevalu -7.60e-07 6.01e-08 -12.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.109 on 3105 degrees of freedom
## Multiple R-squared: 0.256, Adjusted R-squared: 0.255
## F-statistic: 533 on 2 and 3105 DF, p-value: <2e-16
## map of residuals
lmBush.resid <- resid(lmBush)
plot(lmBush.resid ~ USA$BushPct)
## add a new column to USA to hold the residuals
USA$resid <- resid(lmBush)
pal3 <- brewer.pal(3, "Spectral")
cats3 <- classIntervals(USA$resid, n = 3, style = "quantile")
ThreeColors <- findColours(cats3, pal3)
plot(USA, col = ThreeColors)
## I trying to improve the model above with the following models
## model 3
lm3 <- lm(BushPct ~ pctfemhh + homevalu + Obese, data = USA)
summary(lm3)
##
## Call:
## lm(formula = BushPct ~ pctfemhh + homevalu + Obese, data = USA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4962 -0.0770 0.0073 0.0790 0.3871
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.65e-01 1.15e-02 57.8 <2e-16 ***
## pctfemhh -1.11e-02 3.65e-04 -30.3 <2e-16 ***
## homevalu -7.99e-07 5.90e-08 -13.5 <2e-16 ***
## Obese 3.79e-01 3.42e-02 11.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.107 on 3104 degrees of freedom
## Multiple R-squared: 0.284, Adjusted R-squared: 0.283
## F-statistic: 410 on 3 and 3104 DF, p-value: <2e-16
plot(lm3$residuals ~ lm3$fitted.values, main = "Reg mod_3 residuals", xlab = "Fitted Values",
ylab = "Residuals")
## model 4
lm4 <- lm(BushPct ~ pctfemhh + HISP_LAT + Obese, data = USA)
summary(lm4)
##
## Call:
## lm(formula = BushPct ~ pctfemhh + HISP_LAT + Obese, data = USA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4674 -0.0772 0.0083 0.0819 0.2620
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.635623 0.011881 53.5 <2e-16 ***
## pctfemhh -0.011757 0.000373 -31.5 <2e-16 ***
## HISP_LAT 0.000398 0.000166 2.4 0.016 *
## Obese 0.362561 0.035380 10.2 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.11 on 3104 degrees of freedom
## Multiple R-squared: 0.243, Adjusted R-squared: 0.242
## F-statistic: 332 on 3 and 3104 DF, p-value: <2e-16
## model 5
lm5 <- lm(BushPct ~ pctfemhh, data = USA)
summary(lm5)
##
## Call:
## lm(formula = BushPct ~ pctfemhh, data = USA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5773 -0.0760 0.0086 0.0823 0.2702
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.746222 0.005162 144.6 <2e-16 ***
## pctfemhh -0.010741 0.000366 -29.4 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.112 on 3106 degrees of freedom
## Multiple R-squared: 0.217, Adjusted R-squared: 0.217
## F-statistic: 861 on 1 and 3106 DF, p-value: <2e-16
plot(BushPct ~ pctfemhh, data = USA, main = "BushPct v. femhh", xlab = "% Households Led by Female",
ylab = "% Voting for Bush")
par(mfrow = c(2, 2))
plot(lm5, main = "Mod_5 Plots")
## model 6
lm6 <- lm(BushPct ~ pctfemhh + I(pctfemhh^2) + Obese + homevalu, data = USA)
summary(lm6)
##
## Call:
## lm(formula = BushPct ~ pctfemhh + I(pctfemhh^2) + Obese + homevalu,
## data = USA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4693 -0.0755 0.0071 0.0765 0.4674
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.38e-01 1.20e-02 53.41 < 2e-16 ***
## pctfemhh -1.94e-03 1.26e-03 -1.54 0.12
## I(pctfemhh^2) -2.75e-04 3.66e-05 -7.52 7.4e-14 ***
## Obese 2.82e-01 3.62e-02 7.80 8.2e-15 ***
## homevalu -8.88e-07 5.97e-08 -14.88 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.106 on 3103 degrees of freedom
## Multiple R-squared: 0.297, Adjusted R-squared: 0.296
## F-statistic: 327 on 4 and 3103 DF, p-value: <2e-16
par(mfrow = c(1, 1))
plot(lm6$residuals, lm6$fitted.values, main = "Mod_6 Residuals", xlab = "Fitted Values",
ylab = "Residuals")
## model 7
lm7 <- lm(BushPct ~ pctcoled, data = USA)
par(mfrow = c(1, 1))
plot(lm7$residuals ~ lm7$fitted.values, main = "Mod_7 Residuals", xlab = "Fitted Values",
ylab = "Residuals")
## model 8
lm8 <- lm(BushPct ~ pctfemhh + I(pctfemhh^2) + Obese + homevalu + pctcoled +
I(pctcoled^2), data = USA)
summary(lm8) ## this is the best model r^2 adjusted 0.3241
##
## Call:
## lm(formula = BushPct ~ pctfemhh + I(pctfemhh^2) + Obese + homevalu +
## pctcoled + I(pctcoled^2), data = USA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4425 -0.0730 0.0058 0.0736 0.5377
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.11e-01 1.21e-02 50.34 < 2e-16 ***
## pctfemhh -3.55e-03 1.25e-03 -2.85 0.0045 **
## I(pctfemhh^2) -2.28e-04 3.61e-05 -6.32 3.0e-10 ***
## Obese 1.79e-01 3.69e-02 4.85 1.3e-06 ***
## homevalu -6.55e-07 7.79e-08 -8.41 < 2e-16 ***
## pctcoled 9.26e-03 9.91e-04 9.34 < 2e-16 ***
## I(pctcoled^2) -2.83e-04 2.52e-05 -11.21 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.104 on 3101 degrees of freedom
## Multiple R-squared: 0.325, Adjusted R-squared: 0.324
## F-statistic: 249 on 6 and 3101 DF, p-value: <2e-16
plot(lm8$residuals ~ lm8$fitted.values, main = "Model 8 Residuals", xlab = "Fitted Values",
ylab = "Residuals")