library(sp)
## Warning: package 'sp' was built under R version 2.15.3
library(maptools)
## Warning: package 'maptools' was built under R version 2.15.3
## Loading required package: foreign
## Loading required package: grid
## Loading required package: lattice
## Checking rgeos availability: TRUE
library(classInt)
## Warning: package 'classInt' was built under R version 2.15.3
## Loading required package: class
## Warning: package 'class' was built under R version 2.15.3
## Loading required package: e1071
## Warning: package 'e1071' was built under R version 2.15.3
library(RColorBrewer)
library(MASS)
## Warning: package 'MASS' was built under R version 2.15.3
library(leaps)
## Warning: package 'leaps' was built under R version 2.15.3
USA <- readShapePoly("C:\\Users\\QINGHUAN\\Desktop\\Data 2\\2004_Election_Counties.shp")
plot(USA) #it's a map
# remove counties with no votes
USA <- USA[USA$Total > 0, ]
# list the pieces of the file
slotNames(USA)
## [1] "data" "polygons" "plotOrder" "bbox" "proj4string"
# summarize the file
summary(USA)
## Object of class SpatialPolygonsDataFrame
## Coordinates:
## min max
## x -124.73 -66.97
## y 24.96 49.37
## Is projected: NA
## proj4string : [NA]
## Data attributes:
## NAME STATE_NAME STATE_FIPS CNTY_FIPS
## Washington: 32 Texas : 254 48 : 254 001 : 48
## Jefferson : 26 Georgia : 159 13 : 159 003 : 48
## Franklin : 25 Virginia: 134 51 : 134 005 : 48
## Jackson : 24 Kentucky: 120 21 : 120 009 : 47
## Lincoln : 24 Missouri: 115 29 : 115 007 : 46
## Madison : 20 Kansas : 105 20 : 105 011 : 46
## (Other) :2957 (Other) :2221 (Other):2221 (Other):2825
## FIPS AREA FIPS_num Bush
## 01001 : 1 Min. : 2 Min. : 1001 Min. : 65
## 01003 : 1 1st Qu.: 435 1st Qu.:19046 1st Qu.: 2941
## 01005 : 1 Median : 622 Median :29214 Median : 6364
## 01007 : 1 Mean : 966 Mean :30686 Mean : 19073
## 01009 : 1 3rd Qu.: 931 3rd Qu.:46010 3rd Qu.: 15924
## 01011 : 1 Max. :20175 Max. :56045 Max. :954764
## (Other):3102
## Kerry County_F Nader Total
## Min. : 12 Min. : 1001 Min. : 0 Min. : 77
## 1st Qu.: 1782 1st Qu.:19046 1st Qu.: 0 1st Qu.: 4831
## Median : 4041 Median :29214 Median : 14 Median : 10416
## Mean : 17957 Mean :30686 Mean : 145 Mean : 37176
## 3rd Qu.: 10434 3rd Qu.:46010 3rd Qu.: 67 3rd Qu.: 26599
## Max. :1670341 Max. :56045 Max. :13251 Max. :2625105
##
## Bush_pct Kerry_pct Nader_pct MDratio
## Min. : 9.31 Min. : 7.17 Min. :0.000 Min. : 0.0
## 1st Qu.:52.73 1st Qu.:30.23 1st Qu.:0.000 1st Qu.: 37.3
## Median :61.17 Median :38.49 Median :0.303 Median : 65.6
## Mean :60.66 Mean :38.94 Mean :0.401 Mean : 93.1
## 3rd Qu.:69.37 3rd Qu.:46.79 3rd Qu.:0.633 3rd Qu.: 117.6
## Max. :92.83 Max. :90.05 Max. :4.467 Max. :2189.5
##
## hosp pcthisp pcturban urbrural
## Min. : 0.00 Min. : 0.0 Min. : 0.0 Min. :0.00
## 1st Qu.: 1.32 1st Qu.: 4.0 1st Qu.: 0.0 1st Qu.:3.00
## Median : 3.29 Median : 8.0 Median : 33.5 Median :6.00
## Mean : 5.68 Mean : 44.5 Mean : 35.3 Mean :5.54
## 3rd Qu.: 6.75 3rd Qu.: 24.0 3rd Qu.: 56.5 3rd Qu.:7.00
## Max. :84.07 Max. :972.0 Max. :100.0 Max. :9.00
##
## pctfemhh pcincome pctpoor pctlt9ed
## Min. : 0.0 Min. : 0 Min. : 0.0 Min. : 0.0
## 1st Qu.: 9.6 1st Qu.:15474 1st Qu.:11.1 1st Qu.: 8.9
## Median :12.2 Median :17450 Median :15.1 Median :13.2
## Mean :13.0 Mean :17805 Mean :16.5 Mean :14.3
## 3rd Qu.:15.4 3rd Qu.:19818 3rd Qu.:20.4 3rd Qu.:18.7
## Max. :41.1 Max. :58096 Max. :63.1 Max. :56.3
##
## pcthsed pctcoled unemploy pctwhtcl
## Min. : 0.0 Min. : 0.0 Min. : 0.00 Min. : 0.0
## 1st Qu.:61.1 1st Qu.: 9.0 1st Qu.: 3.90 1st Qu.:38.5
## Median :71.2 Median :11.6 Median : 5.30 Median :43.5
## Mean :68.4 Mean :13.1 Mean : 5.88 Mean :44.6
## 3rd Qu.:77.1 3rd Qu.:15.3 3rd Qu.: 7.20 3rd Qu.:50.7
## Max. :95.5 Max. :53.4 Max. :37.90 Max. :81.4
##
## homevalu rent popdens crowded
## Min. : 0 Min. : 0 Min. : 0 Min. : 0.00
## 1st Qu.: 35900 1st Qu.:255 1st Qu.: 15 1st Qu.: 1.80
## Median : 44400 Median :297 Median : 39 Median : 2.60
## Mean : 52066 Mean :314 Mean : 194 Mean : 3.61
## 3rd Qu.: 58600 3rd Qu.:352 3rd Qu.: 93 3rd Qu.: 4.50
## Max. :500001 Max. :926 Max. :53801 Max. :44.40
##
## ginirev SmokecurM SmokevrM SmokecurF
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.390 1st Qu.:0.220 1st Qu.:0.490 1st Qu.:0.190
## Median :0.420 Median :0.240 Median :0.520 Median :0.210
## Mean :0.414 Mean :0.242 Mean :0.505 Mean :0.209
## 3rd Qu.:0.440 3rd Qu.:0.270 3rd Qu.:0.540 3rd Qu.:0.240
## Max. :0.580 Max. :0.580 Max. :0.780 Max. :0.420
##
## SmokevrF Obese Noins XYLENES__M
## Min. :0.000 Min. :0.000 Min. :0.000 Min. : 0
## 1st Qu.:0.390 1st Qu.:0.320 1st Qu.:0.100 1st Qu.: 27
## Median :0.420 Median :0.340 Median :0.120 Median : 58
## Mean :0.412 Mean :0.335 Mean :0.129 Mean : 222
## 3rd Qu.:0.460 3rd Qu.:0.360 3rd Qu.:0.150 3rd Qu.: 171
## Max. :0.630 Max. :0.630 Max. :0.410 Max. :16661
##
## TOLUENE TETRACHLOR STYRENE NICKEL_COM
## Min. : 0 Min. : 0.0 Min. : 0.0 Min. : 0.00
## 1st Qu.: 44 1st Qu.: 0.7 1st Qu.: 0.8 1st Qu.: 0.00
## Median : 91 Median : 1.9 Median : 1.9 Median : 0.01
## Mean : 336 Mean : 13.7 Mean : 15.4 Mean : 0.37
## 3rd Qu.: 256 3rd Qu.: 6.6 3rd Qu.: 8.1 3rd Qu.: 0.11
## Max. :28305 Max. :1966.6 Max. :1413.0 Max. :69.01
##
## METHYLENE_ MERCURY_CO LEAD_COMPO BENZENE__I
## Min. : 0.0 Min. :0.000 Min. : 0.00 Min. : 0
## 1st Qu.: 1.6 1st Qu.:0.002 1st Qu.: 0.01 1st Qu.: 23
## Median : 3.9 Median :0.004 Median : 0.02 Median : 42
## Mean : 26.4 Mean :0.057 Mean : 0.82 Mean : 106
## 3rd Qu.: 12.5 3rd Qu.:0.020 3rd Qu.: 0.23 3rd Qu.: 97
## Max. :2764.2 Max. :3.220 Max. :290.63 Max. :4612
##
## ARSENIC_CO POP2000 POP00SQMIL MALE2000
## Min. : 0.00 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 0.00 1st Qu.: 11368 1st Qu.: 18 1st Qu.: 5600
## Median : 0.00 Median : 24770 Median : 43 Median : 12280
## Mean : 0.11 Mean : 89230 Mean : 244 Mean : 43768
## 3rd Qu.: 0.02 3rd Qu.: 62028 3rd Qu.: 105 3rd Qu.: 30396
## Max. :32.47 Max. :9519338 Max. :66934 Max. :4704105
##
## FEMALE2000 MAL2FEM UNDER18 AIAN
## Min. : 0 Min. : 0.0 Min. : 0.0 Min. : 0.00
## 1st Qu.: 5608 1st Qu.: 94.0 1st Qu.:23.7 1st Qu.: 0.20
## Median : 12548 Median : 97.0 Median :25.3 Median : 0.30
## Mean : 45462 Mean : 98.4 Mean :25.5 Mean : 1.61
## 3rd Qu.: 31554 3rd Qu.:100.0 3rd Qu.:27.1 3rd Qu.: 0.80
## Max. :4815233 Max. :205.0 Max. :45.3 Max. :94.20
##
## ASIA BLACK NHPI WHITE
## Min. : 0.000 Min. : 0.00 Min. :0.0000 Min. : 0.0
## 1st Qu.: 0.200 1st Qu.: 0.30 1st Qu.:0.0000 1st Qu.:77.2
## Median : 0.300 Median : 1.70 Median :0.0000 Median :91.3
## Mean : 0.771 Mean : 8.84 Mean :0.0361 Mean :84.8
## 3rd Qu.: 0.700 3rd Qu.:10.10 3rd Qu.:0.1000 3rd Qu.:96.7
## Max. :30.800 Max. :86.50 Max. :1.5000 Max. :99.7
##
## AIAN_MORE ASIA_MORE BLK_MORE NHPI_MORE
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. :0.0000
## 1st Qu.: 0.50 1st Qu.: 0.30 1st Qu.: 0.40 1st Qu.:0.0000
## Median : 0.80 Median : 0.50 Median : 2.10 Median :0.1000
## Mean : 2.22 Mean : 0.98 Mean : 9.13 Mean :0.0996
## 3rd Qu.: 1.40 3rd Qu.: 0.90 3rd Qu.:10.70 3rd Qu.:0.1000
## Max. :95.10 Max. :32.60 Max. :86.70 Max. :2.6000
##
## WHT_MORE HISP_LAT CH19902000 MEDAGE2000
## Min. : 0.0 Min. : 0.00 Min. :-37.4 Min. : 0.0
## 1st Qu.:79.1 1st Qu.: 0.90 1st Qu.: 1.0 1st Qu.:35.2
## Median :92.7 Median : 1.80 Median : 8.4 Median :37.4
## Mean :86.0 Mean : 6.19 Mean : 11.1 Mean :37.4
## 3rd Qu.:97.6 3rd Qu.: 5.10 3rd Qu.: 17.4 3rd Qu.:39.8
## Max. :99.9 Max. :97.50 Max. :191.0 Max. :54.3
##
## PEROVER65
## Min. : 0.0
## 1st Qu.:12.1
## Median :14.4
## Mean :14.8
## 3rd Qu.:17.1
## Max. :34.7
##
# summarize the data
summary(USA@data)
## NAME STATE_NAME STATE_FIPS CNTY_FIPS
## Washington: 32 Texas : 254 48 : 254 001 : 48
## Jefferson : 26 Georgia : 159 13 : 159 003 : 48
## Franklin : 25 Virginia: 134 51 : 134 005 : 48
## Jackson : 24 Kentucky: 120 21 : 120 009 : 47
## Lincoln : 24 Missouri: 115 29 : 115 007 : 46
## Madison : 20 Kansas : 105 20 : 105 011 : 46
## (Other) :2957 (Other) :2221 (Other):2221 (Other):2825
## FIPS AREA FIPS_num Bush
## 01001 : 1 Min. : 2 Min. : 1001 Min. : 65
## 01003 : 1 1st Qu.: 435 1st Qu.:19046 1st Qu.: 2941
## 01005 : 1 Median : 622 Median :29214 Median : 6364
## 01007 : 1 Mean : 966 Mean :30686 Mean : 19073
## 01009 : 1 3rd Qu.: 931 3rd Qu.:46010 3rd Qu.: 15924
## 01011 : 1 Max. :20175 Max. :56045 Max. :954764
## (Other):3102
## Kerry County_F Nader Total
## Min. : 12 Min. : 1001 Min. : 0 Min. : 77
## 1st Qu.: 1782 1st Qu.:19046 1st Qu.: 0 1st Qu.: 4831
## Median : 4041 Median :29214 Median : 14 Median : 10416
## Mean : 17957 Mean :30686 Mean : 145 Mean : 37176
## 3rd Qu.: 10434 3rd Qu.:46010 3rd Qu.: 67 3rd Qu.: 26599
## Max. :1670341 Max. :56045 Max. :13251 Max. :2625105
##
## Bush_pct Kerry_pct Nader_pct MDratio
## Min. : 9.31 Min. : 7.17 Min. :0.000 Min. : 0.0
## 1st Qu.:52.73 1st Qu.:30.23 1st Qu.:0.000 1st Qu.: 37.3
## Median :61.17 Median :38.49 Median :0.303 Median : 65.6
## Mean :60.66 Mean :38.94 Mean :0.401 Mean : 93.1
## 3rd Qu.:69.37 3rd Qu.:46.79 3rd Qu.:0.633 3rd Qu.: 117.6
## Max. :92.83 Max. :90.05 Max. :4.467 Max. :2189.5
##
## hosp pcthisp pcturban urbrural
## Min. : 0.00 Min. : 0.0 Min. : 0.0 Min. :0.00
## 1st Qu.: 1.32 1st Qu.: 4.0 1st Qu.: 0.0 1st Qu.:3.00
## Median : 3.29 Median : 8.0 Median : 33.5 Median :6.00
## Mean : 5.68 Mean : 44.5 Mean : 35.3 Mean :5.54
## 3rd Qu.: 6.75 3rd Qu.: 24.0 3rd Qu.: 56.5 3rd Qu.:7.00
## Max. :84.07 Max. :972.0 Max. :100.0 Max. :9.00
##
## pctfemhh pcincome pctpoor pctlt9ed
## Min. : 0.0 Min. : 0 Min. : 0.0 Min. : 0.0
## 1st Qu.: 9.6 1st Qu.:15474 1st Qu.:11.1 1st Qu.: 8.9
## Median :12.2 Median :17450 Median :15.1 Median :13.2
## Mean :13.0 Mean :17805 Mean :16.5 Mean :14.3
## 3rd Qu.:15.4 3rd Qu.:19818 3rd Qu.:20.4 3rd Qu.:18.7
## Max. :41.1 Max. :58096 Max. :63.1 Max. :56.3
##
## pcthsed pctcoled unemploy pctwhtcl
## Min. : 0.0 Min. : 0.0 Min. : 0.00 Min. : 0.0
## 1st Qu.:61.1 1st Qu.: 9.0 1st Qu.: 3.90 1st Qu.:38.5
## Median :71.2 Median :11.6 Median : 5.30 Median :43.5
## Mean :68.4 Mean :13.1 Mean : 5.88 Mean :44.6
## 3rd Qu.:77.1 3rd Qu.:15.3 3rd Qu.: 7.20 3rd Qu.:50.7
## Max. :95.5 Max. :53.4 Max. :37.90 Max. :81.4
##
## homevalu rent popdens crowded
## Min. : 0 Min. : 0 Min. : 0 Min. : 0.00
## 1st Qu.: 35900 1st Qu.:255 1st Qu.: 15 1st Qu.: 1.80
## Median : 44400 Median :297 Median : 39 Median : 2.60
## Mean : 52066 Mean :314 Mean : 194 Mean : 3.61
## 3rd Qu.: 58600 3rd Qu.:352 3rd Qu.: 93 3rd Qu.: 4.50
## Max. :500001 Max. :926 Max. :53801 Max. :44.40
##
## ginirev SmokecurM SmokevrM SmokecurF
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.390 1st Qu.:0.220 1st Qu.:0.490 1st Qu.:0.190
## Median :0.420 Median :0.240 Median :0.520 Median :0.210
## Mean :0.414 Mean :0.242 Mean :0.505 Mean :0.209
## 3rd Qu.:0.440 3rd Qu.:0.270 3rd Qu.:0.540 3rd Qu.:0.240
## Max. :0.580 Max. :0.580 Max. :0.780 Max. :0.420
##
## SmokevrF Obese Noins XYLENES__M
## Min. :0.000 Min. :0.000 Min. :0.000 Min. : 0
## 1st Qu.:0.390 1st Qu.:0.320 1st Qu.:0.100 1st Qu.: 27
## Median :0.420 Median :0.340 Median :0.120 Median : 58
## Mean :0.412 Mean :0.335 Mean :0.129 Mean : 222
## 3rd Qu.:0.460 3rd Qu.:0.360 3rd Qu.:0.150 3rd Qu.: 171
## Max. :0.630 Max. :0.630 Max. :0.410 Max. :16661
##
## TOLUENE TETRACHLOR STYRENE NICKEL_COM
## Min. : 0 Min. : 0.0 Min. : 0.0 Min. : 0.00
## 1st Qu.: 44 1st Qu.: 0.7 1st Qu.: 0.8 1st Qu.: 0.00
## Median : 91 Median : 1.9 Median : 1.9 Median : 0.01
## Mean : 336 Mean : 13.7 Mean : 15.4 Mean : 0.37
## 3rd Qu.: 256 3rd Qu.: 6.6 3rd Qu.: 8.1 3rd Qu.: 0.11
## Max. :28305 Max. :1966.6 Max. :1413.0 Max. :69.01
##
## METHYLENE_ MERCURY_CO LEAD_COMPO BENZENE__I
## Min. : 0.0 Min. :0.000 Min. : 0.00 Min. : 0
## 1st Qu.: 1.6 1st Qu.:0.002 1st Qu.: 0.01 1st Qu.: 23
## Median : 3.9 Median :0.004 Median : 0.02 Median : 42
## Mean : 26.4 Mean :0.057 Mean : 0.82 Mean : 106
## 3rd Qu.: 12.5 3rd Qu.:0.020 3rd Qu.: 0.23 3rd Qu.: 97
## Max. :2764.2 Max. :3.220 Max. :290.63 Max. :4612
##
## ARSENIC_CO POP2000 POP00SQMIL MALE2000
## Min. : 0.00 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 0.00 1st Qu.: 11368 1st Qu.: 18 1st Qu.: 5600
## Median : 0.00 Median : 24770 Median : 43 Median : 12280
## Mean : 0.11 Mean : 89230 Mean : 244 Mean : 43768
## 3rd Qu.: 0.02 3rd Qu.: 62028 3rd Qu.: 105 3rd Qu.: 30396
## Max. :32.47 Max. :9519338 Max. :66934 Max. :4704105
##
## FEMALE2000 MAL2FEM UNDER18 AIAN
## Min. : 0 Min. : 0.0 Min. : 0.0 Min. : 0.00
## 1st Qu.: 5608 1st Qu.: 94.0 1st Qu.:23.7 1st Qu.: 0.20
## Median : 12548 Median : 97.0 Median :25.3 Median : 0.30
## Mean : 45462 Mean : 98.4 Mean :25.5 Mean : 1.61
## 3rd Qu.: 31554 3rd Qu.:100.0 3rd Qu.:27.1 3rd Qu.: 0.80
## Max. :4815233 Max. :205.0 Max. :45.3 Max. :94.20
##
## ASIA BLACK NHPI WHITE
## Min. : 0.000 Min. : 0.00 Min. :0.0000 Min. : 0.0
## 1st Qu.: 0.200 1st Qu.: 0.30 1st Qu.:0.0000 1st Qu.:77.2
## Median : 0.300 Median : 1.70 Median :0.0000 Median :91.3
## Mean : 0.771 Mean : 8.84 Mean :0.0361 Mean :84.8
## 3rd Qu.: 0.700 3rd Qu.:10.10 3rd Qu.:0.1000 3rd Qu.:96.7
## Max. :30.800 Max. :86.50 Max. :1.5000 Max. :99.7
##
## AIAN_MORE ASIA_MORE BLK_MORE NHPI_MORE
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. :0.0000
## 1st Qu.: 0.50 1st Qu.: 0.30 1st Qu.: 0.40 1st Qu.:0.0000
## Median : 0.80 Median : 0.50 Median : 2.10 Median :0.1000
## Mean : 2.22 Mean : 0.98 Mean : 9.13 Mean :0.0996
## 3rd Qu.: 1.40 3rd Qu.: 0.90 3rd Qu.:10.70 3rd Qu.:0.1000
## Max. :95.10 Max. :32.60 Max. :86.70 Max. :2.6000
##
## WHT_MORE HISP_LAT CH19902000 MEDAGE2000
## Min. : 0.0 Min. : 0.00 Min. :-37.4 Min. : 0.0
## 1st Qu.:79.1 1st Qu.: 0.90 1st Qu.: 1.0 1st Qu.:35.2
## Median :92.7 Median : 1.80 Median : 8.4 Median :37.4
## Mean :86.0 Mean : 6.19 Mean : 11.1 Mean :37.4
## 3rd Qu.:97.6 3rd Qu.: 5.10 3rd Qu.: 17.4 3rd Qu.:39.8
## Max. :99.9 Max. :97.50 Max. :191.0 Max. :54.3
##
## PEROVER65
## Min. : 0.0
## 1st Qu.:12.1
## Median :14.4
## Mean :14.8
## 3rd Qu.:17.1
## Max. :34.7
##
# plotting the data slot is like plotting a regular table
plot(USA@data)
## Error: figure margins too large
# Making maps in R
display.brewer.all()
# make a 7 color 'spectral' palette
pal7 <- brewer.pal(7, "Spectral")
# to see the colors
display.brewer.pal(7, "Spectral")
# create a column that holds the percent of all votes that went to
# G.W.Bush in 2004
USA$BushPct <- USA$Bush/USA$Total
# create categories
cats7 <- classIntervals(USA$BushPct, n = 7, style = "quantile")
cats7
## style: quantile
## [0.09308,0.4746) [0.4746,0.5415) [0.5415,0.5907) [0.5907,0.6336)
## 444 444 444 444
## [0.6336,0.6801) [0.6801,0.7421) [0.7421,0.9283]
## 444 444 444
# output shows the range for BushPct,within each category each group has
# 444 counties connect our categories to our palette with findColours()
SevenColors <- findColours(cats7, pal7)
# draw map using specified data and colros
plot(USA, col = SevenColors)
# the map is not good,it uses a diverging color scheme to represent
# continuous data color schemes like 'spectral' are used to show
# deviations from the mean Convert our BushPct column into standard units
# so that the mean is equal to zero and we are mapping deviations from the
# mean
# create a new column to hold the standardized percent bush
USA$BushPctZ <- (USA$BushPct - mean(USA$BushPct, na.rm = T))/sd(USA$BushPct,
na.rm = T)
# create new categories with the standardized column and map
pal7 <- brewer.pal(7, "Spectral")
cats7 <- classIntervals(USA$BushPctZ, n = 7, style = "quantile")
SevenColors <- findColours(cats7, pal7)
plot(USA, col = SevenColors)
# In the map the areas that are red have below average BushPct and blue
# areas are above average
# use a model selection machine to find the best possible model 1.stepAIC
# in MASS 2.regsubsets in leaps
library(lmSupport)
## Warning: package 'lmSupport' was built under R version 2.15.3
## Loading required package: car
## Warning: package 'car' was built under R version 2.15.3
## Loading required package: nnet
## Warning: package 'nnet' was built under R version 2.15.3
## Loading required package: psych
## Attaching package: 'psych'
## The following object(s) are masked from 'package:car':
##
## logit
## Loading required package: gplots
## Loading required package: gtools
## Attaching package: 'gtools'
## The following object(s) are masked from 'package:psych':
##
## logit
## The following object(s) are masked from 'package:car':
##
## logit
## The following object(s) are masked from 'package:e1071':
##
## permutations
## Loading required package: gdata
## gdata: Unable to locate valid perl interpreter gdata: gdata: read.xls()
## will be unable to read Excel XLS and XLSX files gdata: unless the 'perl='
## argument is used to specify the location gdata: of a valid perl
## intrpreter. gdata: gdata: (To avoid display of this message in the future,
## please gdata: ensure perl is installed and available on the executable
## gdata: search path.)
## gdata: Unable to load perl libaries needed by read.xls() gdata: to support
## 'XLX' (Excel 97-2004) files.
## ```
## gdata: Run the function 'installXLSXsupport()' gdata: to automatically
## download and install the perl gdata: libaries needed to support Excel XLS
## and XLSX formats.
## Attaching package: 'gdata'
## The following object(s) are masked from 'package:stats':
##
## nobs
## The following object(s) are masked from 'package:utils':
##
## object.size
## Loading required package: caTools
## Loading required package: KernSmooth
## KernSmooth 2.23 loaded Copyright M. P. Wand 1997-2009
## Attaching package: 'gplots'
## The following object(s) are masked from 'package:stats':
##
## lowess
## Loading required package: gvlma
lm1 <- lm(BushPct ~ pcturban + pctfemhh + pctpoor + HISP_LAT + MEDAGE2000, USA)
lm.sumSquares(lm1)
## SS dR-sqr pEta-sqr df F p-value
## (Intercept) 1.258e+01 0.2539 0.2508 1 1038.6644 0.0000
## pcturban 2.691e-04 0.0000 0.0000 1 0.0222 0.8815
## pctfemhh 7.680e+00 0.1551 0.1698 1 634.3157 0.0000
## pctpoor 8.457e-01 0.0171 0.0220 1 69.8485 0.0000
## HISP_LAT 2.941e-02 0.0006 0.0008 1 2.4289 0.1192
## MEDAGE2000 5.919e-02 0.0012 0.0016 1 4.8883 0.0271
## Error (SSE) 3.756e+01 NA NA 3102 NA NA
## Total (SST) 4.953e+01 NA NA NA NA NA
# the variance inflation factor measures the increase in variance of each
# regression
vif(lm1) #vif<10 indicates no problem
## pcturban pctfemhh pctpoor HISP_LAT MEDAGE2000
## 1.749 2.206 2.143 1.289 1.325
d <- regsubsets(BushPct ~ ., nbest = 1, nvmax = 20, data = USA[, c(16:38, 70)])
d.fit <- summary(d)
plot(1:20, y = d.fit$adjr2, type = "o", xlab = "Num.of parameters", ylab = "Adj.r2")
plot(1:20, y = d.fit$bic, type = "o", xlab = "Num.of parameters", ylab = "BIC")
d <- regsubsets(BushPct ~ ., nbest = 1, nvmax = 20, data = USA[, c(16:38, 70)])
plot(d)
d.fit <- summary(d)
d.fit$outmat[10, ]
## MDratio hosp pcthisp pcturban urbrural pctfemhh pcincome
## " " " " "*" " " " " "*" " "
## pctpoor pctlt9ed pcthsed pctcoled unemploy pctwhtcl homevalu
## "*" " " " " "*" "*" "*" "*"
## rent popdens crowded ginirev SmokecurM SmokevrM SmokecurF
## " " " " "*" "*" " " " " " "
## SmokevrF Obese
## "*" " "
# for the 'best' model 1.compute the VIF 2.fit the model and complete
# diagnostics
lm2 <- lm(BushPct ~ pcthisp + pctfemhh + pctpoor + homevalu + pctwhtcl + unemploy +
pctcoled + crowded + ginirev + SmokevrF, USA)
summary(lm2)
##
## Call:
## lm(formula = BushPct ~ pcthisp + pctfemhh + pctpoor + homevalu +
## pctwhtcl + unemploy + pctcoled + crowded + ginirev + SmokevrF,
## data = USA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4356 -0.0651 0.0061 0.0678 0.3459
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.65e-01 1.24e-02 45.41 <2e-16 ***
## pcthisp -1.09e-04 2.09e-05 -5.21 2e-07 ***
## pctfemhh -1.41e-02 4.90e-04 -28.80 <2e-16 ***
## pctpoor -4.28e-03 4.72e-04 -9.06 <2e-16 ***
## homevalu -6.95e-07 8.30e-08 -8.38 <2e-16 ***
## pctwhtcl 1.52e-03 3.55e-04 4.27 2e-05 ***
## unemploy -7.93e-03 7.26e-04 -10.92 <2e-16 ***
## pctcoled -5.42e-03 5.30e-04 -10.23 <2e-16 ***
## crowded 9.11e-03 9.41e-04 9.69 <2e-16 ***
## ginirev 1.14e+00 4.89e-02 23.28 <2e-16 ***
## SmokevrF -2.84e-01 3.11e-02 -9.15 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0962 on 3097 degrees of freedom
## Multiple R-squared: 0.421, Adjusted R-squared: 0.419
## F-statistic: 225 on 10 and 3097 DF, p-value: <2e-16
vif(lm2)
## pcthisp pctfemhh pctpoor homevalu pctwhtcl unemploy pctcoled crowded
## 1.809 2.413 4.945 2.499 4.941 1.730 4.016 3.024
## ginirev SmokevrF
## 3.398 1.807
# plot residuals against fitted values
par(mfrow = c(2, 2))
plot(lm2)
shapiro.test(lm2$residuals)
##
## Shapiro-Wilk normality test
##
## data: lm2$residuals
## W = 0.9929, p-value = 3.059e-11
# null:residuals are normally distributed,so reject null and accept alt
# that residuals are not normally distributed