Options and packages:options(scipen = 999)
library(sp)
## Warning: package 'sp' was built under R version 2.15.3
library(maptools)
## Loading required package: foreign
## Loading required package: lattice
## Checking rgeos availability: FALSE Note: when rgeos is not available,
## polygon geometry computations in maptools depend on gpclib, which has a
## restricted licence. It is disabled by default; to enable gpclib, type
## gpclibPermit()
library(classInt)
## Warning: package 'classInt' was built under R version 2.15.3
## Loading required package: class
## Loading required package: e1071
library(RColorBrewer)
Loading and preparing the data:Read in the shapefile
USA <- readShapePoly("C:/CU BOULDER/Coursework/Y2S2/GEOG 5023 - Quant Methods Geo/Week 4 - Multiple Regression/Data/2004_Election_Counties.shp")
Visualize the data
par(mar = c(1, 1, 1, 1))
plot(USA)
Clean and Summarize the data
USA <- USA[USA$Total > 0, ] #remove counties with no votes
slotNames(USA) #list pieces of the file
## [1] "data" "polygons" "plotOrder" "bbox" "proj4string"
summary(USA@data) #summarizing the data
## NAME STATE_NAME STATE_FIPS CNTY_FIPS
## Washington: 32 Texas : 254 48 : 254 001 : 48
## Jefferson : 26 Georgia : 159 13 : 159 003 : 48
## Franklin : 25 Virginia: 134 51 : 134 005 : 48
## Jackson : 24 Kentucky: 120 21 : 120 009 : 47
## Lincoln : 24 Missouri: 115 29 : 115 007 : 46
## Madison : 20 Kansas : 105 20 : 105 011 : 46
## (Other) :2957 (Other) :2221 (Other):2221 (Other):2825
## FIPS AREA FIPS_num Bush
## 01001 : 1 Min. : 2 Min. : 1001 Min. : 65
## 01003 : 1 1st Qu.: 435 1st Qu.:19046 1st Qu.: 2941
## 01005 : 1 Median : 622 Median :29214 Median : 6364
## 01007 : 1 Mean : 966 Mean :30686 Mean : 19073
## 01009 : 1 3rd Qu.: 931 3rd Qu.:46010 3rd Qu.: 15924
## 01011 : 1 Max. :20175 Max. :56045 Max. :954764
## (Other):3102
## Kerry County_F Nader Total
## Min. : 12 Min. : 1001 Min. : 0 Min. : 77
## 1st Qu.: 1782 1st Qu.:19046 1st Qu.: 0 1st Qu.: 4831
## Median : 4041 Median :29214 Median : 14 Median : 10416
## Mean : 17957 Mean :30686 Mean : 145 Mean : 37176
## 3rd Qu.: 10434 3rd Qu.:46010 3rd Qu.: 67 3rd Qu.: 26599
## Max. :1670341 Max. :56045 Max. :13251 Max. :2625105
##
## Bush_pct Kerry_pct Nader_pct MDratio
## Min. : 9.31 Min. : 7.17 Min. :0.000 Min. : 0.0
## 1st Qu.:52.73 1st Qu.:30.23 1st Qu.:0.000 1st Qu.: 37.3
## Median :61.17 Median :38.49 Median :0.303 Median : 65.6
## Mean :60.66 Mean :38.94 Mean :0.401 Mean : 93.1
## 3rd Qu.:69.37 3rd Qu.:46.79 3rd Qu.:0.633 3rd Qu.: 117.6
## Max. :92.83 Max. :90.05 Max. :4.467 Max. :2189.5
##
## hosp pcthisp pcturban urbrural
## Min. : 0.00 Min. : 0.0 Min. : 0.0 Min. :0.00
## 1st Qu.: 1.32 1st Qu.: 4.0 1st Qu.: 0.0 1st Qu.:3.00
## Median : 3.29 Median : 8.0 Median : 33.5 Median :6.00
## Mean : 5.68 Mean : 44.5 Mean : 35.3 Mean :5.54
## 3rd Qu.: 6.75 3rd Qu.: 24.0 3rd Qu.: 56.5 3rd Qu.:7.00
## Max. :84.07 Max. :972.0 Max. :100.0 Max. :9.00
##
## pctfemhh pcincome pctpoor pctlt9ed
## Min. : 0.0 Min. : 0 Min. : 0.0 Min. : 0.0
## 1st Qu.: 9.6 1st Qu.:15474 1st Qu.:11.1 1st Qu.: 8.9
## Median :12.2 Median :17450 Median :15.1 Median :13.2
## Mean :13.0 Mean :17805 Mean :16.5 Mean :14.3
## 3rd Qu.:15.4 3rd Qu.:19818 3rd Qu.:20.4 3rd Qu.:18.7
## Max. :41.1 Max. :58096 Max. :63.1 Max. :56.3
##
## pcthsed pctcoled unemploy pctwhtcl
## Min. : 0.0 Min. : 0.0 Min. : 0.00 Min. : 0.0
## 1st Qu.:61.1 1st Qu.: 9.0 1st Qu.: 3.90 1st Qu.:38.5
## Median :71.2 Median :11.6 Median : 5.30 Median :43.5
## Mean :68.4 Mean :13.1 Mean : 5.88 Mean :44.6
## 3rd Qu.:77.1 3rd Qu.:15.3 3rd Qu.: 7.20 3rd Qu.:50.7
## Max. :95.5 Max. :53.4 Max. :37.90 Max. :81.4
##
## homevalu rent popdens crowded
## Min. : 0 Min. : 0 Min. : 0 Min. : 0.00
## 1st Qu.: 35900 1st Qu.:255 1st Qu.: 15 1st Qu.: 1.80
## Median : 44400 Median :297 Median : 39 Median : 2.60
## Mean : 52066 Mean :314 Mean : 194 Mean : 3.61
## 3rd Qu.: 58600 3rd Qu.:352 3rd Qu.: 93 3rd Qu.: 4.50
## Max. :500001 Max. :926 Max. :53801 Max. :44.40
##
## ginirev SmokecurM SmokevrM SmokecurF
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.390 1st Qu.:0.220 1st Qu.:0.490 1st Qu.:0.190
## Median :0.420 Median :0.240 Median :0.520 Median :0.210
## Mean :0.414 Mean :0.242 Mean :0.505 Mean :0.209
## 3rd Qu.:0.440 3rd Qu.:0.270 3rd Qu.:0.540 3rd Qu.:0.240
## Max. :0.580 Max. :0.580 Max. :0.780 Max. :0.420
##
## SmokevrF Obese Noins XYLENES__M
## Min. :0.000 Min. :0.000 Min. :0.000 Min. : 0
## 1st Qu.:0.390 1st Qu.:0.320 1st Qu.:0.100 1st Qu.: 27
## Median :0.420 Median :0.340 Median :0.120 Median : 58
## Mean :0.412 Mean :0.335 Mean :0.129 Mean : 222
## 3rd Qu.:0.460 3rd Qu.:0.360 3rd Qu.:0.150 3rd Qu.: 171
## Max. :0.630 Max. :0.630 Max. :0.410 Max. :16661
##
## TOLUENE TETRACHLOR STYRENE NICKEL_COM
## Min. : 0 Min. : 0.0 Min. : 0.0 Min. : 0.00
## 1st Qu.: 44 1st Qu.: 0.7 1st Qu.: 0.8 1st Qu.: 0.00
## Median : 91 Median : 1.9 Median : 1.9 Median : 0.01
## Mean : 336 Mean : 13.7 Mean : 15.4 Mean : 0.37
## 3rd Qu.: 256 3rd Qu.: 6.6 3rd Qu.: 8.1 3rd Qu.: 0.11
## Max. :28305 Max. :1966.6 Max. :1413.0 Max. :69.01
##
## METHYLENE_ MERCURY_CO LEAD_COMPO BENZENE__I
## Min. : 0.0 Min. :0.000 Min. : 0.00 Min. : 0
## 1st Qu.: 1.6 1st Qu.:0.002 1st Qu.: 0.01 1st Qu.: 23
## Median : 3.9 Median :0.004 Median : 0.02 Median : 42
## Mean : 26.4 Mean :0.057 Mean : 0.82 Mean : 106
## 3rd Qu.: 12.5 3rd Qu.:0.020 3rd Qu.: 0.23 3rd Qu.: 97
## Max. :2764.2 Max. :3.220 Max. :290.63 Max. :4612
##
## ARSENIC_CO POP2000 POP00SQMIL MALE2000
## Min. : 0.00 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 0.00 1st Qu.: 11368 1st Qu.: 18 1st Qu.: 5600
## Median : 0.00 Median : 24770 Median : 43 Median : 12280
## Mean : 0.11 Mean : 89230 Mean : 244 Mean : 43768
## 3rd Qu.: 0.02 3rd Qu.: 62028 3rd Qu.: 105 3rd Qu.: 30396
## Max. :32.47 Max. :9519338 Max. :66934 Max. :4704105
##
## FEMALE2000 MAL2FEM UNDER18 AIAN
## Min. : 0 Min. : 0.0 Min. : 0.0 Min. : 0.00
## 1st Qu.: 5608 1st Qu.: 94.0 1st Qu.:23.7 1st Qu.: 0.20
## Median : 12548 Median : 97.0 Median :25.3 Median : 0.30
## Mean : 45462 Mean : 98.4 Mean :25.5 Mean : 1.61
## 3rd Qu.: 31554 3rd Qu.:100.0 3rd Qu.:27.1 3rd Qu.: 0.80
## Max. :4815233 Max. :205.0 Max. :45.3 Max. :94.20
##
## ASIA BLACK NHPI WHITE
## Min. : 0.000 Min. : 0.00 Min. :0.0000 Min. : 0.0
## 1st Qu.: 0.200 1st Qu.: 0.30 1st Qu.:0.0000 1st Qu.:77.2
## Median : 0.300 Median : 1.70 Median :0.0000 Median :91.3
## Mean : 0.771 Mean : 8.84 Mean :0.0361 Mean :84.8
## 3rd Qu.: 0.700 3rd Qu.:10.10 3rd Qu.:0.1000 3rd Qu.:96.7
## Max. :30.800 Max. :86.50 Max. :1.5000 Max. :99.7
##
## AIAN_MORE ASIA_MORE BLK_MORE NHPI_MORE
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. :0.0000
## 1st Qu.: 0.50 1st Qu.: 0.30 1st Qu.: 0.40 1st Qu.:0.0000
## Median : 0.80 Median : 0.50 Median : 2.10 Median :0.1000
## Mean : 2.22 Mean : 0.98 Mean : 9.13 Mean :0.0996
## 3rd Qu.: 1.40 3rd Qu.: 0.90 3rd Qu.:10.70 3rd Qu.:0.1000
## Max. :95.10 Max. :32.60 Max. :86.70 Max. :2.6000
##
## WHT_MORE HISP_LAT CH19902000 MEDAGE2000
## Min. : 0.0 Min. : 0.00 Min. :-37.4 Min. : 0.0
## 1st Qu.:79.1 1st Qu.: 0.90 1st Qu.: 1.0 1st Qu.:35.2
## Median :92.7 Median : 1.80 Median : 8.4 Median :37.4
## Mean :86.0 Mean : 6.19 Mean : 11.1 Mean :37.4
## 3rd Qu.:97.6 3rd Qu.: 5.10 3rd Qu.: 17.4 3rd Qu.:39.8
## Max. :99.9 Max. :97.50 Max. :191.0 Max. :54.3
##
## PEROVER65
## Min. : 0.0
## 1st Qu.:12.1
## Median :14.4
## Mean :14.8
## 3rd Qu.:17.1
## Max. :34.7
##
Plot as a table
par(mar = c(1, 1, 1, 1))
plot(USA@data)
## Error: figure margins too large
Making Maps in R:All colors
display.brewer.all()
Make a 7-color spectral palette
pal7 <- brewer.pal(7, "Spectral")
display.brewer.pal(7, "Spectral") #view the colors
Associate the colors with the map units
USA$BushPct <- USA$Bush/USA$Total #percent of votes for Bush
cats7 <- classIntervals(USA$BushPct, n = 7, style = "quantile") #create categories
cats7
## style: quantile
## [0.09308,0.4746) [0.4746,0.5415) [0.5415,0.5907) [0.5907,0.6336)
## 444 444 444 444
## [0.6336,0.6801) [0.6801,0.7421) [0.7421,0.9283]
## 444 444 444
SevenColors <- findColours(cats7, pal7) #connect categories to colors
Plot the map with its colors
par(mar = c(1, 1, 1, 1))
plot(USA, col = SevenColors)
Calculate deviations from mean to coincide with spectral color palette
USA$BushPctZ <- (USA$BushPct - mean(USA$BushPct, na.rm = TRUE))/sd(USA$BushPct,
na.rm = TRUE)
pal7 <- brewer.pal(7, "Spectral")
cats7 <- classIntervals(USA$BushPctZ, n = 7, style = "quantile")
SevenColors <- findColours(cats7, pal7) #connect new categories to colors
Plot map with new colors
par(mar = c(1, 1, 1, 1))
plot(USA, col = SevenColors)
Multiple Regression:Modeling percent of votes for Bush
lm1 <- lm(BushPct ~ pcturban + pctfemhh + pctpoor + HISP_LAT + MEDAGE2000, USA)
summary(lm1)
##
## Call:
## lm(formula = BushPct ~ pcturban + pctfemhh + pctpoor + HISP_LAT +
## MEDAGE2000, data = USA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5687 -0.0733 0.0102 0.0808 0.2708
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.7818017 0.0242582 32.23 <0.0000000000000002 ***
## pcturban -0.0000132 0.0000888 -0.15 0.882
## pctfemhh -0.0134822 0.0005353 -25.19 <0.0000000000000002 ***
## pctpoor 0.0029674 0.0003551 8.36 <0.0000000000000002 ***
## HISP_LAT -0.0002907 0.0001866 -1.56 0.119
## MEDAGE2000 -0.0012489 0.0005649 -2.21 0.027 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.11 on 3102 degrees of freedom
## Multiple R-squared: 0.242, Adjusted R-squared: 0.241
## F-statistic: 198 on 5 and 3102 DF, p-value: <0.0000000000000002
lm1.resid <- resid(lm1) #residuals
plot(lm1.resid ~ USA$BushPct) #residuals look very bad
Trying a different model
lmBush <- lm(BushPct ~ pctfemhh + homevalu, data = USA)
summary(lmBush)
##
## Call:
## lm(formula = BushPct ~ pctfemhh + homevalu, data = USA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6082 -0.0731 0.0089 0.0779 0.3558
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.7771534694 0.0055962539 138.9 <0.0000000000000002 ***
## pctfemhh -0.0100754006 0.0003608127 -27.9 <0.0000000000000002 ***
## homevalu -0.0000007603 0.0000000601 -12.7 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.109 on 3105 degrees of freedom
## Multiple R-squared: 0.256, Adjusted R-squared: 0.255
## F-statistic: 533 on 2 and 3105 DF, p-value: <0.0000000000000002
lmBush.resid <- resid(lmBush)
lmBush.pred <- predict(lmBush)
plot(lmBush.resid ~ USA$BushPct) #residuals still look bad
plot(lmBush.resid ~ USA$pctfemhh)
plot(lmBush.resid ~ USA$homevalu)
plot(lmBush.resid ~ lmBush.pred)
Create color palette to be able to map the residuals
USA$resid <- resid(lmBush)
pal3 <- brewer.pal(3, "Spectral")
cats3 <- classIntervals(USA$resid, n = 3, style = "quantile")
ThreeColors <- findColours(cats3, pal3)
Map the residuals
par(mar = c(1, 1, 1, 1))
plot(USA, col = ThreeColors) #map shows clear pattern in the residuals
Creating a Better Fitting Model:Potential variables
names(USA)
## [1] "NAME" "STATE_NAME" "STATE_FIPS" "CNTY_FIPS" "FIPS"
## [6] "AREA" "FIPS_num" "Bush" "Kerry" "County_F"
## [11] "Nader" "Total" "Bush_pct" "Kerry_pct" "Nader_pct"
## [16] "MDratio" "hosp" "pcthisp" "pcturban" "urbrural"
## [21] "pctfemhh" "pcincome" "pctpoor" "pctlt9ed" "pcthsed"
## [26] "pctcoled" "unemploy" "pctwhtcl" "homevalu" "rent"
## [31] "popdens" "crowded" "ginirev" "SmokecurM" "SmokevrM"
## [36] "SmokecurF" "SmokevrF" "Obese" "Noins" "XYLENES__M"
## [41] "TOLUENE" "TETRACHLOR" "STYRENE" "NICKEL_COM" "METHYLENE_"
## [46] "MERCURY_CO" "LEAD_COMPO" "BENZENE__I" "ARSENIC_CO" "POP2000"
## [51] "POP00SQMIL" "MALE2000" "FEMALE2000" "MAL2FEM" "UNDER18"
## [56] "AIAN" "ASIA" "BLACK" "NHPI" "WHITE"
## [61] "AIAN_MORE" "ASIA_MORE" "BLK_MORE" "NHPI_MORE" "WHT_MORE"
## [66] "HISP_LAT" "CH19902000" "MEDAGE2000" "PEROVER65" "BushPct"
## [71] "BushPctZ" "resid"
Try a fixed effect model
lmFixed <- lm(BushPct ~ factor(STATE_NAME) + pctfemhh + homevalu, data = USA) #fixed effects, including a dummy variable for all but one state
summary(lmFixed)
##
## Call:
## lm(formula = BushPct ~ factor(STATE_NAME) + pctfemhh + homevalu,
## data = USA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5928 -0.0467 0.0071 0.0556 0.2867
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.8688855429 0.0127841233 67.97
## factor(STATE_NAME)Arizona -0.0990669145 0.0248571545 -3.99
## factor(STATE_NAME)Arkansas -0.1306708800 0.0146640734 -8.91
## factor(STATE_NAME)California -0.1189931905 0.0169820232 -7.01
## factor(STATE_NAME)Colorado -0.1171683114 0.0155897238 -7.52
## factor(STATE_NAME)Connecticut -0.1916002862 0.0335642249 -5.71
## factor(STATE_NAME)Delaware -0.1143393743 0.0513223215 -2.23
## factor(STATE_NAME)District of Columbia -0.2190191307 0.0879025445 -2.49
## factor(STATE_NAME)Florida -0.0617952171 0.0151199634 -4.09
## factor(STATE_NAME)Georgia 0.0264870225 0.0126495893 2.09
## factor(STATE_NAME)Idaho 0.0034561525 0.0172061938 0.20
## factor(STATE_NAME)Illinois -0.1191754347 0.0138609755 -8.60
## factor(STATE_NAME)Indiana -0.0496437196 0.0141713605 -3.50
## factor(STATE_NAME)Iowa -0.1958287619 0.0141304825 -13.86
## factor(STATE_NAME)Kansas -0.0185120240 0.0140101184 -1.32
## factor(STATE_NAME)Kentucky -0.0526115662 0.0133508476 -3.94
## factor(STATE_NAME)Louisiana 0.0039311322 0.0151805091 0.26
## factor(STATE_NAME)Maine -0.2184542974 0.0243497633 -8.97
## factor(STATE_NAME)Maryland -0.1071135487 0.0210597078 -5.09
## factor(STATE_NAME)Massachusetts -0.2538809930 0.0266921807 -9.51
## factor(STATE_NAME)Michigan -0.1405570347 0.0143978007 -9.76
## factor(STATE_NAME)Minnesota -0.2040343886 0.0145122559 -14.06
## factor(STATE_NAME)Mississippi 0.0008641367 0.0143500089 0.06
## factor(STATE_NAME)Missouri -0.0927936740 0.0136331709 -6.81
## factor(STATE_NAME)Montana -0.0603453723 0.0160115951 -3.77
## factor(STATE_NAME)Nebraska -0.0006254248 0.0144456256 -0.04
## factor(STATE_NAME)Nevada -0.0494043289 0.0238640908 -2.07
## factor(STATE_NAME)New Hampshire -0.2042405192 0.0298945562 -6.83
## factor(STATE_NAME)New Jersey -0.1358943106 0.0228416946 -5.95
## factor(STATE_NAME)New Mexico -0.1067993898 0.0185092156 -5.77
## factor(STATE_NAME)New York -0.1380033744 0.0157607627 -8.76
## factor(STATE_NAME)North Carolina -0.0518595692 0.0137451576 -3.77
## factor(STATE_NAME)North Dakota -0.1080430835 0.0163708422 -6.60
## factor(STATE_NAME)Ohio -0.0977370126 0.0142369969 -6.87
## factor(STATE_NAME)Oklahoma -0.0206343505 0.0146672980 -1.41
## factor(STATE_NAME)Oregon -0.1144779034 0.0181602256 -6.30
## factor(STATE_NAME)Pennsylvania -0.0982098153 0.0151958327 -6.46
## factor(STATE_NAME)Rhode Island -0.2360312665 0.0408165212 -5.78
## factor(STATE_NAME)South Carolina -0.0397792387 0.0166538425 -2.39
## factor(STATE_NAME)South Dakota -0.1159343029 0.0153431273 -7.56
## factor(STATE_NAME)Tennessee -0.0874536138 0.0139399958 -6.27
## factor(STATE_NAME)Texas 0.0053005053 0.0121337166 0.44
## factor(STATE_NAME)Utah 0.0465632112 0.0196286324 2.37
## factor(STATE_NAME)Vermont -0.2644860105 0.0257734286 -10.26
## factor(STATE_NAME)Virginia -0.1613607561 0.0133871672 -12.05
## factor(STATE_NAME)Washington -0.1378044769 0.0177341247 -7.77
## factor(STATE_NAME)West Virginia -0.1000959981 0.0158880566 -6.30
## factor(STATE_NAME)Wisconsin -0.2025707788 0.0149989116 -13.51
## factor(STATE_NAME)Wyoming 0.0136112878 0.0212206132 0.64
## pctfemhh -0.0132700307 0.0003675416 -36.10
## homevalu -0.0000002353 0.0000000658 -3.58
## Pr(>|t|)
## (Intercept) < 0.0000000000000002 ***
## factor(STATE_NAME)Arizona 0.000068926600940 ***
## factor(STATE_NAME)Arkansas < 0.0000000000000002 ***
## factor(STATE_NAME)California 0.000000000002982 ***
## factor(STATE_NAME)Colorado 0.000000000000074 ***
## factor(STATE_NAME)Connecticut 0.000000012491643 ***
## factor(STATE_NAME)Delaware 0.02596 *
## factor(STATE_NAME)District of Columbia 0.01277 *
## factor(STATE_NAME)Florida 0.000044824002734 ***
## factor(STATE_NAME)Georgia 0.03635 *
## factor(STATE_NAME)Idaho 0.84082
## factor(STATE_NAME)Illinois < 0.0000000000000002 ***
## factor(STATE_NAME)Indiana 0.00047 ***
## factor(STATE_NAME)Iowa < 0.0000000000000002 ***
## factor(STATE_NAME)Kansas 0.18649
## factor(STATE_NAME)Kentucky 0.000083065651533 ***
## factor(STATE_NAME)Louisiana 0.79568
## factor(STATE_NAME)Maine < 0.0000000000000002 ***
## factor(STATE_NAME)Maryland 0.000000387377686 ***
## factor(STATE_NAME)Massachusetts < 0.0000000000000002 ***
## factor(STATE_NAME)Michigan < 0.0000000000000002 ***
## factor(STATE_NAME)Minnesota < 0.0000000000000002 ***
## factor(STATE_NAME)Mississippi 0.95199
## factor(STATE_NAME)Missouri 0.000000000011990 ***
## factor(STATE_NAME)Montana 0.00017 ***
## factor(STATE_NAME)Nebraska 0.96547
## factor(STATE_NAME)Nevada 0.03851 *
## factor(STATE_NAME)New Hampshire 0.000000000010062 ***
## factor(STATE_NAME)New Jersey 0.000000002996345 ***
## factor(STATE_NAME)New Mexico 0.000000008716794 ***
## factor(STATE_NAME)New York < 0.0000000000000002 ***
## factor(STATE_NAME)North Carolina 0.00016 ***
## factor(STATE_NAME)North Dakota 0.000000000048372 ***
## factor(STATE_NAME)Ohio 0.000000000008019 ***
## factor(STATE_NAME)Oklahoma 0.15958
## factor(STATE_NAME)Oregon 0.000000000332282 ***
## factor(STATE_NAME)Pennsylvania 0.000000000119078 ***
## factor(STATE_NAME)Rhode Island 0.000000008091255 ***
## factor(STATE_NAME)South Carolina 0.01697 *
## factor(STATE_NAME)South Dakota 0.000000000000055 ***
## factor(STATE_NAME)Tennessee 0.000000000402612 ***
## factor(STATE_NAME)Texas 0.66226
## factor(STATE_NAME)Utah 0.01774 *
## factor(STATE_NAME)Vermont < 0.0000000000000002 ***
## factor(STATE_NAME)Virginia < 0.0000000000000002 ***
## factor(STATE_NAME)Washington 0.000000000000011 ***
## factor(STATE_NAME)West Virginia 0.000000000340192 ***
## factor(STATE_NAME)Wisconsin < 0.0000000000000002 ***
## factor(STATE_NAME)Wyoming 0.52130
## pctfemhh < 0.0000000000000002 ***
## homevalu 0.00035 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0868 on 3057 degrees of freedom
## Multiple R-squared: 0.535, Adjusted R-squared: 0.527
## F-statistic: 70.4 on 50 and 3057 DF, p-value: <0.0000000000000002
Model diagnostics
lmFixed.resid <- resid(lmFixed)
lmFixed.pred <- predict(lmFixed)
plot(lmFixed.resid ~ USA$BushPct) #residuals still don't look very good, but are a little better than before
plot(lmFixed.resid ~ USA$pctfemhh)
plot(lmFixed.resid ~ USA$homevalu)
plot(lmFixed.resid ~ lmFixed.pred)
shapiro.test(lmFixed.resid) #but not normally distributed
##
## Shapiro-Wilk normality test
##
## data: lmFixed.resid
## W = 0.9619, p-value < 0.00000000000000022
Create color palette to be able to map the residuals from the fixed effects model
USA$fixed.resid <- resid(lmFixed)
pal3 <- brewer.pal(3, "Spectral")
cats3 <- classIntervals(USA$fixed.resid, n = 3, style = "quantile")
ThreeColors <- findColours(cats3, pal3)
Map the residuals from the fixed effects model
par(mar = c(1, 1, 1, 1))
plot(USA, col = ThreeColors) #still see some pattern to residuals, but better than before
Comparing the two models
anova(lmBush, lmFixed) #fixed effects model performs better
## Analysis of Variance Table
##
## Model 1: BushPct ~ pctfemhh + homevalu
## Model 2: BushPct ~ factor(STATE_NAME) + pctfemhh + homevalu
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 3105 36.9
## 2 3057 23.0 48 13.8 38.3 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1