First need to install packages
# install.packages('sp')
library(sp)
library(maptools)
## Loading required package: foreign
## Loading required package: grid
## Loading required package: lattice
## Checking rgeos availability: FALSE Note: when rgeos is not available,
## polygon geometry computations in maptools depend on gpclib, which has a
## restricted licence. It is disabled by default; to enable gpclib, type
## gpclibPermit()
library(classInt)
## Loading required package: class
## Loading required package: e1071
library(RColorBrewer)
Reading a Poly Shapefile
# This is loaded at a spatial polygon data frame, which means I can
# display it as a map.
USA <- readShapePoly("/Users/telekineticturtle/Desktop/Colorado 13/Quant Methods/Data/USAcopy.shp")
plot(USA)
Remove counties with no votes using bracket notation.
USA <- USA[USA$Total > 0, ]
List the pieces of the shapefile using slotNames()
# This can be useful especially if there are a lot of fields for the data.
slotNames(USA)
## [1] "data" "polygons" "plotOrder" "bbox" "proj4string"
Summarizing data
summary(USA)
## Object of class SpatialPolygonsDataFrame
## Coordinates:
## min max
## x -124.73 -66.97
## y 24.96 49.37
## Is projected: NA
## proj4string : [NA]
## Data attributes:
## SP_ID NAME STATE_NAME STATE_FIPS
## 0 : 1 Washington: 32 Texas : 254 48 : 254
## 1 : 1 Jefferson : 26 Georgia : 159 13 : 159
## 10 : 1 Franklin : 25 Virginia: 134 51 : 134
## 100 : 1 Jackson : 24 Kentucky: 120 21 : 120
## 1000 : 1 Lincoln : 24 Missouri: 115 29 : 115
## 1001 : 1 Madison : 20 Kansas : 105 20 : 105
## (Other):3102 (Other) :2957 (Other) :2221 (Other):2221
## CNTY_FIPS FIPS AREA FIPS_num
## 001 : 48 01001 : 1 Min. : 2 Min. : 1001
## 003 : 48 01003 : 1 1st Qu.: 435 1st Qu.:19046
## 005 : 48 01005 : 1 Median : 622 Median :29214
## 009 : 47 01007 : 1 Mean : 966 Mean :30686
## 007 : 46 01009 : 1 3rd Qu.: 931 3rd Qu.:46010
## 011 : 46 01011 : 1 Max. :20175 Max. :56045
## (Other):2825 (Other):3102
## Bush Kerry County_F Nader
## Min. : 65 Min. : 12 Min. : 1001 Min. : 0
## 1st Qu.: 2941 1st Qu.: 1782 1st Qu.:19046 1st Qu.: 0
## Median : 6364 Median : 4041 Median :29214 Median : 14
## Mean : 19073 Mean : 17957 Mean :30686 Mean : 145
## 3rd Qu.: 15924 3rd Qu.: 10434 3rd Qu.:46010 3rd Qu.: 67
## Max. :954764 Max. :1670341 Max. :56045 Max. :13251
##
## Total Bush_pct Kerry_pct Nader_pct
## Min. : 77 Min. : 9.31 Min. : 7.17 Min. :0.000
## 1st Qu.: 4831 1st Qu.:52.73 1st Qu.:30.23 1st Qu.:0.000
## Median : 10416 Median :61.17 Median :38.49 Median :0.303
## Mean : 37176 Mean :60.66 Mean :38.94 Mean :0.401
## 3rd Qu.: 26599 3rd Qu.:69.37 3rd Qu.:46.79 3rd Qu.:0.633
## Max. :2625105 Max. :92.83 Max. :90.05 Max. :4.467
##
## MDratio pcturban pctfemhh pcincome
## Min. : 0.0 Min. : 0.0 Min. : 0.0 Min. : 0
## 1st Qu.: 37.3 1st Qu.: 0.0 1st Qu.: 9.6 1st Qu.:15474
## Median : 65.6 Median : 33.5 Median :12.2 Median :17450
## Mean : 93.1 Mean : 35.3 Mean :13.0 Mean :17805
## 3rd Qu.: 117.6 3rd Qu.: 56.5 3rd Qu.:15.4 3rd Qu.:19818
## Max. :2189.5 Max. :100.0 Max. :41.1 Max. :58096
##
## pctpoor pctcoled unemploy homevalu
## Min. : 0.0 Min. : 0.0 Min. : 0.00 Min. : 0
## 1st Qu.:11.1 1st Qu.: 9.0 1st Qu.: 3.90 1st Qu.: 35900
## Median :15.1 Median :11.7 Median : 5.30 Median : 44400
## Mean :16.5 Mean :13.1 Mean : 5.88 Mean : 52066
## 3rd Qu.:20.4 3rd Qu.:15.3 3rd Qu.: 7.20 3rd Qu.: 58600
## Max. :63.1 Max. :53.4 Max. :37.90 Max. :500001
##
## popdens Obese Noins HISP_LAT
## Min. : 0 Min. :0.000 Min. :0.000 Min. : 0.00
## 1st Qu.: 15 1st Qu.:0.320 1st Qu.:0.100 1st Qu.: 0.90
## Median : 39 Median :0.340 Median :0.120 Median : 1.80
## Mean : 194 Mean :0.335 Mean :0.129 Mean : 6.19
## 3rd Qu.: 93 3rd Qu.:0.360 3rd Qu.:0.150 3rd Qu.: 5.10
## Max. :53801 Max. :0.630 Max. :0.410 Max. :97.50
##
## MEDAGE2000 PEROVER65
## Min. : 0.0 Min. : 0.0
## 1st Qu.:35.2 1st Qu.:12.1
## Median :37.4 Median :14.4
## Mean :37.4 Mean :14.8
## 3rd Qu.:39.8 3rd Qu.:17.1
## Max. :54.3 Max. :34.7
##
# In general, use the @data to see just data.
summary(USA@data)
## SP_ID NAME STATE_NAME STATE_FIPS
## 0 : 1 Washington: 32 Texas : 254 48 : 254
## 1 : 1 Jefferson : 26 Georgia : 159 13 : 159
## 10 : 1 Franklin : 25 Virginia: 134 51 : 134
## 100 : 1 Jackson : 24 Kentucky: 120 21 : 120
## 1000 : 1 Lincoln : 24 Missouri: 115 29 : 115
## 1001 : 1 Madison : 20 Kansas : 105 20 : 105
## (Other):3102 (Other) :2957 (Other) :2221 (Other):2221
## CNTY_FIPS FIPS AREA FIPS_num
## 001 : 48 01001 : 1 Min. : 2 Min. : 1001
## 003 : 48 01003 : 1 1st Qu.: 435 1st Qu.:19046
## 005 : 48 01005 : 1 Median : 622 Median :29214
## 009 : 47 01007 : 1 Mean : 966 Mean :30686
## 007 : 46 01009 : 1 3rd Qu.: 931 3rd Qu.:46010
## 011 : 46 01011 : 1 Max. :20175 Max. :56045
## (Other):2825 (Other):3102
## Bush Kerry County_F Nader
## Min. : 65 Min. : 12 Min. : 1001 Min. : 0
## 1st Qu.: 2941 1st Qu.: 1782 1st Qu.:19046 1st Qu.: 0
## Median : 6364 Median : 4041 Median :29214 Median : 14
## Mean : 19073 Mean : 17957 Mean :30686 Mean : 145
## 3rd Qu.: 15924 3rd Qu.: 10434 3rd Qu.:46010 3rd Qu.: 67
## Max. :954764 Max. :1670341 Max. :56045 Max. :13251
##
## Total Bush_pct Kerry_pct Nader_pct
## Min. : 77 Min. : 9.31 Min. : 7.17 Min. :0.000
## 1st Qu.: 4831 1st Qu.:52.73 1st Qu.:30.23 1st Qu.:0.000
## Median : 10416 Median :61.17 Median :38.49 Median :0.303
## Mean : 37176 Mean :60.66 Mean :38.94 Mean :0.401
## 3rd Qu.: 26599 3rd Qu.:69.37 3rd Qu.:46.79 3rd Qu.:0.633
## Max. :2625105 Max. :92.83 Max. :90.05 Max. :4.467
##
## MDratio pcturban pctfemhh pcincome
## Min. : 0.0 Min. : 0.0 Min. : 0.0 Min. : 0
## 1st Qu.: 37.3 1st Qu.: 0.0 1st Qu.: 9.6 1st Qu.:15474
## Median : 65.6 Median : 33.5 Median :12.2 Median :17450
## Mean : 93.1 Mean : 35.3 Mean :13.0 Mean :17805
## 3rd Qu.: 117.6 3rd Qu.: 56.5 3rd Qu.:15.4 3rd Qu.:19818
## Max. :2189.5 Max. :100.0 Max. :41.1 Max. :58096
##
## pctpoor pctcoled unemploy homevalu
## Min. : 0.0 Min. : 0.0 Min. : 0.00 Min. : 0
## 1st Qu.:11.1 1st Qu.: 9.0 1st Qu.: 3.90 1st Qu.: 35900
## Median :15.1 Median :11.7 Median : 5.30 Median : 44400
## Mean :16.5 Mean :13.1 Mean : 5.88 Mean : 52066
## 3rd Qu.:20.4 3rd Qu.:15.3 3rd Qu.: 7.20 3rd Qu.: 58600
## Max. :63.1 Max. :53.4 Max. :37.90 Max. :500001
##
## popdens Obese Noins HISP_LAT
## Min. : 0 Min. :0.000 Min. :0.000 Min. : 0.00
## 1st Qu.: 15 1st Qu.:0.320 1st Qu.:0.100 1st Qu.: 0.90
## Median : 39 Median :0.340 Median :0.120 Median : 1.80
## Mean : 194 Mean :0.335 Mean :0.129 Mean : 6.19
## 3rd Qu.: 93 3rd Qu.:0.360 3rd Qu.:0.150 3rd Qu.: 5.10
## Max. :53801 Max. :0.630 Max. :0.410 Max. :97.50
##
## MEDAGE2000 PEROVER65
## Min. : 0.0 Min. : 0.0
## 1st Qu.:35.2 1st Qu.:12.1
## Median :37.4 Median :14.4
## Mean :37.4 Mean :14.8
## 3rd Qu.:39.8 3rd Qu.:17.1
## Max. :54.3 Max. :34.7
##
# The only difference for the summary function is that if I use @data, it
# adds the coordinates and projection.
Color Brewer Palettes
# Color Brewer links with the shapefile to make thematic maps.
display.brewer.all() # Displays all the colors available
# Use brewer.pal(#,'Type') to create a palette
pal7 <- brewer.pal(7, "Spectral") # Makes a 7-color spectral palette
display.brewer.pal(7, "Spectral") # This displays the colors
pal7 # Returns esoteric list of color codes. Not helpful
## [1] "#D53E4F" "#FC8D59" "#FEE08B" "#FFFFBF" "#E6F598" "#99D594" "#3288BD"
Classifying Data using classIntervals() and findColours()
# SS: Create a column that holds the percent of all votes that went to
# G.W. Bush in 2004.
USA$BushPct <- USA$Bush/USA$Total
# Create the classification intervals using classIntervals(DATA, n=
# #OFCLASSES, style='STYLE') Styles are the basics like
# 'quantile','equal','fixed','sd','jenks', and apparently 'pretty'
cats7 <- classIntervals(USA$BushPct, n = 7, style = "quantile")
cats7 # Returns the bins for each classification.
## style: quantile
## [0.09308,0.4746) [0.4746,0.5415) [0.5415,0.5907) [0.5907,0.6336)
## 444 444 444 444
## [0.6336,0.6801) [0.6801,0.7421) [0.7421,0.9283]
## 444 444 444
# Link the color pallette to the categories using
# findColours(CATEGORIES,PALETTE)
SevenColors <- findColours(cats7, pal7)
# Draw a map using specificed data and colors
plot(USA, col = SevenColors, main = "2004 Voting Percentage for Bush") # The col= argument provides the coloring scheme.
Mapping Deviations from the Mean
# Create a new column to hold the standardized percent bush (Z Scores) by
# subtracting the mean % and dividing by the std dev.
USA$BushPctZ <- (USA$BushPct - mean(USA$BushPct, na.rm = TRUE))/sd(USA$BushPct,
na.rm = TRUE)
# Crete a new palette and new categories with the standardized column and
# map.
pal7 <- brewer.pal(7, "Spectral")
cats7 <- classIntervals(USA$BushPctZ, n = 7, style = "quantile")
SevenColors <- findColours(cats7, pal7)
plot(USA, col = SevenColors, main = "2004 Voting Percentage for Bush")
Null Hypothesis: The model does not explain the variation in perctage vote for George Bush better than the mean percentage.
Note that this is the null hypothesis for every regression model performed in this section. There is also an additional null hypothesis in the ANOVA for each independent variable. In that case, the null is always that the coefficient for that variable is 0, making RSS = MSS.
Trial 1
# The first linear model uses the percentage of the county population that
# is urban and the percentage that is 'poor', the percentage of households
# that are female-headed, and the median age in 2000.
lm1 <- lm(BushPct ~ pcturban + pctfemhh + pctpoor + MEDAGE2000, USA)
summary(lm1) # The adj. R^2 is 0.2402
##
## Call:
## lm(formula = BushPct ~ pcturban + pctfemhh + pctpoor + MEDAGE2000,
## data = USA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5688 -0.0732 0.0108 0.0807 0.2666
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.76e-01 2.40e-02 32.33 <2e-16 ***
## pcturban -6.05e-05 8.35e-05 -0.72 0.469
## pctfemhh -1.32e-02 5.10e-04 -25.95 <2e-16 ***
## pctpoor 2.76e-03 3.29e-04 8.38 <2e-16 ***
## MEDAGE2000 -1.10e-03 5.57e-04 -1.98 0.048 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.11 on 3103 degrees of freedom
## Multiple R-squared: 0.241, Adjusted R-squared: 0.24
## F-statistic: 247 on 4 and 3103 DF, p-value: <2e-16
Reaction: I'm not surprised that poorer people prefer policies of Democrats, so are socially and fiscally liberal,and the for female-headed households, that may represent socially progressive counties. The Republican Party sells itself as a party of tradition. (Both of these have negative coefficients, indicating that higher percentages of those variables are associated with fewer votes for George Bush.) Median age shows that counties with a higher median age are less likely to vote for George Bush,which honestly is the opposite of what I would expect, based on the “tradtition” argument. Percent urban is not significant. I know in states like Pennsylvania and Ohio, the cities tend to be vote more democratic, but then again, a lot of counties have one midsized city and a lot of countryside, so that may mask the urban-rural split.
Observing Residuals
# Residuals and predicted values can be extracted using resid() and
# predict(), respectively, or they can be recalled using MODEL$residuals
# and MODEL$fitted.values.
lm1.resid <- resid(lm1) # Extracting Residuals
lm1.predicted <- predict(lm1) # Extracting Predicted Values
# We plot the residuals against the predicted y values to check if they're
# homoskedastic. We also want to see a mean of zero and a linear fit to
# the residuals. These residuals show us that we have many problems.
# Spatial dependence is perhaps the biggest variable omitted from the
# model.
plot(lm1.resid ~ lm1.predicted, main = "Model 1 Residuals", xlab = "Fitted Values",
ylab = "Residuals") # Plot vs. the fitting y values
Trial 2
# This model uses the percentage of households headed by females and home
# value.
lm2 <- lm(BushPct ~ pctfemhh + homevalu, data = USA)
summary(lm2) # R2 is 0.255, a little better
##
## Call:
## lm(formula = BushPct ~ pctfemhh + homevalu, data = USA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6082 -0.0731 0.0089 0.0779 0.3558
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.77e-01 5.60e-03 138.9 <2e-16 ***
## pctfemhh -1.01e-02 3.61e-04 -27.9 <2e-16 ***
## homevalu -7.60e-07 6.01e-08 -12.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.109 on 3105 degrees of freedom
## Multiple R-squared: 0.256, Adjusted R-squared: 0.255
## F-statistic: 533 on 2 and 3105 DF, p-value: <2e-16
Observing Residuals
lm2.resid <- resid(lm2) # Extracting Residuals
lm2.predicted <- predict(lm2) # Extracting Predicted Values
plot(lm2.resid ~ lm2.predicted, main = "Model 2 Residuals", xlab = "Fitted Values",
ylab = "Residuals") # Plot vs. the fitting y variables
Plotting Residuals on a Map
# SS: Since the residuals have something strange going on, we may have an
# omitted variable. SS: Add a new column to USA to hold the residuals
USA$resid <- resid(lm2)
pal3 <- brewer.pal(3, "Spectral")
cats3 <- classIntervals(USA$resid, n = 3, style = "quantile")
ThreeColors <- findColours(cats3, pal3)
# The upper midwest, northwest coast, and New England are all over
# predicted (negative residuals). The southern Great Plains are under
# predicted (positive residuals).
plot(USA, col = ThreeColors)
Hereafter, the goal of this exercise is to improve on the model above by explaining more of the variation is county vote percentage for George Bush in 2004 and to improve the distribution of the residuals for the model.
Trial 3
# This model adds obesity as a proxy for location. The percent obseity is
# a little higher in the South than other regions, and I know the South is
# generally under-predicted in previous models.
lm3 <- lm(BushPct ~ pctfemhh + homevalu + Obese, data = USA)
summary(lm3) # The adjusted R^2 is 0.2831, the best so far.
##
## Call:
## lm(formula = BushPct ~ pctfemhh + homevalu + Obese, data = USA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4962 -0.0770 0.0073 0.0790 0.3871
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.65e-01 1.15e-02 57.8 <2e-16 ***
## pctfemhh -1.11e-02 3.65e-04 -30.3 <2e-16 ***
## homevalu -7.99e-07 5.90e-08 -13.5 <2e-16 ***
## Obese 3.79e-01 3.42e-02 11.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.107 on 3104 degrees of freedom
## Multiple R-squared: 0.284, Adjusted R-squared: 0.283
## F-statistic: 410 on 3 and 3104 DF, p-value: <2e-16
# But the residuals are still a blob.
plot(lm3$residuals ~ lm3$fitted.values, main = "Model 3 Residuals", xlab = "Fitted Values",
ylab = "Residuals")
Trial 4
# Instead of home value, what about the percentage of the population that
# is Latino/Hispanic? Theoretically, immigration issues would push them
# Democratic.
lm4 <- lm(BushPct ~ pctfemhh + HISP_LAT + Obese, data = USA)
summary(lm4) # Adjsuted R^2 is 0.2423. No improvement here.
##
## Call:
## lm(formula = BushPct ~ pctfemhh + HISP_LAT + Obese, data = USA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4674 -0.0772 0.0083 0.0819 0.2620
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.635623 0.011881 53.5 <2e-16 ***
## pctfemhh -0.011757 0.000373 -31.5 <2e-16 ***
## HISP_LAT 0.000398 0.000166 2.4 0.016 *
## Obese 0.362561 0.035380 10.2 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.11 on 3104 degrees of freedom
## Multiple R-squared: 0.243, Adjusted R-squared: 0.242
## F-statistic: 332 on 3 and 3104 DF, p-value: <2e-16
Trial 5
# I've decided to take a closer look at a single variable.
plot(BushPct ~ pctfemhh, data = USA, main = "BushPct v. femhh", xlab = "% Households Led by Female",
ylab = "% Voting for Bush") # femhh shows a better correlation than most variables.
# Check the residuals.
lm5 <- lm(BushPct ~ pctfemhh, data = USA)
# Use par(mfrow=c(#rows,#columns)) to make an array of plots. The
# residuals show that it is not a linear relation -- a quadratic
# relationship would be better.
par(mfrow = c(2, 2))
plot(lm5, main = "Model 5 Plots")
Trial 6
# That in find, I'll improve on lm3 by including a quadratic:
lm6 <- lm(BushPct ~ pctfemhh + I(pctfemhh^2) + Obese + homevalu, data = USA)
summary(lm6) # 0.2957 is the best so far. Woohoo!
##
## Call:
## lm(formula = BushPct ~ pctfemhh + I(pctfemhh^2) + Obese + homevalu,
## data = USA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4693 -0.0755 0.0071 0.0765 0.4674
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.38e-01 1.20e-02 53.41 < 2e-16 ***
## pctfemhh -1.94e-03 1.26e-03 -1.54 0.12
## I(pctfemhh^2) -2.75e-04 3.66e-05 -7.52 7.4e-14 ***
## Obese 2.82e-01 3.62e-02 7.80 8.2e-15 ***
## homevalu -8.88e-07 5.97e-08 -14.88 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.106 on 3103 degrees of freedom
## Multiple R-squared: 0.297, Adjusted R-squared: 0.296
## F-statistic: 327 on 4 and 3103 DF, p-value: <2e-16
# But the residuals are WEIRD. They're less blobbed for sure, but now the
# mean is +0.6? Gosh!
par(mfrow = c(1, 1))
plot(lm6$residuals, lm6$fitted.values, main = "Model 6 Residuals", xlab = "Fitted Values",
ylab = "Residuals")
Trial 7
# Now let's look at college education. College educated people are known
# for being less traditional, perhaps less likely to vote for the socially
# conservative party.
par(mfrow = c(1, 2))
plot(BushPct ~ pctcoled, data = USA, main = "BushPct v. pctcoled", xlab = "% College Educated",
ylab = "% Voting for Bush") # Another negative correlation.
plot(pctcoled ~ pctfemhh, data = USA, main = "pctcoled v. pctfemhh", xlab = "% Households Led by Females",
ylab = "% College Educated") # And not visually autocorrelated with female households.
lm7 <- lm(BushPct ~ pctcoled, data = USA)
# This variable also seems to be better explained with a quadratic.
par(mfrow = c(1, 1))
plot(lm7$residuals ~ lm7$fitted.values, main = "Model 7 Residuals", xlab = "Fitted Values",
ylab = "Residuals")
Trial 8
lm8 <- lm(BushPct ~ pctfemhh + I(pctfemhh^2) + Obese + homevalu + pctcoled +
I(pctcoled^2), data = USA)
summary(lm8) # I'm over 30% for adj. R^2, so I'm pretty happy for this data set.
##
## Call:
## lm(formula = BushPct ~ pctfemhh + I(pctfemhh^2) + Obese + homevalu +
## pctcoled + I(pctcoled^2), data = USA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4425 -0.0730 0.0058 0.0736 0.5377
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.11e-01 1.21e-02 50.34 < 2e-16 ***
## pctfemhh -3.55e-03 1.25e-03 -2.85 0.0045 **
## I(pctfemhh^2) -2.28e-04 3.61e-05 -6.32 3.0e-10 ***
## Obese 1.79e-01 3.69e-02 4.85 1.3e-06 ***
## homevalu -6.55e-07 7.79e-08 -8.41 < 2e-16 ***
## pctcoled 9.26e-03 9.91e-04 9.34 < 2e-16 ***
## I(pctcoled^2) -2.83e-04 2.52e-05 -11.21 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.104 on 3101 degrees of freedom
## Multiple R-squared: 0.325, Adjusted R-squared: 0.324
## F-statistic: 249 on 6 and 3101 DF, p-value: <2e-16
# The residuals are also the best I've seen, although still atrocious. At
# least they're back to a mean around 0! Final Count: 0.3241.
plot(lm8$residuals ~ lm8$fitted.values, main = "Model 8 Residuals", xlab = "Fitted Values",
ylab = "Residuals")