## 'data.frame': 3112 obs. of 82 variables:
## $ X.1 : int 30 31 32 33 34 35 36 37 38 39 ...
## $ X : int 29 30 31 32 33 34 35 36 37 38 ...
## $ combined_fips : int 1001 1003 1005 1007 1009 1011 1013 1015 1017 1019 ...
## $ votes_dem_2016 : int 5908 18409 4848 1874 2150 3530 3716 13197 5763 1524 ...
## $ votes_gop_2016 : int 18110 72780 5431 6733 22808 1139 4891 32803 7803 8809 ...
## $ total_votes_2016 : int 24661 94090 10390 8748 25384 4701 8685 47376 13778 10503 ...
## $ Clinton : num 0.2396 0.1957 0.4666 0.2142 0.0847 ...
## $ Trump : num 0.734 0.774 0.523 0.77 0.899 ...
## $ diff_2016 : int 12202 54371 583 4859 20658 2391 1175 19606 2040 7285 ...
## $ per_point_diff_2016: num -0.4948 -0.5779 -0.0561 -0.5554 -0.8138 ...
## $ state_abbr : chr "AL" "AL" "AL" "AL" ...
## $ county_name : chr "Autauga County" "Baldwin County" "Barbour County" "Bibb County" ...
## $ FIPS : int 1001 1003 1005 1007 1009 1011 1013 1015 1017 1019 ...
## $ total_votes_2012 : int 23909 84988 11459 8391 23980 5318 9483 46240 14562 9761 ...
## $ votes_dem_2012 : int 6354 18329 5873 2200 2961 4058 4367 15500 6853 2126 ...
## $ votes_gop_2012 : int 17366 65772 5539 6131 20741 1250 5081 30272 7596 7494 ...
## $ county_fips : int 1001 1003 1005 1007 1009 1011 1013 1015 1017 1019 ...
## $ state_fips : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Obama : num 0.266 0.216 0.513 0.262 0.123 ...
## $ Romney : num 0.726 0.774 0.483 0.731 0.865 ...
## $ diff_2012 : int 11012 47443 334 3931 17780 2808 714 14772 743 5368 ...
## $ per_point_diff_2012: num -0.4606 -0.5582 0.0291 -0.4685 -0.7415 ...
## $ fips : int 1001 1003 1005 1007 1009 1011 1013 1015 1017 1019 ...
## $ area_name : chr "Autauga County" "Baldwin County" "Barbour County" "Bibb County" ...
## $ state_abbreviation : chr "AL" "AL" "AL" "AL" ...
## $ population2014 : int 55395 200111 26887 22506 57719 10764 20296 115916 34076 26037 ...
## $ population2010 : int 54571 182265 27457 22919 57322 10915 20946 118586 34170 25986 ...
## $ population_change : num 1.5 9.8 -2.1 -1.8 0.7 -1.4 -3.1 -2.3 -0.3 0.2 ...
## $ POP010210 : int 54571 182265 27457 22915 57322 10914 20947 118572 34215 25989 ...
## $ AGE135214 : num 6 5.6 5.7 5.3 6.1 6.3 6.1 5.7 5.9 4.8 ...
## $ AGE295214 : num 25.2 22.2 21.2 21 23.6 21.4 23.6 22.2 21.4 20.4 ...
## $ age65plus : num 13.8 18.7 16.5 14.8 17 14.9 18 16 18.3 20.9 ...
## $ SEX255214 : num 51.4 51.2 46.6 45.9 50.5 45.3 53.6 51.8 52.3 50.2 ...
## $ White : num 0.779 0.871 0.502 0.763 0.96 0.269 0.539 0.758 0.583 0.93 ...
## $ Black : num 0.187 0.096 0.476 0.221 0.018 0.701 0.44 0.211 0.395 0.046 ...
## $ RHI325214 : num 0.5 0.7 0.6 0.4 0.6 0.8 0.4 0.5 0.3 0.5 ...
## $ RHI425214 : num 1.1 0.9 0.5 0.2 0.3 0.3 0.9 0.9 0.8 0.3 ...
## $ RHI525214 : num 0.1 0.1 0.2 0.1 0.1 0.7 0 0.1 0.1 0 ...
## $ RHI625214 : num 1.8 1.6 0.9 0.9 1.2 1.1 0.8 1.7 1.1 1.6 ...
## $ Hispanic : num 0.027 0.046 0.045 0.021 0.087 0.075 0.012 0.035 0.02 0.015 ...
## $ RHI825214 : num 75.6 83 46.6 74.5 87.8 22.1 53.1 72.9 56.8 91.6 ...
## $ POP715213 : num 85 82.1 84.8 86.6 88.7 84.7 94.6 83.6 85.8 90.6 ...
## $ POP645213 : num 1.6 3.6 2.9 1.2 4.3 5.4 0.8 2.4 1.1 0.7 ...
## $ NonEnglish : num 3.5 5.5 5 2.1 7.3 5.2 1.7 4.5 1.3 1.1 ...
## $ Edu_highschool : num 85.6 89.1 73.7 77.5 77 67.8 76.3 78.6 75.1 78.3 ...
## $ Edu_batchelors : num 20.9 27.7 13.4 12.1 12.1 12.5 14 16.1 11.8 12.8 ...
## $ VET605213 : int 5922 19346 2120 1327 4540 636 1497 11385 2691 2174 ...
## $ LFE305213 : num 26.2 25.9 24.6 27.6 33.9 26.9 24 22.5 24.6 26.9 ...
## $ HSG010214 : int 22751 107374 11799 8978 23826 4461 9916 53289 16894 16241 ...
## $ HSG445213 : num 76.8 72.6 67.7 79 81 74.3 70.3 68.7 67.9 76.1 ...
## $ HSG096213 : num 8.3 24.4 10.6 7.3 4.5 8.7 13.3 13.8 11.1 4.6 ...
## $ HSG495213 : int 136200 168600 89200 90500 117100 70600 74700 100600 81200 99400 ...
## $ HSD410213 : int 20071 73283 9200 7091 21108 3741 8235 45196 13722 11656 ...
## $ HSD310213 : num 2.71 2.52 2.66 3.03 2.7 2.73 2.47 2.54 2.46 2.2 ...
## $ Income : int 24571 26766 16829 17427 20730 18628 17403 20828 19291 22030 ...
## $ INC110213 : int 53682 50221 32911 36447 44145 32033 29918 39962 32402 34907 ...
## $ Poverty : num 12.1 13.9 26.7 18.1 15.8 21.6 28.4 21.9 24.1 21.2 ...
## $ BZA010213 : int 817 4871 464 275 660 112 393 2311 515 379 ...
## $ BZA110213 : int 10120 54988 6611 3145 6798 0 5711 34871 6431 3864 ...
## $ BZA115213 : num 2.1 3.7 -5.6 7.5 3.4 0 2.7 0.6 -0.2 5.5 ...
## $ NES010213 : int 2947 16508 1546 1126 3563 470 1095 6352 2354 1560 ...
## $ SBO001207 : int 4067 19035 1667 1385 4458 417 1769 8713 1981 2180 ...
## $ SBO315207 : num 15.2 2.7 0 14.9 0 0 0 7.2 0 0 ...
## $ SBO115207 : num 0 0.4 0 0 0 0 0 0 0 0 ...
## $ SBO215207 : num 1.3 1 0 0 0 0 3.3 1.6 0 0 ...
## $ SBO515207 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SBO415207 : num 0.7 1.3 0 0 0 0 0 0.5 0 0 ...
## $ SBO015207 : num 31.7 27.3 27 0 23.2 38.8 0 24.7 29.3 14.5 ...
## $ MAN450207 : int 0 1410273 0 0 341544 0 399132 2679991 667283 307439 ...
## $ WTN220207 : int 0 0 0 0 0 0 56712 0 0 62293 ...
## $ RTN130207 : int 598175 2966489 188337 124707 319700 43810 229277 1542981 264650 186321 ...
## $ RTN131207 : int 12003 17166 6334 5804 5622 3995 11326 13678 7620 7613 ...
## $ AFN120207 : int 88157 436955 0 10757 20941 3670 28427 186533 23237 13948 ...
## $ BPS030214 : int 131 1384 8 19 3 1 2 114 8 2 ...
## $ LND110210 : num 594 1590 885 623 645 ...
## $ Density : num 91.8 114.6 31 36.8 88.9 ...
## $ Clinton_Obama : num -0.0262 -0.02 -0.0459 -0.048 -0.0388 ...
## $ Trump_Romney : num 0.008021 -0.000383 0.039339 0.038998 0.03359 ...
## $ Trump_Prediction : num 0.621 0.587 0.518 0.692 0.79 ...
## $ Clinton_Prediction : num 0.34 0.36 0.475 0.286 0.177 ...
## $ Trump_Deviation : num -0.1135 -0.18677 -0.00488 -0.07743 -0.10887 ...
## $ Clinton_Deviation : num 0.10092 0.16385 0.00809 0.07181 0.09279 ...
## 'data.frame': 3112 obs. of 18 variables:
## $ DemPercentRepPercent: num -0.4948 -0.5779 -0.0561 -0.5554 -0.8138 ...
## $ State : chr "AL" "AL" "AL" "AL" ...
## $ County : chr "Autauga County" "Baldwin County" "Barbour County" "Bibb County" ...
## $ PopulationChange : num 1.5 9.8 -2.1 -1.8 0.7 -1.4 -3.1 -2.3 -0.3 0.2 ...
## $ PercentAgeUnder5 : num 6 5.6 5.7 5.3 6.1 6.3 6.1 5.7 5.9 4.8 ...
## $ PercentAgeUnder18 : num 25.2 22.2 21.2 21 23.6 21.4 23.6 22.2 21.4 20.4 ...
## $ PercentAgeOver65 : num 13.8 18.7 16.5 14.8 17 14.9 18 16 18.3 20.9 ...
## $ MedianHouseIncome : int 53682 50221 32911 36447 44145 32033 29918 39962 32402 34907 ...
## $ NumberFirms : int 4067 19035 1667 1385 4458 417 1769 8713 1981 2180 ...
## $ PercentBlackFirms : num 15.2 2.7 0 14.9 0 0 0 7.2 0 0 ...
## $ PercentNAFirms : num 0 0.4 0 0 0 0 0 0 0 0 ...
## $ PercentAsianFirms : num 1.3 1 0 0 0 0 3.3 1.6 0 0 ...
## $ PercentHawaiianFirms: num 0 0 0 0 0 0 0 0 0 0 ...
## $ PercentHispanicFirms: num 0.7 1.3 0 0 0 0 0 0.5 0 0 ...
## $ PercentWomenFirms : num 31.7 27.3 27 0 23.2 38.8 0 24.7 29.3 14.5 ...
## $ Density : num 91.8 114.6 31 36.8 88.9 ...
## $ PercentHSGrad : num 85.6 89.1 73.7 77.5 77 67.8 76.3 78.6 75.1 78.3 ...
## $ PercentCollegeGrad : num 20.9 27.7 13.4 12.1 12.1 12.5 14 16.1 11.8 12.8 ...
Null Hypothesis = None of our independent variables contributes significantly to our model
Alternative Hypothesis = At least one of our independent variables contributes significantly to our model
##
## Call:
## lm(formula = DemPercentRepPercent ~ PopulationChange + PercentAgeUnder5 +
## PercentAgeUnder18 + PercentAgeOver65 + MedianHouseIncome +
## PercentBlackFirms + PercentNAFirms + PercentAsianFirms +
## PercentHispanicFirms + PercentWomenFirms + Density + PercentHSGrad +
## PercentCollegeGrad, data = relevant_1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.76022 -0.13809 -0.01807 0.12032 1.11070
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.878e-01 7.229e-02 3.980 7.04e-05 ***
## PopulationChange -1.229e-02 1.092e-03 -11.258 < 2e-16 ***
## PercentAgeUnder5 3.294e-02 6.662e-03 4.945 8.03e-07 ***
## PercentAgeUnder18 -2.616e-02 2.420e-03 -10.808 < 2e-16 ***
## PercentAgeOver65 -1.372e-02 1.233e-03 -11.133 < 2e-16 ***
## MedianHouseIncome -2.826e-06 5.269e-07 -5.363 8.80e-08 ***
## PercentBlackFirms 1.354e-02 5.754e-04 23.527 < 2e-16 ***
## PercentNAFirms 1.518e-02 1.328e-03 11.433 < 2e-16 ***
## PercentAsianFirms 1.411e-02 1.737e-03 8.124 6.43e-16 ***
## PercentHispanicFirms 1.086e-02 6.517e-04 16.662 < 2e-16 ***
## PercentWomenFirms 2.284e-03 3.156e-04 7.236 5.80e-13 ***
## Density 3.426e-06 2.307e-06 1.485 0.138
## PercentHSGrad -3.215e-03 7.970e-04 -4.034 5.62e-05 ***
## PercentCollegeGrad 1.652e-02 7.242e-04 22.807 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.204 on 3098 degrees of freedom
## Multiple R-squared: 0.5654, Adjusted R-squared: 0.5636
## F-statistic: 310 on 13 and 3098 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = DemPercentRepPercent ~ PercentAgeUnder18 + PercentAgeOver65 +
## PercentBlackFirms + PercentNAFirms + PercentHispanicFirms +
## PercentCollegeGrad, data = relevant_1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.67772 -0.14518 -0.02404 0.12196 1.21274
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2102684 0.0543131 3.871 0.00011 ***
## PercentAgeUnder18 -0.0241880 0.0015322 -15.787 < 2e-16 ***
## PercentAgeOver65 -0.0162217 0.0011926 -13.602 < 2e-16 ***
## PercentBlackFirms 0.0168902 0.0005766 29.291 < 2e-16 ***
## PercentNAFirms 0.0170474 0.0013857 12.302 < 2e-16 ***
## PercentHispanicFirms 0.0130834 0.0006403 20.432 < 2e-16 ***
## PercentCollegeGrad 0.0117063 0.0004757 24.611 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2163 on 3105 degrees of freedom
## Multiple R-squared: 0.5104, Adjusted R-squared: 0.5095
## F-statistic: 539.5 on 6 and 3105 DF, p-value: < 2.2e-16
model2 <- lm(DemPercentRepPercent ~ PopulationChange + PercentAgeUnder5 + PercentAgeUnder18 + PercentAgeOver65 + MedianHouseIncome + PercentBlackFirms + PercentNAFirms + PercentAsianFirms + PercentHispanicFirms + PercentWomenFirms + Density + PercentHSGrad + PercentCollegeGrad, data=trainSet)
summary(model2)
##
## Call:
## lm(formula = DemPercentRepPercent ~ PopulationChange + PercentAgeUnder5 +
## PercentAgeUnder18 + PercentAgeOver65 + MedianHouseIncome +
## PercentBlackFirms + PercentNAFirms + PercentAsianFirms +
## PercentHispanicFirms + PercentWomenFirms + Density + PercentHSGrad +
## PercentCollegeGrad, data = trainSet)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.76595 -0.13878 -0.01657 0.12156 1.06828
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.861e-01 8.654e-02 3.306 0.000962 ***
## PopulationChange -1.200e-02 1.281e-03 -9.367 < 2e-16 ***
## PercentAgeUnder5 3.349e-02 7.848e-03 4.267 2.06e-05 ***
## PercentAgeUnder18 -2.514e-02 2.877e-03 -8.736 < 2e-16 ***
## PercentAgeOver65 -1.341e-02 1.491e-03 -8.995 < 2e-16 ***
## MedianHouseIncome -2.941e-06 6.327e-07 -4.648 3.56e-06 ***
## PercentBlackFirms 1.323e-02 6.983e-04 18.953 < 2e-16 ***
## PercentNAFirms 1.365e-02 1.650e-03 8.274 2.24e-16 ***
## PercentAsianFirms 1.236e-02 2.099e-03 5.891 4.45e-09 ***
## PercentHispanicFirms 1.078e-02 7.811e-04 13.802 < 2e-16 ***
## PercentWomenFirms 2.398e-03 3.759e-04 6.379 2.17e-10 ***
## Density 9.196e-06 3.790e-06 2.426 0.015338 *
## PercentHSGrad -3.612e-03 9.556e-04 -3.780 0.000161 ***
## PercentCollegeGrad 1.700e-02 8.561e-04 19.858 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2035 on 2164 degrees of freedom
## Multiple R-squared: 0.5682, Adjusted R-squared: 0.5656
## F-statistic: 219 on 13 and 2164 DF, p-value: < 2.2e-16
## [1] 0.2028
## [1] 0.2063212
predict(model2, newdata = data.frame(
PopulationChange = -8,
PercentAgeUnder5 = 8,
PercentAgeUnder18 = 15,
PercentAgeOver65 = 10,
MedianHouseIncome = 35000,
PercentBlackFirms = 15,
PercentNAFirms = 3,
PercentAsianFirms = 10,
PercentHispanicFirms = 15,
PercentWomenFirms = 40,
Density = 4000,
PercentHSGrad = 80,
PercentCollegeGrad = 35
))
## 1
## 0.9994977
predict(model2, newdata = data.frame(
PopulationChange = 8,
PercentAgeUnder5 = 2,
PercentAgeUnder18 = 25,
PercentAgeOver65 = 30,
MedianHouseIncome = 100000,
PercentBlackFirms = 0,
PercentNAFirms = 0,
PercentAsianFirms = 0,
PercentHispanicFirms = 0,
PercentWomenFirms = 0,
Density = 1,
PercentHSGrad = 50,
PercentCollegeGrad = 15
))
## 1
## -0.9932248