Update model
- for this project we wanted a quadratic term, the cr plot above shows us some good options for quadratic terms
- we also want to have interaction effect between a dichotomous variable and factor column so lets add that
- check in with out model summary and plots again
fit <- update(my_fit,.~.+age_strata*income_levels)
fit <- update(fit,.~.+I(O^3))
fit <- update(fit,.~.+I(percent_white^2))
fit <- update(fit,.~.-median_rent)
fit <- update(fit,.~.-median_age)
layout(matrix(c(1, 2, 3, 4), 2, 2))
summary(fit)
##
## Call:
## lm(formula = Dem.pct ~ O + percent_white + percent_black + percent_asian +
## percent_hispanic + per_capita_income + voter_turnout + county_age +
## age_strata + income_levels + I(O^3) + I(percent_white^2) +
## age_strata:income_levels, data = df_2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.51629 -0.05804 -0.00562 0.05352 0.37018
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 6.904e-01 3.017e-02 22.882
## O 4.351e-06 5.276e-07 8.246
## percent_white -1.494e-02 6.027e-04 -24.792
## percent_black 2.360e-03 2.672e-04 8.831
## percent_asian 8.392e-03 9.943e-04 8.440
## percent_hispanic -1.678e-03 2.763e-04 -6.072
## per_capita_income 6.284e-06 9.725e-07 6.461
## voter_turnout 2.613e-01 3.068e-02 8.518
## county_age 2.101e-10 3.586e-10 0.586
## age_strataover_40 1.378e-03 1.706e-02 0.081
## income_levels5-25% -1.556e-02 1.171e-02 -1.328
## income_levels25-50% -2.151e-02 1.312e-02 -1.640
## income_levels50-75% -3.720e-03 1.491e-02 -0.249
## income_levels75-95% -1.549e-02 1.762e-02 -0.879
## income_levelstop 5% -4.275e-02 2.639e-02 -1.620
## I(O^3) -1.543e-16 1.967e-17 -7.844
## I(percent_white^2) 8.038e-05 4.122e-06 19.500
## age_strataover_40:income_levels5-25% -1.219e-02 1.841e-02 -0.662
## age_strataover_40:income_levels25-50% 6.274e-04 1.824e-02 0.034
## age_strataover_40:income_levels50-75% -2.120e-02 1.849e-02 -1.147
## age_strataover_40:income_levels75-95% -1.905e-02 1.874e-02 -1.017
## age_strataover_40:income_levelstop 5% 7.965e-03 2.246e-02 0.355
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## O 2.40e-16 ***
## percent_white < 2e-16 ***
## percent_black < 2e-16 ***
## percent_asian < 2e-16 ***
## percent_hispanic 1.42e-09 ***
## per_capita_income 1.20e-10 ***
## voter_turnout < 2e-16 ***
## county_age 0.558
## age_strataover_40 0.936
## income_levels5-25% 0.184
## income_levels25-50% 0.101
## income_levels50-75% 0.803
## income_levels75-95% 0.379
## income_levelstop 5% 0.105
## I(O^3) 5.98e-15 ***
## I(percent_white^2) < 2e-16 ***
## age_strataover_40:income_levels5-25% 0.508
## age_strataover_40:income_levels25-50% 0.973
## age_strataover_40:income_levels50-75% 0.252
## age_strataover_40:income_levels75-95% 0.309
## age_strataover_40:income_levelstop 5% 0.723
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09315 on 3070 degrees of freedom
## Multiple R-squared: 0.6271, Adjusted R-squared: 0.6245
## F-statistic: 245.8 on 21 and 3070 DF, p-value: < 2.2e-16

- the model doesn’t take well to the dichotomous* categorical variable
- I am going to drop this predictor
- the transformation of column O didnt go as planned, im gonna try log transform below instead of exponential
fit <- update(fit,.~.-age_strata*income_levels)
#fit <- update(fit,.~.+ log(percent_asian+.0001))
fit <- update(fit,.~.- I(O^3))
fit <- update(fit,.~.+ log(O+.0001))
fit <- update(fit,.~.- O)
summary(fit)
##
## Call:
## lm(formula = Dem.pct ~ percent_white + percent_black + percent_asian +
## percent_hispanic + per_capita_income + voter_turnout + county_age +
## I(percent_white^2) + log(O + 1e-04), data = df_2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41048 -0.05110 -0.00713 0.04615 0.40520
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.430e-01 2.378e-02 22.833 < 2e-16 ***
## percent_white -1.653e-02 4.746e-04 -34.841 < 2e-16 ***
## percent_black 2.666e-03 2.269e-04 11.748 < 2e-16 ***
## percent_asian 5.260e-03 8.307e-04 6.333 2.76e-10 ***
## percent_hispanic -1.408e-03 2.338e-04 -6.023 1.91e-09 ***
## per_capita_income 2.594e-06 4.050e-07 6.405 1.73e-10 ***
## voter_turnout 3.141e-01 2.515e-02 12.488 < 2e-16 ***
## county_age -7.093e-10 1.608e-10 -4.410 1.07e-05 ***
## I(percent_white^2) 9.125e-05 3.332e-06 27.386 < 2e-16 ***
## log(O + 1e-04) 4.028e-02 1.153e-03 34.929 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08049 on 3082 degrees of freedom
## Multiple R-squared: 0.7205, Adjusted R-squared: 0.7196
## F-statistic: 882.6 on 9 and 3082 DF, p-value: < 2.2e-16
layout(matrix(c(1, 2, 3, 4), 2, 2))
plot(fit)
