library(readr)
library(tidyverse)
OR_Data <- read_csv("OR_Data.csv")
library(leaps)
response: TractSNAP - number of housing units recieving SNAP benefits
explanatory: TractLOWI - total low income population
categorical: as.factor(HUNVFlag) - whether or not the census tract has low vehicle access
mod1 <-lm(TractSNAP~TractLOWI, data = OR_Data)
summary(mod1)
##
## Call:
## lm(formula = TractSNAP ~ TractLOWI, data = OR_Data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -725.51 -58.50 -10.91 49.46 764.93
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.074361 7.734538 -0.656 0.512
## TractLOWI 0.208255 0.004197 49.624 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 115.1 on 824 degrees of freedom
## Multiple R-squared: 0.7493, Adjusted R-squared: 0.749
## F-statistic: 2463 on 1 and 824 DF, p-value: < 2.2e-16
ggplot(OR_Data, aes(TractLOWI, TractSNAP, color = as.factor(HUNVFlag)))+
geom_point()+
geom_abline(intercept = -5.074361, slope = 0.208255, col = "blue")
The relationship between TractSNAP and TractLOWI does appear to be significant - the p-value is less than 2e-16.
contrasts(as.factor(OR_Data$HUNVFlag))
## 1
## 0 0
## 1 1
x2 = {0 if not a low vehicle access tract, 1 otherwise}
mod2 <- lm(TractSNAP~as.factor(HUNVFlag), data = OR_Data)
anova(mod2)
## Analysis of Variance Table
##
## Response: TractSNAP
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(HUNVFlag) 1 5542228 5542228 120.08 < 2.2e-16 ***
## Residuals 824 38031816 46155
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(OR_Data, aes(as.factor(HUNVFlag), TractSNAP, fill = as.factor(HUNVFlag)))+
geom_boxplot()
There are differences in the means between the levels of HUNVFlag
mod3 <- lm(TractSNAP~TractLOWI + as.factor(HUNVFlag), data = OR_Data)
summary(mod3)
##
## Call:
## lm(formula = TractSNAP ~ TractLOWI + as.factor(HUNVFlag), data = OR_Data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -682.86 -60.14 -8.70 51.09 782.79
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.776483 7.654497 -0.493 0.622
## TractLOWI 0.201866 0.004394 45.937 < 2e-16 ***
## as.factor(HUNVFlag)1 48.005295 10.854084 4.423 1.11e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 113.9 on 823 degrees of freedom
## Multiple R-squared: 0.7551, Adjusted R-squared: 0.7545
## F-statistic: 1269 on 2 and 823 DF, p-value: < 2.2e-16
ggplot(OR_Data, aes(TractLOWI, TractSNAP, color = as.factor(HUNVFlag)))+
geom_point()+
geom_abline(intercept = mod3$coefficients[1], slope = mod3$coefficients[2], color = "blue", lwd = 1)+
geom_abline(intercept = mod3$coefficients[1] + mod3$coefficients[3], slope = mod3$coefficients[2], color = "red", lwd = 1)
models:
HUNVFlag = 0: y = -3.776483 + 0.201866x
HUNVFlag = 1: y = 44.22881 + 0.201866x
mod4 <- lm(TractSNAP~TractLOWI + as.factor(HUNVFlag) + TractLOWI*as.factor(HUNVFlag), data = OR_Data)
summary(mod4)
##
## Call:
## lm(formula = TractSNAP ~ TractLOWI + as.factor(HUNVFlag) + TractLOWI *
## as.factor(HUNVFlag), data = OR_Data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -693.58 -60.00 -7.82 50.65 779.70
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.586162 8.503727 -0.775 0.43886
## TractLOWI 0.203833 0.005102 39.948 < 2e-16 ***
## as.factor(HUNVFlag)1 63.498511 23.112703 2.747 0.00614 **
## TractLOWI:as.factor(HUNVFlag)1 -0.007629 0.010048 -0.759 0.44788
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 113.9 on 822 degrees of freedom
## Multiple R-squared: 0.7553, Adjusted R-squared: 0.7544
## F-statistic: 845.6 on 3 and 822 DF, p-value: < 2.2e-16
ggplot(OR_Data, aes(TractLOWI, TractSNAP, color = as.factor(HUNVFlag)))+
geom_point()+
geom_abline(intercept = mod4$coefficients[1], slope = mod4$coefficients[2], color = "blue", lwd = 1)+
geom_abline(intercept = mod4$coefficients[1] + mod4$coefficients[3], slope = mod4$coefficients[2] + mod4$coefficients[4], color = "red", lwd = 1)
models:
HUNVFlag = 0: y = -6.586162 + 0.203833x
HUNVFlag = 1: y = 56.91235 + 0.196204x
anova(mod1)
## Analysis of Variance Table
##
## Response: TractSNAP
## Df Sum Sq Mean Sq F value Pr(>F)
## TractLOWI 1 32649335 32649335 2462.6 < 2.2e-16 ***
## Residuals 824 10924709 13258
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod2)
## Analysis of Variance Table
##
## Response: TractSNAP
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(HUNVFlag) 1 5542228 5542228 120.08 < 2.2e-16 ***
## Residuals 824 38031816 46155
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod3)
## Analysis of Variance Table
##
## Response: TractSNAP
## Df Sum Sq Mean Sq F value Pr(>F)
## TractLOWI 1 32649335 32649335 2518.059 < 2.2e-16 ***
## as.factor(HUNVFlag) 1 253630 253630 19.561 1.105e-05 ***
## Residuals 823 10671080 12966
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod4)
## Analysis of Variance Table
##
## Response: TractSNAP
## Df Sum Sq Mean Sq F value Pr(>F)
## TractLOWI 1 32649335 32649335 2516.7630 < 2.2e-16 ***
## as.factor(HUNVFlag) 1 253630 253630 19.5510 1.111e-05 ***
## TractLOWI:as.factor(HUNVFlag) 1 7480 7480 0.5766 0.4479
## Residuals 822 10663600 12973
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The MSE for the model in part E (parallel lines) was the lowest.
Models B, E, and F increased in complexity, going from a simple linear regression to parallel lines to unrelated lines.
Vehicle access does have an effect on the relationship between low income population and number of people receiving SNAP benefits. The relationship was significant with parallel lines, but not with different slopes. We also attempted to use whether or not a tract was urban as a categorical predictor, but that had an insignificant effect on the intercept of the regression line.