Loading Data:
library(readxl)
library(ggplot2)
regression_data <- read_excel("C:/Users/brian/OneDrive/Documents/School/Spring 2020/Thesis/Regression_Data.xlsx")
Initial Test Moved by Income Alone
Logit_income <- glm(Moved ~ Org_AGI, family = "binomial", data = regression_data)
summary(Logit_income)
Call:
glm(formula = Moved ~ Org_AGI, family = "binomial", data = regression_data)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.7467 -0.6660 -0.6439 -0.6078 2.1029
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.342e+00 1.432e-02 -93.73 <2e-16 ***
Org_AGI -6.001e-06 6.155e-07 -9.75 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 65126 on 66857 degrees of freedom
Residual deviance: 65028 on 66856 degrees of freedom
(20 observations deleted due to missingness)
AIC: 65032
Number of Fisher Scoring iterations: 4
reg_Income_Logit <- cbind(regression_data, predict(Logit_income, newdata = regression_data, type = "link", se = TRUE))
reg_Income_Logit <- within(reg_Income_Logit, {
PredictedProb <- plogis(fit)
LL <- plogis(fit - (1.96 * se.fit))
UL <- plogis(fit + (1.96 * se.fit))
})
ggplot(data = reg_Income_Logit, aes(x = Org_AGI, y = PredictedProb)) +
geom_ribbon(aes(ymin = LL,
ymax = UL), alpha = 0.2) +
geom_line(size = 1)
Moved by Age Alone
Logit_age <- glm(Moved ~ Org_Age, family = "binomial", data = regression_data)
summary(Logit_age)
Call:
glm(formula = Moved ~ Org_Age, family = "binomial", data = regression_data)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.9394 -0.7078 -0.5977 -0.4973 2.2740
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.5893599 0.0268729 -21.93 <2e-16 ***
Org_Age -0.0208474 0.0006336 -32.91 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 65126 on 66857 degrees of freedom
Residual deviance: 63987 on 66856 degrees of freedom
(20 observations deleted due to missingness)
AIC: 63991
Number of Fisher Scoring iterations: 4
reg_age_Logit <- cbind(regression_data, predict(Logit_age, newdata = regression_data, type = "link", se = TRUE))
reg_age_Logit <- within(reg_age_Logit, {
PredictedProb <- plogis(fit)
LL <- plogis(fit - (1.96 * se.fit))
UL <- plogis(fit + (1.96 * se.fit))
})
ggplot(data = reg_age_Logit, aes(x = Org_Age, y = PredictedProb)) +
geom_ribbon(aes(ymin = LL,
ymax = UL), alpha = 0.2) +
geom_line(size = 1)
Moved by Number of Dependents Alone
Logit_Dep <- glm(Moved ~ Org_Number.of.Dependents, family = "binomial", data = regression_data)
summary(Logit_Dep)
Call:
glm(formula = Moved ~ Org_Number.of.Dependents, family = "binomial",
data = regression_data)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.7029 -0.6535 -0.6466 -0.6466 1.8263
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.45869 0.01149 -126.899 <2e-16 ***
Org_Number.of.Dependents 0.02331 0.01106 2.107 0.0351 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 65126 on 66857 degrees of freedom
Residual deviance: 65121 on 66856 degrees of freedom
(20 observations deleted due to missingness)
AIC: 65125
Number of Fisher Scoring iterations: 4
reg_dep_Logit <- cbind(regression_data, predict(Logit_Dep, newdata = regression_data, type = "link", se = TRUE))
reg_dep_Logit <- within(reg_dep_Logit, {
PredictedProb <- plogis(fit)
LL <- plogis(fit - (1.96 * se.fit))
UL <- plogis(fit + (1.96 * se.fit))
})
ggplot(data = reg_dep_Logit, aes(x = Org_Number.of.Dependents, y = PredictedProb)) +
geom_ribbon(aes(ymin = LL,
ymax = UL), alpha = 0.2) +
geom_line(size = 1)
Moved by Race Alone
Logit_Race <- glm(Moved ~ Org_Race, family = "binomial", data = regression_data)
summary(Logit_Race)
Call:
glm(formula = Moved ~ Org_Race, family = "binomial", data = regression_data)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.7981 -0.6729 -0.6614 -0.5837 1.9255
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.48042 0.05387 -27.481 < 2e-16 ***
Org_RaceBlack 0.11028 0.05683 1.940 0.05233 .
Org_RaceHispanic 0.07176 0.06356 1.129 0.25890
Org_RaceMulti -0.13579 0.07936 -1.711 0.08705 .
Org_RaceNative American -0.12544 0.15602 -0.804 0.42141
Org_RaceOther 0.49959 0.09451 5.286 1.25e-07 ***
Org_RaceWhite -0.20289 0.06723 -3.018 0.00255 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 34076 on 34695 degrees of freedom
Residual deviance: 33979 on 34689 degrees of freedom
(32182 observations deleted due to missingness)
AIC: 33993
Number of Fisher Scoring iterations: 4
reg_race_Logit <- cbind(regression_data, predict(Logit_Race, newdata = regression_data, type = "link", se = TRUE))
reg_race_Logit <- within(reg_race_Logit, {
PredictedProb <- plogis(fit)
LL <- plogis(fit - (1.96 * se.fit))
UL <- plogis(fit + (1.96 * se.fit))
})
ggplot(data = reg_race_Logit, aes(x = Org_Race, y = PredictedProb)) +
geom_errorbar(aes(ymin = LL,
ymax = UL)) +
geom_point(size = 2)
Moved by Disability Status Alone
Logit_Disabilty <- glm(Moved ~ Org_Disability.Status, family = "binomial", data = regression_data)
summary(Logit_Disabilty)
Call:
glm(formula = Moved ~ Org_Disability.Status, family = "binomial",
data = regression_data)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.6563 -0.6563 -0.6563 -0.5976 1.9030
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.42592 0.01373 -103.860 < 2e-16 ***
Org_Disability.StatusYes -0.20627 0.03750 -5.501 3.77e-08 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 38774 on 39968 degrees of freedom
Residual deviance: 38743 on 39967 degrees of freedom
(26909 observations deleted due to missingness)
AIC: 38747
Number of Fisher Scoring iterations: 4
reg_dis_Logit <- cbind(regression_data, predict(Logit_Disabilty, newdata = regression_data, type = "link", se = TRUE))
reg_dis_Logit <- within(reg_dis_Logit, {
PredictedProb <- plogis(fit)
LL <- plogis(fit - (1.96 * se.fit))
UL <- plogis(fit + (1.96 * se.fit))
})
ggplot(data = reg_dis_Logit, aes(x = Org_Disability.Status, y = PredictedProb)) +
geom_errorbar(aes(ymin = LL,
ymax = UL)) +
geom_point(size = 2)
Moved by Educational Attainment Alone
Logit_Education <- glm(Moved ~ Org_Educational.Attainment, family = "binomial", data = regression_data)
summary(Logit_Education)
Call:
glm(formula = Moved ~ Org_Educational.Attainment, family = "binomial",
data = regression_data)
Deviance Residuals:
Min 1Q Median 3Q Max
-0.8811 -0.6632 -0.6520 -0.6520 1.8777
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.41534 0.07367 -19.212 < 2e-16 ***
Org_Educational.AttainmentCollege 0.05377 0.08392 0.641 0.522
Org_Educational.AttainmentGED 0.66932 0.10999 6.085 1.16e-09 ***
Org_Educational.AttainmentHigh School -0.02500 0.07827 -0.319 0.749
Org_Educational.AttainmentNo High School -0.15923 0.10118 -1.574 0.116
Org_Educational.AttainmentSome College 0.01273 0.07832 0.163 0.871
Org_Educational.AttainmentSome High School 0.13669 0.08436 1.620 0.105
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 28837 on 28780 degrees of freedom
Residual deviance: 28761 on 28774 degrees of freedom
(38097 observations deleted due to missingness)
AIC: 28775
Number of Fisher Scoring iterations: 4
reg_edu_Logit <- cbind(regression_data, predict(Logit_Education, newdata = regression_data, type = "link", se = TRUE))
reg_edu_Logit <- within(reg_edu_Logit, {
PredictedProb <- plogis(fit)
LL <- plogis(fit - (1.96 * se.fit))
UL <- plogis(fit + (1.96 * se.fit))
})
ggplot(data = reg_edu_Logit, aes(x = Org_Educational.Attainment, y = PredictedProb)) +
geom_errorbar(aes(ymin = LL,
ymax = UL)) +
geom_point(size = 2)
Full BTHC Regression
Logit_Education <- glm(Moved ~ Org_AGI + Org_Age + Org_Number.of.Dependents + Org_Race + Org_Disability.Status + Org_Educational.Attainment,
family = "binomial", data = regression_data)
summary(Logit_Education)
Call:
glm(formula = Moved ~ Org_AGI + Org_Age + Org_Number.of.Dependents +
Org_Race + Org_Disability.Status + Org_Educational.Attainment,
family = "binomial", data = regression_data)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.1745 -0.7156 -0.5984 -0.4809 2.3291
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.127e-01 1.071e-01 -2.920 0.00350 **
Org_AGI -3.248e-06 1.061e-06 -3.061 0.00220 **
Org_Age -2.276e-02 1.089e-03 -20.904 < 2e-16 ***
Org_Number.of.Dependents -4.023e-03 1.814e-02 -0.222 0.82447
Org_RaceBlack 1.283e-01 6.628e-02 1.935 0.05297 .
Org_RaceHispanic -4.556e-02 7.328e-02 -0.622 0.53411
Org_RaceMulti -1.748e-01 9.284e-02 -1.883 0.05976 .
Org_RaceNative American -1.585e-01 2.021e-01 -0.784 0.43277
Org_RaceOther 3.200e-01 1.088e-01 2.941 0.00327 **
Org_RaceWhite -6.549e-02 7.757e-02 -0.844 0.39853
Org_Disability.StatusYes -9.397e-02 4.581e-02 -2.051 0.04026 *
Org_Educational.AttainmentCollege -1.370e-01 8.789e-02 -1.558 0.11912
Org_Educational.AttainmentGED 4.868e-01 1.164e-01 4.180 2.91e-05 ***
Org_Educational.AttainmentHigh School -1.545e-01 8.211e-02 -1.881 0.05994 .
Org_Educational.AttainmentNo High School -6.450e-02 1.078e-01 -0.598 0.54968
Org_Educational.AttainmentSome College -2.068e-01 8.241e-02 -2.509 0.01210 *
Org_Educational.AttainmentSome High School -5.272e-02 8.940e-02 -0.590 0.55540
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 26265 on 26323 degrees of freedom
Residual deviance: 25584 on 26307 degrees of freedom
(40554 observations deleted due to missingness)
AIC: 25618
Number of Fisher Scoring iterations: 4