#PNPL Standardized Split
MEDIAN <- round(median(df$PNPL),4)
LABEL0 <- paste("PNPL Score < Median:", round(MEDIAN,2))
LABEL1 <- paste("PNPL Score >= Median:", round(MEDIAN,2))
df <- df %>%
mutate(PNPL_SPLIT = as.numeric(PNPL >= MEDIAN),
PNPL_SPLIT = factor(PNPL_SPLIT,
levels = 0:1,
labels = c(LABEL0, LABEL1)))
tapply(df$PNPL, df$PNPL_SPLIT, range)
## $`PNPL Score < Median: 0.21`
## [1] 0.03104137 0.21264430
##
## $`PNPL Score >= Median: 0.21`
## [1] 0.2128068 5.1370115
tbl_dem <-
df %>%
dplyr::select(VULEOPCT, MINORPCT, LOWINCPCT, LESSHSPCT, LINGISOPCT, UNEMPPCT, UNDER5PCT, OVER64PCT, PNPL_SPLIT) %>%
tbl_summary(
# The "by" variable
by =PNPL_SPLIT,
statistic = list(all_continuous() ~ "{mean} ({sd})"),
digits = list(all_continuous() ~ c(2, 2)),
type = list(VULEOPCT ~ "continuous",
MINORPCT ~ "continuous",
LOWINCPCT ~ "continuous",
UNEMPPCT ~ "continuous",
LESSHSPCT ~ "continuous",
LINGISOPCT ~ "continuous",
UNDER5PCT ~ "continuous",
OVER64PCT ~ "continuous"),
label = list(VULEOPCT ~ "Vulerability Index %",
MINORPCT ~ "Racial Minority %",
LOWINCPCT ~ "Low Income %",
UNEMPPCT ~ "Unemployed %",
LESSHSPCT ~ "Less than HS Ed. %",
LINGISOPCT ~ "Lingustic Isolation %",
UNDER5PCT ~ "Under Age 5 %",
OVER64PCT ~ "Over Age 64 %")
) %>%
modify_header(
label = "**Variable**",
# The following adds the % to the column total label
# <br> is the location of a line break
all_stat_cols() ~ "**{level}**<br>N = {n} ({style_percent(p, digits=1)}%)"
) %>%
modify_caption("Demographic Characteristics") %>%
bold_labels() %>%
# Include an "overall" column
add_overall(
last = FALSE,
# The ** make it bold
col_label = "**All census block groups**<br>N = {N}"
) %>%
add_p(
test = list(all_continuous() ~ "t.test",
all_categorical() ~ "chisq.test"),
pvalue_fun = function(x) style_pvalue(x, digits = 3)
)
tbl_dem
| Variable | All census block groups N = 21901 |
PNPL Score < Median: 0.21 N = 1095 (50.0%)1 |
PNPL Score >= Median: 0.21 N = 1095 (50.0%)1 |
p-value2 |
|---|---|---|---|---|
| Vulerability Index % | 0.25 (0.17) | 0.22 (0.16) | 0.27 (0.18) | <0.001 |
| Racial Minority % | 0.34 (0.28) | 0.29 (0.25) | 0.40 (0.29) | <0.001 |
| Low Income % | 0.15 (0.13) | 0.15 (0.13) | 0.15 (0.14) | 0.872 |
| Less than HS Ed. % | 0.08 (0.09) | 0.08 (0.08) | 0.09 (0.10) | <0.001 |
| Lingustic Isolation % | 0.04 (0.07) | 0.03 (0.06) | 0.05 (0.07) | <0.001 |
| Unemployed % | 0.04 (0.05) | 0.04 (0.05) | 0.04 (0.05) | 0.201 |
| Under Age 5 % | 0.05 (0.04) | 0.05 (0.04) | 0.05 (0.04) | 0.105 |
| Over Age 64 % | 0.19 (0.11) | 0.19 (0.13) | 0.18 (0.09) | 0.001 |
| 1 Mean (SD) | ||||
| 2 Welch Two Sample t-test | ||||
tbl_dem_strat <-
df %>%
dplyr::select(VULEOPCT, MINORPCT, LOWINCPCT, LESSHSPCT, LINGISOPCT, UNEMPPCT, UNDER5PCT, OVER64PCT, PNPL_SPLIT, CNTY_NAME) %>%
mutate(CNTY_NAME = paste(CNTY_NAME,"County")) %>%
tbl_strata(
strata = CNTY_NAME,
.tbl_fun =
~ .x %>%
tbl_summary(
# The "by" variable
by =PNPL_SPLIT,
statistic = list(all_continuous() ~ "{mean} ({sd})"),
digits = list(all_continuous() ~ c(2, 2)),
type = list(VULEOPCT ~ "continuous",
MINORPCT ~ "continuous",
LOWINCPCT ~ "continuous",
UNEMPPCT ~ "continuous",
LESSHSPCT ~ "continuous",
LINGISOPCT ~ "continuous",
UNDER5PCT ~ "continuous",
OVER64PCT ~ "continuous"),
label = list(VULEOPCT ~ "Vulerability Index %",
MINORPCT ~ "Racial Minority %",
LOWINCPCT ~ "Low Income %",
UNEMPPCT ~ "Unemployed %",
LESSHSPCT ~ "Less than HS Ed. %",
LINGISOPCT ~ "Lingustic Isolation %",
UNDER5PCT ~ "Under Age 5 %",
OVER64PCT ~ "Over Age 64 %")
) %>%
modify_header(
label = "**Variable**",
# The following adds the % to the column total label
# <br> is the location of a line break
all_stat_cols() ~ "**{level}**<br>N = {n} ({style_percent(p, digits=1)}%)"
) %>%
modify_caption("Demographic Characteristics - Stratified by County") %>%
bold_labels() %>%
# Include an "overall" column
add_overall(
last = FALSE,
# The ** make it bold
col_label = "**All census block groups**<br>N = {N}"
) %>%
add_p(
test = list(all_continuous() ~ "t.test",
all_categorical() ~ "chisq.test"),
pvalue_fun = function(x) style_pvalue(x, digits = 3)
) )
tbl_dem_strat
| Variable | Nassau County County | Suffolk County County | ||||||
|---|---|---|---|---|---|---|---|---|
| All census block groups N = 11341 |
PNPL Score < Median: 0.21 N = 358 (31.6%)1 |
PNPL Score >= Median: 0.21 N = 776 (68.4%)1 |
p-value2 | All census block groups N = 10561 |
PNPL Score < Median: 0.21 N = 737 (69.8%)1 |
PNPL Score >= Median: 0.21 N = 319 (30.2%)1 |
p-value2 | |
| Vulerability Index % | 0.26 (0.18) | 0.24 (0.19) | 0.27 (0.18) | 0.007 | 0.23 (0.16) | 0.21 (0.15) | 0.28 (0.19) | <0.001 |
| Racial Minority % | 0.39 (0.29) | 0.35 (0.30) | 0.40 (0.28) | 0.002 | 0.30 (0.26) | 0.26 (0.22) | 0.39 (0.31) | <0.001 |
| Low Income % | 0.13 (0.13) | 0.13 (0.13) | 0.14 (0.13) | 0.478 | 0.16 (0.13) | 0.16 (0.13) | 0.17 (0.14) | 0.100 |
| Less than HS Ed. % | 0.08 (0.09) | 0.07 (0.09) | 0.09 (0.09) | 0.006 | 0.09 (0.10) | 0.08 (0.08) | 0.11 (0.12) | <0.001 |
| Lingustic Isolation % | 0.05 (0.08) | 0.04 (0.08) | 0.06 (0.08) | <0.001 | 0.03 (0.05) | 0.02 (0.05) | 0.04 (0.06) | <0.001 |
| Unemployed % | 0.04 (0.05) | 0.04 (0.06) | 0.04 (0.05) | 0.310 | 0.04 (0.04) | 0.04 (0.04) | 0.04 (0.05) | 0.837 |
| Under Age 5 % | 0.05 (0.04) | 0.05 (0.04) | 0.05 (0.04) | 0.592 | 0.05 (0.04) | 0.05 (0.04) | 0.05 (0.04) | 0.221 |
| Over Age 64 % | 0.18 (0.09) | 0.18 (0.09) | 0.18 (0.09) | 0.700 | 0.19 (0.13) | 0.20 (0.14) | 0.17 (0.11) | <0.001 |
| 1 Mean (SD) | ||||||||
| 2 Welch Two Sample t-test | ||||||||
tbl_env <-
df %>%
dplyr::select(PRE1960PCT, PTRAF, PWDIS, PRMP, PTSDF, OZONE, PM25, UST, PNPL_SPLIT) %>%
tbl_summary(
# The "by" variable
by =PNPL_SPLIT,
statistic = list(all_continuous() ~ "{mean} ({sd})"),
digits = list(all_continuous() ~ c(2, 2)),
type = list(PRE1960PCT ~ "continuous",
PTRAF ~ "continuous",
PWDIS ~ "continuous",
PRMP ~ "continuous",
PTSDF ~ "continuous",
OZONE ~ "continuous",
PM25 ~ "continuous",
UST ~ "continuous"),
label = list(PRE1960PCT ~ "% Houses Built Pre 1960",
PTRAF ~ "Proximity to Traffic",
PWDIS ~ "Proximity to Waterwater Discharge Facility",
PRMP ~ "Proximity to Regulated Management Plan Facility",
PTSDF ~ "Proximity to Hazardous Waste Facility",
OZONE ~ "Average Seasonal Ozone Concentration",
PM25 ~ "Annual Average PM2.5 Concentration",
UST ~ "Proximity to Underground Storage Tank")
) %>%
modify_header(
label = "**Variable**",
# The following adds the % to the column total label
# <br> is the location of a line break
all_stat_cols() ~ "**{level}**<br>N = {n} ({style_percent(p, digits=1)}%)"
) %>%
modify_caption("Environmental Characteristics - Stratified by County") %>%
bold_labels() %>%
# Include an "overall" column
add_overall(
last = FALSE,
# The ** make it bold
col_label = "**All census block groups**<br>N = {N}"
) %>%
add_p(
test = list(all_continuous() ~ "t.test",
all_categorical() ~ "chisq.test"),
pvalue_fun = function(x) style_pvalue(x, digits = 3)
)
tbl_env
| Variable | All census block groups N = 21901 |
PNPL Score < Median: 0.21 N = 1095 (50.0%)1 |
PNPL Score >= Median: 0.21 N = 1095 (50.0%)1 |
p-value2 |
|---|---|---|---|---|
| % Houses Built Pre 1960 | 0.53 (0.29) | 0.46 (0.27) | 0.61 (0.29) | <0.001 |
| Proximity to Traffic | 504.84 (685.51) | 387.27 (540.78) | 622.41 (787.50) | <0.001 |
| Proximity to Waterwater Discharge Facility | 0.01 (0.19) | 0.02 (0.19) | 0.01 (0.19) | 0.225 |
| Proximity to Regulated Management Plan Facility | 0.16 (0.21) | 0.09 (0.06) | 0.23 (0.27) | <0.001 |
| Proximity to Hazardous Waste Facility | 2.02 (1.74) | 1.20 (1.03) | 2.84 (1.91) | <0.001 |
| Average Seasonal Ozone Concentration | 43.56 (0.61) | 43.51 (0.70) | 43.61 (0.49) | <0.001 |
| Annual Average PM2.5 Concentration | 7.48 (0.46) | 7.26 (0.44) | 7.70 (0.38) | <0.001 |
| Proximity to Underground Storage Tank | 1.16 (1.98) | 0.91 (1.54) | 1.41 (2.32) | <0.001 |
| 1 Mean (SD) | ||||
| 2 Welch Two Sample t-test | ||||
tbl_env_strat <-
df %>%
dplyr::select(PRE1960PCT, PTRAF, PWDIS, PRMP, PTSDF, OZONE, PM25, UST, PNPL_SPLIT, CNTY_NAME) %>%
mutate(CNTY_NAME = paste(CNTY_NAME,"County")) %>%
tbl_strata(
strata = CNTY_NAME,
.tbl_fun =
~ .x %>%
tbl_summary(
# The "by" variable
by =PNPL_SPLIT,
statistic = list(all_continuous() ~ "{mean} ({sd})"),
digits = list(all_continuous() ~ c(2, 2)),
type = list(PRE1960PCT ~ "continuous",
PTRAF ~ "continuous",
PWDIS ~ "continuous",
PRMP ~ "continuous",
PTSDF ~ "continuous",
OZONE ~ "continuous",
PM25 ~ "continuous",
UST ~ "continuous"),
label = list(PRE1960PCT ~ "% Houses Built Pre 1960",
PTRAF ~ "Proximity to Traffic",
PWDIS ~ "Proximity to Waterwater Discharge Facility",
PRMP ~ "Proximity to Regulated Management Plan Facility",
PTSDF ~ "Proximity to Hazardous Waste Facility",
OZONE ~ "Average Seasonal Ozone Concentration",
PM25 ~ "Annual Average PM2.5 Concentration",
UST ~ "Proximity to Underground Storage Tank")
) %>%
modify_header(
label = "**Variable**",
# The following adds the % to the column total label
# <br> is the location of a line break
all_stat_cols() ~ "**{level}**<br>N = {n} ({style_percent(p, digits=1)}%)"
) %>%
modify_caption("Environmental Characteristics - Stratified by County") %>%
bold_labels() %>%
# Include an "overall" column
add_overall(
last = FALSE,
# The ** make it bold
col_label = "**All census block groups**<br>N = {N}"
) %>%
add_p(
test = list(all_continuous() ~ "t.test",
all_categorical() ~ "chisq.test"),
pvalue_fun = function(x) style_pvalue(x, digits = 3)
) )
tbl_env_strat
| Variable | Nassau County County | Suffolk County County | ||||||
|---|---|---|---|---|---|---|---|---|
| All census block groups N = 11341 |
PNPL Score < Median: 0.21 N = 358 (31.6%)1 |
PNPL Score >= Median: 0.21 N = 776 (68.4%)1 |
p-value2 | All census block groups N = 10561 |
PNPL Score < Median: 0.21 N = 737 (69.8%)1 |
PNPL Score >= Median: 0.21 N = 319 (30.2%)1 |
p-value2 | |
| % Houses Built Pre 1960 | 0.72 (0.21) | 0.68 (0.22) | 0.74 (0.20) | <0.001 | 0.33 (0.22) | 0.35 (0.23) | 0.28 (0.20) | <0.001 |
| Proximity to Traffic | 664.91 (809.44) | 587.33 (729.43) | 700.70 (841.84) | 0.021 | 332.94 (462.90) | 290.08 (384.30) | 431.97 (595.81) | <0.001 |
| Proximity to Waterwater Discharge Facility | 0.03 (0.27) | 0.06 (0.33) | 0.01 (0.23) | 0.020 | 0.00 (0.00) | 0.00 (0.00) | 0.00 (0.00) | 0.019 |
| Proximity to Regulated Management Plan Facility | 0.23 (0.24) | 0.14 (0.07) | 0.27 (0.28) | <0.001 | 0.09 (0.12) | 0.07 (0.03) | 0.16 (0.19) | <0.001 |
| Proximity to Hazardous Waste Facility | 2.38 (1.72) | 1.44 (0.97) | 2.81 (1.82) | <0.001 | 1.64 (1.67) | 1.09 (1.04) | 2.91 (2.11) | <0.001 |
| Average Seasonal Ozone Concentration | 43.60 (0.51) | 43.50 (0.55) | 43.64 (0.48) | <0.001 | 43.53 (0.70) | 43.52 (0.77) | 43.55 (0.50) | 0.408 |
| Annual Average PM2.5 Concentration | 7.81 (0.25) | 7.62 (0.17) | 7.89 (0.23) | <0.001 | 7.13 (0.38) | 7.09 (0.42) | 7.23 (0.24) | <0.001 |
| Proximity to Underground Storage Tank | 1.71 (2.48) | 1.56 (2.15) | 1.78 (2.62) | 0.129 | 0.57 (0.93) | 0.60 (0.99) | 0.51 (0.77) | 0.126 |
| 1 Mean (SD) | ||||||||
| 2 Welch Two Sample t-test | ||||||||
### Linear Regression
### Adjusted, PTRAF, PWDIS, PRMP, and PTSDF were turned into categorical variabels (1-5) due to having non-linearity
fit.adjusted <- lm(PNPL_LOGAR ~ MINORPCT + LOWINCPCT + LESSHSPCT +
LINGISOPCT + UNDER5PCT + OVER64PCT
+ PRE1960PCT + PTRAF_Q + PWDIS_Q + PRMP_Q + PTDF_Q + OZONE + PM25 + UST, data = df)
summary(fit.adjusted)
##
## Call:
## lm(formula = PNPL_LOGAR ~ MINORPCT + LOWINCPCT + LESSHSPCT +
## LINGISOPCT + UNDER5PCT + OVER64PCT + PRE1960PCT + PTRAF_Q +
## PWDIS_Q + PRMP_Q + PTDF_Q + OZONE + PM25 + UST, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8736 -0.4653 -0.1013 0.3984 2.4621
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.927103 1.107757 0.837 0.40273
## MINORPCT -0.458205 0.070759 -6.476 0.000000000116 ***
## LOWINCPCT 0.187780 0.125374 1.498 0.13434
## LESSHSPCT 0.110720 0.201946 0.548 0.58357
## LINGISOPCT 0.609317 0.246230 2.475 0.01341 *
## UNDER5PCT 0.364534 0.355793 1.025 0.30568
## OVER64PCT -0.110311 0.135906 -0.812 0.41707
## PRE1960PCT -0.191164 0.062178 -3.074 0.00214 **
## PTRAF_Q 0.031599 0.011500 2.748 0.00605 **
## PWDIS_Q 0.189355 0.012090 15.662 < 0.0000000000000002 ***
## PRMP_Q -0.245570 0.014202 -17.292 < 0.0000000000000002 ***
## PTDF_Q -0.176359 0.011465 -15.383 < 0.0000000000000002 ***
## OZONE -0.161639 0.029262 -5.524 0.000000037112 ***
## PM25 0.711391 0.052906 13.446 < 0.0000000000000002 ***
## UST 0.001443 0.007834 0.184 0.85382
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6514 on 2175 degrees of freedom
## Multiple R-squared: 0.5236, Adjusted R-squared: 0.5206
## F-statistic: 170.8 on 14 and 2175 DF, p-value: < 0.00000000000000022
# Table
NOBS <- length(fit.adjusted$residuals)
library(gtsummary)
fit.adjusted %>%
tbl_regression(
estimate_fun = function(x) style_sigfig(x, digits = 3),
pvalue_fun = function(x) style_pvalue(x, digits = 3),
label = list(MINORPCT ~ "Racial Minority %",
LOWINCPCT ~ "Low Income %",
LESSHSPCT ~ "Less than HS Ed. %",
LINGISOPCT ~ "Linguistic Isolation %",
UNDER5PCT ~ "Under 5%",
OVER64PCT ~ "Over 64%",
PRE1960PCT ~ "Houses Built Pre-1960 %",
PTRAF_Q ~ "Proximity to Traffic (log)",
PWDIS_Q ~ "Proximity to Wastewater Discharge Facility",
PRMP_Q ~ "Proximity to RMP Facility",
PTDF_Q ~ "Proximity to Hazardous Waste Facility",
OZONE ~ "Average O3 Ambient Air Concentration",
PM25 ~ "Average Annual PM 2.5 Ambient Air Concentration",
UST ~ "Proximity to Underground Storage Facility"
)) %>%
add_global_p(keep = T) %>%
modify_caption(paste("Linear regression results for proximity to Superfund sites (log transformed) by census block group (N = ", NOBS, ")", sep=""))
| Characteristic | Beta | 95% CI1 | p-value |
|---|---|---|---|
| Racial Minority % | -0.458 | -0.597, -0.319 | <0.001 |
| Low Income % | 0.188 | -0.058, 0.434 | 0.134 |
| Less than HS Ed. % | 0.111 | -0.285, 0.507 | 0.584 |
| Linguistic Isolation % | 0.609 | 0.126, 1.09 | 0.013 |
| Under 5% | 0.365 | -0.333, 1.06 | 0.306 |
| Over 64% | -0.110 | -0.377, 0.156 | 0.417 |
| Houses Built Pre-1960 % | -0.191 | -0.313, -0.069 | 0.002 |
| Proximity to Traffic (log) | 0.032 | 0.009, 0.054 | 0.006 |
| Proximity to Wastewater Discharge Facility | 0.189 | 0.166, 0.213 | <0.001 |
| Proximity to RMP Facility | -0.246 | -0.273, -0.218 | <0.001 |
| Proximity to Hazardous Waste Facility | -0.176 | -0.199, -0.154 | <0.001 |
| Average O3 Ambient Air Concentration | -0.162 | -0.219, -0.104 | <0.001 |
| Average Annual PM 2.5 Ambient Air Concentration | 0.711 | 0.608, 0.815 | <0.001 |
| Proximity to Underground Storage Facility | 0.001 | -0.014, 0.017 | 0.854 |
| 1 CI = Confidence Interval | |||
# plot histogram of residuals
hist(resid(fit.adjusted), breaks = 30, col = "grey", main = "Histogram of Residuals")
# create QQ-plot of residuals
qqnorm(resid(fit.adjusted))
qqline(resid(fit.adjusted))
# Fit the linear regression model
model <- glm(formula = PNPL_LOGAR ~ MINORPCT + LOWINCPCT + LESSHSPCT +
LINGISOPCT + UNDER5PCT + OVER64PCT + PRE1960PCT + PTRAF_Q +
PWDIS_Q + PRMP_Q + PTDF_Q + OZONE + PM25 + UST, data = df)
# Removing variables with low coefficents or those that are insignficant
reduced_model <- step(fit.adjusted, direction = "backward")
## Start: AIC=-1862.75
## PNPL_LOGAR ~ MINORPCT + LOWINCPCT + LESSHSPCT + LINGISOPCT +
## UNDER5PCT + OVER64PCT + PRE1960PCT + PTRAF_Q + PWDIS_Q +
## PRMP_Q + PTDF_Q + OZONE + PM25 + UST
##
## Df Sum of Sq RSS AIC
## - UST 1 0.014 922.79 -1864.7
## - LESSHSPCT 1 0.128 922.91 -1864.4
## - OVER64PCT 1 0.280 923.06 -1864.1
## - UNDER5PCT 1 0.445 923.22 -1863.7
## <none> 922.78 -1862.8
## - LOWINCPCT 1 0.952 923.73 -1862.5
## - LINGISOPCT 1 2.598 925.38 -1858.6
## - PTRAF_Q 1 3.203 925.98 -1857.2
## - PRE1960PCT 1 4.010 926.79 -1855.2
## - OZONE 1 12.946 935.72 -1834.2
## - MINORPCT 1 17.791 940.57 -1822.9
## - PM25 1 76.709 999.49 -1689.9
## - PTDF_Q 1 100.397 1023.18 -1638.6
## - PWDIS_Q 1 104.065 1026.84 -1630.7
## - PRMP_Q 1 126.856 1049.63 -1582.7
##
## Step: AIC=-1864.71
## PNPL_LOGAR ~ MINORPCT + LOWINCPCT + LESSHSPCT + LINGISOPCT +
## UNDER5PCT + OVER64PCT + PRE1960PCT + PTRAF_Q + PWDIS_Q +
## PRMP_Q + PTDF_Q + OZONE + PM25
##
## Df Sum of Sq RSS AIC
## - LESSHSPCT 1 0.126 922.92 -1866.4
## - OVER64PCT 1 0.282 923.07 -1866.0
## - UNDER5PCT 1 0.439 923.23 -1865.7
## <none> 922.79 -1864.7
## - LOWINCPCT 1 0.953 923.75 -1864.5
## - LINGISOPCT 1 2.641 925.43 -1860.5
## - PTRAF_Q 1 3.200 925.99 -1859.1
## - PRE1960PCT 1 3.999 926.79 -1857.2
## - OZONE 1 13.787 936.58 -1834.2
## - MINORPCT 1 17.808 940.60 -1824.8
## - PM25 1 79.654 1002.45 -1685.4
## - PTDF_Q 1 100.517 1023.31 -1640.3
## - PWDIS_Q 1 104.950 1027.74 -1630.8
## - PRMP_Q 1 127.430 1050.22 -1583.4
##
## Step: AIC=-1866.41
## PNPL_LOGAR ~ MINORPCT + LOWINCPCT + LINGISOPCT + UNDER5PCT +
## OVER64PCT + PRE1960PCT + PTRAF_Q + PWDIS_Q + PRMP_Q + PTDF_Q +
## OZONE + PM25
##
## Df Sum of Sq RSS AIC
## - OVER64PCT 1 0.289 923.21 -1867.7
## - UNDER5PCT 1 0.444 923.36 -1867.4
## <none> 922.92 -1866.4
## - LOWINCPCT 1 1.259 924.18 -1865.4
## - LINGISOPCT 1 3.171 926.09 -1860.9
## - PTRAF_Q 1 3.202 926.12 -1860.8
## - PRE1960PCT 1 4.082 927.00 -1858.8
## - OZONE 1 13.684 936.60 -1836.2
## - MINORPCT 1 19.503 942.42 -1822.6
## - PM25 1 79.948 1002.87 -1686.5
## - PTDF_Q 1 101.304 1024.22 -1640.3
## - PWDIS_Q 1 104.919 1027.84 -1632.6
## - PRMP_Q 1 128.395 1051.31 -1583.2
##
## Step: AIC=-1867.73
## PNPL_LOGAR ~ MINORPCT + LOWINCPCT + LINGISOPCT + UNDER5PCT +
## PRE1960PCT + PTRAF_Q + PWDIS_Q + PRMP_Q + PTDF_Q + OZONE +
## PM25
##
## Df Sum of Sq RSS AIC
## - UNDER5PCT 1 0.683 923.89 -1868.1
## <none> 923.21 -1867.7
## - LOWINCPCT 1 1.180 924.39 -1866.9
## - LINGISOPCT 1 3.146 926.35 -1862.3
## - PTRAF_Q 1 3.163 926.37 -1862.2
## - PRE1960PCT 1 3.930 927.14 -1860.4
## - OZONE 1 13.513 936.72 -1837.9
## - MINORPCT 1 19.569 942.78 -1823.8
## - PM25 1 79.696 1002.90 -1688.4
## - PTDF_Q 1 101.704 1024.91 -1640.8
## - PWDIS_Q 1 104.928 1028.14 -1634.0
## - PRMP_Q 1 128.330 1051.54 -1584.7
##
## Step: AIC=-1868.11
## PNPL_LOGAR ~ MINORPCT + LOWINCPCT + LINGISOPCT + PRE1960PCT +
## PTRAF_Q + PWDIS_Q + PRMP_Q + PTDF_Q + OZONE + PM25
##
## Df Sum of Sq RSS AIC
## <none> 923.89 -1868.1
## - LOWINCPCT 1 1.307 925.20 -1867.0
## - PTRAF_Q 1 3.082 926.97 -1862.8
## - LINGISOPCT 1 3.136 927.03 -1862.7
## - PRE1960PCT 1 3.795 927.69 -1861.1
## - OZONE 1 13.553 937.44 -1838.2
## - MINORPCT 1 19.096 942.99 -1825.3
## - PM25 1 79.587 1003.48 -1689.1
## - PTDF_Q 1 101.747 1025.64 -1641.3
## - PWDIS_Q 1 104.938 1028.83 -1634.5
## - PRMP_Q 1 128.251 1052.14 -1585.4
summary(reduced_model)
##
## Call:
## lm(formula = PNPL_LOGAR ~ MINORPCT + LOWINCPCT + LINGISOPCT +
## PRE1960PCT + PTRAF_Q + PWDIS_Q + PRMP_Q + PTDF_Q + OZONE +
## PM25, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8108 -0.4678 -0.1027 0.3972 2.4349
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.91945 1.08174 0.850 0.39543
## MINORPCT -0.42234 0.06293 -6.711 0.0000000000246 ***
## LOWINCPCT 0.21010 0.11966 1.756 0.07926 .
## LINGISOPCT 0.64417 0.23685 2.720 0.00659 **
## PRE1960PCT -0.18421 0.06157 -2.992 0.00281 **
## PTRAF_Q 0.03073 0.01140 2.696 0.00707 **
## PWDIS_Q 0.18911 0.01202 15.732 < 0.0000000000000002 ***
## PRMP_Q -0.24569 0.01413 -17.392 < 0.0000000000000002 ***
## PTDF_Q -0.17710 0.01143 -15.491 < 0.0000000000000002 ***
## OZONE -0.16096 0.02847 -5.654 0.0000000177607 ***
## PM25 0.70780 0.05166 13.701 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6512 on 2179 degrees of freedom
## Multiple R-squared: 0.523, Adjusted R-squared: 0.5209
## F-statistic: 239 on 10 and 2179 DF, p-value: < 0.00000000000000022
reduced_model %>%
tbl_regression(intercept = T,
estimate_fun = function(x) style_sigfig(x, digits = 2),
pvalue_fun = function(x) style_pvalue(x, digits = 3),
label = list(MINORPCT ~ "Racial Minority %",
LOWINCPCT ~ "Low Income %",
LINGISOPCT ~ "Linguistic Isolation %",
PRE1960PCT ~ "Houses Built Pre-1960 %",
PTRAF_Q ~ "Proximity to Traffic",
PWDIS_Q ~ "Proximity to Wastewater Discharge Facility",
PRMP_Q ~ "Proximity to RMP Facility",
PTDF_Q ~ "Proximity to Hazardous Waste Facility",
OZONE ~ "Average O3 Ambient Air Concentration",
PM25 ~ "Average Annual PM 2.5 Ambient Air Concentration" )) %>%
add_global_p(keep = T) %>%
modify_caption(paste("Reduced Linear regression results for proximity to Superfund sites (log transformed) by census block group (N = ", NOBS, ")", sep=""))
| Characteristic | Beta | 95% CI1 | p-value |
|---|---|---|---|
| (Intercept) | 0.92 | -1.2, 3.0 | 0.395 |
| Racial Minority % | -0.42 | -0.55, -0.30 | <0.001 |
| Low Income % | 0.21 | -0.02, 0.44 | 0.079 |
| Linguistic Isolation % | 0.64 | 0.18, 1.1 | 0.007 |
| Houses Built Pre-1960 % | -0.18 | -0.30, -0.06 | 0.003 |
| Proximity to Traffic | 0.03 | 0.01, 0.05 | 0.007 |
| Proximity to Wastewater Discharge Facility | 0.19 | 0.17, 0.21 | <0.001 |
| Proximity to RMP Facility | -0.25 | -0.27, -0.22 | <0.001 |
| Proximity to Hazardous Waste Facility | -0.18 | -0.20, -0.15 | <0.001 |
| Average O3 Ambient Air Concentration | -0.16 | -0.22, -0.11 | <0.001 |
| Average Annual PM 2.5 Ambient Air Concentration | 0.71 | 0.61, 0.81 | <0.001 |
| 1 CI = Confidence Interval | |||
# Get the coefficient estimates and confidence intervals
tidy_fit_reduced <- tidy(reduced_model, exponentiate = TRUE, conf.int = TRUE)
tidy_fit_reduced <- tidy(reduced_model, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(
term = case_when(
term == "MINORPCT" ~ "Percent Minority",
term == "LOWINCPCT" ~ "Percent Low Income",
term == "LINGISOPCT" ~ "Percent Limited English Proficiency",
term == "PRE1960PCT" ~ "Percent Pre-1960 Housing",
term == "PTRAF_Q" ~ "Traffic Density Quartile",
term == "PWDIS_Q" ~ "Proximity to Wastewater Discharge Facility Quartile",
term == "PRMP_Q" ~ "Proximity to RMP Facility Quartile",
term == "PTDF_Q" ~ "Proximity to Hazardous Waste Facility Quartile",
term == "OZONE" ~ "Ozone Concentration (ppb)",
term == "PM25" ~ "PM2.5 Concentration (ug/m3)",
),
estimate = round(estimate, 2),
conf.low = round(conf.low, 2),
conf.high = round(conf.high, 2),
exp_estimate = round(exp(estimate), 2),
exp_ci_low = round(exp(conf.low), 2),
exp_ci_high = round(exp(conf.high), 2)
)
tidy_fit_reduced %>%
select(term, estimate, exp_estimate, exp_ci_low, exp_ci_high) %>%
kable(format = "html", align = "c") %>%
kable_styling(full_width = FALSE)
| term | estimate | exp_estimate | exp_ci_low | exp_ci_high |
|---|---|---|---|---|
| NA | 2.51 | 12.30 | 1.35 | 1217420362.37 |
| Percent Minority | 0.66 | 1.93 | 1.79 | 2.10 |
| Percent Low Income | 1.23 | 3.42 | 2.66 | 4.76 |
| Percent Limited English Proficiency | 1.90 | 6.69 | 3.32 | 20.70 |
| Percent Pre-1960 Housing | 0.83 | 2.29 | 2.10 | 2.56 |
| Traffic Density Quartile | 1.03 | 2.80 | 2.75 | 2.86 |
| Proximity to Wastewater Discharge Facility Quartile | 1.21 | 3.35 | 3.25 | 3.46 |
| Proximity to RMP Facility Quartile | 0.78 | 2.18 | 2.14 | 2.23 |
| Proximity to Hazardous Waste Facility Quartile | 0.84 | 2.32 | 2.27 | 2.36 |
| Ozone Concentration (ppb) | 0.85 | 2.34 | 2.25 | 2.46 |
| PM2.5 Concentration (ug/m3) | 2.03 | 7.61 | 6.23 | 9.49 |
### Exponentiation Model
# Get the coefficient estimates and confidence intervals
tidy_fit <- tidy(fit.adjusted, exponentiate = TRUE, conf.int = TRUE)
tidy_fit <- tidy(fit.adjusted, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(
term = case_when(
term == "MINORPCT" ~ "Percent Minority",
term == "LOWINCPCT" ~ "Percent Low Income",
term == "LESSHSPCT" ~ "Percent Less than High School Education",
term == "LINGISOPCT" ~ "Percent Limited English Proficiency",
term == "UNDER5PCT" ~ "Percent Under 5 Years Old",
term == "OVER64PCT" ~ "Percent Over 64 Years Old",
term == "PRE1960PCT" ~ "Percent Pre-1960 Housing",
term == "PTRAF_Q" ~ "Traffic Density Quartile",
term == "PWDIS_Q" ~ "Proximity to Wastewater Discharge Facility Quartile",
term == "PRMP_Q" ~ "Proximity to RMP Facility Quartile",
term == "PTDF_Q" ~ "Proximity to Hazardous Waste Facility Quartile",
term == "OZONE" ~ "Ozone Concentration (ppb)",
term == "PM25" ~ "PM2.5 Concentration (ug/m3)",
term == "UST" ~ "Proximity to Underground Storage Tanks"
),
estimate = round(estimate, 2),
conf.low = round(conf.low, 2),
conf.high = round(conf.high, 2),
exp_estimate = round(exp(estimate), 2),
exp_ci_low = round(exp(conf.low), 2),
exp_ci_high = round(exp(conf.high), 2)
)
tidy_fit %>%
select(term, estimate, exp_estimate, exp_ci_low, exp_ci_high) %>%
kable(format = "html", align = "c") %>%
kable_styling(full_width = FALSE)
| term | estimate | exp_estimate | exp_ci_low | exp_ci_high |
|---|---|---|---|---|
| NA | 2.53 | 12.55 | 1.34 | 4335054416.82 |
| Percent Minority | 0.63 | 1.88 | 1.73 | 2.08 |
| Percent Low Income | 1.21 | 3.35 | 2.56 | 4.66 |
| Percent Less than High School Education | 1.12 | 3.06 | 2.12 | 5.26 |
| Percent Limited English Proficiency | 1.84 | 6.30 | 3.10 | 19.69 |
| Percent Under 5 Years Old | 1.44 | 4.22 | 2.05 | 17.99 |
| Percent Over 64 Years Old | 0.90 | 2.46 | 1.99 | 3.22 |
| Percent Pre-1960 Housing | 0.83 | 2.29 | 2.08 | 2.53 |
| Traffic Density Quartile | 1.03 | 2.80 | 2.75 | 2.89 |
| Proximity to Wastewater Discharge Facility Quartile | 1.21 | 3.35 | 3.25 | 3.46 |
| Proximity to RMP Facility Quartile | 0.78 | 2.18 | 2.14 | 2.23 |
| Proximity to Hazardous Waste Facility Quartile | 0.84 | 2.32 | 2.27 | 2.36 |
| Ozone Concentration (ppb) | 0.85 | 2.34 | 2.23 | 2.46 |
| PM2.5 Concentration (ug/m3) | 2.04 | 7.69 | 6.30 | 9.58 |
| Proximity to Underground Storage Tanks | 1.00 | 2.72 | 2.69 | 2.77 |
# Get the coefficient estimates and confidence intervals
tidy_fit <- tidy(fit.adjusted, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(
term = case_when(
term == "MINORPCT" ~ "Percent Minority",
term == "LOWINCPCT" ~ "Percent Low Income",
term == "LESSHSPCT" ~ "Percent Less than High School Education",
term == "LINGISOPCT" ~ "Percent Limited English Proficiency",
term == "UNDER5PCT" ~ "Percent Under 5 Years Old",
term == "OVER64PCT" ~ "Percent Over 64 Years Old",
term == "PRE1960PCT" ~ "Percent Pre-1960 Housing",
term == "PTRAF_Q" ~ "Traffic Density Quartile",
term == "PWDIS_Q" ~ "Proximity to Wastewater Discharge Quartile",
term == "PRMP_Q" ~ "Percent in Poverty Quartile",
term == "PTDF_Q" ~ "Proximity to Hazardous Waste Facility Quartile",
term == "OZONE" ~ "Ozone Concentration (ppb)",
term == "PM25" ~ "PM2.5 Concentration (ug/m3)",
term == "UST" ~ "Underground Storage Tanks"
),
estimate = round(estimate, 2),
conf.low = round(conf.low, 2),
conf.high = round(conf.high, 2),
exp_estimate = round(exp(estimate), 2),
exp_ci_low = round(exp(conf.low), 2),
exp_ci_high = round(exp(conf.high), 2)
)
# Filter significant and positive effect variables
tidy_fit_significant <- tidy_fit %>%
filter(p.value < 0.05, exp_estimate > 1) %>%
select(term, estimate, exp_estimate, exp_ci_low, exp_ci_high) %>%
arrange(desc(exp_estimate))
# Display the results
tidy_fit_significant %>%
kable(format = "html", align = "c") %>%
kable_styling(full_width = FALSE)
| term | estimate | exp_estimate | exp_ci_low | exp_ci_high |
|---|---|---|---|---|
| PM2.5 Concentration (ug/m3) | 2.04 | 7.69 | 6.30 | 9.58 |
| Percent Limited English Proficiency | 1.84 | 6.30 | 3.10 | 19.69 |
| Proximity to Wastewater Discharge Quartile | 1.21 | 3.35 | 3.25 | 3.46 |
| Traffic Density Quartile | 1.03 | 2.80 | 2.75 | 2.89 |
| Ozone Concentration (ppb) | 0.85 | 2.34 | 2.23 | 2.46 |
| Proximity to Hazardous Waste Facility Quartile | 0.84 | 2.32 | 2.27 | 2.36 |
| Percent Pre-1960 Housing | 0.83 | 2.29 | 2.08 | 2.53 |
| Percent in Poverty Quartile | 0.78 | 2.18 | 2.14 | 2.23 |
| Percent Minority | 0.63 | 1.88 | 1.73 | 2.08 |
NassauEJ <- subset(EJScreen, CNTY_NAME == "Nassau County")
df_nassau = as.data.frame(NassauEJ)
fit.adjusted.nassau <- lm(PNPL_LOGAR ~ MINORPCT + LOWINCPCT + LESSHSPCT +
LINGISOPCT + UNDER5PCT + OVER64PCT
+ PRE1960PCT + PTRAF_Q + PWDIS_Q + PRMP_Q + PTDF_Q + OZONE + PM25 + UST, data = df_nassau)
# Obtain model summary
summary(fit.adjusted.nassau)
##
## Call:
## lm(formula = PNPL_LOGAR ~ MINORPCT + LOWINCPCT + LESSHSPCT +
## LINGISOPCT + UNDER5PCT + OVER64PCT + PRE1960PCT + PTRAF_Q +
## PWDIS_Q + PRMP_Q + PTDF_Q + OZONE + PM25 + UST, data = df_nassau)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.01836 -0.43066 -0.03288 0.42088 2.39012
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -11.97764 2.31475 -5.174 0.000000270717775821 ***
## MINORPCT -0.77608 0.09451 -8.212 0.000000000000000597 ***
## LOWINCPCT 0.36371 0.19091 1.905 0.0570 .
## LESSHSPCT 0.54359 0.30127 1.804 0.0714 .
## LINGISOPCT 0.24242 0.31480 0.770 0.4414
## UNDER5PCT 0.72408 0.52116 1.389 0.1650
## OVER64PCT -0.01349 0.24772 -0.054 0.9566
## PRE1960PCT 0.20892 0.10379 2.013 0.0444 *
## PTRAF_Q -0.00969 0.01659 -0.584 0.5593
## PWDIS_Q 0.20242 0.01713 11.820 < 0.0000000000000002 ***
## PRMP_Q -0.16811 0.02655 -6.332 0.000000000348690279 ***
## PTDF_Q -0.15018 0.01729 -8.686 < 0.0000000000000002 ***
## OZONE 0.06728 0.05137 1.310 0.1905
## PM25 1.03121 0.09194 11.216 < 0.0000000000000002 ***
## UST 0.01773 0.00880 2.015 0.0441 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6779 on 1119 degrees of freedom
## Multiple R-squared: 0.4278, Adjusted R-squared: 0.4207
## F-statistic: 59.76 on 14 and 1119 DF, p-value: < 0.00000000000000022
# Calculate VIF values
vif(fit.adjusted.nassau)
## MINORPCT LOWINCPCT LESSHSPCT LINGISOPCT UNDER5PCT OVER64PCT PRE1960PCT
## 1.835252 1.553097 1.918606 1.515844 1.078144 1.150854 1.170427
## PTRAF_Q PWDIS_Q PRMP_Q PTDF_Q OZONE PM25 UST
## 1.180701 1.362653 1.669211 1.276865 1.681449 1.312099 1.175993
# Get the coefficient estimates and confidence intervals
tidy_fit_nassau <- tidy(fit.adjusted.nassau, exponentiate = TRUE, conf.int = TRUE)
tidy_fit_nassau <- tidy(fit.adjusted.nassau, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(
term = case_when(
term == "MINORPCT" ~ "Percent Minority",
term == "LOWINCPCT" ~ "Percent Low Income",
term == "LESSHSPCT" ~ "Percent Less than High School Education",
term == "LINGISOPCT" ~ "Percent Limited English Proficiency",
term == "UNDER5PCT" ~ "Percent Under 5 Years Old",
term == "OVER64PCT" ~ "Percent Over 64 Years Old",
term == "PRE1960PCT" ~ "Percent Pre-1960 Housing",
term == "PTRAF_Q" ~ "Traffic Density Quartile",
term == "PWDIS_Q" ~ "Proximity to Wastewater Discharge Facility Quartile",
term == "PRMP_Q" ~ "Proximity to RMP Facility Quartile",
term == "PTDF_Q" ~ "Proximity to Hazardous Waste Facility Quartile",
term == "OZONE" ~ "Ozone Concentration (ppb)",
term == "PM25" ~ "PM2.5 Concentration (ug/m3)",
term == "UST" ~ "Proximity to Underground Storage Tanks"
),
estimate = round(estimate, 2),
conf.low = round(conf.low, 2),
conf.high = round(conf.high, 2),
exp_estimate = round(exp(estimate), 2),
exp_ci_low = round(exp(conf.low), 2),
exp_ci_high = round(exp(conf.high), 2)
)
tidy_fit_nassau %>%
select(term, estimate, exp_estimate, exp_ci_low, exp_ci_high) %>%
kable(format = "html", align = "c") %>%
kable_styling(full_width = FALSE)
| term | estimate | exp_estimate | exp_ci_low | exp_ci_high |
|---|---|---|---|---|
| NA | 0.00 | 1.00 | 1.00 | 1.00 |
| Percent Minority | 0.46 | 1.58 | 1.46 | 1.73 |
| Percent Low Income | 1.44 | 4.22 | 2.69 | 8.08 |
| Percent Less than High School Education | 1.72 | 5.58 | 2.59 | 22.42 |
| Percent Limited English Proficiency | 1.27 | 3.56 | 1.99 | 10.59 |
| Percent Under 5 Years Old | 2.06 | 7.85 | 2.10 | 311.06 |
| Percent Over 64 Years Old | 0.99 | 2.69 | 1.84 | 4.95 |
| Percent Pre-1960 Housing | 1.23 | 3.42 | 2.75 | 4.53 |
| Traffic Density Quartile | 0.99 | 2.69 | 2.61 | 2.77 |
| Proximity to Wastewater Discharge Facility Quartile | 1.22 | 3.39 | 3.25 | 3.56 |
| Proximity to RMP Facility Quartile | 0.85 | 2.34 | 2.23 | 2.44 |
| Proximity to Hazardous Waste Facility Quartile | 0.86 | 2.36 | 2.29 | 2.44 |
| Ozone Concentration (ppb) | 1.07 | 2.92 | 2.64 | 3.25 |
| PM2.5 Concentration (ug/m3) | 2.80 | 16.44 | 10.38 | 28.79 |
| Proximity to Underground Storage Tanks | 1.02 | 2.77 | 2.72 | 2.83 |
### Significant results
# Get the coefficient estimates and confidence intervals
tidy_fit_nassau <- tidy(fit.adjusted.nassau, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(
term = case_when(
term == "MINORPCT" ~ "Percent Minority",
term == "LOWINCPCT" ~ "Percent Low Income",
term == "LESSHSPCT" ~ "Percent Less than High School Education",
term == "LINGISOPCT" ~ "Percent Limited English Proficiency",
term == "UNDER5PCT" ~ "Percent Under 5 Years Old",
term == "OVER64PCT" ~ "Percent Over 64 Years Old",
term == "PRE1960PCT" ~ "Percent Pre-1960 Housing",
term == "PTRAF_Q" ~ "Traffic Density Quartile",
term == "PWDIS_Q" ~ "Proximity to Wastewater Discharge Quartile",
term == "PRMP_Q" ~ "Percent in Poverty Quartile",
term == "PTDF_Q" ~ "Proximity to Hazardous Waste Facility Quartile",
term == "OZONE" ~ "Ozone Concentration (ppb)",
term == "PM25" ~ "PM2.5 Concentration (ug/m3)",
term == "UST" ~ "Underground Storage Tanks"
),
estimate = round(estimate, 2),
conf.low = round(conf.low, 2),
conf.high = round(conf.high, 2),
exp_estimate = round(exp(estimate), 2),
exp_ci_low = round(exp(conf.low), 2),
exp_ci_high = round(exp(conf.high), 2)
)
# Filter significant and positive effect variables
tidy_fit_significant_nassau <- tidy_fit_nassau %>%
filter(p.value < 0.05, exp_estimate > 1) %>%
select(term, estimate, exp_estimate, exp_ci_low, exp_ci_high) %>%
arrange(desc(exp_estimate))
# Display the results
tidy_fit_significant_nassau %>%
kable(format = "html", align = "c") %>%
kable_styling(full_width = FALSE)
| term | estimate | exp_estimate | exp_ci_low | exp_ci_high |
|---|---|---|---|---|
| PM2.5 Concentration (ug/m3) | 2.80 | 16.44 | 10.38 | 28.79 |
| Percent Pre-1960 Housing | 1.23 | 3.42 | 2.75 | 4.53 |
| Proximity to Wastewater Discharge Quartile | 1.22 | 3.39 | 3.25 | 3.56 |
| Underground Storage Tanks | 1.02 | 2.77 | 2.72 | 2.83 |
| Proximity to Hazardous Waste Facility Quartile | 0.86 | 2.36 | 2.29 | 2.44 |
| Percent in Poverty Quartile | 0.85 | 2.34 | 2.23 | 2.44 |
| Percent Minority | 0.46 | 1.58 | 1.46 | 1.73 |
## Reduced Model for Nassau
reduced_model_nassau <- step(fit.adjusted.nassau, direction = "backward")
## Start: AIC=-866.76
## PNPL_LOGAR ~ MINORPCT + LOWINCPCT + LESSHSPCT + LINGISOPCT +
## UNDER5PCT + OVER64PCT + PRE1960PCT + PTRAF_Q + PWDIS_Q +
## PRMP_Q + PTDF_Q + OZONE + PM25 + UST
##
## Df Sum of Sq RSS AIC
## - OVER64PCT 1 0.001 514.25 -868.76
## - PTRAF_Q 1 0.157 514.41 -868.41
## - LINGISOPCT 1 0.273 514.52 -868.16
## - OZONE 1 0.789 515.04 -867.02
## - UNDER5PCT 1 0.887 515.14 -866.81
## <none> 514.25 -866.76
## - LESSHSPCT 1 1.496 515.75 -865.47
## - LOWINCPCT 1 1.668 515.92 -865.09
## - PRE1960PCT 1 1.862 516.11 -864.66
## - UST 1 1.866 516.12 -864.65
## - PRMP_Q 1 18.429 532.68 -828.83
## - MINORPCT 1 30.988 545.24 -802.41
## - PTDF_Q 1 34.672 548.92 -794.77
## - PM25 1 57.811 572.06 -747.95
## - PWDIS_Q 1 64.209 578.46 -735.34
##
## Step: AIC=-868.76
## PNPL_LOGAR ~ MINORPCT + LOWINCPCT + LESSHSPCT + LINGISOPCT +
## UNDER5PCT + PRE1960PCT + PTRAF_Q + PWDIS_Q + PRMP_Q + PTDF_Q +
## OZONE + PM25 + UST
##
## Df Sum of Sq RSS AIC
## - PTRAF_Q 1 0.156 514.41 -870.41
## - LINGISOPCT 1 0.273 514.53 -870.15
## - OZONE 1 0.788 515.04 -869.02
## <none> 514.25 -868.76
## - UNDER5PCT 1 0.951 515.20 -868.66
## - LESSHSPCT 1 1.495 515.75 -867.46
## - LOWINCPCT 1 1.667 515.92 -867.09
## - UST 1 1.872 516.13 -866.64
## - PRE1960PCT 1 1.906 516.16 -866.56
## - PRMP_Q 1 18.470 532.72 -830.74
## - MINORPCT 1 32.096 546.35 -802.10
## - PTDF_Q 1 34.746 549.00 -796.62
## - PM25 1 58.211 572.46 -749.15
## - PWDIS_Q 1 64.219 578.47 -737.31
##
## Step: AIC=-870.41
## PNPL_LOGAR ~ MINORPCT + LOWINCPCT + LESSHSPCT + LINGISOPCT +
## UNDER5PCT + PRE1960PCT + PWDIS_Q + PRMP_Q + PTDF_Q + OZONE +
## PM25 + UST
##
## Df Sum of Sq RSS AIC
## - LINGISOPCT 1 0.300 514.71 -871.75
## - OZONE 1 0.786 515.20 -870.68
## <none> 514.41 -870.41
## - UNDER5PCT 1 0.984 515.39 -870.25
## - LESSHSPCT 1 1.478 515.89 -869.16
## - LOWINCPCT 1 1.658 516.07 -868.76
## - PRE1960PCT 1 1.963 516.37 -868.09
## - UST 1 2.029 516.44 -867.95
## - PRMP_Q 1 19.300 533.71 -830.65
## - MINORPCT 1 32.077 546.49 -803.82
## - PTDF_Q 1 36.376 550.78 -794.93
## - PM25 1 58.258 572.67 -750.75
## - PWDIS_Q 1 64.669 579.08 -738.13
##
## Step: AIC=-871.75
## PNPL_LOGAR ~ MINORPCT + LOWINCPCT + LESSHSPCT + UNDER5PCT + PRE1960PCT +
## PWDIS_Q + PRMP_Q + PTDF_Q + OZONE + PM25 + UST
##
## Df Sum of Sq RSS AIC
## - OZONE 1 0.870 515.58 -871.84
## <none> 514.71 -871.75
## - UNDER5PCT 1 0.980 515.69 -871.59
## - LOWINCPCT 1 1.965 516.67 -869.43
## - PRE1960PCT 1 1.972 516.68 -869.42
## - LESSHSPCT 1 2.099 516.81 -869.14
## - UST 1 2.179 516.89 -868.96
## - PRMP_Q 1 19.142 533.85 -832.34
## - MINORPCT 1 31.826 546.53 -805.72
## - PTDF_Q 1 36.352 551.06 -796.36
## - PM25 1 59.988 574.70 -748.74
## - PWDIS_Q 1 64.391 579.10 -740.08
##
## Step: AIC=-871.84
## PNPL_LOGAR ~ MINORPCT + LOWINCPCT + LESSHSPCT + UNDER5PCT + PRE1960PCT +
## PWDIS_Q + PRMP_Q + PTDF_Q + PM25 + UST
##
## Df Sum of Sq RSS AIC
## <none> 515.58 -871.84
## - UNDER5PCT 1 0.945 516.52 -871.76
## - PRE1960PCT 1 1.518 517.10 -870.50
## - UST 1 1.899 517.48 -869.67
## - LESSHSPCT 1 1.972 517.55 -869.51
## - LOWINCPCT 1 2.017 517.60 -869.41
## - PRMP_Q 1 28.221 543.80 -813.41
## - MINORPCT 1 35.144 550.72 -799.06
## - PTDF_Q 1 35.975 551.55 -797.35
## - PM25 1 61.478 577.06 -746.09
## - PWDIS_Q 1 79.838 595.42 -710.57
summary(reduced_model_nassau)
##
## Call:
## lm(formula = PNPL_LOGAR ~ MINORPCT + LOWINCPCT + LESSHSPCT +
## UNDER5PCT + PRE1960PCT + PWDIS_Q + PRMP_Q + PTDF_Q + PM25 +
## UST, data = df_nassau)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.84426 -0.44494 -0.02027 0.42442 2.39773
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.14761 0.70112 -13.047 < 0.0000000000000002 ***
## MINORPCT -0.78168 0.08934 -8.749 < 0.0000000000000002 ***
## LOWINCPCT 0.39388 0.18791 2.096 0.0363 *
## LESSHSPCT 0.59415 0.28665 2.073 0.0384 *
## UNDER5PCT 0.72722 0.50700 1.434 0.1517
## PRE1960PCT 0.18247 0.10035 1.818 0.0693 .
## PWDIS_Q 0.21055 0.01597 13.187 < 0.0000000000000002 ***
## PRMP_Q -0.18520 0.02362 -7.840 0.0000000000000104 ***
## PTDF_Q -0.14539 0.01642 -8.852 < 0.0000000000000002 ***
## PM25 1.04287 0.09012 11.572 < 0.0000000000000002 ***
## UST 0.01757 0.00864 2.034 0.0422 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6776 on 1123 degrees of freedom
## Multiple R-squared: 0.4263, Adjusted R-squared: 0.4212
## F-statistic: 83.46 on 10 and 1123 DF, p-value: < 0.00000000000000022
# Get the coefficient estimates and confidence intervals
tidy_fit_nassau_reduced <- tidy(reduced_model_nassau, exponentiate = TRUE, conf.int = TRUE)
tidy_fit_nassau_reduced <- tidy(reduced_model_nassau, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(
term = case_when(
term == "MINORPCT" ~ "Percent Minority",
term == "LOWINCPCT" ~ "Percent Low Income",
term == "LESSHSPCT" ~ "Percent Less than High School Education",
term == "UNDER5PCT" ~ "Percent Under 5 Years Old",
term == "PRE1960PCT" ~ "Percent Pre-1960 Housing",
term == "PWDIS_Q" ~ "Proximity to Wastewater Discharge Facility Quartile",
term == "PRMP_Q" ~ "Proximity to RMP Facility Quartile",
term == "PTDF_Q" ~ "Proximity to Hazardous Waste Facility Quartile",
term == "PM25" ~ "PM2.5 Concentration (ug/m3)",
term == "UST" ~ "Proximity to Underground Storage Tanks"
),
estimate = round(estimate, 2),
conf.low = round(conf.low, 2),
conf.high = round(conf.high, 2),
exp_estimate = round(exp(estimate), 2),
exp_ci_low = round(exp(conf.low), 2),
exp_ci_high = round(exp(conf.high), 2)
)
tidy_fit_nassau_reduced %>%
select(term, estimate, exp_estimate, exp_ci_low, exp_ci_high) %>%
kable(format = "html", align = "c") %>%
kable_styling(full_width = FALSE)
| term | estimate | exp_estimate | exp_ci_low | exp_ci_high |
|---|---|---|---|---|
| NA | 0.00 | 1.00 | 1.00 | 1.00 |
| Percent Minority | 0.46 | 1.58 | 1.46 | 1.73 |
| Percent Low Income | 1.48 | 4.39 | 2.80 | 8.50 |
| Percent Less than High School Education | 1.81 | 6.11 | 2.80 | 24.05 |
| Percent Under 5 Years Old | 2.07 | 7.92 | 2.16 | 270.43 |
| Percent Pre-1960 Housing | 1.20 | 3.32 | 2.69 | 4.31 |
| Proximity to Wastewater Discharge Facility Quartile | 1.23 | 3.42 | 3.32 | 3.56 |
| Proximity to RMP Facility Quartile | 0.83 | 2.29 | 2.20 | 2.39 |
| Proximity to Hazardous Waste Facility Quartile | 0.86 | 2.36 | 2.32 | 2.44 |
| PM2.5 Concentration (ug/m3) | 2.84 | 17.12 | 10.80 | 29.67 |
| Proximity to Underground Storage Tanks | 1.02 | 2.77 | 2.72 | 2.83 |
SuffolkEJ <- subset(EJScreen, CNTY_NAME == "Suffolk County")
df_Suffolk = as.data.frame(SuffolkEJ)
fit.adjusted.Suffolk <- lm(PNPL_LOGAR ~ MINORPCT + LOWINCPCT + LESSHSPCT +
LINGISOPCT + UNDER5PCT + OVER64PCT
+ PRE1960PCT + PTRAF_Q + PWDIS_Q + PRMP_Q + PTDF_Q + OZONE + UST, data = df_Suffolk)
# Obtain model summary
summary(fit.adjusted.Suffolk)
##
## Call:
## lm(formula = PNPL_LOGAR ~ MINORPCT + LOWINCPCT + LESSHSPCT +
## LINGISOPCT + UNDER5PCT + OVER64PCT + PRE1960PCT + PTRAF_Q +
## PWDIS_Q + PRMP_Q + PTDF_Q + OZONE + UST, data = df_Suffolk)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.17511 -0.34711 -0.09261 0.23723 2.14710
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.96196 1.16077 0.829 0.40745
## MINORPCT 0.13057 0.09857 1.325 0.18558
## LOWINCPCT -0.06567 0.14488 -0.453 0.65048
## LESSHSPCT -0.49640 0.24550 -2.022 0.04343 *
## LINGISOPCT 0.21371 0.37562 0.569 0.56950
## UNDER5PCT -0.38233 0.42412 -0.901 0.36755
## OVER64PCT -0.33442 0.14357 -2.329 0.02004 *
## PRE1960PCT -0.53673 0.07934 -6.765 0.0000000000221 ***
## PTRAF_Q 0.02129 0.01430 1.489 0.13678
## PWDIS_Q 0.02004 0.02058 0.974 0.33047
## PRMP_Q -0.26506 0.01629 -16.272 < 0.0000000000000002 ***
## PTDF_Q -0.24489 0.01399 -17.499 < 0.0000000000000002 ***
## OZONE -0.01676 0.02563 -0.654 0.51322
## UST -0.05473 0.01934 -2.829 0.00475 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.541 on 1042 degrees of freedom
## Multiple R-squared: 0.513, Adjusted R-squared: 0.5069
## F-statistic: 84.44 on 13 and 1042 DF, p-value: < 0.00000000000000022
# Calculate VIF values
vif(fit.adjusted.Suffolk)
## MINORPCT LOWINCPCT LESSHSPCT LINGISOPCT UNDER5PCT OVER64PCT PRE1960PCT
## 2.288209 1.339459 2.048313 1.378826 1.139644 1.325095 1.116388
## PTRAF_Q PWDIS_Q PRMP_Q PTDF_Q OZONE UST
## 1.381084 1.118711 1.336111 1.421702 1.147994 1.164093
# PM2.5 was taken out of the minal due to having a VIF of 9.32
# Get the coefficient estimates and confidence intervals
tidy_fit_Suffolk <- tidy(fit.adjusted.Suffolk, exponentiate = TRUE, conf.int = TRUE)
tidy_fit_Suffolk <- tidy(fit.adjusted.Suffolk, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(
term = case_when(
term == "MINORPCT" ~ "Percent Minority",
term == "LOWINCPCT" ~ "Percent Low Income",
term == "LESSHSPCT" ~ "Percent Less than High School Education",
term == "LINGISOPCT" ~ "Percent Limited English Proficiency",
term == "UNDER5PCT" ~ "Percent Under 5 Years Old",
term == "OVER64PCT" ~ "Percent Over 64 Years Old",
term == "PRE1960PCT" ~ "Percent Pre-1960 Housing",
term == "PTRAF_Q" ~ "Traffic Density Quartile",
term == "PWDIS_Q" ~ "Proximity to Wastewater Discharge Facility Quartile",
term == "PRMP_Q" ~ "Proximity to RMP Facility Quartile",
term == "PTDF_Q" ~ "Proximity to Hazardous Waste Facility Quartile",
term == "OZONE" ~ "Ozone Concentration (ppb)",
term == "PM25" ~ "PM2.5 Concentration (ug/m3)",
term == "UST" ~ "Proximity to Underground Storage Tanks"
),
estimate = round(estimate, 2),
conf.low = round(conf.low, 2),
conf.high = round(conf.high, 2),
exp_estimate = round(exp(estimate), 2),
exp_ci_low = round(exp(conf.low), 2),
exp_ci_high = round(exp(conf.high), 2)
)
tidy_fit_Suffolk %>%
select(term, estimate, exp_estimate, exp_ci_low, exp_ci_high) %>%
kable(format = "html", align = "c") %>%
kable_styling(full_width = FALSE)
| term | estimate | exp_estimate | exp_ci_low | exp_ci_high |
|---|---|---|---|---|
| NA | 2.62 | 13.74 | 1.31 | 122331449863.11 |
| Percent Minority | 1.14 | 3.13 | 2.56 | 3.97 |
| Percent Low Income | 0.94 | 2.56 | 2.01 | 3.46 |
| Percent Less than High School Education | 0.61 | 1.84 | 1.46 | 2.69 |
| Percent Limited English Proficiency | 1.24 | 3.46 | 1.80 | 13.33 |
| Percent Under 5 Years Old | 0.68 | 1.97 | 1.35 | 4.81 |
| Percent Over 64 Years Old | 0.72 | 2.05 | 1.72 | 2.59 |
| Percent Pre-1960 Housing | 0.58 | 1.79 | 1.65 | 1.97 |
| Traffic Density Quartile | 1.02 | 2.77 | 2.69 | 2.86 |
| Proximity to Wastewater Discharge Facility Quartile | 1.02 | 2.77 | 2.66 | 2.89 |
| Proximity to RMP Facility Quartile | 0.77 | 2.16 | 2.10 | 2.20 |
| Proximity to Hazardous Waste Facility Quartile | 0.78 | 2.18 | 2.14 | 2.23 |
| Ozone Concentration (ppb) | 0.98 | 2.66 | 2.56 | 2.80 |
| Proximity to Underground Storage Tanks | 0.95 | 2.59 | 2.48 | 2.66 |
### Significant results
# Get the coefficient estimates and confidence intervals
tidy_fit_Suffolk <- tidy(fit.adjusted.Suffolk, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(
term = case_when(
term == "MINORPCT" ~ "Percent Minority",
term == "LOWINCPCT" ~ "Percent Low Income",
term == "LESSHSPCT" ~ "Percent Less than High School Education",
term == "LINGISOPCT" ~ "Percent Limited English Proficiency",
term == "UNDER5PCT" ~ "Percent Under 5 Years Old",
term == "OVER64PCT" ~ "Percent Over 64 Years Old",
term == "PRE1960PCT" ~ "Percent Pre-1960 Housing",
term == "PTRAF_Q" ~ "Traffic Density Quartile",
term == "PWDIS_Q" ~ "Proximity to Wastewater Discharge Quartile",
term == "PRMP_Q" ~ "Percent in Poverty Quartile",
term == "PTDF_Q" ~ "Proximity to Hazardous Waste Facility Quartile",
term == "OZONE" ~ "Ozone Concentration (ppb)",
term == "PM25" ~ "PM2.5 Concentration (ug/m3)",
term == "UST" ~ "Underground Storage Tanks"
),
estimate = round(estimate, 2),
conf.low = round(conf.low, 2),
conf.high = round(conf.high, 2),
exp_estimate = round(exp(estimate), 2),
exp_ci_low = round(exp(conf.low), 2),
exp_ci_high = round(exp(conf.high), 2)
)
# Filter significant and positive effect variables
tidy_fit_significant_Suffolk <- tidy_fit_Suffolk %>%
filter(p.value < 0.05, exp_estimate > 1) %>%
select(term, estimate, exp_estimate, exp_ci_low, exp_ci_high) %>%
arrange(desc(exp_estimate))
# Display the results
tidy_fit_significant_Suffolk %>%
kable(format = "html", align = "c") %>%
kable_styling(full_width = FALSE)
| term | estimate | exp_estimate | exp_ci_low | exp_ci_high |
|---|---|---|---|---|
| Underground Storage Tanks | 0.95 | 2.59 | 2.48 | 2.66 |
| Proximity to Hazardous Waste Facility Quartile | 0.78 | 2.18 | 2.14 | 2.23 |
| Percent in Poverty Quartile | 0.77 | 2.16 | 2.10 | 2.20 |
| Percent Over 64 Years Old | 0.72 | 2.05 | 1.72 | 2.59 |
| Percent Less than High School Education | 0.61 | 1.84 | 1.46 | 2.69 |
| Percent Pre-1960 Housing | 0.58 | 1.79 | 1.65 | 1.97 |
## Reduced Model for Suffolk
reduced_model_Suffolk <- step(fit.adjusted.Suffolk, direction = "backward")
## Start: AIC=-1283.64
## PNPL_LOGAR ~ MINORPCT + LOWINCPCT + LESSHSPCT + LINGISOPCT +
## UNDER5PCT + OVER64PCT + PRE1960PCT + PTRAF_Q + PWDIS_Q +
## PRMP_Q + PTDF_Q + OZONE + UST
##
## Df Sum of Sq RSS AIC
## - LOWINCPCT 1 0.060 305.01 -1285.4
## - LINGISOPCT 1 0.095 305.05 -1285.3
## - OZONE 1 0.125 305.08 -1285.2
## - UNDER5PCT 1 0.238 305.19 -1284.8
## - PWDIS_Q 1 0.277 305.23 -1284.7
## - MINORPCT 1 0.514 305.47 -1283.9
## <none> 304.95 -1283.6
## - PTRAF_Q 1 0.649 305.60 -1283.4
## - LESSHSPCT 1 1.197 306.15 -1281.5
## - OVER64PCT 1 1.588 306.54 -1280.2
## - UST 1 2.343 307.30 -1277.6
## - PRE1960PCT 1 13.395 318.35 -1240.2
## - PRMP_Q 1 77.490 382.44 -1046.5
## - PTDF_Q 1 89.617 394.57 -1013.6
##
## Step: AIC=-1285.43
## PNPL_LOGAR ~ MINORPCT + LESSHSPCT + LINGISOPCT + UNDER5PCT +
## OVER64PCT + PRE1960PCT + PTRAF_Q + PWDIS_Q + PRMP_Q + PTDF_Q +
## OZONE + UST
##
## Df Sum of Sq RSS AIC
## - LINGISOPCT 1 0.082 305.10 -1287.2
## - OZONE 1 0.108 305.12 -1287.1
## - UNDER5PCT 1 0.257 305.27 -1286.5
## - PWDIS_Q 1 0.289 305.30 -1286.4
## - MINORPCT 1 0.477 305.49 -1285.8
## <none> 305.01 -1285.4
## - PTRAF_Q 1 0.660 305.67 -1285.2
## - LESSHSPCT 1 1.415 306.43 -1282.5
## - OVER64PCT 1 1.679 306.69 -1281.6
## - UST 1 2.381 307.39 -1279.2
## - PRE1960PCT 1 13.344 318.36 -1242.2
## - PRMP_Q 1 79.224 384.24 -1043.6
## - PTDF_Q 1 89.557 394.57 -1015.6
##
## Step: AIC=-1287.15
## PNPL_LOGAR ~ MINORPCT + LESSHSPCT + UNDER5PCT + OVER64PCT + PRE1960PCT +
## PTRAF_Q + PWDIS_Q + PRMP_Q + PTDF_Q + OZONE + UST
##
## Df Sum of Sq RSS AIC
## - OZONE 1 0.109 305.20 -1288.8
## - UNDER5PCT 1 0.253 305.35 -1288.3
## - PWDIS_Q 1 0.273 305.37 -1288.2
## <none> 305.10 -1287.2
## - MINORPCT 1 0.586 305.68 -1287.1
## - PTRAF_Q 1 0.636 305.73 -1287.0
## - LESSHSPCT 1 1.334 306.43 -1284.5
## - OVER64PCT 1 1.663 306.76 -1283.4
## - UST 1 2.381 307.48 -1280.9
## - PRE1960PCT 1 13.351 318.45 -1243.9
## - PRMP_Q 1 79.203 384.30 -1045.4
## - PTDF_Q 1 89.532 394.63 -1017.4
##
## Step: AIC=-1288.77
## PNPL_LOGAR ~ MINORPCT + LESSHSPCT + UNDER5PCT + OVER64PCT + PRE1960PCT +
## PTRAF_Q + PWDIS_Q + PRMP_Q + PTDF_Q + UST
##
## Df Sum of Sq RSS AIC
## - UNDER5PCT 1 0.240 305.44 -1289.9
## - PWDIS_Q 1 0.373 305.58 -1289.5
## <none> 305.20 -1288.8
## - MINORPCT 1 0.601 305.81 -1288.7
## - PTRAF_Q 1 0.668 305.87 -1288.5
## - LESSHSPCT 1 1.290 306.49 -1286.3
## - OVER64PCT 1 1.631 306.84 -1285.1
## - UST 1 2.301 307.51 -1282.8
## - PRE1960PCT 1 14.009 319.21 -1243.4
## - PRMP_Q 1 79.451 384.66 -1046.5
## - PTDF_Q 1 89.652 394.86 -1018.8
##
## Step: AIC=-1289.94
## PNPL_LOGAR ~ MINORPCT + LESSHSPCT + OVER64PCT + PRE1960PCT +
## PTRAF_Q + PWDIS_Q + PRMP_Q + PTDF_Q + UST
##
## Df Sum of Sq RSS AIC
## - PWDIS_Q 1 0.367 305.81 -1290.7
## - MINORPCT 1 0.516 305.96 -1290.2
## <none> 305.44 -1289.9
## - PTRAF_Q 1 0.676 306.12 -1289.6
## - LESSHSPCT 1 1.247 306.69 -1287.6
## - OVER64PCT 1 1.431 306.88 -1287.0
## - UST 1 2.307 307.75 -1284.0
## - PRE1960PCT 1 14.140 319.58 -1244.2
## - PRMP_Q 1 79.682 385.13 -1047.2
## - PTDF_Q 1 89.585 395.03 -1020.4
##
## Step: AIC=-1290.68
## PNPL_LOGAR ~ MINORPCT + LESSHSPCT + OVER64PCT + PRE1960PCT +
## PTRAF_Q + PRMP_Q + PTDF_Q + UST
##
## Df Sum of Sq RSS AIC
## - PTRAF_Q 1 0.575 306.39 -1290.7
## <none> 305.81 -1290.7
## - MINORPCT 1 0.601 306.41 -1290.6
## - LESSHSPCT 1 1.313 307.12 -1288.2
## - OVER64PCT 1 1.420 307.23 -1287.8
## - UST 1 2.290 308.10 -1284.8
## - PRE1960PCT 1 14.951 320.76 -1242.3
## - PRMP_Q 1 81.375 387.19 -1043.5
## - PTDF_Q 1 89.493 395.30 -1021.6
##
## Step: AIC=-1290.69
## PNPL_LOGAR ~ MINORPCT + LESSHSPCT + OVER64PCT + PRE1960PCT +
## PRMP_Q + PTDF_Q + UST
##
## Df Sum of Sq RSS AIC
## - MINORPCT 1 0.581 306.97 -1290.7
## <none> 306.39 -1290.7
## - OVER64PCT 1 1.332 307.72 -1288.1
## - LESSHSPCT 1 1.364 307.75 -1288.0
## - UST 1 2.982 309.37 -1282.5
## - PRE1960PCT 1 15.453 321.84 -1240.7
## - PRMP_Q 1 81.385 387.77 -1043.9
## - PTDF_Q 1 92.687 399.07 -1013.6
##
## Step: AIC=-1290.69
## PNPL_LOGAR ~ LESSHSPCT + OVER64PCT + PRE1960PCT + PRMP_Q + PTDF_Q +
## UST
##
## Df Sum of Sq RSS AIC
## <none> 306.97 -1290.7
## - LESSHSPCT 1 0.789 307.75 -1290.0
## - OVER64PCT 1 2.125 309.09 -1285.4
## - UST 1 2.899 309.87 -1282.8
## - PRE1960PCT 1 16.497 323.46 -1237.4
## - PRMP_Q 1 83.900 390.87 -1037.5
## - PTDF_Q 1 97.029 403.99 -1002.6
summary(reduced_model_Suffolk)
##
## Call:
## lm(formula = PNPL_LOGAR ~ LESSHSPCT + OVER64PCT + PRE1960PCT +
## PRMP_Q + PTDF_Q + UST, data = df_Suffolk)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.21203 -0.34717 -0.09136 0.24412 2.18213
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.39988 0.08024 4.983 0.000000730179649 ***
## LESSHSPCT -0.29955 0.18247 -1.642 0.10095
## OVER64PCT -0.35501 0.13175 -2.695 0.00716 **
## PRE1960PCT -0.57751 0.07692 -7.508 0.000000000000128 ***
## PRMP_Q -0.26556 0.01568 -16.933 < 0.0000000000000002 ***
## PTDF_Q -0.23903 0.01313 -18.209 < 0.0000000000000002 ***
## UST -0.05893 0.01872 -3.148 0.00169 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.541 on 1049 degrees of freedom
## Multiple R-squared: 0.5098, Adjusted R-squared: 0.507
## F-statistic: 181.8 on 6 and 1049 DF, p-value: < 0.00000000000000022
# Get the coefficient estimates and confidence intervals
tidy_fit_Suffolk_reduced <- tidy(reduced_model_Suffolk, exponentiate = TRUE, conf.int = TRUE)
tidy_fit_Suffolk_reduced <- tidy(reduced_model_Suffolk, exponentiate = TRUE, conf.int = TRUE) %>%
mutate(
term = case_when(
term == "LESSHSPCT" ~ "Percent Less than High School Education",
term == "OVER64PCT" ~ "Percent Over 64 Years Old",
term == "PRE1960PCT" ~ "Percent Pre-1960 Housing",
term == "PTRAF_Q" ~ "Traffic Density Quartile",
term == "PRMP_Q" ~ "Proximity to RMP Facility Quartile",
term == "PTDF_Q" ~ "Proximity to Hazardous Waste Facility Quartile",
term == "OZONE" ~ "Ozone Concentration (ppb)",
term == "PM25" ~ "PM2.5 Concentration (ug/m3)",
term == "UST" ~ "Proximity to Underground Storage Tanks"
),
estimate = round(estimate, 2),
conf.low = round(conf.low, 2),
conf.high = round(conf.high, 2),
exp_estimate = round(exp(estimate), 2),
exp_ci_low = round(exp(conf.low), 2),
exp_ci_high = round(exp(conf.high), 2)
)
tidy_fit_Suffolk_reduced %>%
select(term, estimate, exp_estimate, exp_ci_low, exp_ci_high) %>%
kable(format = "html", align = "c") %>%
kable_styling(full_width = FALSE)
| term | estimate | exp_estimate | exp_ci_low | exp_ci_high |
|---|---|---|---|---|
| NA | 1.49 | 4.44 | 3.56 | 5.75 |
| Percent Less than High School Education | 0.74 | 2.10 | 1.68 | 2.89 |
| Percent Over 64 Years Old | 0.70 | 2.01 | 1.72 | 2.48 |
| Percent Pre-1960 Housing | 0.56 | 1.75 | 1.62 | 1.92 |
| Proximity to RMP Facility Quartile | 0.77 | 2.16 | 2.10 | 2.20 |
| Proximity to Hazardous Waste Facility Quartile | 0.79 | 2.20 | 2.16 | 2.25 |
| Proximity to Underground Storage Tanks | 0.94 | 2.56 | 2.48 | 2.66 |
# Calculate coefficients as percentage change for a 1% increase in independent variable
coef_pct_change <- 100*(exp(coef(fit.adjusted)) - 1)
# Calculate confidence intervals for percentage change coefficients
SE <- sqrt(diag(vcov(fit.adjusted)))
lower_ci <- 100*(exp(coef(fit.adjusted))^(1 - 1.96*SE) - 1)
upper_ci <- 100*(exp(coef(fit.adjusted))^(1 + 1.96*SE) - 1)
# Create a new data frame to display the coefficients and confidence intervals
coef_table <- data.frame(Coefficient = names(coef_pct_change),
Percentage_change = coef_pct_change,
Lower_CI = lower_ci,
Upper_CI = upper_ci)
# Round the values in the table to two decimal places
coef_table[,2:4] <- round(coef_table[,2:4], 2)
# Print the table
print(coef_table)
## Coefficient Percentage_change Lower_CI Upper_CI
## (Intercept) (Intercept) 152.72 -66.24 1791.65
## MINORPCT MINORPCT -36.76 -32.61 -40.65
## LOWINCPCT LOWINCPCT 20.66 15.22 26.35
## LESSHSPCT LESSHSPCT 11.71 6.92 16.71
## LINGISOPCT LINGISOPCT 83.92 37.06 146.79
## UNDER5PCT UNDER5PCT 43.98 11.66 85.66
## OVER64PCT OVER64PCT -10.44 -7.77 -13.04
## PRE1960PCT PRE1960PCT -17.40 -15.45 -19.30
## PTRAF_Q PTRAF_Q 3.21 3.14 3.28
## PWDIS_Q PWDIS_Q 20.85 20.31 21.39
## PRMP_Q PRMP_Q -21.77 -21.24 -22.31
## PTDF_Q PTDF_Q -16.17 -15.84 -16.50
## OZONE OZONE -14.93 -14.13 -15.71
## PM25 PM25 103.68 89.20 119.28
## UST UST 0.14 0.14 0.15
# CREATE LOGISTIC VARIABLE
MEDIAN <- round(median(df$PNPL),4)
df <- df %>%
mutate(PNPL_SPLIT = as.numeric(PNPL >= MEDIAN),
PNPL_SPLIT = factor(PNPL_SPLIT,
levels = 0:1,
labels = c(0, 1)))
tapply(df$PNPL, df$PNPL_SPLIT, range)
## $`0`
## [1] 0.03104137 0.21264430
##
## $`1`
## [1] 0.2128068 5.1370115
fit.logistic <- glm(PNPL_SPLIT ~ MINORPCT + LOWINCPCT + LESSHSPCT +
LINGISOPCT + UNDER5PCT + OVER64PCT
+ PRE1960PCT + PTRAF_Q + PWDIS_Q + PRMP_Q + PTDF_Q + OZONE + PM25 + UST, data = df, family = binomial)
summary(fit.logistic)
##
## Call:
## glm(formula = PNPL_SPLIT ~ MINORPCT + LOWINCPCT + LESSHSPCT +
## LINGISOPCT + UNDER5PCT + OVER64PCT + PRE1960PCT + PTRAF_Q +
## PWDIS_Q + PRMP_Q + PTDF_Q + OZONE + PM25 + UST, family = binomial,
## data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.47516 -0.69978 0.06315 0.63253 2.86338
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 17.671083 5.107511 3.460 0.000541 ***
## MINORPCT -0.724756 0.288209 -2.515 0.011914 *
## LOWINCPCT -0.005107 0.506163 -0.010 0.991949
## LESSHSPCT 0.084824 0.840405 0.101 0.919604
## LINGISOPCT 2.812117 1.076620 2.612 0.009002 **
## UNDER5PCT 0.126327 1.470678 0.086 0.931548
## OVER64PCT -0.132112 0.607200 -0.218 0.827760
## PRE1960PCT -1.064281 0.254760 -4.178 0.00002946252 ***
## PTRAF_Q 0.153802 0.045931 3.349 0.000812 ***
## PWDIS_Q 0.478843 0.052927 9.047 < 0.0000000000000002 ***
## PRMP_Q -0.619451 0.058306 -10.624 < 0.0000000000000002 ***
## PTDF_Q -0.668534 0.047859 -13.969 < 0.0000000000000002 ***
## OZONE -0.799839 0.134573 -5.944 0.00000000279 ***
## PM25 2.554376 0.248990 10.259 < 0.0000000000000002 ***
## UST -0.032458 0.030543 -1.063 0.287923
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3036.0 on 2189 degrees of freedom
## Residual deviance: 1905.6 on 2175 degrees of freedom
## AIC: 1935.6
##
## Number of Fisher Scoring iterations: 5
fit.logistic %>%
tbl_regression(exponentiate = TRUE,
intercept = T,
estimate_fun = function(x) style_sigfig(x, digits = 3),
pvalue_fun = function(x) style_pvalue(x, digits = 3),
label = list(MINORPCT ~ "Racial Minority %",
LOWINCPCT ~ "Low Income %",
LESSHSPCT ~ "Less than HS Ed. %",
LINGISOPCT ~ "Linguistic Isolation %",
UNDER5PCT ~ "Under 5%",
OVER64PCT ~ "Over 64%",
PRE1960PCT ~ "Houses Built Pre-1960 %",
PTRAF_Q ~ "Proximity to Traffic",
PWDIS_Q ~ "Proximity to Wastewater Discharge Facility",
PRMP_Q ~ "Proximity to RMP Facility",
PTDF_Q ~ "Proximity to Hazardous Waste Facility",
OZONE ~ "Average O3 Ambient Air Concentration",
PM25 ~ "Average Annual PM 2.5 Ambient Air Concentration",
UST ~ "Proximity to Underground Storage Facility"
)) %>%
add_global_p(keep = T) %>%
modify_caption(paste("Logistic regression results for proximity to Superfund sites (split by median) by census block group (N = ", NOBS, ")", sep=""))
| Characteristic | OR1 | 95% CI1 | p-value |
|---|---|---|---|
| (Intercept) | 47,255,668 | 2,305, 1,158,569,667,952 | <0.001 |
| Racial Minority % | 0.484 | 0.275, 0.851 | 0.012 |
| Low Income % | 0.995 | 0.370, 2.70 | 0.992 |
| Less than HS Ed. % | 1.09 | 0.211, 5.70 | 0.920 |
| Linguistic Isolation % | 16.6 | 2.07, 141 | 0.008 |
| Under 5% | 1.13 | 0.063, 20.3 | 0.932 |
| Over 64% | 0.876 | 0.265, 2.87 | 0.828 |
| Houses Built Pre-1960 % | 0.345 | 0.209, 0.567 | <0.001 |
| Proximity to Traffic | 1.17 | 1.07, 1.28 | <0.001 |
| Proximity to Wastewater Discharge Facility | 1.61 | 1.46, 1.79 | <0.001 |
| Proximity to RMP Facility | 0.538 | 0.480, 0.603 | <0.001 |
| Proximity to Hazardous Waste Facility | 0.512 | 0.466, 0.562 | <0.001 |
| Average O3 Ambient Air Concentration | 0.449 | 0.344, 0.583 | <0.001 |
| Average Annual PM 2.5 Ambient Air Concentration | 12.9 | 7.97, 21.2 | <0.001 |
| Proximity to Underground Storage Facility | 0.968 | 0.912, 1.03 | 0.290 |
| 1 OR = Odds Ratio, CI = Confidence Interval | |||
# plot histogram of deviance residuals
hist(resid(fit.logistic), breaks = 30, col = "grey", main = "Histogram of Deviance Residuals")
# create QQ-plot of deviance residuals
qqnorm(resid(fit.logistic))
qqline(resid(fit.logistic))
reduced.logistic.model <- step(fit.logistic, direction = "both")
## Start: AIC=1935.63
## PNPL_SPLIT ~ MINORPCT + LOWINCPCT + LESSHSPCT + LINGISOPCT +
## UNDER5PCT + OVER64PCT + PRE1960PCT + PTRAF_Q + PWDIS_Q +
## PRMP_Q + PTDF_Q + OZONE + PM25 + UST
##
## Df Deviance AIC
## - LOWINCPCT 1 1905.6 1933.6
## - UNDER5PCT 1 1905.6 1933.6
## - LESSHSPCT 1 1905.6 1933.6
## - OVER64PCT 1 1905.7 1933.7
## - UST 1 1906.8 1934.8
## <none> 1905.6 1935.6
## - MINORPCT 1 1912.0 1940.0
## - LINGISOPCT 1 1912.7 1940.7
## - PTRAF_Q 1 1917.0 1945.0
## - PRE1960PCT 1 1923.4 1951.4
## - OZONE 1 1943.3 1971.3
## - PWDIS_Q 1 1996.5 2024.5
## - PRMP_Q 1 2026.4 2054.4
## - PM25 1 2030.1 2058.1
## - PTDF_Q 1 2125.7 2153.7
##
## Step: AIC=1933.63
## PNPL_SPLIT ~ MINORPCT + LESSHSPCT + LINGISOPCT + UNDER5PCT +
## OVER64PCT + PRE1960PCT + PTRAF_Q + PWDIS_Q + PRMP_Q + PTDF_Q +
## OZONE + PM25 + UST
##
## Df Deviance AIC
## - UNDER5PCT 1 1905.6 1931.6
## - LESSHSPCT 1 1905.7 1931.7
## - OVER64PCT 1 1905.7 1931.7
## - UST 1 1906.8 1932.8
## <none> 1905.6 1933.6
## + LOWINCPCT 1 1905.6 1935.6
## - MINORPCT 1 1912.1 1938.1
## - LINGISOPCT 1 1912.8 1938.8
## - PTRAF_Q 1 1917.0 1943.0
## - PRE1960PCT 1 1923.5 1949.5
## - OZONE 1 1943.3 1969.3
## - PWDIS_Q 1 1996.5 2022.5
## - PRMP_Q 1 2026.8 2052.8
## - PM25 1 2030.2 2056.2
## - PTDF_Q 1 2125.8 2151.8
##
## Step: AIC=1931.64
## PNPL_SPLIT ~ MINORPCT + LESSHSPCT + LINGISOPCT + OVER64PCT +
## PRE1960PCT + PTRAF_Q + PWDIS_Q + PRMP_Q + PTDF_Q + OZONE +
## PM25 + UST
##
## Df Deviance AIC
## - LESSHSPCT 1 1905.7 1929.7
## - OVER64PCT 1 1905.7 1929.7
## - UST 1 1906.8 1930.8
## <none> 1905.6 1931.6
## + UNDER5PCT 1 1905.6 1933.6
## + LOWINCPCT 1 1905.6 1933.6
## - MINORPCT 1 1912.1 1936.1
## - LINGISOPCT 1 1912.8 1936.8
## - PTRAF_Q 1 1917.0 1941.0
## - PRE1960PCT 1 1923.5 1947.5
## - OZONE 1 1943.3 1967.3
## - PWDIS_Q 1 1996.6 2020.6
## - PRMP_Q 1 2026.9 2050.9
## - PM25 1 2030.4 2054.4
## - PTDF_Q 1 2125.9 2149.9
##
## Step: AIC=1929.65
## PNPL_SPLIT ~ MINORPCT + LINGISOPCT + OVER64PCT + PRE1960PCT +
## PTRAF_Q + PWDIS_Q + PRMP_Q + PTDF_Q + OZONE + PM25 + UST
##
## Df Deviance AIC
## - OVER64PCT 1 1905.7 1927.7
## - UST 1 1906.8 1928.8
## <none> 1905.7 1929.7
## + LESSHSPCT 1 1905.6 1931.6
## + UNDER5PCT 1 1905.7 1931.7
## + LOWINCPCT 1 1905.7 1931.7
## - MINORPCT 1 1913.8 1935.8
## - LINGISOPCT 1 1914.1 1936.1
## - PTRAF_Q 1 1917.1 1939.1
## - PRE1960PCT 1 1923.6 1945.6
## - OZONE 1 1943.3 1965.3
## - PWDIS_Q 1 1996.8 2018.8
## - PRMP_Q 1 2027.0 2049.0
## - PM25 1 2032.0 2054.0
## - PTDF_Q 1 2127.2 2149.2
##
## Step: AIC=1927.71
## PNPL_SPLIT ~ MINORPCT + LINGISOPCT + PRE1960PCT + PTRAF_Q + PWDIS_Q +
## PRMP_Q + PTDF_Q + OZONE + PM25 + UST
##
## Df Deviance AIC
## - UST 1 1906.8 1926.8
## <none> 1905.7 1927.7
## + OVER64PCT 1 1905.7 1929.7
## + UNDER5PCT 1 1905.7 1929.7
## + LESSHSPCT 1 1905.7 1929.7
## + LOWINCPCT 1 1905.7 1929.7
## - MINORPCT 1 1914.0 1934.0
## - LINGISOPCT 1 1914.2 1934.2
## - PTRAF_Q 1 1917.1 1937.1
## - PRE1960PCT 1 1923.8 1943.8
## - OZONE 1 1943.3 1963.3
## - PWDIS_Q 1 1997.2 2017.2
## - PRMP_Q 1 2027.1 2047.1
## - PM25 1 2033.2 2053.2
## - PTDF_Q 1 2127.3 2147.3
##
## Step: AIC=1926.83
## PNPL_SPLIT ~ MINORPCT + LINGISOPCT + PRE1960PCT + PTRAF_Q + PWDIS_Q +
## PRMP_Q + PTDF_Q + OZONE + PM25
##
## Df Deviance AIC
## <none> 1906.8 1926.8
## + UST 1 1905.7 1927.7
## + OVER64PCT 1 1906.8 1928.8
## + UNDER5PCT 1 1906.8 1928.8
## + LESSHSPCT 1 1906.8 1928.8
## + LOWINCPCT 1 1906.8 1928.8
## - LINGISOPCT 1 1914.9 1932.9
## - MINORPCT 1 1915.5 1933.5
## - PTRAF_Q 1 1919.4 1937.4
## - PRE1960PCT 1 1925.8 1943.8
## - OZONE 1 1943.3 1961.3
## - PWDIS_Q 1 2001.1 2019.1
## - PRMP_Q 1 2028.9 2046.9
## - PM25 1 2033.5 2051.5
## - PTDF_Q 1 2128.1 2146.1
summary(reduced.logistic.model)
##
## Call:
## glm(formula = PNPL_SPLIT ~ MINORPCT + LINGISOPCT + PRE1960PCT +
## PTRAF_Q + PWDIS_Q + PRMP_Q + PTDF_Q + OZONE + PM25, family = binomial,
## data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.46498 -0.70946 0.06019 0.63019 2.86550
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 16.76018 5.02570 3.335 0.000853 ***
## MINORPCT -0.70857 0.24173 -2.931 0.003376 **
## LINGISOPCT 2.77585 0.99877 2.779 0.005448 **
## PRE1960PCT -1.07665 0.25002 -4.306 0.00001660717 ***
## PTRAF_Q 0.15988 0.04552 3.512 0.000444 ***
## PWDIS_Q 0.48547 0.05267 9.217 < 0.0000000000000002 ***
## PRMP_Q -0.62141 0.05820 -10.677 < 0.0000000000000002 ***
## PTDF_Q -0.66862 0.04777 -13.997 < 0.0000000000000002 ***
## OZONE -0.77337 0.13223 -5.849 0.00000000496 ***
## PM25 2.51042 0.24270 10.344 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3036.0 on 2189 degrees of freedom
## Residual deviance: 1906.8 on 2180 degrees of freedom
## AIC: 1926.8
##
## Number of Fisher Scoring iterations: 5
reduced.logistic.model %>%
tbl_regression(exponentiate = TRUE,
estimate_fun = function(x) style_sigfig(x, digits = 3),
pvalue_fun = function(x) style_pvalue(x, digits = 3),
label = list(MINORPCT ~ "Racial Minority %",
LINGISOPCT ~ "Linguistic Isolation %",
PRE1960PCT ~ "Houses Built Pre-1960 %",
PTRAF_Q ~ "Proximity to Traffic",
PWDIS_Q ~ "Proximity to Wastewater Discharge Facility",
PRMP_Q ~ "Proximity to RMP Facility",
PTDF_Q ~ "Proximity to Hazardous Waste Facility",
OZONE ~ "Average O3 Ambient Air Concentration",
PM25 ~ "Average Annual PM 2.5 Ambient Air Concentration" )) %>%
add_global_p(keep = T) %>%
modify_caption(paste("Reduced logistic regression results for proximity to Superfund sites (split by median) by census block group (N = ", NOBS, ")", sep=""))
| Characteristic | OR1 | 95% CI1 | p-value |
|---|---|---|---|
| Racial Minority % | 0.492 | 0.306, 0.790 | 0.003 |
| Linguistic Isolation % | 16.1 | 2.34, 118 | 0.004 |
| Houses Built Pre-1960 % | 0.341 | 0.208, 0.555 | <0.001 |
| Proximity to Traffic | 1.17 | 1.07, 1.28 | <0.001 |
| Proximity to Wastewater Discharge Facility | 1.62 | 1.47, 1.80 | <0.001 |
| Proximity to RMP Facility | 0.537 | 0.479, 0.601 | <0.001 |
| Proximity to Hazardous Waste Facility | 0.512 | 0.466, 0.562 | <0.001 |
| Average O3 Ambient Air Concentration | 0.461 | 0.355, 0.596 | <0.001 |
| Average Annual PM 2.5 Ambient Air Concentration | 12.3 | 7.72, 20.0 | <0.001 |
| 1 OR = Odds Ratio, CI = Confidence Interval | |||