# Changing NA values to zero
df["PWDIS"][is.na(df["PWDIS"])] <- 0
df["UST"][is.na(df["UST"])] <- 0
df["PTRAF"][is.na(df["PTRAF"])] <- 0
#PNPL Standardized Split
MEDIAN <- round(median(df$PNPL),4)
LABEL0 <- paste("PNPL Score < Median:", round(MEDIAN,2))
LABEL1 <- paste("PNPL Score >= Median:", round(MEDIAN,2))
df <- df %>%
mutate(PNPL_SPLIT = as.numeric(PNPL >= MEDIAN),
PNPL_SPLIT = factor(PNPL_SPLIT,
levels = 0:1,
labels = c(LABEL0, LABEL1)))
tapply(df$PNPL, df$PNPL_SPLIT, range)
## $`PNPL Score < Median: 0.21`
## [1] 0.03104137 0.21204710
##
## $`PNPL Score >= Median: 0.21`
## [1] 0.212644 5.137011
Demographic Factors
tbl_dem <-
df %>%
dplyr::select(VULEOPCT, MINORPCT, LOWINCPCT, LESSHSPCT, LINGISOPCT, UNEMPPCT, UNDER5PCT, OVER64PCT, PNPL_SPLIT) %>%
tbl_summary(
# The "by" variable
by =PNPL_SPLIT,
statistic = list(all_continuous() ~ "{mean} ({sd})"),
digits = list(all_continuous() ~ c(2, 2)),
type = list(VULEOPCT ~ "continuous",
MINORPCT ~ "continuous",
LOWINCPCT ~ "continuous",
UNEMPPCT ~ "continuous",
LESSHSPCT ~ "continuous",
LINGISOPCT ~ "continuous",
UNDER5PCT ~ "continuous",
OVER64PCT ~ "continuous"),
label = list(VULEOPCT ~ "Vulerability Index %",
MINORPCT ~ "Racial Minority %",
LOWINCPCT ~ "Low Income %",
UNEMPPCT ~ "Unemployed %",
LESSHSPCT ~ "Less than HS Ed. %",
LINGISOPCT ~ "Lingustic Isolation %",
UNDER5PCT ~ "Under Age 5 %",
OVER64PCT ~ "Over Age 64 %")
) %>%
modify_header(
label = "**Variable**",
# The following adds the % to the column total label
# <br> is the location of a line break
all_stat_cols() ~ "**{level}**<br>N = {n} ({style_percent(p, digits=1)}%)"
) %>%
modify_caption("Demographic Characteristics") %>%
bold_labels() %>%
# Include an "overall" column
add_overall(
last = FALSE,
# The ** make it bold
col_label = "**All census block groups**<br>N = {N}"
) %>%
add_p(
test = list(all_continuous() ~ "t.test",
all_categorical() ~ "chisq.test"),
pvalue_fun = function(x) style_pvalue(x, digits = 3)
)
tbl_dem
| Variable | All census block groups N = 21911 |
PNPL Score < Median: 0.21 N = 1094 (49.9%)1 |
PNPL Score >= Median: 0.21 N = 1097 (50.1%)1 |
p-value2 |
|---|---|---|---|---|
| Vulerability Index % | 0.25 (0.17) | 0.22 (0.16) | 0.27 (0.18) | <0.001 |
| Racial Minority % | 0.34 (0.28) | 0.29 (0.25) | 0.40 (0.29) | <0.001 |
| Low Income % | 0.15 (0.13) | 0.15 (0.13) | 0.15 (0.14) | 0.818 |
| Less than HS Ed. % | 0.08 (0.09) | 0.08 (0.08) | 0.09 (0.10) | <0.001 |
| Lingustic Isolation % | 0.04 (0.07) | 0.03 (0.06) | 0.05 (0.07) | <0.001 |
| Unemployed % | 0.04 (0.05) | 0.04 (0.05) | 0.04 (0.05) | 0.240 |
| Under Age 5 % | 0.05 (0.04) | 0.05 (0.04) | 0.05 (0.04) | 0.122 |
| Over Age 64 % | 0.19 (0.11) | 0.19 (0.13) | 0.18 (0.09) | 0.002 |
| 1 Mean (SD) | ||||
| 2 Welch Two Sample t-test | ||||
Demographic Factors - Stratified
tbl_dem_strat <-
df %>%
dplyr::select(VULEOPCT, MINORPCT, LOWINCPCT, LESSHSPCT, LINGISOPCT, UNEMPPCT, UNDER5PCT, OVER64PCT, PNPL_SPLIT, CNTY_NAME) %>%
mutate(CNTY_NAME = paste(CNTY_NAME,"County")) %>%
tbl_strata(
strata = CNTY_NAME,
.tbl_fun =
~ .x %>%
tbl_summary(
# The "by" variable
by =PNPL_SPLIT,
statistic = list(all_continuous() ~ "{mean} ({sd})"),
digits = list(all_continuous() ~ c(2, 2)),
type = list(VULEOPCT ~ "continuous",
MINORPCT ~ "continuous",
LOWINCPCT ~ "continuous",
UNEMPPCT ~ "continuous",
LESSHSPCT ~ "continuous",
LINGISOPCT ~ "continuous",
UNDER5PCT ~ "continuous",
OVER64PCT ~ "continuous"),
label = list(VULEOPCT ~ "Vulerability Index %",
MINORPCT ~ "Racial Minority %",
LOWINCPCT ~ "Low Income %",
UNEMPPCT ~ "Unemployed %",
LESSHSPCT ~ "Less than HS Ed. %",
LINGISOPCT ~ "Lingustic Isolation %",
UNDER5PCT ~ "Under Age 5 %",
OVER64PCT ~ "Over Age 64 %")
) %>%
modify_header(
label = "**Variable**",
# The following adds the % to the column total label
# <br> is the location of a line break
all_stat_cols() ~ "**{level}**<br>N = {n} ({style_percent(p, digits=1)}%)"
) %>%
modify_caption("Demographic Characteristics - Stratified by County") %>%
bold_labels() %>%
# Include an "overall" column
add_overall(
last = FALSE,
# The ** make it bold
col_label = "**All census block groups**<br>N = {N}"
) %>%
add_p(
test = list(all_continuous() ~ "t.test",
all_categorical() ~ "chisq.test"),
pvalue_fun = function(x) style_pvalue(x, digits = 3)
) )
tbl_dem_strat
| Variable | Nassau County County | Suffolk County County | ||||||
|---|---|---|---|---|---|---|---|---|
| All census block groups N = 11341 |
PNPL Score < Median: 0.21 N = 357 (31.5%)1 |
PNPL Score >= Median: 0.21 N = 777 (68.5%)1 |
p-value2 | All census block groups N = 10571 |
PNPL Score < Median: 0.21 N = 737 (69.7%)1 |
PNPL Score >= Median: 0.21 N = 320 (30.3%)1 |
p-value2 | |
| Vulerability Index % | 0.26 (0.18) | 0.24 (0.19) | 0.27 (0.18) | 0.008 | 0.23 (0.16) | 0.21 (0.15) | 0.28 (0.19) | <0.001 |
| Racial Minority % | 0.39 (0.29) | 0.35 (0.30) | 0.40 (0.28) | 0.002 | 0.30 (0.26) | 0.26 (0.22) | 0.39 (0.31) | <0.001 |
| Low Income % | 0.13 (0.13) | 0.13 (0.13) | 0.14 (0.13) | 0.508 | 0.16 (0.13) | 0.16 (0.13) | 0.17 (0.14) | 0.106 |
| Less than HS Ed. % | 0.08 (0.09) | 0.07 (0.09) | 0.09 (0.09) | 0.007 | 0.09 (0.10) | 0.08 (0.08) | 0.11 (0.12) | <0.001 |
| Lingustic Isolation % | 0.05 (0.08) | 0.04 (0.08) | 0.06 (0.08) | <0.001 | 0.03 (0.05) | 0.02 (0.05) | 0.04 (0.06) | <0.001 |
| Unemployed % | 0.04 (0.05) | 0.04 (0.06) | 0.04 (0.05) | 0.340 | 0.04 (0.04) | 0.04 (0.04) | 0.04 (0.05) | 0.900 |
| Under Age 5 % | 0.05 (0.04) | 0.05 (0.04) | 0.05 (0.04) | 0.589 | 0.05 (0.04) | 0.05 (0.04) | 0.05 (0.04) | 0.255 |
| Over Age 64 % | 0.18 (0.09) | 0.18 (0.09) | 0.18 (0.09) | 0.739 | 0.19 (0.13) | 0.20 (0.14) | 0.17 (0.11) | <0.001 |
| 1 Mean (SD) | ||||||||
| 2 Welch Two Sample t-test | ||||||||
Environmental Factors
tbl_env <-
df %>%
dplyr::select(PRE1960PCT, PTRAF, PWDIS, PRMP, PTSDF, OZONE, PM25, UST, PNPL_SPLIT) %>%
tbl_summary(
# The "by" variable
by =PNPL_SPLIT,
statistic = list(all_continuous() ~ "{mean} ({sd})"),
digits = list(all_continuous() ~ c(2, 2)),
type = list(PRE1960PCT ~ "continuous",
PTRAF ~ "continuous",
PWDIS ~ "continuous",
PRMP ~ "continuous",
PTSDF ~ "continuous",
OZONE ~ "continuous",
PM25 ~ "continuous",
UST ~ "continuous"),
label = list(PRE1960PCT ~ "% Houses Built Pre 1960",
PTRAF ~ "Proximity to Traffic",
PWDIS ~ "Proximity to Waterwater Discharge Facility",
PRMP ~ "Proximity to Regulated Management Plan Facility",
PTSDF ~ "Proximity to Hazardous Waste Facility",
OZONE ~ "Average Seasonal Ozone Concentration",
PM25 ~ "Annual Average PM2.5 Concentration",
UST ~ "Proximity to Underground Storage Tank")
) %>%
modify_header(
label = "**Variable**",
# The following adds the % to the column total label
# <br> is the location of a line break
all_stat_cols() ~ "**{level}**<br>N = {n} ({style_percent(p, digits=1)}%)"
) %>%
modify_caption("Environmental Characteristics - Stratified by County") %>%
bold_labels() %>%
# Include an "overall" column
add_overall(
last = FALSE,
# The ** make it bold
col_label = "**All census block groups**<br>N = {N}"
) %>%
add_p(
test = list(all_continuous() ~ "t.test",
all_categorical() ~ "chisq.test"),
pvalue_fun = function(x) style_pvalue(x, digits = 3)
)
tbl_env
| Variable | All census block groups N = 21911 |
PNPL Score < Median: 0.21 N = 1094 (49.9%)1 |
PNPL Score >= Median: 0.21 N = 1097 (50.1%)1 |
p-value2 |
|---|---|---|---|---|
| % Houses Built Pre 1960 | 0.53 (0.29) | 0.46 (0.27) | 0.61 (0.29) | <0.001 |
| Proximity to Traffic | 504.61 (685.44) | 387.41 (541.02) | 621.48 (787.09) | <0.001 |
| Proximity to Waterwater Discharge Facility | 0.01 (0.19) | 0.02 (0.19) | 0.01 (0.19) | 0.223 |
| Proximity to Regulated Management Plan Facility | 0.16 (0.21) | 0.09 (0.06) | 0.23 (0.27) | <0.001 |
| Proximity to Hazardous Waste Facility | 2.02 (1.74) | 1.20 (1.03) | 2.84 (1.90) | <0.001 |
| Average Seasonal Ozone Concentration | 43.56 (0.61) | 43.51 (0.70) | 43.61 (0.49) | <0.001 |
| Annual Average PM2.5 Concentration | 7.48 (0.47) | 7.26 (0.44) | 7.70 (0.38) | <0.001 |
| Proximity to Underground Storage Tank | 1.16 (1.98) | 0.91 (1.54) | 1.41 (2.31) | <0.001 |
| 1 Mean (SD) | ||||
| 2 Welch Two Sample t-test | ||||
Environmental Factors-Stratified
tbl_env_strat <-
df %>%
dplyr::select(PRE1960PCT, PTRAF, PWDIS, PRMP, PTSDF, OZONE, PM25, UST, PNPL_SPLIT, CNTY_NAME) %>%
mutate(CNTY_NAME = paste(CNTY_NAME,"County")) %>%
tbl_strata(
strata = CNTY_NAME,
.tbl_fun =
~ .x %>%
tbl_summary(
# The "by" variable
by =PNPL_SPLIT,
statistic = list(all_continuous() ~ "{mean} ({sd})"),
digits = list(all_continuous() ~ c(2, 2)),
type = list(PRE1960PCT ~ "continuous",
PTRAF ~ "continuous",
PWDIS ~ "continuous",
PRMP ~ "continuous",
PTSDF ~ "continuous",
OZONE ~ "continuous",
PM25 ~ "continuous",
UST ~ "continuous"),
label = list(PRE1960PCT ~ "% Houses Built Pre 1960",
PTRAF ~ "Proximity to Traffic",
PWDIS ~ "Proximity to Waterwater Discharge Facility",
PRMP ~ "Proximity to Regulated Management Plan Facility",
PTSDF ~ "Proximity to Hazardous Waste Facility",
OZONE ~ "Average Seasonal Ozone Concentration",
PM25 ~ "Annual Average PM2.5 Concentration",
UST ~ "Proximity to Underground Storage Tank")
) %>%
modify_header(
label = "**Variable**",
# The following adds the % to the column total label
# <br> is the location of a line break
all_stat_cols() ~ "**{level}**<br>N = {n} ({style_percent(p, digits=1)}%)"
) %>%
modify_caption("Environmental Characteristics - Stratified by County") %>%
bold_labels() %>%
# Include an "overall" column
add_overall(
last = FALSE,
# The ** make it bold
col_label = "**All census block groups**<br>N = {N}"
) %>%
add_p(
test = list(all_continuous() ~ "t.test",
all_categorical() ~ "chisq.test"),
pvalue_fun = function(x) style_pvalue(x, digits = 3)
) )
tbl_env_strat
| Variable | Nassau County County | Suffolk County County | ||||||
|---|---|---|---|---|---|---|---|---|
| All census block groups N = 11341 |
PNPL Score < Median: 0.21 N = 357 (31.5%)1 |
PNPL Score >= Median: 0.21 N = 777 (68.5%)1 |
p-value2 | All census block groups N = 10571 |
PNPL Score < Median: 0.21 N = 737 (69.7%)1 |
PNPL Score >= Median: 0.21 N = 320 (30.3%)1 |
p-value2 | |
| % Houses Built Pre 1960 | 0.72 (0.21) | 0.68 (0.22) | 0.74 (0.20) | <0.001 | 0.33 (0.22) | 0.35 (0.23) | 0.28 (0.20) | <0.001 |
| Proximity to Traffic | 664.91 (809.44) | 588.88 (729.87) | 699.84 (841.64) | 0.024 | 332.63 (462.80) | 289.83 (384.43) | 431.21 (595.03) | <0.001 |
| Proximity to Waterwater Discharge Facility | 0.03 (0.27) | 0.06 (0.33) | 0.01 (0.23) | 0.020 | 0.00 (0.00) | 0.00 (0.00) | 0.00 (0.00) | 0.019 |
| Proximity to Regulated Management Plan Facility | 0.23 (0.24) | 0.14 (0.07) | 0.27 (0.28) | <0.001 | 0.09 (0.12) | 0.07 (0.03) | 0.16 (0.19) | <0.001 |
| Proximity to Hazardous Waste Facility | 2.38 (1.72) | 1.44 (0.96) | 2.81 (1.81) | <0.001 | 1.64 (1.67) | 1.08 (1.03) | 2.91 (2.11) | <0.001 |
| Average Seasonal Ozone Concentration | 43.60 (0.51) | 43.50 (0.55) | 43.64 (0.48) | <0.001 | 43.53 (0.70) | 43.52 (0.77) | 43.55 (0.50) | 0.398 |
| Annual Average PM2.5 Concentration | 7.81 (0.25) | 7.62 (0.17) | 7.89 (0.23) | <0.001 | 7.13 (0.38) | 7.09 (0.42) | 7.23 (0.24) | <0.001 |
| Proximity to Underground Storage Tank | 1.71 (2.48) | 1.56 (2.15) | 1.78 (2.62) | 0.131 | 0.57 (0.93) | 0.60 (0.99) | 0.51 (0.77) | 0.116 |
| 1 Mean (SD) | ||||||||
| 2 Welch Two Sample t-test | ||||||||
Correlation Matrix
matrix <- subset(df, select = c(VULEOPCT, MINORPCT,LOWINCPCT, LESSHSPCT, LINGISOPCT, UNEMPPCT, UNDER5PCT, OVER64PCT
,PRE1960PCT, PTRAF,
PWDIS, PRMP, PTSDF, OZONE, PM25, UST))
matrix.cor <- round(cor(matrix), 1)
All Variables - Heat Map
col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
corrplot(matrix.cor, method="color", col=col(200),
type="upper", order="hclust",
addCoef.col = "black", # Add coefficient of correlation
tl.col="black", tl.srt=45, #Text label color and rotation
# Combine with significance
p.mat = p.mat, sig.level = 0.05, insig = "blank",
# hide correlation coefficient on the principal diagonal
diag=FALSE
)
Demographic Variables - Heat Map
#Demographic Correlation Matrix
matrix_dem <- subset(df, select = c(VULEOPCT, MINORPCT,LOWINCPCT, LESSHSPCT, LINGISOPCT, UNEMPPCT, UNDER5PCT, OVER64PCT))
matrix_dem.cor <- round(cor(matrix_dem), 1)
p.mat <- cor.mtest(matrix_dem)
col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
corrplot(matrix_dem.cor, method="color", col=col(200),
type="upper", order="hclust",
addCoef.col = "black", # Add coefficient of correlation
tl.col="black", tl.srt=45, #Text label color and rotation
# Combine with significance
p.mat = p.mat, sig.level = 0.05, insig = "blank",
# hide correlation coefficient on the principal diagonal
diag=FALSE
)
Environmental Variables - Heat Map
# Environmental Correlation Matrix
matrix_env <- subset(df, select = c(PRE1960PCT, PTRAF,
PWDIS, PRMP, PTSDF, OZONE, PM25, UST))
matrix_env.cor <- round(cor(matrix_env), 1)
p.mat <- cor.mtest(matrix_env)
col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))
corrplot(matrix_env.cor, method="color", col=col(200),
type="upper", order="hclust",
addCoef.col = "black", # Add coefficient of correlation
tl.col="black", tl.srt=45, #Text label color and rotation
# Combine with significance
p.mat = p.mat, sig.level = 0.05, insig = "blank",
# hide correlation coefficient on the principal diagonal
diag=FALSE
)
Creating PNPL Log Transformation
#Vulnerability PCT
### Fairly normal
hist(df$VULEOPCT, main = "Distribution of VULEOPCT") #2
#MINOR PCT
### Non-normal
hist(df$MINORPCT, main = "Distribution of MINORPCT") #3
#LOW INCOME PCT
### Skew Right
hist(df$LOWINCPCT, main = "Distribution of LOWINCPCT") #4
#LESSHSPCT
### Skew Right
hist(df$LESSHSPCT, main = "Distribution of LESSHSPCT") #5
#LINGUISTIC ISOLATION
### Skew Right
hist(df$LINGISOPCT, main = "Distribution of LINGISOPCT") #6
#UNEMPPCT
### Skew left
hist(df$UNEMPPCT, main = "Distribution of UNEMPPCT") #7
#UNDER 5 PCT
### Looks normal
hist(df$UNDER5PCT, main = "Distribution of UNDER5PCT") #8
#OVER 64 PCT
### Looks normal
hist(df$OVER64PCT, main = "Distribution of OVER64PCT") #9
#PRE1960 PCT
### Skewed right
hist(df$PRE1960PCT, main = "Distribution of PRE1960PCT") #10
#PTRAF
### Skewed left
hist(df$PTRAF, main = "Distribution of PTRAF") #11
#PWDIS
### Skewed left
hist(df$PWDIS, main = "Distribution of PWDIS") #12
#PRMP
### Skewed left
hist(df$PRMP, main = "Distribution of PRMP") #13
#PTSDF
### Skewed left
hist(df$PTSDF, main = "Distribution of PTSDF") #14
#Ozone
### Looks fairly normal
hist(df$OZONE, main = "Distribution of OZONE") #15
#PM2.5
### Slight skew right
hist(df$PM25, main = "Distribution of PM25") #16
#UST
### Skewed left
hist(df$UST, main = "Distribution of UST") #17
# MINORPCT
fit.MINORPCT <- lm(PNPL_LOG ~ MINORPCT, data = df)
# LWINCPCT
fit.LWINCPCT <- lm(PNPL_LOG ~ LOWINCPCT, data = df)
# LESHSPCT
fit.LESHSPCT <- lm(PNPL_LOG ~ LESSHSPCT, data = df)
# LNGISPCT
fit.LNGISPCT <- lm(PNPL_LOG ~ LINGISOPCT, data = df)
# UNDR5PCT
fit.UNDR5PCT <- lm(PNPL_LOG ~ UNDER5PCT, data = df)
# OVR64PCT
fit.OVR64PCT <- lm(PNPL_LOG ~ OVER64PCT, data = df)
tab_model(fit.MINORPCT, fit.LWINCPCT, fit.LESHSPCT, fit.LNGISPCT, fit.UNDR5PCT, fit.OVR64PCT)
| Â | PNPL_LOG | PNPL_LOG | PNPL_LOG | PNPL_LOG | PNPL_LOG | PNPL_LOG | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | CI | p | Estimates | CI | p | Estimates | CI | p | Estimates | CI | p | Estimates | CI | p | Estimates | CI | p |
| (Intercept) | -1.51 | -1.58 – -1.45 | <0.001 | -1.30 | -1.36 – -1.24 | <0.001 | -1.39 | -1.44 – -1.33 | <0.001 | -1.40 | -1.45 – -1.36 | <0.001 | -1.38 | -1.44 – -1.32 | <0.001 | -1.18 | -1.25 – -1.10 | <0.001 |
| MINORPCT | 0.55 | 0.41 – 0.69 | <0.001 | |||||||||||||||
| LOWINCPCT | -0.18 | -0.47 – 0.12 | 0.241 | |||||||||||||||
| LESSHSPCT | 0.72 | 0.30 – 1.13 | 0.001 | |||||||||||||||
| LINGISOPCT | 1.94 | 1.37 – 2.51 | <0.001 | |||||||||||||||
| UNDER5PCT | 1.08 | 0.12 – 2.04 | 0.027 | |||||||||||||||
| OVER64PCT | -0.78 | -1.13 – -0.43 | <0.001 | |||||||||||||||
| Observations | 2191 | 2191 | 2191 | 2191 | 2191 | 2191 | ||||||||||||
| R2 / R2 adjusted | 0.026 / 0.025 | 0.001 / 0.000 | 0.005 / 0.005 | 0.020 / 0.019 | 0.002 / 0.002 | 0.009 / 0.008 | ||||||||||||
# PRE1960 PCT
fit.PRE1960 <- lm(PNPL_LOG ~ PRE1960PCT, data = df)
# PTRAF
fit.PTRAF <- lm(PNPL_LOG ~ PTRAF, data = df)
# PRMP
fit.PRMP <- lm(PNPL_LOG ~ PRMP, data = df)
# PTSDF
fit.PTSDF <- lm(PNPL_LOG ~ PTSDF, data = df)
# OZONE
fit.OZONE <- lm(PNPL_LOG ~ OZONE, data = df)
# PM2.5
fit.PM25 <- lm(PNPL_LOG ~ PM25, data = df)
# UST
fit.UST <- lm(PNPL_LOG ~ UST, data = df)
tab_model(fit.PRE1960, fit.PTRAF, fit.PRMP, fit.PTSDF, fit.OZONE, fit.PM25, fit.UST)
| Â | PNPL_LOG | PNPL_LOG | PNPL_LOG | PNPL_LOG | PNPL_LOG | PNPL_LOG | PNPL_LOG | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | CI | p | Estimates | CI | p | Estimates | CI | p | Estimates | CI | p | Estimates | CI | p | Estimates | CI | p | Estimates | CI | p |
| (Intercept) | -1.86 | -1.94 – -1.79 | <0.001 | -1.44 | -1.49 – -1.39 | <0.001 | -1.63 | -1.68 – -1.59 | <0.001 | -1.83 | -1.88 – -1.78 | <0.001 | -12.12 | -14.91 – -9.33 | <0.001 | -9.70 | -10.23 – -9.17 | <0.001 | -1.41 | -1.45 – -1.36 | <0.001 |
| PRE1960PCT | 1.01 | 0.88 – 1.14 | <0.001 | ||||||||||||||||||
| PTRAF | 0.00 | 0.00 – 0.00 | <0.001 | ||||||||||||||||||
| PRMP | 1.90 | 1.73 – 2.08 | <0.001 | ||||||||||||||||||
| PTSDF | 0.25 | 0.23 – 0.27 | <0.001 | ||||||||||||||||||
| OZONE | 0.25 | 0.18 – 0.31 | <0.001 | ||||||||||||||||||
| PM25 | 1.12 | 1.05 – 1.19 | <0.001 | ||||||||||||||||||
| UST | 0.07 | 0.05 – 0.09 | <0.001 | ||||||||||||||||||
| Observations | 2191 | 2191 | 2191 | 2191 | 2191 | 2191 | 2191 | ||||||||||||||
| R2 / R2 adjusted | 0.099 / 0.098 | 0.027 / 0.027 | 0.172 / 0.172 | 0.213 / 0.212 | 0.026 / 0.025 | 0.307 / 0.306 | 0.021 / 0.021 | ||||||||||||||
fit.adjusted <- lm(PNPL_LOG ~ MINORPCT + LOWINCPCT + LESSHSPCT +
LINGISOPCT + UNDER5PCT + OVER64PCT
+ PRE1960PCT + PTRAF + PWDIS + PRMP + PTSDF + OZONE + PM25+ UST, data = df)
summary(fit.adjusted)
##
## Call:
## lm(formula = PNPL_LOG ~ MINORPCT + LOWINCPCT + LESSHSPCT + LINGISOPCT +
## UNDER5PCT + OVER64PCT + PRE1960PCT + PTRAF + PWDIS + PRMP +
## PTSDF + OZONE + PM25 + UST, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0698 -0.4849 -0.1283 0.3930 2.7427
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.406e-01 1.171e+00 -0.547 0.5844
## MINORPCT -4.005e-01 7.480e-02 -5.354 9.51e-08 ***
## LOWINCPCT -8.677e-02 1.319e-01 -0.658 0.5107
## LESSHSPCT -4.589e-02 2.135e-01 -0.215 0.8298
## LINGISOPCT 2.696e-01 2.599e-01 1.037 0.2997
## UNDER5PCT 2.710e-01 3.741e-01 0.724 0.4690
## OVER64PCT -1.329e-01 1.430e-01 -0.929 0.3528
## PRE1960PCT -1.394e-01 6.442e-02 -2.164 0.0305 *
## PTRAF -2.906e-05 2.271e-05 -1.280 0.2007
## PWDIS -1.843e-01 7.710e-02 -2.391 0.0169 *
## PRMP 1.295e+00 8.096e-02 16.001 < 2e-16 ***
## PTSDF 1.730e-01 9.338e-03 18.528 < 2e-16 ***
## OZONE -1.876e-01 3.064e-02 -6.122 1.09e-09 ***
## PM25 9.616e-01 5.085e-02 18.912 < 2e-16 ***
## UST -1.828e-02 8.139e-03 -2.246 0.0248 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6855 on 2176 degrees of freedom
## Multiple R-squared: 0.4727, Adjusted R-squared: 0.4693
## F-statistic: 139.3 on 14 and 2176 DF, p-value: < 2.2e-16
fit.adjusted %>%
tbl_regression(intercept = T,
estimate_fun = function(x) style_sigfig(x, digits = 3),
pvalue_fun = function(x) style_pvalue(x, digits = 3),
label = list(
MINORPCT ~ "Racial Minority %",
LOWINCPCT ~ "Low Income %",
LESSHSPCT ~ "Less than HS Ed. %",
LINGISOPCT ~ "Lingustic Isolation %",
UNDER5PCT ~ "Under Age 5 %",
OVER64PCT ~ "Over Age 64 %",
PRE1960PCT ~ "Housing Units Pre1960 %",
PTRAF ~ "Proximity to Traffic",
PWDIS ~ "Prxomity to Wastewater Discharge Facility",
PRMP ~ "Proximity to RMP Facility",
PTSDF ~ "Proximity to Hazardous Waste Facility",
OZONE ~ "Ozone Concentration",
PM25 ~ "PM 2.5 Concentration",
UST ~ "Proximity to Underground Storage Tank"
)) %>%
add_global_p(keep = T) %>%
modify_caption("Adjusted Linear Regression Results for Block Group Proximity to NPl Site")
| Characteristic | Beta | 95% CI1 | p-value |
|---|---|---|---|
| (Intercept) | -0.641 | -2.94, 1.66 | 0.584 |
| Racial Minority % | -0.401 | -0.547, -0.254 | <0.001 |
| Low Income % | -0.087 | -0.345, 0.172 | 0.511 |
| Less than HS Ed. % | -0.046 | -0.465, 0.373 | 0.830 |
| Lingustic Isolation % | 0.270 | -0.240, 0.779 | 0.300 |
| Under Age 5 % | 0.271 | -0.463, 1.00 | 0.469 |
| Over Age 64 % | -0.133 | -0.413, 0.148 | 0.353 |
| Housing Units Pre1960 % | -0.139 | -0.266, -0.013 | 0.031 |
| Proximity to Traffic | 0.000 | 0.000, 0.000 | 0.201 |
| Prxomity to Wastewater Discharge Facility | -0.184 | -0.336, -0.033 | 0.017 |
| Proximity to RMP Facility | 1.30 | 1.14, 1.45 | <0.001 |
| Proximity to Hazardous Waste Facility | 0.173 | 0.155, 0.191 | <0.001 |
| Ozone Concentration | -0.188 | -0.248, -0.127 | <0.001 |
| PM 2.5 Concentration | 0.962 | 0.862, 1.06 | <0.001 |
| Proximity to Underground Storage Tank | -0.018 | -0.034, -0.002 | 0.025 |
| 1 CI = Confidence Interval | |||
## [1] "The sample size is 2191 and there are 14 predictors."
## [1] "There are 156.5 observations per predictor."
## MINORPCT LOWINCPCT LESSHSPCT LINGISOPCT UNDER5PCT OVER64PCT PRE1960PCT
## 1.993136 1.433285 1.910932 1.458653 1.098889 1.195903 1.642841
## PTRAF PWDIS PRMP PTSDF OZONE PM25 UST
## 1.128826 1.014517 1.283995 1.224700 1.614204 2.606815 1.211848