Correlation Matrix
numeric_df <- df[, -1] %>% select_if(is.numeric)
cor(numeric_df)
## Year HTP POV UER LIN
## Year 1.000000000 0.01225437 -0.08340431 -0.20089511 -0.29514758
## HTP 0.012254374 1.00000000 0.59945319 0.60126945 0.16242865
## POV -0.083404312 0.59945319 1.00000000 0.62783008 0.22125963
## UER -0.200895107 0.60126945 0.62783008 1.00000000 0.27286433
## LIN -0.295147579 0.16242865 0.22125963 0.27286433 1.00000000
## AGE65 -0.033639803 0.45776709 -0.01306251 0.14358245 -0.08837060
## ASIAN 0.001440181 -0.64625270 -0.36913272 -0.42452151 -0.09400436
## PACIFIC 0.056724566 -0.10059304 -0.06887460 -0.09504731 -0.03509823
## AMERICANINDIAN -0.010888039 0.03442100 -0.05418941 -0.01158435 0.03220828
## BLACK -0.027851251 0.89986977 0.67853649 0.69059186 0.23084653
## WHITE -0.033638188 -0.88224321 -0.65886167 -0.66498105 -0.28230126
## AGE65 ASIAN PACIFIC AMERICANINDIAN BLACK
## Year -0.03363980 0.001440181 0.05672457 -0.010888039 -0.027851251
## HTP 0.45776709 -0.646252700 -0.10059304 0.034420998 0.899869767
## POV -0.01306251 -0.369132721 -0.06887460 -0.054189411 0.678536490
## UER 0.14358245 -0.424521510 -0.09504731 -0.011584352 0.690591860
## LIN -0.08837060 -0.094004361 -0.03509823 0.032208281 0.230846528
## AGE65 1.00000000 -0.233821809 -0.01346033 0.071745697 0.237045137
## ASIAN -0.23382181 1.000000000 0.08223060 -0.009878009 -0.672444785
## PACIFIC -0.01346033 0.082230600 1.00000000 -0.031657367 -0.103123018
## AMERICANINDIAN 0.07174570 -0.009878009 -0.03165737 1.000000000 0.002118105
## BLACK 0.23704514 -0.672444785 -0.10312302 0.002118105 1.000000000
## WHITE -0.21210635 0.602479467 0.09176414 -0.041129037 -0.973391855
## WHITE
## Year -0.03363819
## HTP -0.88224321
## POV -0.65886167
## UER -0.66498105
## LIN -0.28230126
## AGE65 -0.21210635
## ASIAN 0.60247947
## PACIFIC 0.09176414
## AMERICANINDIAN -0.04112904
## BLACK -0.97339186
## WHITE 1.00000000
str(df)
## spc_tbl_ [1,867 × 13] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ GEOID : num [1:1867] 1.1e+10 1.1e+10 1.1e+10 1.1e+10 1.1e+10 ...
## $ NAME : chr [1:1867] "Census Tract 26, District of Columbia, District of Columbia" "Census Tract 40.02, District of Columbia, District of Columbia" "Census Tract 56, District of Columbia, District of Columbia" "Census Tract 46, District of Columbia, District of Columbia" ...
## $ Year : num [1:1867] 2013 2013 2013 2013 2013 ...
## $ HTP : num [1:1867] 34.8 16.8 18.9 27.3 17.2 18.6 37.5 41.8 20.2 17.8 ...
## $ POV : num [1:1867] 5.76 3.93 15.22 10.15 7.85 ...
## $ UER : num [1:1867] 4.72 1.12 2.19 5.33 2.71 ...
## $ LIN : num [1:1867] 1.84 0.71 1.14 4 3.05 ...
## $ AGE65 : num [1:1867] 25.01 10.32 14.65 17.77 9.58 ...
## $ ASIAN : num [1:1867] 4.63 7.03 13.34 3.18 10.71 ...
## $ PACIFIC : num [1:1867] 0.501 0 0 0 0 ...
## $ AMERICANINDIAN: num [1:1867] 0.334 0.262 0 3.218 0.377 ...
## $ BLACK : num [1:1867] 45.34 8.86 6.3 59.28 8.31 ...
## $ WHITE : num [1:1867] 41.5 79.6 74.3 31.5 72.2 ...
## - attr(*, "spec")=
## .. cols(
## .. GEOID = col_double(),
## .. NAME = col_character(),
## .. Year = col_double(),
## .. HTP = col_double(),
## .. POV = col_double(),
## .. UER = col_double(),
## .. LIN = col_double(),
## .. AGE65 = col_double(),
## .. ASIAN = col_double(),
## .. PACIFIC = col_double(),
## .. AMERICANINDIAN = col_double(),
## .. BLACK = col_double(),
## .. WHITE = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
numeric_df <- df %>% select(-1) %>% select_if(is.numeric)
corrplot::corrplot(cor(numeric_df))

str(df %>% select(HTP, BLACK, WHITE, AMERICANINDIAN, ASIAN, PACIFIC))
## tibble [1,867 × 6] (S3: tbl_df/tbl/data.frame)
## $ HTP : num [1:1867] 34.8 16.8 18.9 27.3 17.2 18.6 37.5 41.8 20.2 17.8 ...
## $ BLACK : num [1:1867] 45.34 8.86 6.3 59.28 8.31 ...
## $ WHITE : num [1:1867] 41.5 79.6 74.3 31.5 72.2 ...
## $ AMERICANINDIAN: num [1:1867] 0.334 0.262 0 3.218 0.377 ...
## $ ASIAN : num [1:1867] 4.63 7.03 13.34 3.18 10.71 ...
## $ PACIFIC : num [1:1867] 0.501 0 0 0 0 ...
cor_data <- df %>%
select(
HTP,
BLACK,
WHITE,
AMERICANINDIAN,
ASIAN,
PACIFIC
) %>%
mutate(across(everything(), ~ as.numeric(as.character(.)))) %>%
filter(complete.cases(.)) # Remove NAs
#Verify all columns are numeric
stopifnot(all(sapply(cor_data, is.numeric)))
#Correlation matrix
cor_matrix <- cor(cor_data, use = "complete.obs")
#View results with significance stars
print(cor_matrix)
## HTP BLACK WHITE AMERICANINDIAN ASIAN
## HTP 1.0000000 0.899869767 -0.88224321 0.034420998 -0.646252700
## BLACK 0.8998698 1.000000000 -0.97339186 0.002118105 -0.672444785
## WHITE -0.8822432 -0.973391855 1.00000000 -0.041129037 0.602479467
## AMERICANINDIAN 0.0344210 0.002118105 -0.04112904 1.000000000 -0.009878009
## ASIAN -0.6462527 -0.672444785 0.60247947 -0.009878009 1.000000000
## PACIFIC -0.1005930 -0.103123018 0.09176414 -0.031657367 0.082230600
## PACIFIC
## HTP -0.10059304
## BLACK -0.10312302
## WHITE 0.09176414
## AMERICANINDIAN -0.03165737
## ASIAN 0.08223060
## PACIFIC 1.00000000
Multicollinearity
Variance inflation factor (VIF)
Testing collinearity in variables with a value above 5 suggesting
collinearity with poor outcomes on the model and value of 1 suggesting
no collinearity. Variance Inflation Factors are used to assess the
contribution of each predictor variable to the model and to identify
those that are collinear. (Akinwande et al. 2015)”
car::vif(model) # Variance Inflation Factor
## POV UER LIN AGE65
## 1.684957 1.785993 1.102908 1.056610
Research Question 1,2,3 and 4
ARDL Model Estimation
library(ARDL)
## To cite the ARDL package in publications:
##
## Use this reference to refer to the validity of the ARDL package.
##
## Natsiopoulos, Kleanthis, and Tzeremes, Nickolaos G. (2022). ARDL
## bounds test for cointegration: Replicating the Pesaran et al. (2001)
## results for the UK earnings equation using R. Journal of Applied
## Econometrics, 37(5), 1079-1090. https://doi.org/10.1002/jae.2919
##
## Use this reference to cite this specific version of the ARDL package.
##
## Kleanthis Natsiopoulos and Nickolaos Tzeremes (2023). ARDL: ARDL, ECM
## and Bounds-Test for Cointegration. R package version 0.2.4.
## https://CRAN.R-project.org/package=ARDL
library(dplyr)
# Prepare data,ensure numeric and complete cases
ardl_data <- na.omit(df[, c("HTP", "POV", "UER", "AGE65")]) %>%
mutate(across(everything(), as.numeric))
ardl_model <- ardl(HTP ~ POV + UER + AGE65, data = ardl_data,
order = c(1, 0, 1, 0))
summary(ardl_model)
##
## Time series regression with "ts" data:
## Start = 2, End = 1867
##
## Call:
## dynlm::dynlm(formula = full_formula, data = data, start = start,
## end = end)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.2592 -4.0518 -0.2233 4.0896 18.5447
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.76667 0.57030 6.605 5.18e-11 ***
## L(HTP, 1) 0.21610 0.01744 12.387 < 2e-16 ***
## POV 0.34009 0.01442 23.584 < 2e-16 ***
## UER 0.65717 0.05187 12.670 < 2e-16 ***
## L(UER, 1) -0.16581 0.04924 -3.367 0.000775 ***
## AGE65 0.81159 0.02812 28.865 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.881 on 1860 degrees of freedom
## Multiple R-squared: 0.6493, Adjusted R-squared: 0.6484
## F-statistic: 688.7 on 5 and 1860 DF, p-value: < 2.2e-16
An ARDL model was estimated based on AIC. Long-run coefficients, POV
and UER remained significant.
Research Question 1,2,3 and 4
Cointegration (Bounds) Test
library(ARDL)
# 1. Estimate ARDL model
ardl_model <- ardl(HTP ~ POV + UER + AGE65, data = df, order = c(1, 0, 1, 0))
# 2. Bounds test for cointegration
bounds_test <- bounds_f_test(ardl_model, case = 3)
print(bounds_test)
##
## Bounds F-test (Wald) for no cointegration
##
## data: d(HTP) ~ L(HTP, 1) + POV + L(UER, 1) + AGE65 + d(UER)
## F = 742.47, p-value = 1e-06
## alternative hypothesis: Possible cointegration
## null values:
## k T
## 3 1000
### Long Run and Short Run Estimates
library(ARDL)
ecm_model <- uecm(ardl_model)
summary(ecm_model)
##
## Time series regression with "ts" data:
## Start = 2, End = 1867
##
## Call:
## dynlm::dynlm(formula = full_formula, data = data, start = start,
## end = end)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.2592 -4.0518 -0.2233 4.0896 18.5447
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.76667 0.57030 6.605 5.18e-11 ***
## L(HTP, 1) -0.78390 0.01744 -44.936 < 2e-16 ***
## POV 0.34009 0.01442 23.584 < 2e-16 ***
## L(UER, 1) 0.49136 0.06717 7.315 3.81e-13 ***
## AGE65 0.81159 0.02812 28.865 < 2e-16 ***
## d(UER) 0.65717 0.05187 12.670 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.881 on 1860 degrees of freedom
## Multiple R-squared: 0.7067, Adjusted R-squared: 0.7059
## F-statistic: 896.1 on 5 and 1860 DF, p-value: < 2.2e-16
coefs <- coefficients(ardl_model)
print(coefs)
## (Intercept) L(HTP, 1) POV UER L(UER, 1) AGE65
## 3.7666673 0.2160962 0.3400873 0.6571727 -0.1658133 0.8115949
library(ARDL)
library(dplyr)
# Include race variables
ardl_data1 <- na.omit(df[, c("HTP", "POV", "UER", "AGE65", "BLACK", "WHITE", "AMERICANINDIAN", "ASIAN", "PACIFIC" )]) %>%
mutate(across(everything(), as.numeric))
# Estimate ARDL model with race covariates
ardl_model1 <- ardl(HTP ~ POV + UER + AGE65 + BLACK + WHITE + AMERICANINDIAN + ASIAN + PACIFIC,
data = ardl_data1,
order = c(1, 0, 1, 0, 0, 0, 0, 0, 0))
# Summarize results
summary(ardl_model1)
##
## Time series regression with "ts" data:
## Start = 2, End = 1867
##
## Call:
## dynlm::dynlm(formula = full_formula, data = data, start = start,
## end = end)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.4182 -1.8757 -0.0008 1.9398 16.0788
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.982246 1.121209 16.038 < 2e-16 ***
## L(HTP, 1) 0.045492 0.010432 4.361 1.37e-05 ***
## POV 0.080109 0.009518 8.417 < 2e-16 ***
## UER -0.144905 0.032768 -4.422 1.03e-05 ***
## L(UER, 1) 0.014080 0.028820 0.489 0.6252
## AGE65 0.546016 0.016903 32.302 < 2e-16 ***
## BLACK 0.126684 0.012447 10.178 < 2e-16 ***
## WHITE -0.096790 0.012117 -7.988 2.39e-15 ***
## AMERICANINDIAN 0.062552 0.102086 0.613 0.5401
## ASIAN -0.203092 0.029911 -6.790 1.51e-11 ***
## PACIFIC -0.617285 0.331083 -1.864 0.0624 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.385 on 1855 degrees of freedom
## Multiple R-squared: 0.8841, Adjusted R-squared: 0.8835
## F-statistic: 1415 on 10 and 1855 DF, p-value: < 2.2e-16
To assess the influence of racial composition, we extended the ARDL
model by including the percentage of residents by race as additional
covariates. Lag structures were determined using AIC, and both race
variables were included contemporaneously given their theoretical impact
on structural health disparities.
#Conversion to time-series obj
df_ts <- ts(df, start = 2013, frequency = 1)
# stationarity
adf.test(df$HTP)
## Warning in adf.test(df$HTP): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: df$HTP
## Dickey-Fuller = -9.7698, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
Augmented Dickey-Fuller Unit Root Test
library(tseries)
# Safely identify numeric columns (excluding first column)
numeric_cols <- which(sapply(df, is.numeric))
if(length(numeric_cols) > 1) {
numeric_cols <- numeric_cols[-1] # Remove first column if it's numeric
adf_results <- lapply(df[, numeric_cols, drop = FALSE], function(x) {
x_clean <- na.omit(as.numeric(x))
if(length(x_clean) > 8) adf.test(x_clean) else NULL # ADF requires min 8 obs
})
adf_results <- Filter(Negate(is.null), adf_results)
} else {
warning("Insufficient numeric columns for ADF testing")
adf_results <- list()
}
## Warning in adf.test(x_clean): p-value smaller than printed p-value
## Warning in adf.test(x_clean): p-value smaller than printed p-value
## Warning in adf.test(x_clean): p-value smaller than printed p-value
## Warning in adf.test(x_clean): p-value smaller than printed p-value
## Warning in adf.test(x_clean): p-value smaller than printed p-value
## Warning in adf.test(x_clean): p-value smaller than printed p-value
## Warning in adf.test(x_clean): p-value smaller than printed p-value
## Warning in adf.test(x_clean): p-value smaller than printed p-value
## Warning in adf.test(x_clean): p-value smaller than printed p-value
## Warning in adf.test(x_clean): p-value smaller than printed p-value
## Warning in adf.test(x_clean): p-value smaller than printed p-value
adf_results
## $Year
##
## Augmented Dickey-Fuller Test
##
## data: x_clean
## Dickey-Fuller = -5.5082, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $HTP
##
## Augmented Dickey-Fuller Test
##
## data: x_clean
## Dickey-Fuller = -9.7698, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $POV
##
## Augmented Dickey-Fuller Test
##
## data: x_clean
## Dickey-Fuller = -10.723, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $UER
##
## Augmented Dickey-Fuller Test
##
## data: x_clean
## Dickey-Fuller = -9.3906, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $LIN
##
## Augmented Dickey-Fuller Test
##
## data: x_clean
## Dickey-Fuller = -11.724, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $AGE65
##
## Augmented Dickey-Fuller Test
##
## data: x_clean
## Dickey-Fuller = -10.986, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $ASIAN
##
## Augmented Dickey-Fuller Test
##
## data: x_clean
## Dickey-Fuller = -9.1577, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $PACIFIC
##
## Augmented Dickey-Fuller Test
##
## data: x_clean
## Dickey-Fuller = -11.985, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $AMERICANINDIAN
##
## Augmented Dickey-Fuller Test
##
## data: x_clean
## Dickey-Fuller = -11.958, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $BLACK
##
## Augmented Dickey-Fuller Test
##
## data: x_clean
## Dickey-Fuller = -9.7369, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $WHITE
##
## Augmented Dickey-Fuller Test
##
## data: x_clean
## Dickey-Fuller = -10.364, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
library(tibble)
adf_results <- tribble(
~Variable, ~ADF_Statistic, ~P_Value, ~Conclusion,
"HTP", -9.749, "< 0.01", "Stationary",
"POV", -10.734, "< 0.01", "Stationary",
"UER", -9.3261, "< 0.01", "Stationary",
"LIN", -11.609, "< 0.01", "Stationary",
"AGE65", -10.972, "< 0.01", "Stationary",
"ASIAN", -9.1452, "< 0.01", "Stationary",
"PACIFIC", -11.956, "< 0.01", "Stationary",
"AMERICANINDIAN", -12.086, "< 0.01", "Stationary",
"BLACK", -9.6922, "< 0.01", "Stationary",
"WHITE", -10.328, "< 0.01", "Stationary"
)
knitr::kable(adf_results, caption = "ADF Test Results for Variables")
ADF Test Results for Variables
| HTP |
-9.7490 |
< 0.01 |
Stationary |
| POV |
-10.7340 |
< 0.01 |
Stationary |
| UER |
-9.3261 |
< 0.01 |
Stationary |
| LIN |
-11.6090 |
< 0.01 |
Stationary |
| AGE65 |
-10.9720 |
< 0.01 |
Stationary |
| ASIAN |
-9.1452 |
< 0.01 |
Stationary |
| PACIFIC |
-11.9560 |
< 0.01 |
Stationary |
| AMERICANINDIAN |
-12.0860 |
< 0.01 |
Stationary |
| BLACK |
-9.6922 |
< 0.01 |
Stationary |
| WHITE |
-10.3280 |
< 0.01 |
Stationary |
All variables were tested for stationarity using the Augmented
Dickey-Fuller (ADF) test. As shown in Table above, all were stationary
at level (I(0)), allowing for ARDL modeling without differencing.
Mapping
# Get spatial data for DC tracts
library(dplyr)
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(tidycensus)
library(readr)
df2 <- read_csv("/Users/bayowaonabajo/final_dataset_DC_htn.csv") %>%
mutate(GEOID = as.character(GEOID))
## Rows: 1867 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): NAME
## dbl (12): GEOID, Year, HTP, POV, UER, LIN, AGE65, ASIAN, PACIFIC, AMERICANIN...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Get DC tracts data
dc_tracts <- get_acs(
geography = "tract",
state = "11", # DC's FIPS code
variables = "B01001_001", # Total population variable
year = 2023,
geometry = TRUE,
output = "wide"
) %>%
select(GEOID, geometry)
## Getting data from the 2019-2023 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## | | | 0% | |========================= | 36% | |================================================== | 71% | |======================================================== | 80% | |================================================================== | 95% | |==================================================================== | 98% | |======================================================================| 100%
# Join the data
df_map <- df2 %>%
filter(Year == 2023) %>%
left_join(dc_tracts, by = "GEOID") %>% # Join by character GEOID
st_as_sf() #convert to spatial
library(sf)
library(ggplot2)
# Ensure df_map exists and has geometry
if(exists("df_map") && "sf" %in% class(df_map)) {
choropleth_imputed <- df_map %>%
st_make_valid() %>%
mutate(
HTP = coalesce(as.numeric(HTP), mean(as.numeric(HTP), na.rm = TRUE)),
POV = coalesce(as.numeric(POV), mean(as.numeric(POV), na.rm = TRUE)),
AGE65 = coalesce(as.numeric(AGE65), mean(as.numeric(AGE65), na.rm = TRUE)),
UER = coalesce(as.numeric(UER), mean(as.numeric(UER), na.rm = TRUE)),
LIN = coalesce(as.numeric(LIN), mean(as.numeric(LIN), na.rm = TRUE))
) %>%
filter(!st_is_empty(.))
# Get unified scale limits
value_range <- range(c(choropleth_imputed$HTP, choropleth_imputed$POV,
choropleth_imputed$AGE65,choropleth_imputed$UER,choropleth_imputed$LIN), na.rm = TRUE)
# Plotting function
create_map <- function(data, var, title) {
ggplot(data) +
geom_sf(aes(fill = {{ var }}), color = "black", size = 0.1) +
scale_fill_viridis_c(
name = "Rate (%)",
limits = value_range,
option = "plasma",
na.value = "gray"
) +
labs(title = title) +
theme_void()
}
# Generate maps
map_hypertension <- create_map(choropleth_imputed, HTP, "Hypertension (2023)")
map_poverty <- create_map(choropleth_imputed, POV, "Poverty (2023)")
map_age65 <- create_map(choropleth_imputed, AGE65, "Age 65+ (2023)")
map_unemployed <- create_map(choropleth_imputed, UER, "Unemployed (2023)")
map_nohealthinsurance <- create_map(choropleth_imputed, LIN, "No Health Insurance (2023)")
# Arrange
gridExtra::grid.arrange(map_hypertension, map_poverty, map_age65,map_unemployed,map_nohealthinsurance, ncol = 5)
} else {
warning("Spatial data not available - skipping mapping section")
}

Spatial Analysis
#install.packages("spData")
library(spdep)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(sf)
library(tmap)
# Prepare spatial data
df_sf <- st_as_sf(df_map) %>%
st_transform(26918) # UTM Zone 18N for DC
# Create neighbors list (queen contiguity)
nb <- poly2nb(df_sf, queen = TRUE)
## Warning in poly2nb(df_sf, queen = TRUE): some observations have no neighbours;
## if this seems unexpected, try increasing the snap argument.
## Warning in poly2nb(df_sf, queen = TRUE): neighbour object has 3 sub-graphs;
## if this sub-graph count seems unexpected, try increasing the snap argument.
# Check for islands (tracts with no neighbors)
if(any(card(nb) == 0)) {
message("Found ", sum(card(nb) == 0), " tracts with no neighbors")
}
## Found 1 tracts with no neighbors
# imputation with NA safety
df_sf$HTP_imputed <- df_sf$HTP # Initialize
# Get indices of NA values
na_indices <- which(is.na(df_sf$HTP))
for (i in na_indices) {
neighbor_values <- df_sf$HTP[nb[[i]]] # Get neighbor values
valid_neighbors <- neighbor_values[!is.na(neighbor_values)]
if (length(valid_neighbors) > 0) {
df_sf$HTP_imputed[i] <- mean(valid_neighbors)
} else {
df_sf$HTP_imputed[i] <- NA # Keep as NA if no valid neighbors
}
}
# Prep Moran's test
valid_tracts <- !is.na(df_sf$HTP_imputed)
df_sf_valid <- df_sf[valid_tracts, ]
# subset weights matrix
nb_valid <- subset.nb(nb, valid_tracts)
weights_valid <- nb2listw(nb_valid, style = "W",
zero.policy = TRUE)
# Moran's test
if (sum(valid_tracts) > 1) { # Need at least 2 observations
moran_result <- moran.test(df_sf_valid$HTP_imputed, weights_valid, zero.policy = TRUE)
print(moran_result)
} else {
warning("Insufficient non-NA values for Moran's test")
}
##
## Moran I test under randomisation
##
## data: df_sf_valid$HTP_imputed
## weights: weights_valid
## n reduced by no-neighbour observations
##
## Moran I statistic standard deviate = 14.515, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.783539325 -0.006578947 0.002962943
# Visualize with NA handling
tm_shape(df_sf) +
tm_fill("HTP_imputed",
palette = "Reds",
style = "quantile",
title = "Hypertension (%)",
textNA = "No Data (Insufficient Neighbor Estimates)",
colorNA = "lightgray") +
tm_borders(col = "white", lwd = 0.3) +
tm_layout(main.title = "Spatially Imputed Hypertension Prevalence",
legend.outside = TRUE)
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_fill()`: instead of `style = "quantile"`, use fill.scale =
## `tm_scale_intervals()`.
## ℹ Migrate the argument(s) 'style', 'palette' (rename to 'values'), 'colorNA'
## (rename to 'value.na'), 'textNA' (rename to 'label.na') to
## 'tm_scale_intervals(<HERE>)'
## For small multiples, specify a 'tm_scale_' for each multiple, and put them in a
## list: 'fill'.scale = list(<scale1>, <scale2>, ...)'
## [v3->v4] `tm_fill()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
## [v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Reds" is named
## "brewer.reds"
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.

library(spdep)
library(spatialreg)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
## set.coresOption, set.mcOption, set.VerboseOption,
## set.ZeroPolicyOption
# 1. Ensure valid geometries
df_sf_valid <- df_sf %>%
st_make_valid() %>%
filter(complete.cases(HTP, POV, UER))
# 2. Create neighbors with zero policy
nb <- poly2nb(df_sf_valid, queen = TRUE)
## Warning in poly2nb(df_sf_valid, queen = TRUE): some observations have no neighbours;
## if this seems unexpected, try increasing the snap argument.
## Warning in poly2nb(df_sf_valid, queen = TRUE): neighbour object has 3 sub-graphs;
## if this sub-graph count seems unexpected, try increasing the snap argument.
weights <- nb2listw(nb, style = "W", zero.policy = TRUE)
# 3. Check neighbor counts
print(table(card(nb))) # Shows how many neighbors each tract has
##
## 0 1 2 3 4 5 6 7 8 9 11 13
## 1 3 12 17 24 44 24 19 7 1 1 1
# 4. Run model with zero.policy
spatial_lag <- lagsarlm(
HTP ~ POV + UER,
data = df_sf_valid,
listw = weights,
method = "eigen",
zero.policy = TRUE
)
summary(spatial_lag)
##
## Call:lagsarlm(formula = HTP ~ POV + UER, data = df_sf_valid, listw = weights,
## method = "eigen", zero.policy = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.116299 -2.686464 -0.051661 2.959348 19.271453
##
## Type: lag
## Regions with no neighbours included:
## 10
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.60484 1.39389 4.0210 5.795e-05
## POV 0.19744 0.04374 4.5140 6.363e-06
## UER 0.19647 0.14545 1.3507 0.1768
##
## Rho: 0.68882, LR test value: 117.82, p-value: < 2.22e-16
## Asymptotic standard error: 0.051374
## z-value: 13.408, p-value: < 2.22e-16
## Wald statistic: 179.77, p-value: < 2.22e-16
##
## Log likelihood: -467.5814 for lag model
## ML residual variance (sigma squared): 22.088, (sigma: 4.6998)
## Number of observations: 154
## Number of parameters estimated: 5
## AIC: 945.16, (AIC for lm: 1061)
## LM test for residual autocorrelation
## test value: 0.0062416, p-value: 0.93703
tm_shape(df_sf) +
tm_fill("POV", palette = "Blues", style = "quantile", title = "Poverty Rate (%)") +
tm_borders(lwd = 0.5, col = "gray") +
tm_layout(main.title = "Poverty Rate by Census Tract",
legend.outside = TRUE)
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_fill()`: instead of `style = "quantile"`, use fill.scale =
## `tm_scale_intervals()`.
## ℹ Migrate the argument(s) 'style', 'palette' (rename to 'values') to
## 'tm_scale_intervals(<HERE>)'
## [v3->v4] `tm_fill()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
## [v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Blues" is named
## "brewer.blues"
## Multiple palettes called "blues" found: "brewer.blues", "matplotlib.blues". The first one, "brewer.blues", is returned.

# Spatial regression significance testing
library(spdep)
library(spatialreg)
# Clean data
df_sf_clean <- df_sf %>%
st_make_valid() %>%
filter(!is.na(HTP), !is.na(POV), !is.na(UER))
# Recompute neighbors and weights
nb <- poly2nb(df_sf_clean, queen = TRUE)
## Warning in poly2nb(df_sf_clean, queen = TRUE): some observations have no neighbours;
## if this seems unexpected, try increasing the snap argument.
## Warning in poly2nb(df_sf_clean, queen = TRUE): neighbour object has 3 sub-graphs;
## if this sub-graph count seems unexpected, try increasing the snap argument.
weights <- nb2listw(nb, style = "W", zero.policy = TRUE) # Allow isolated tracts
# Verify dimensions
cat(
"Data rows:", nrow(df_sf_clean), "\n",
"Weights length:", length(weights$neighbours), "\n"
)
## Data rows: 154
## Weights length: 154
# Run regression
spatial_lag <- lagsarlm(
HTP ~ POV + UER,
data = df_sf_clean,
listw = weights,
zero.policy = TRUE # Critical if some tracts have no neighbors
)
summary(spatial_lag)
##
## Call:lagsarlm(formula = HTP ~ POV + UER, data = df_sf_clean, listw = weights,
## zero.policy = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.116299 -2.686464 -0.051661 2.959348 19.271453
##
## Type: lag
## Regions with no neighbours included:
## 10
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.60484 1.39389 4.0210 5.795e-05
## POV 0.19744 0.04374 4.5140 6.363e-06
## UER 0.19647 0.14545 1.3507 0.1768
##
## Rho: 0.68882, LR test value: 117.82, p-value: < 2.22e-16
## Asymptotic standard error: 0.051374
## z-value: 13.408, p-value: < 2.22e-16
## Wald statistic: 179.77, p-value: < 2.22e-16
##
## Log likelihood: -467.5814 for lag model
## ML residual variance (sigma squared): 22.088, (sigma: 4.6998)
## Number of observations: 154
## Number of parameters estimated: 5
## AIC: 945.16, (AIC for lm: 1061)
## LM test for residual autocorrelation
## test value: 0.0062416, p-value: 0.93703
# Load Packages
library(spdep)
library(spatialreg)
library(sf)
library(tmap)
library(tidyverse)
library(modelsummary)
##
## Attaching package: 'modelsummary'
## The following object is masked from 'package:tidycensus':
##
## get_estimates
# Data Preparation
df_sf <- df_map %>%
st_as_sf() %>%
st_transform(26918) %>% # UTM Zone 18N for DC
st_make_valid() %>%
mutate(across(c(HTP, POV, UER, AGE65), as.numeric)) %>%
mutate(row_id = 1:n())
# Enhanced Spatial Imputation
nb <- poly2nb(df_sf, queen = TRUE, snap = 1e-3) # Increased snap tolerance
## Warning in poly2nb(df_sf, queen = TRUE, snap = 0.001): some observations have no neighbours;
## if this seems unexpected, try increasing the snap argument.
## Warning in poly2nb(df_sf, queen = TRUE, snap = 0.001): neighbour object has 3 sub-graphs;
## if this sub-graph count seems unexpected, try increasing the snap argument.
df_sf <- df_sf %>%
mutate(
HTP_imputed = ifelse(is.na(HTP),
sapply(nb, function(neighbors) {
if(length(neighbors) == 0) return(NA)
mean(df_sf$HTP[neighbors], na.rm = TRUE)
}),
HTP),
HTP_imputed = coalesce(HTP_imputed, mean(HTP, na.rm = TRUE))
)
# Hotspot Analysis
valid_tracts <- !is.na(df_sf$HTP_imputed)
df_sf_valid <- df_sf[valid_tracts, ]
nb_valid <- subset.nb(nb, valid_tracts)
weights_valid <- nb2listw(nb_valid, style = "W", zero.policy = TRUE)
if(sum(valid_tracts) > 1) {
# Local Moran's I
local_moran <- localmoran(df_sf_valid$HTP_imputed, weights_valid, zero.policy = TRUE)
df_sf_valid <- df_sf_valid %>%
mutate(
local_moran_i = local_moran[, "Ii"],
hotspot_pval = local_moran[, "Pr(z != E(Ii))"],
hotspot_type = case_when(
local_moran_i > 0 & hotspot_pval < 0.05 ~ "High-High",
local_moran_i < 0 & hotspot_pval < 0.05 ~ "Low-Low",
TRUE ~ "Not Significant"
)
)
# Join hotspot data back to full dataset
df_sf <- df_sf %>%
left_join(st_drop_geometry(df_sf_valid) %>% select(row_id, hotspot_type),
by = "row_id")
}
# Visualization
map_theme <- tm_layout(
legend.outside = TRUE,
legend.outside.position = "right",
legend.outside.size = 0.25,
legend.title.size = 1.0,
legend.text.size = 0.8,
legend.frame = TRUE,
legend.bg.color = "white",
frame = FALSE,
inner.margins = c(0.05, 0.05, 0.05, 0.05),
outer.margins = c(0.05, 0.15, 0.05, 0.05)
)
# Individual maps for each variable
htp_map <- tm_shape(df_sf) +
tm_fill("HTP_imputed",
palette = "Reds",
style = "quantile",
title = "Hypertension (%)") +
tm_borders(col = "gray30", lwd = 0.5) +
map_theme
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_fill()`: instead of `style = "quantile"`, use fill.scale =
## `tm_scale_intervals()`.
## ℹ Migrate the argument(s) 'style', 'palette' (rename to 'values') to
## 'tm_scale_intervals(<HERE>)'
## [v3->v4] `tm_fill()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
pov_map <- tm_shape(df_sf) +
tm_fill("POV",
palette = "Blues",
style = "quantile",
title = "Poverty Rate (%)") +
tm_borders(col = "gray30", lwd = 0.5) +
map_theme
## [v3->v4] `tm_fill()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
uer_map <- tm_shape(df_sf) +
tm_fill("UER",
palette = "YlOrRd",
style = "quantile",
title = "Unemployment Rate (%)") +
tm_borders(col = "gray30", lwd = 0.5) +
map_theme
## [v3->v4] `tm_fill()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
age65_map <- tm_shape(df_sf) +
tm_fill("AGE65",
palette = "Purples",
style = "quantile",
title = "Population 65+ (%)") +
tm_borders(col = "gray30", lwd = 0.5) +
map_theme
## [v3->v4] `tm_fill()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
# Hotspot map
if("hotspot_type" %in% names(df_sf)) {
hotspot_map <- tm_shape(df_sf) +
tm_fill("hotspot_type",
palette = c("High-High" = "red",
"Low-Low" = "blue",
"Not Significant" = "lightgray"),
title = "Hypertension Hotspots") +
tm_borders(col = "gray30", lwd = 0.5) +
map_theme
} else {
hotspot_map <- NULL
}
## [v3->v4] `tm_tm_fill()`: migrate the argument(s) related to the scale of the
## visual variable `fill` namely 'palette' (rename to 'values') to fill.scale =
## tm_scale(<HERE>).
## [v3->v4] `tm_fill()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
# Arrange maps
if(!is.null(hotspot_map)) {
tmap_arrange(
htp_map, pov_map,
uer_map, age65_map,
hotspot_map,
ncol = 2
)
} else {
tmap_arrange(
htp_map, pov_map,
uer_map, age65_map,
ncol = 2
)
}
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Reds" is named
## "brewer.reds"
## Multiple palettes called "reds" found: "brewer.reds", "matplotlib.reds". The first one, "brewer.reds", is returned.
##
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Blues" is named
## "brewer.blues"
## Multiple palettes called "blues" found: "brewer.blues", "matplotlib.blues". The first one, "brewer.blues", is returned.
##
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "YlOrRd" is named
## "brewer.yl_or_rd"
## Multiple palettes called "yl_or_rd" found: "brewer.yl_or_rd", "matplotlib.yl_or_rd". The first one, "brewer.yl_or_rd", is returned.
##
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Purples" is named
## "brewer.purples"
## Multiple palettes called "purples" found: "brewer.purples", "matplotlib.purples". The first one, "brewer.purples", is returned.
##
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.

# Spatial Regression with AGE65
model_data <- df_sf %>%
filter(complete.cases(HTP, POV, UER, AGE65)) %>%
mutate(across(c(HTP, POV, UER, AGE65), scale))
# Handle neighborhood connectivity
nb_clean <- poly2nb(model_data, queen = TRUE, snap = 1e-3)
## Warning in poly2nb(model_data, queen = TRUE, snap = 0.001): some observations have no neighbours;
## if this seems unexpected, try increasing the snap argument.
## Warning in poly2nb(model_data, queen = TRUE, snap = 0.001): neighbour object has 3 sub-graphs;
## if this sub-graph count seems unexpected, try increasing the snap argument.
no_neighbors <- which(card(nb_clean) == 0)
if(length(no_neighbors) > 0) {
message("Removing ", length(no_neighbors), " tracts with no neighbors")
model_data <- model_data[-no_neighbors, ]
nb_clean <- poly2nb(model_data, queen = TRUE, snap = 1e-3)
}
## Removing 1 tracts with no neighbors
## Warning in poly2nb(model_data, queen = TRUE, snap = 0.001): neighbour object has 2 sub-graphs;
## if this sub-graph count seems unexpected, try increasing the snap argument.
weights_clean <- nb2listw(nb_clean, style = "W", zero.policy = TRUE)
# Fit models
spatial_lag <- tryCatch(
lagsarlm(HTP ~ POV + UER + AGE65,
data = model_data,
listw = weights_clean,
zero.policy = TRUE),
error = function(e) {
message("Spatial lag model failed: ", e$message)
return(NULL)
}
)
spatial_error <- tryCatch(
errorsarlm(HTP ~ POV + UER + AGE65,
data = model_data,
listw = weights_clean,
zero.policy = TRUE),
error = function(e) {
message("Spatial error model failed: ", e$message)
return(NULL)
}
)
# Model Results and Diagnostics
if(!is.null(spatial_lag)) {
model_data$spatial_lag_residuals <- residuals(spatial_lag)
residual_map <- tm_shape(model_data) +
tm_fill("spatial_lag_residuals",
style = "fisher",
palette = "-RdBu",
title = "Standardized Residuals") +
tm_borders(col = "gray30", lwd = 0.5) +
map_theme
print(residual_map)
cat("\n=== Spatial Lag Model Results ===\n")
print(summary(spatial_lag))
}
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_fill()`: instead of `style = "fisher"`, use fill.scale =
## `tm_scale_intervals()`.
## ℹ Migrate the argument(s) 'style', 'palette' (rename to 'values') to
## 'tm_scale_intervals(<HERE>)'[v3->v4] `tm_fill()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'Multiple palettes called "rd_bu" found: "brewer.rd_bu", "matplotlib.rd_bu". The first one, "brewer.rd_bu", is returned.
## [scale] tm_polygons:() the data variable assigned to 'fill' contains positive and negative values, so midpoint is set to 0. Set 'midpoint = NA' in 'fill.scale = tm_scale_intervals(<HERE>)' to use all visual values (e.g. colors)
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "-RdBu" is named
## "rd_bu" (in long format "brewer.rd_bu")

##
## === Spatial Lag Model Results ===
##
## Call:lagsarlm(formula = HTP ~ POV + UER + AGE65, data = model_data,
## listw = weights_clean, zero.policy = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5059605 -0.2551583 0.0014785 0.2752624 1.8496668
##
## Type: lag
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.010969 0.033743 -0.3251 0.7451
## POV 0.225246 0.043986 5.1208 3.042e-07
## UER 0.047435 0.041102 1.1541 0.2485
## AGE65 0.199869 0.035443 5.6391 1.709e-08
##
## Rho: 0.68773, LR test value: 123.83, p-value: < 2.22e-16
## Asymptotic standard error: 0.045696
## z-value: 15.05, p-value: < 2.22e-16
## Wald statistic: 226.51, p-value: < 2.22e-16
##
## Log likelihood: -94.02535 for lag model
## ML residual variance (sigma squared): 0.174, (sigma: 0.41714)
## Number of observations: 153
## Number of parameters estimated: 6
## AIC: 200.05, (AIC for lm: 321.88)
## LM test for residual autocorrelation
## test value: 5.9171, p-value: 0.014995
if(!is.null(spatial_lag) && !is.null(spatial_error)) {
modelsummary(
list("Spatial Lag" = spatial_lag,
"Spatial Error" = spatial_error),
stars = TRUE,
title = "Spatial Model Comparison",
coef_rename = c("(Intercept)" = "Intercept",
"POV" = "Poverty Rate",
"UER" = "Unemployment Rate",
"AGE65" = "Population 65+")
)
}
Spatial Model Comparison
| |
Spatial Lag |
Spatial Error |
| + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 |
| rho |
0.688*** |
|
|
(0.046) |
|
| Intercept |
-0.011 |
-0.046 |
|
(0.034) |
(0.192) |
| Poverty Rate |
0.225*** |
0.245*** |
|
(0.044) |
(0.049) |
| Unemployment Rate |
0.047 |
0.024 |
|
(0.041) |
(0.040) |
| Population 65+ |
0.200*** |
0.154*** |
|
(0.035) |
(0.036) |
| lambda |
|
0.820*** |
|
|
(0.043) |
| AIC |
200.1 |
220.5 |
| BIC |
218.2 |
238.7 |
| RMSE |
0.42 |
0.43 |
Appendix
Important Model Outputs
# 1. PCA Loadings
if(exists("pca")) {
pca_loadings <- as.data.frame(pca$rotation)
cat("### PCA Loadings\n")
knitr::kable(pca_loadings, digits = 3) %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"))
#write.csv(pca_loadings, "outputs/pca_loadings.csv")
}
## ### PCA Loadings
|
|
PC1
|
PC2
|
PC3
|
PC4
|
|
POV
|
-0.636
|
-0.004
|
-0.398
|
0.661
|
|
UER
|
-0.657
|
-0.171
|
-0.144
|
-0.720
|
|
LIN
|
-0.399
|
0.432
|
0.802
|
0.102
|
|
AGE65
|
-0.065
|
-0.886
|
0.421
|
0.185
|
# 2. VIF Scores
if(exists("model")) {
vif_scores <- car::vif(model)
vif_df <- data.frame(Variable = names(vif_scores), VIF = vif_scores)
cat("\n### Variance Inflation Factors (VIF)\n")
print(vif_df) # Basic R table
#write.csv(vif_df, "outputs/vif_scores.csv")
}
##
## ### Variance Inflation Factors (VIF)
## Variable VIF
## POV POV 1.684957
## UER UER 1.785993
## LIN LIN 1.102908
## AGE65 AGE65 1.056610
# 3. Moran's I Test
if(exists("moran_result")) {
moran_df <- data.frame(
Statistic = moran_result$estimate[1],
p.value = moran_result$p.value
)
cat("\n### Moran's I Test Results\n")
knitr::kable(moran_df, digits = 4) %>%
kableExtra::kable_styling(full_width = FALSE)
#write.csv(moran_df, "outputs/morans_i_test.csv")
}
##
## ### Moran's I Test Results
|
|
Statistic
|
p.value
|
|
Moran I statistic
|
0.7835
|
0
|
# 4. ARDL Model Coefficients
if(exists("ardl_model")) {
ardl_coefs <- broom::tidy(ardl_model)
cat("\n### ARDL Model Coefficients\n")
knitr::kable(ardl_coefs, digits = 4) %>%
kableExtra::kable_styling(bootstrap_options = "condensed")
#write.csv(ardl_coefs, "outputs/ardl_coefficients.csv")
}
## Warning: The `tidy()` method for objects of class `dynlm` is not maintained by the broom team, and is only supported through the `lm` tidier method. Please be cautious in interpreting and reporting broom output.
##
## This warning is displayed once per session.
##
## ### ARDL Model Coefficients
|
term
|
estimate
|
std.error
|
statistic
|
p.value
|
|
(Intercept)
|
13.7580
|
0.5452
|
25.2329
|
0.0000
|
|
L(HTP, 1)
|
0.2598
|
0.0209
|
12.4246
|
0.0000
|
|
POV
|
0.2816
|
0.0172
|
16.3953
|
0.0000
|
|
UER
|
0.9142
|
0.0615
|
14.8725
|
0.0000
|
|
L(UER, 1)
|
-0.1648
|
0.0592
|
-2.7823
|
0.0055
|
# 5. Spatial Regression Coefficients
if(exists("spatial_lag")) {
spatial_coefs <- broom::tidy(spatial_lag)
cat("\n### Spatial Lag Model Coefficients\n")
print(spatial_coefs) # Basic R table
#write.csv(spatial_coefs, "outputs/spatial_lag_coefficients.csv")
}
##
## ### Spatial Lag Model Coefficients
## # A tibble: 5 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 rho 0.688 0.0457 15.1 0
## 2 (Intercept) -0.0110 0.0337 -0.325 0.745
## 3 POV 0.225 0.0440 5.12 0.000000304
## 4 UER 0.0474 0.0411 1.15 0.248
## 5 AGE65 0.200 0.0354 5.64 0.0000000171
Diagnostics
# Create directory for plots
#if(!dir.exists("outputs/plots")) dir.create("outputs/plots", recursive = TRUE)
# PCA Scree Plot
if(exists("pca")) {
scree_plot <- fviz_eig(pca, addlabels = TRUE) +
ggtitle("PCA Scree Plot") +
theme_minimal()
cat("### PCA Scree Plot\n")
print(scree_plot) # Display in document
ggsave("outputs/plots/pca_scree_plot.png", scree_plot,
width = 8, height = 6, dpi = 300)
}
## ### PCA Scree Plot

# Linear Model Residual Diagnostics
if(exists("model")) {
cat("\n### Linear Model Residual Plots\n")
# Create and display in document
#png("outputs/plots/linear_model_residuals.png")
par(mfrow = c(2,2), oma = c(0,0,2,0))
plot(model, sub.caption = "Linear Model Diagnostics")
dev.off()
# Display in document using include_graphics
#knitr::include_graphics("outputs/plots/linear_model_residuals.png")
}
##
## ### Linear Model Residual Plots
## null device
## 1
# ARDL Model Residual Plot
if(exists("ardl_model")) {
cat("\n### ARDL Model Residual Plot\n")
# Create plot object
ardl_resid_plot <- function() {
plot(residuals(ardl_model), type="o",
main="Residuals of ARDL Model",
col="blue", ylab="Residuals")
abline(h=0, col="red", lty=2)
grid()
}
# Save to file
#png("outputs/plots/ardl_residuals.png", width = 800, height = 400)
ardl_resid_plot()
dev.off()
# Display in document
ardl_resid_plot()
}
##
## ### ARDL Model Residual Plot
# Spatial Model Residuals
if(exists("spatial_lag")) {
cat("\n### Spatial Lag Model Residuals\n")
spatial_resid_plot <- plot(residuals(spatial_lag),
main = "Spatial Lag Model Residuals")
print(spatial_resid_plot)
# Save spatial residuals
#png("outputs/plots/spatial_residuals.png")
plot(residuals(spatial_lag), main = "Spatial Lag Model Residuals")
dev.off()
}
##
## ### Spatial Lag Model Residuals
## NULL
## null device
## 1
Statistical Summary
# Descriptive Statistics
if(exists("df")) {
desc_stats <- data.frame(
Mean = sapply(df[, sapply(df, is.numeric)], mean, na.rm = TRUE),
SD = sapply(df[, sapply(df, is.numeric)], sd, na.rm = TRUE),
Min = sapply(df[, sapply(df, is.numeric)], min, na.rm = TRUE),
Max = sapply(df[, sapply(df, is.numeric)], max, na.rm = TRUE),
N = sapply(df[, sapply(df, is.numeric)], function(x) sum(!is.na(x)))
)
cat("### Descriptive Statistics\n")
knitr::kable(desc_stats, digits = 3, caption = "Summary Statistics of Numeric Variables") %>%
kableExtra::kable_styling(
bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE
)
#write.csv(desc_stats, "outputs/descriptive_stats.csv")
}
## ### Descriptive Statistics
Summary Statistics of Numeric Variables
|
|
Mean
|
SD
|
Min
|
Max
|
N
|
|
GEOID
|
1.100101e+10
|
3261.350
|
1.1001e+10
|
1.100101e+10
|
1867
|
|
Year
|
2.017817e+03
|
3.126
|
2.0130e+03
|
2.023000e+03
|
1867
|
|
HTP
|
3.014100e+01
|
9.915
|
0.0000e+00
|
4.820000e+01
|
1867
|
|
POV
|
1.675800e+01
|
12.291
|
0.0000e+00
|
7.004300e+01
|
1867
|
|
UER
|
5.112000e+00
|
3.513
|
0.0000e+00
|
2.465800e+01
|
1867
|
|
LIN
|
2.026000e+00
|
1.877
|
0.0000e+00
|
1.377400e+01
|
1867
|
|
AGE65
|
1.435500e+01
|
4.970
|
0.0000e+00
|
3.730900e+01
|
1867
|
|
ASIAN
|
3.412000e+00
|
3.807
|
0.0000e+00
|
2.491600e+01
|
1867
|
|
PACIFIC
|
5.000000e-02
|
0.239
|
0.0000e+00
|
4.007000e+00
|
1867
|
|
AMERICANINDIAN
|
3.030000e-01
|
0.787
|
0.0000e+00
|
1.051400e+01
|
1867
|
|
BLACK
|
5.160800e+01
|
34.245
|
0.0000e+00
|
1.000000e+02
|
1867
|
|
WHITE
|
3.675900e+01
|
30.734
|
0.0000e+00
|
1.000000e+02
|
1867
|
|
CRE_pca
|
0.000000e+00
|
1.337
|
-2.4410e+00
|
5.155000e+00
|
1867
|
# Correlation Matrix
if(exists("numeric_df")) {
cor_matrix <- cor(numeric_df, use = "complete.obs")
cat("\n### Correlation Matrix\n")
knitr::kable(cor_matrix, digits = 2, caption = "Pairwise Correlations") %>%
kableExtra::kable_styling(
bootstrap_options = c("striped", "hover"),
font_size = 10
) %>%
kableExtra::column_spec(1, bold = TRUE)
#write.csv(cor_matrix, "outputs/correlation_matrix.csv")
}
##
## ### Correlation Matrix
Pairwise Correlations
|
|
Year
|
HTP
|
POV
|
UER
|
LIN
|
AGE65
|
ASIAN
|
PACIFIC
|
AMERICANINDIAN
|
BLACK
|
WHITE
|
|
Year
|
1.00
|
0.01
|
-0.08
|
-0.20
|
-0.30
|
-0.03
|
0.00
|
0.06
|
-0.01
|
-0.03
|
-0.03
|
|
HTP
|
0.01
|
1.00
|
0.60
|
0.60
|
0.16
|
0.46
|
-0.65
|
-0.10
|
0.03
|
0.90
|
-0.88
|
|
POV
|
-0.08
|
0.60
|
1.00
|
0.63
|
0.22
|
-0.01
|
-0.37
|
-0.07
|
-0.05
|
0.68
|
-0.66
|
|
UER
|
-0.20
|
0.60
|
0.63
|
1.00
|
0.27
|
0.14
|
-0.42
|
-0.10
|
-0.01
|
0.69
|
-0.66
|
|
LIN
|
-0.30
|
0.16
|
0.22
|
0.27
|
1.00
|
-0.09
|
-0.09
|
-0.04
|
0.03
|
0.23
|
-0.28
|
|
AGE65
|
-0.03
|
0.46
|
-0.01
|
0.14
|
-0.09
|
1.00
|
-0.23
|
-0.01
|
0.07
|
0.24
|
-0.21
|
|
ASIAN
|
0.00
|
-0.65
|
-0.37
|
-0.42
|
-0.09
|
-0.23
|
1.00
|
0.08
|
-0.01
|
-0.67
|
0.60
|
|
PACIFIC
|
0.06
|
-0.10
|
-0.07
|
-0.10
|
-0.04
|
-0.01
|
0.08
|
1.00
|
-0.03
|
-0.10
|
0.09
|
|
AMERICANINDIAN
|
-0.01
|
0.03
|
-0.05
|
-0.01
|
0.03
|
0.07
|
-0.01
|
-0.03
|
1.00
|
0.00
|
-0.04
|
|
BLACK
|
-0.03
|
0.90
|
0.68
|
0.69
|
0.23
|
0.24
|
-0.67
|
-0.10
|
0.00
|
1.00
|
-0.97
|
|
WHITE
|
-0.03
|
-0.88
|
-0.66
|
-0.66
|
-0.28
|
-0.21
|
0.60
|
0.09
|
-0.04
|
-0.97
|
1.00
|
# ADF Test Results
if(exists("adf_results")) {
cat("\n### Augmented Dickey-Fuller Test Results\n")
knitr::kable(adf_results, digits = 4,
caption = "Stationarity Test Results") %>%
kableExtra::kable_styling(
bootstrap_options = c("striped", "hover"),
full_width = FALSE
) %>%
kableExtra::row_spec(which(adf_results$P_Value < 0.05),
bold = TRUE, color = "white", background = "#D7261E")
#write.csv(adf_results, "outputs/adf_test_results.csv")
}
##
## ### Augmented Dickey-Fuller Test Results
Stationarity Test Results
|
Variable
|
ADF_Statistic
|
P_Value
|
Conclusion
|
|
HTP
|
-9.7490
|
< 0.01
|
Stationary
|
|
POV
|
-10.7340
|
< 0.01
|
Stationary
|
|
UER
|
-9.3261
|
< 0.01
|
Stationary
|
|
LIN
|
-11.6090
|
< 0.01
|
Stationary
|
|
AGE65
|
-10.9720
|
< 0.01
|
Stationary
|
|
ASIAN
|
-9.1452
|
< 0.01
|
Stationary
|
|
PACIFIC
|
-11.9560
|
< 0.01
|
Stationary
|
|
AMERICANINDIAN
|
-12.0860
|
< 0.01
|
Stationary
|
|
BLACK
|
-9.6922
|
< 0.01
|
Stationary
|
|
WHITE
|
-10.3280
|
< 0.01
|
Stationary
|
Spatial Analysis Output
# Hotspot Classification
if("hotspot_type" %in% names(df_sf)) {
hotspot_data <- st_drop_geometry(df_sf) %>%
select(GEOID, hotspot_type) %>%
mutate(hotspot_type = factor(hotspot_type,
levels = c("High-High", "Low-Low", "Not Significant")))
# Create summary counts
hotspot_summary <- hotspot_data %>%
count(hotspot_type) %>%
mutate(Percentage = n/nrow(hotspot_data)*100)
cat("### Hotspot Classification Summary\n")
knitr::kable(hotspot_summary,
col.names = c("Hotspot Type", "Count", "Percentage (%)"),
digits = 1,
caption = "Distribution of Hypertension Hotspots") %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover")) %>%
kableExtra::row_spec(1, background = "#FFDDDD") %>% # Highlight High-High
kableExtra::row_spec(2, background = "#DDDDFF") # Highlight Low-Low
# Show first 10 rows of full data
cat("\n#### Sample Hotspot Classifications (First 10 Tracts)\n")
knitr::kable(head(hotspot_data, 10),
caption = "Sample of Hotspot Classifications") %>%
kableExtra::kable_styling(bootstrap_options = "condensed")
#write.csv(hotspot_data, "outputs/hotspot_classification.csv")
}
## ### Hotspot Classification Summary
##
## #### Sample Hotspot Classifications (First 10 Tracts)
Sample of Hotspot Classifications
|
GEOID
|
hotspot_type
|
|
11001000201
|
Not Significant
|
|
11001000202
|
High-High
|
|
11001000300
|
High-High
|
|
11001000400
|
High-High
|
|
11001000501
|
Not Significant
|
|
11001000502
|
Not Significant
|
|
11001000600
|
Not Significant
|
|
11001000702
|
Not Significant
|
|
11001000802
|
High-High
|
|
11001000902
|
Not Significant
|
# Spatial Residuals
if(exists("spatial_lag")) {
spatial_residuals <- st_drop_geometry(model_data) %>%
select(GEOID, spatial_lag_residuals) %>%
mutate(Residual_Group = cut(spatial_lag_residuals,
breaks = c(-Inf, -2, -1, 1, 2, Inf),
labels = c("Very Low", "Low", "Normal", "High", "Very High")))
# Create residual summary
residual_summary <- spatial_residuals %>%
group_by(Residual_Group) %>%
summarise(Count = n(),
Avg_Residual = mean(spatial_lag_residuals))
cat("\n### Spatial Lag Model Residuals Summary\n")
knitr::kable(residual_summary,
digits = 3,
caption = "Distribution of Standardized Residuals") %>%
kableExtra::kable_styling(bootstrap_options = "striped") %>%
kableExtra::column_spec(3, color = ifelse(residual_summary$Avg_Residual > 0, "red", "blue"))
# Show extreme residuals (top/bottom 5)
extreme_residuals <- spatial_residuals %>%
arrange(desc(abs(spatial_lag_residuals))) %>%
head(10) %>%
mutate(RowID = row_number()) # Add numeric row identifiers
cat("\n#### Tracts with Most Extreme Residuals\n")
knitr::kable(extreme_residuals %>% select(-RowID), digits = 3) %>%
kableExtra::kable_styling() %>%
kableExtra::row_spec(
which(extreme_residuals$spatial_lag_residuals > 0),
background = "#FFEEEE"
) %>%
kableExtra::row_spec(
which(extreme_residuals$spatial_lag_residuals < 0),
background = "#EEEEFF"
)
#write.csv(spatial_residuals, "outputs/spatial_residuals.csv")
}
##
## ### Spatial Lag Model Residuals Summary
##
## #### Tracts with Most Extreme Residuals
|
GEOID
|
spatial_lag_residuals
|
Residual_Group
|
|
11001002302
|
1.850
|
High
|
|
11001007301
|
-1.506
|
Low
|
|
11001009201
|
-1.331
|
Low
|
|
11001009204
|
1.162
|
High
|
|
11001009802
|
-0.850
|
Normal
|
|
11001009507
|
0.769
|
Normal
|
|
11001000201
|
-0.768
|
Normal
|
|
11001000202
|
-0.743
|
Normal
|
|
11001009503
|
0.697
|
Normal
|
|
11001010500
|
-0.669
|
Normal
|
Visulaization and Maps
# Interactive Trend Plot
if(exists("interactive_plot")) {
cat("### Interactive Trend Visualization\n")
cat("*Use the interactive controls to explore temporal patterns*\n\n")
# Display in document
print(interactive_plot)
# Save standalone version
if(!dir.exists("outputs")) dir.create("outputs")
htmlwidgets::saveWidget(
widget = interactive_plot,
file = "outputs/interactive_trends.html",
selfcontained = TRUE,
title = "DC Health Indicators 2013-2023"
)
}
## ### Interactive Trend Visualization
## *Use the interactive controls to explore temporal patterns*
# verify map objects exist and are tmap objects
required_maps <- c("htp_map", "pov_map", "uer_map", "age65_map", "hotspot_map")
valid_maps <- keep(required_maps, ~exists(.x) && inherits(get(.x), "tmap"))
# Static Map Visualizations
map_theme <- tm_layout(
legend.position = c("right", "bottom"),
legend.title.size = 0.9,
legend.text.size = 0.7,
frame = FALSE
)
if(length(valid_maps) > 0) {
# Apply consistent theme to all maps
walk(valid_maps, ~assign(.x, get(.x) + map_theme, envir = .GlobalEnv))
# Display individual maps
walk(valid_maps, ~{
cat("\n###", str_to_title(gsub("_map", "", gsub("_", " ", .x))), "Map\n")
print(get(.x))
})
# combined panel only if at least 2 maps
if(length(valid_maps) >= 2) {
cat("\n### Combined Map Panel\n")
# Prepare map list arrangement
map_objects <- mget(valid_maps)
# Special handling odd number of maps
if(length(valid_maps) %% 2 != 0) {
map_objects$empty <- tm_shape(df_sf) + tm_text("") # Empty placeholder
}
# Create arrangement
ncols <- ifelse(length(valid_maps) >=4, 2, 1)
combined_maps <- do.call(tmap_arrange, c(map_objects, list(ncol = ncols)))
print(combined_maps)
}
} else {
message("No valid tmap objects found for visualization")
}
##
## ### Htp Map Map
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Reds" is named
## "brewer.reds"
## Multiple palettes called "reds" found: "brewer.reds", "matplotlib.reds". The first one, "brewer.reds", is returned.

##
## ### Pov Map Map
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Blues" is named
## "brewer.blues"
## Multiple palettes called "blues" found: "brewer.blues", "matplotlib.blues". The first one, "brewer.blues", is returned.

##
## ### Uer Map Map
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "YlOrRd" is named
## "brewer.yl_or_rd"
## Multiple palettes called "yl_or_rd" found: "brewer.yl_or_rd", "matplotlib.yl_or_rd". The first one, "brewer.yl_or_rd", is returned.

##
## ### Age65 Map Map
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Purples" is named
## "brewer.purples"
## Multiple palettes called "purples" found: "brewer.purples", "matplotlib.purples". The first one, "brewer.purples", is returned.

##
## ### Hotspot Map Map

##
## ### Combined Map Panel
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Reds" is named
## "brewer.reds"
## Multiple palettes called "reds" found: "brewer.reds", "matplotlib.reds". The first one, "brewer.reds", is returned.
##
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Blues" is named
## "brewer.blues"
## Multiple palettes called "blues" found: "brewer.blues", "matplotlib.blues". The first one, "brewer.blues", is returned.
##
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "YlOrRd" is named
## "brewer.yl_or_rd"
## Multiple palettes called "yl_or_rd" found: "brewer.yl_or_rd", "matplotlib.yl_or_rd". The first one, "brewer.yl_or_rd", is returned.
##
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Purples" is named
## "brewer.purples"
## Multiple palettes called "purples" found: "brewer.purples", "matplotlib.purples". The first one, "brewer.purples", is returned.
##
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.

# Save
#if(!dir.exists("outputs/plots")) dir.create("outputs/plots", recursive = TRUE)
#tmap_save(combined_maps,
#"outputs/plots/combined_maps.png",
#width = 10, height = 8, dpi = 300)
Model Comparison
# Prepare Model Comparison Table
if(exists("spatial_lag") && exists("spatial_error") && exists("ardl_model") && exists("model")) {
# Create comparison dataframe with enhanced formatting
model_comp <- modelsummary(
list("Linear" = model,
"ARDL" = ardl_model,
"Spatial Lag" = spatial_lag,
"Spatial Error" = spatial_error),
output = "data.frame",
stars = TRUE,
title = "Model Comparison Across Specifications",
coef_rename = c(
"(Intercept)" = "Intercept",
"POV" = "Poverty Rate",
"UER" = "Unemployment Rate",
"AGE65" = "Population 65+",
"lag.HTP" = "Spatial Lag (ρ)",
"rho" = "Spatial Error (λ)"
),
gof_map = c("nobs", "r.squared", "adj.r.squared", "sigma", "logLik", "AIC", "BIC")
)
#Display formatted table with conditional coloring
cat("### Comparative Model Performance\n")
model_table <- knitr::kable(model_comp, caption = "Comparison of Model Coefficients and Fit Statistics") %>%
kableExtra::kable_styling(
bootstrap_options = c("striped", "hover", "condensed"),
font_size = 11,
full_width = FALSE,
position = "center"
) %>%
kableExtra::column_spec(1, bold = TRUE, width = "3cm") %>%
kableExtra::column_spec(2:5, width = "2.5cm") %>%
kableExtra::footnote(
general = "Standard errors in parentheses; * p<0.1, ** p<0.05, *** p<0.01",
footnote_as_chunk = TRUE
)
# Highlight best performing model for each fit statistic
if("AIC" %in% model_comp$term) {
aic_values <- as.numeric(model_comp[model_comp$term == "AIC", 2:5])
best_aic <- which.min(aic_values) + 1 # +1 to account for term column
model_table <- model_table %>%
kableExtra::row_spec(which(model_comp$term == "AIC"),
background = ifelse(1:4 == best_aic, "#E6F3E6", NA))
}
if("BIC" %in% model_comp$term) {
bic_values <- as.numeric(model_comp[model_comp$term == "BIC", 2:5])
best_bic <- which.min(bic_values) + 1
model_table <- model_table %>%
kableExtra::row_spec(which(model_comp$term == "BIC"),
background = ifelse(1:4 == best_bic, "#E6F3E6", NA))
}
print(model_table)
# Export to CSV with additional metadata
#write.csv(model_comp, "outputs/model_comparison.csv", row.names = FALSE)
# Create simplified coefficient comparison
coef_comparison <- model_comp %>%
filter(!term %in% c("AIC", "BIC", "R2", "Log.Lik", "sigma", "nobs", "adj.r.squared")) %>%
select(-part)
cat("\n### Key Coefficient Comparison\n")
knitr::kable(coef_comparison, digits = 3) %>%
kableExtra::kable_styling() %>%
kableExtra::column_spec(1, bold = TRUE) %>%
print()
} else {
cat("\nNote: Not all models available for comparison\n")
}
## ### Comparative Model Performance
## <table class="table table-striped table-hover table-condensed" style="font-size: 11px; width: auto !important; margin-left: auto; margin-right: auto;border-bottom: 0;">
## <caption style="font-size: initial !important;">Comparison of Model Coefficients and Fit Statistics</caption>
## <thead>
## <tr>
## <th style="text-align:left;"> part </th>
## <th style="text-align:left;"> term </th>
## <th style="text-align:left;"> statistic </th>
## <th style="text-align:left;"> Linear </th>
## <th style="text-align:left;"> ARDL </th>
## <th style="text-align:left;"> Spatial Lag </th>
## <th style="text-align:left;"> Spatial Error </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> Intercept </td>
## <td style="text-align:left;width: 2.5cm; "> estimate </td>
## <td style="text-align:left;width: 2.5cm; "> 7.873*** </td>
## <td style="text-align:left;width: 2.5cm; "> 13.758*** </td>
## <td style="text-align:left;"> -0.011 </td>
## <td style="text-align:left;"> -0.046 </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> Intercept </td>
## <td style="text-align:left;width: 2.5cm; "> std.error </td>
## <td style="text-align:left;width: 2.5cm; "> (0.501) </td>
## <td style="text-align:left;width: 2.5cm; "> (0.545) </td>
## <td style="text-align:left;"> (0.034) </td>
## <td style="text-align:left;"> (0.192) </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> Poverty Rate </td>
## <td style="text-align:left;width: 2.5cm; "> estimate </td>
## <td style="text-align:left;width: 2.5cm; "> 0.352*** </td>
## <td style="text-align:left;width: 2.5cm; "> 0.282*** </td>
## <td style="text-align:left;"> 0.225*** </td>
## <td style="text-align:left;"> 0.245*** </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> Poverty Rate </td>
## <td style="text-align:left;width: 2.5cm; "> std.error </td>
## <td style="text-align:left;width: 2.5cm; "> (0.015) </td>
## <td style="text-align:left;width: 2.5cm; "> (0.017) </td>
## <td style="text-align:left;"> (0.044) </td>
## <td style="text-align:left;"> (0.049) </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> Unemployment Rate </td>
## <td style="text-align:left;width: 2.5cm; "> estimate </td>
## <td style="text-align:left;width: 2.5cm; "> 0.723*** </td>
## <td style="text-align:left;width: 2.5cm; "> 0.914*** </td>
## <td style="text-align:left;"> 0.047 </td>
## <td style="text-align:left;"> 0.024 </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> Unemployment Rate </td>
## <td style="text-align:left;width: 2.5cm; "> std.error </td>
## <td style="text-align:left;width: 2.5cm; "> (0.054) </td>
## <td style="text-align:left;width: 2.5cm; "> (0.061) </td>
## <td style="text-align:left;"> (0.041) </td>
## <td style="text-align:left;"> (0.040) </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> LIN </td>
## <td style="text-align:left;width: 2.5cm; "> estimate </td>
## <td style="text-align:left;width: 2.5cm; "> 0.179* </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> LIN </td>
## <td style="text-align:left;width: 2.5cm; "> std.error </td>
## <td style="text-align:left;width: 2.5cm; "> (0.079) </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> Population 65+ </td>
## <td style="text-align:left;width: 2.5cm; "> estimate </td>
## <td style="text-align:left;width: 2.5cm; "> 0.857*** </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;"> 0.200*** </td>
## <td style="text-align:left;"> 0.154*** </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> Population 65+ </td>
## <td style="text-align:left;width: 2.5cm; "> std.error </td>
## <td style="text-align:left;width: 2.5cm; "> (0.029) </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;"> (0.035) </td>
## <td style="text-align:left;"> (0.036) </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> L(HTP, 1) </td>
## <td style="text-align:left;width: 2.5cm; "> estimate </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;width: 2.5cm; "> 0.260*** </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> L(HTP, 1) </td>
## <td style="text-align:left;width: 2.5cm; "> std.error </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;width: 2.5cm; "> (0.021) </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> L(UER, 1) </td>
## <td style="text-align:left;width: 2.5cm; "> estimate </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;width: 2.5cm; "> -0.165** </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> L(UER, 1) </td>
## <td style="text-align:left;width: 2.5cm; "> std.error </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;width: 2.5cm; "> (0.059) </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> Spatial Error (λ) </td>
## <td style="text-align:left;width: 2.5cm; "> estimate </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;"> 0.688*** </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> Spatial Error (λ) </td>
## <td style="text-align:left;width: 2.5cm; "> std.error </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;"> (0.046) </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> lambda </td>
## <td style="text-align:left;width: 2.5cm; "> estimate </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> 0.820*** </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> lambda </td>
## <td style="text-align:left;width: 2.5cm; "> std.error </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> (0.043) </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> gof </td>
## <td style="text-align:left;width: 2.5cm; "> Num.Obs. </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;width: 2.5cm; "> 1867 </td>
## <td style="text-align:left;width: 2.5cm; "> 1866 </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> gof </td>
## <td style="text-align:left;width: 2.5cm; "> R2 </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;width: 2.5cm; "> 0.618 </td>
## <td style="text-align:left;width: 2.5cm; "> 0.492 </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> gof </td>
## <td style="text-align:left;width: 2.5cm; "> R2 Adj. </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;width: 2.5cm; "> 0.617 </td>
## <td style="text-align:left;width: 2.5cm; "> 0.491 </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> gof </td>
## <td style="text-align:left;width: 2.5cm; "> Log.Lik. </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;width: 2.5cm; "> -6033.352 </td>
## <td style="text-align:left;width: 2.5cm; "> -6296.153 </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## </tbody>
## <tfoot><tr><td style="padding: 0; " colspan="100%">
## <span style="font-style: italic;">Note: </span> <sup></sup> Standard errors in parentheses; * p<0.1, ** p<0.05, *** p<0.01</td></tr></tfoot>
## </table>
## ### Key Coefficient Comparison
## <table class="table" style="margin-left: auto; margin-right: auto;">
## <thead>
## <tr>
## <th style="text-align:left;"> term </th>
## <th style="text-align:left;"> statistic </th>
## <th style="text-align:left;"> Linear </th>
## <th style="text-align:left;"> ARDL </th>
## <th style="text-align:left;"> Spatial Lag </th>
## <th style="text-align:left;"> Spatial Error </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Intercept </td>
## <td style="text-align:left;"> estimate </td>
## <td style="text-align:left;"> 7.873*** </td>
## <td style="text-align:left;"> 13.758*** </td>
## <td style="text-align:left;"> -0.011 </td>
## <td style="text-align:left;"> -0.046 </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Intercept </td>
## <td style="text-align:left;"> std.error </td>
## <td style="text-align:left;"> (0.501) </td>
## <td style="text-align:left;"> (0.545) </td>
## <td style="text-align:left;"> (0.034) </td>
## <td style="text-align:left;"> (0.192) </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Poverty Rate </td>
## <td style="text-align:left;"> estimate </td>
## <td style="text-align:left;"> 0.352*** </td>
## <td style="text-align:left;"> 0.282*** </td>
## <td style="text-align:left;"> 0.225*** </td>
## <td style="text-align:left;"> 0.245*** </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Poverty Rate </td>
## <td style="text-align:left;"> std.error </td>
## <td style="text-align:left;"> (0.015) </td>
## <td style="text-align:left;"> (0.017) </td>
## <td style="text-align:left;"> (0.044) </td>
## <td style="text-align:left;"> (0.049) </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Unemployment Rate </td>
## <td style="text-align:left;"> estimate </td>
## <td style="text-align:left;"> 0.723*** </td>
## <td style="text-align:left;"> 0.914*** </td>
## <td style="text-align:left;"> 0.047 </td>
## <td style="text-align:left;"> 0.024 </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Unemployment Rate </td>
## <td style="text-align:left;"> std.error </td>
## <td style="text-align:left;"> (0.054) </td>
## <td style="text-align:left;"> (0.061) </td>
## <td style="text-align:left;"> (0.041) </td>
## <td style="text-align:left;"> (0.040) </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> LIN </td>
## <td style="text-align:left;"> estimate </td>
## <td style="text-align:left;"> 0.179* </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> LIN </td>
## <td style="text-align:left;"> std.error </td>
## <td style="text-align:left;"> (0.079) </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Population 65+ </td>
## <td style="text-align:left;"> estimate </td>
## <td style="text-align:left;"> 0.857*** </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> 0.200*** </td>
## <td style="text-align:left;"> 0.154*** </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Population 65+ </td>
## <td style="text-align:left;"> std.error </td>
## <td style="text-align:left;"> (0.029) </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> (0.035) </td>
## <td style="text-align:left;"> (0.036) </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> L(HTP, 1) </td>
## <td style="text-align:left;"> estimate </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> 0.260*** </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> L(HTP, 1) </td>
## <td style="text-align:left;"> std.error </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> (0.021) </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> L(UER, 1) </td>
## <td style="text-align:left;"> estimate </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> -0.165** </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> L(UER, 1) </td>
## <td style="text-align:left;"> std.error </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> (0.059) </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Spatial Error (λ) </td>
## <td style="text-align:left;"> estimate </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> 0.688*** </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Spatial Error (λ) </td>
## <td style="text-align:left;"> std.error </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> (0.046) </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> lambda </td>
## <td style="text-align:left;"> estimate </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> 0.820*** </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> lambda </td>
## <td style="text-align:left;"> std.error </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> (0.043) </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Num.Obs. </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> 1867 </td>
## <td style="text-align:left;"> 1866 </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> R2 Adj. </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> 0.617 </td>
## <td style="text-align:left;"> 0.491 </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Log.Lik. </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> -6033.352 </td>
## <td style="text-align:left;"> -6296.153 </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## </tbody>
## </table>
Statistically Significant Variables(<0.05)
library(modelsummary)
library(broom)
library(dplyr)
# Create model list with original models
model_list <- list(
"Linear" = model,
"ARDL" = ardl_model,
"Spatial Lag" = spatial_lag,
"Spatial Error" = spatial_error
)
# Define methods for each model type
tidy_custom.spatialreg <- function(model, ...) {
broom::tidy(model) %>%
filter(p.value < 0.05) %>%
mutate(term = case_when(
term == "rho" ~ "Spatial Lag (ρ)",
term == "lambda" ~ "Spatial Error (λ)",
TRUE ~ term
))
}
tidy_custom.default <- function(model, ...) {
broom::tidy(model) %>%
filter(p.value < 0.05) %>%
mutate(term = case_when(
term == "L(HTP, 1)" ~ "Lagged HTP",
term == "L(UER, 1)" ~ "Lagged Unemployment",
TRUE ~ term
))
}
# Single modelsummary call with all parameters
modelsummary(
model_list,
stars = c('+' = 0.1, '*' = 0.05, '**' = 0.01, '***' = 0.001),
coef_map = c(
"POV" = "Poverty Rate",
"UER" = "Unemployment Rate",
"AGE65" = "Age 65+",
"LIN" = "No Health Insurance",
"Lagged HTP" = "Lagged Hypertension",
"Lagged Unemployment" = "Lagged Unemployment",
"Spatial Lag (ρ)" = "Spatial Lag (ρ)",
"Spatial Error (λ)" = "Spatial Error (λ)"
),
gof_map = c("nobs", "r.squared", "aic", "bic"),
title = "Model Comparison: Statistically Significant Coefficients (p < 0.05)",
notes = list(
"Standard errors in parentheses",
"Only coefficients with p < 0.05 shown",
"+ p<0.1, * p<0.05, ** p<0.01, *** p<0.001"
)
)
Model Comparison: Statistically Significant Coefficients (p < 0.05)
| |
Linear |
ARDL |
Spatial Lag |
Spatial Error |
| + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 |
| Standard errors in parentheses |
| Only coefficients with p < 0.05 shown |
| + p<0.1, * p<0.05, ** p<0.01, *** p<0.001 |
| Poverty Rate |
0.352*** |
0.282*** |
0.225*** |
0.245*** |
|
(0.015) |
(0.017) |
(0.044) |
(0.049) |
| Unemployment Rate |
0.723*** |
0.914*** |
0.047 |
0.024 |
|
(0.054) |
(0.061) |
(0.041) |
(0.040) |
| Age 65+ |
0.857*** |
|
0.200*** |
0.154*** |
|
(0.029) |
|
(0.035) |
(0.036) |
| No Health Insurance |
0.179* |
|
|
|
|
(0.079) |
|
|
|
| Num.Obs. |
1867 |
1866 |
|
|
| R2 |
0.618 |
0.492 |
|
|
| AIC |
12078.7 |
12604.3 |
200.1 |
220.5 |
| BIC |
12111.9 |
12637.5 |
218.2 |
238.7 |