Data Collation and Exploration
Hypertension Data(CDC)
#Install
#install.packages(c("dLagM", "urca", "lmtest", "car", "ggplot2", "tseries"))
#install.packages("tsbox")
#install.packages("ARDL")
#install.packages(c("tidycensus", "tidyverse", "sf"))
library(dLagM)
## Loading required package: nardl
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Loading required package: dynlm
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(urca)
library(lmtest)
library(car)
## Loading required package: carData
library(ggplot2)
library(tseries)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.3.0
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::recode() masks car::recode()
## ✖ purrr::some() masks car::some()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tsbox)
library(ARDL)
## To cite the ARDL package in publications:
##
## Use this reference to refer to the validity of the ARDL package.
##
## Natsiopoulos, Kleanthis, and Tzeremes, Nickolaos G. (2022). ARDL
## bounds test for cointegration: Replicating the Pesaran et al. (2001)
## results for the UK earnings equation using R. Journal of Applied
## Econometrics, 37(5), 1079-1090. https://doi.org/10.1002/jae.2919
##
## Use this reference to cite this specific version of the ARDL package.
##
## Kleanthis Natsiopoulos and Nickolaos Tzeremes (2023). ARDL: ARDL, ECM
## and Bounds-Test for Cointegration. R package version 0.2.4.
## https://CRAN.R-project.org/package=ARDL
library(tidycensus)
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(tidyverse)
library(readr)
ACS Socioeconomic Variables
library(tidycensus)
library(dplyr)
library(purrr)
# Census API key
api_key <- Sys.getenv("CENSUS_API_KEY")
census_api_key(api_key, overwrite = TRUE)
## To install your API key for use in future sessions, run this function with `install = TRUE`.
variables <- c(
poverty_rate = "B17001_002",
unemployment_rate = "B23025_005",
no_health_insurance = "B27010_033",
education_less_than_hs = "B15003_002",
median_age = "B01002_001",
black_population = "B02001_003",
white_population = "B02001_002",
american_indian_alaska_native_alone = "B02001_004",
asian_alone = "B02001_005",
native_hawaiian_other_pacific_islander_alone = "B02001_006",
male_65_69 = "B01001_017",
male_70_74 = "B01001_018",
male_75_79 = "B01001_019",
male_80_84 = "B01001_020",
male_85plus = "B01001_021",
female_65_69 = "B01001_041",
female_70_74 = "B01001_042",
female_75_79 = "B01001_043",
female_80_84 = "B01001_044",
female_85plus = "B01001_045"
)
years <- 2013:2024
get_acs_year <- function(year) {
tryCatch({
data <- get_acs(
geography = "tract",
year = year,
state = "11",
survey = "acs5",
variables = variables,
summary_var = "B01001_001",
output = "wide",
geometry = FALSE
)
data$year <- year
return(data)
}, error = function(e) {
message(paste("Error getting data for year", year, ":", e$message))
return(NULL)
})
}
# Get data and calculate rates
dc_ACS_all_years <- map_df(years, possibly(get_acs_year, otherwise = NULL)) %>%
mutate(
# Calculate total 65+
total_65plus = male_65_69E + male_70_74E + male_75_79E + male_80_84E + male_85plusE +
female_65_69E + female_70_74E + female_75_79E + female_80_84E + female_85plusE,
# Calc rates(B01001_001E)
# Basic rates
poverty_rate_pct = (poverty_rateE / summary_est) * 100,
unemployment_rate_pct = (unemployment_rateE / summary_est) * 100,
no_health_insurance_pct = (no_health_insuranceE / summary_est) * 100,
education_less_than_hs_pct = (education_less_than_hsE / summary_est) * 100,
# race percentages
black_population_pct = (black_populationE / summary_est) * 100,
white_population_pct = (white_populationE / summary_est) * 100,
american_indian_pct = (american_indian_alaska_native_aloneE / summary_est) * 100,
asian_pct = (asian_aloneE / summary_est) * 100,
pacific_islander_pct = (native_hawaiian_other_pacific_islander_aloneE / summary_est) * 100,
# Age 65+ percentage
pct_65plus = (total_65plus / summary_est) * 100,
# race percentages among 65+ population
black_65plus_pct = ifelse(total_65plus > 0, (black_populationE / total_65plus) * 100, NA),
white_65plus_pct = ifelse(total_65plus > 0, (white_populationE / total_65plus) * 100, NA),
asian_65plus_pct = ifelse(total_65plus > 0, (asian_aloneE / total_65plus) * 100, NA),
)
## Getting data from the 2009-2013 5-year ACS
## Getting data from the 2010-2014 5-year ACS
## Getting data from the 2011-2015 5-year ACS
## Getting data from the 2012-2016 5-year ACS
## Getting data from the 2013-2017 5-year ACS
## Getting data from the 2014-2018 5-year ACS
## Getting data from the 2015-2019 5-year ACS
## Getting data from the 2016-2020 5-year ACS
## Getting data from the 2017-2021 5-year ACS
## Getting data from the 2018-2022 5-year ACS
## Getting data from the 2019-2023 5-year ACS
## Getting data from the 2020-2024 5-year ACS
## Error getting data for year 2024 : Your API call has errors. The API message returned is <!doctype html><html lang="en"><head><title>HTTP Status 404 ? Not Found</title><style type="text/css">body {font-family:Tahoma,Arial,sans-serif;} h1, h2, h3, b {color:white;background-color:#525D76;} h1 {font-size:22px;} h2 {font-size:16px;} h3 {font-size:14px;} p {font-size:12px;} a {color:black;} .line {height:1px;background-color:#525D76;border:none;}</style></head><body><h1>HTTP Status 404 ? Not Found</h1></body></html>.
# View results
glimpse(dc_ACS_all_years)
## Rows: 2,077
## Columns: 59
## $ GEOID <chr> "11001002600", "11001004…
## $ NAME <chr> "Census Tract 26, Distri…
## $ poverty_rateE <dbl> 138, 105, 932, 284, 458,…
## $ poverty_rateM <dbl> 91, 52, 234, 173, 155, 2…
## $ unemployment_rateE <dbl> 113, 30, 134, 149, 158, …
## $ unemployment_rateM <dbl> 46, 24, 164, 87, 106, 10…
## $ no_health_insuranceE <dbl> 44, 19, 70, 112, 178, 52…
## $ no_health_insuranceM <dbl> 29, 22, 56, 69, 137, 28,…
## $ education_less_than_hsE <dbl> 2, 0, 80, 20, 0, 15, 40,…
## $ education_less_than_hsM <dbl> 5, 12, 53, 29, 17, 22, 4…
## $ median_ageE <dbl> 47.5, 34.2, 28.5, 34.1, …
## $ median_ageM <dbl> 2.9, 1.6, 4.4, 3.0, 1.9,…
## $ black_populationE <dbl> 1086, 237, 386, 1658, 48…
## $ black_populationM <dbl> 295, 133, 159, 337, 208,…
## $ white_populationE <dbl> 993, 2130, 4547, 882, 42…
## $ white_populationM <dbl> 112, 286, 662, 203, 488,…
## $ american_indian_alaska_native_aloneE <dbl> 8, 7, 0, 90, 22, 0, 0, 0…
## $ american_indian_alaska_native_aloneM <dbl> 11, 11, 17, 109, 36, 12,…
## $ asian_aloneE <dbl> 111, 188, 817, 89, 625, …
## $ asian_aloneM <dbl> 62, 111, 325, 66, 162, 9…
## $ native_hawaiian_other_pacific_islander_aloneE <dbl> 12, 0, 0, 0, 0, 0, 0, 0,…
## $ native_hawaiian_other_pacific_islander_aloneM <dbl> 19, 12, 17, 12, 17, 12, …
## $ male_65_69E <dbl> 89, 56, 127, 104, 170, 1…
## $ male_65_69M <dbl> 43, 31, 103, 85, 86, 73,…
## $ male_70_74E <dbl> 26, 51, 81, 43, 63, 33, …
## $ male_70_74M <dbl> 19, 42, 82, 37, 51, 35, …
## $ male_75_79E <dbl> 7, 17, 72, 101, 47, 34, …
## $ male_75_79M <dbl> 11, 18, 49, 79, 49, 41, …
## $ male_80_84E <dbl> 50, 6, 93, 0, 60, 9, 15,…
## $ male_80_84M <dbl> 21, 9, 80, 12, 49, 13, 2…
## $ male_85plusE <dbl> 56, 7, 90, 0, 62, 60, 36…
## $ male_85plusM <dbl> 27, 11, 74, 12, 75, 49, …
## $ female_65_69E <dbl> 140, 49, 170, 107, 27, 2…
## $ female_65_69M <dbl> 49, 29, 85, 84, 30, 64, …
## $ female_70_74E <dbl> 51, 14, 84, 26, 35, 24, …
## $ female_70_74M <dbl> 29, 16, 62, 22, 38, 20, …
## $ female_75_79E <dbl> 100, 30, 44, 62, 55, 0, …
## $ female_75_79M <dbl> 51, 24, 39, 79, 53, 12, …
## $ female_80_84E <dbl> 41, 34, 26, 18, 22, 14, …
## $ female_80_84M <dbl> 24, 27, 31, 23, 25, 15, …
## $ female_85plusE <dbl> 39, 12, 110, 36, 18, 21,…
## $ female_85plusM <dbl> 29, 12, 65, 55, 30, 22, …
## $ summary_est <dbl> 2395, 2675, 6123, 2797, …
## $ summary_moe <dbl> 262, 254, 815, 320, 542,…
## $ year <int> 2013, 2013, 2013, 2013, …
## $ total_65plus <dbl> 599, 276, 897, 497, 559,…
## $ poverty_rate_pct <dbl> 5.762004, 3.925234, 15.2…
## $ unemployment_rate_pct <dbl> 4.718163, 1.121495, 2.18…
## $ no_health_insurance_pct <dbl> 1.8371608, 0.7102804, 1.…
## $ education_less_than_hs_pct <dbl> 0.08350731, 0.00000000, …
## $ black_population_pct <dbl> 45.344468, 8.859813, 6.3…
## $ white_population_pct <dbl> 41.461378, 79.626168, 74…
## $ american_indian_pct <dbl> 0.33402923, 0.26168224, …
## $ asian_pct <dbl> 4.6346555, 7.0280374, 13…
## $ pacific_islander_pct <dbl> 0.5010438, 0.0000000, 0.…
## $ pct_65plus <dbl> 25.010438, 10.317757, 14…
## $ black_65plus_pct <dbl> 181.30217, 85.86957, 43.…
## $ white_65plus_pct <dbl> 165.77629, 771.73913, 50…
## $ asian_65plus_pct <dbl> 18.530885, 68.115942, 91…
View(dc_ACS_all_years)
# write.csv(dc_ACS_all_years, "dc_acs_rates_2013_2024.csv", row.names = FALSE)
Hypertension Data BRFSS(CDC)
# Read the CSV file
brfss_data <- read_csv("/Users/bayowaonabajo/Downloads/Behavioral_Risk_Factor_Surveillance_System__BRFSS__Age-Adjusted_Prevalence_Data__2011_to_present_.csv")
## Rows: 80280 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (21): Locationabbr, Locationdesc, Class, Topic, Question, Response, Brea...
## dbl (6): Year, Sample_Size, Data_value, Confidence_limit_Low, Confidence_li...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dc_hypertension <- brfss_data %>%
filter(Locationdesc == "District of Columbia",
Year >= 2013 & Year <= 2024,
Topic == "High Blood Pressure",
Question == "Adults who have been told they have high blood pressure (variable calculated from one or more BRFSS questions)") %>%
select(Year, Locationdesc, Response, Data_value,Sample_Size,Data_value_unit,Data_value_type,DataSource, Confidence_limit_Low, Confidence_limit_High, Geolocation)
# View the results
View(dc_hypertension)
library(tidyverse)
library(readr)
# Read Csv file
HtnPrevalence_datadc <- read_csv("/Users/bayowaonabajo/Downloads/indicator_data_download_20250703.csv", col_types = cols(
`Indicator Name` = col_character(),
`What Is This Indicator` = col_character(),
`Location Type` = col_character(),
`Location` = col_character(),
`Indicator Rate Value` = col_double(),
`Indicator Rate Value Units` = col_character(),
`Rate Lower Confidence Interval` = col_double(),
`Rate Upper Confidence Interval` = col_double(),
`Indicator Count Value` = col_skip(),
`Indicator Count Value Units` = col_skip(),
`Count Lower Confidence Interval` = col_skip(),
`Count Upper Confidence Interval` = col_skip(),
`Indicator Value Unstable` = col_character(),
`Period of Measure` = col_integer(),
`Data Source` = col_character(),
`Technical Note` = col_character(),
`Breakout Title` = col_skip(),
`Breakout Category` = col_skip(),
`Breakout Subcategory` = col_skip(),
`Breakout Rate Value` = col_skip(),
`Breakout Rate Value Units` = col_skip(),
`Breakout Rate Lower Confidence Interval` = col_skip(),
`Breakout Rate Upper Confidence Interval` = col_skip(),
`Breakout Count Value` = col_skip(),
`Breakout Count Value Units` = col_skip(),
`Breakout Count Lower Confidence Interval` = col_skip(),
`Breakout Count Upper Confidence Interval` = col_skip(),
`Breakout Unstable` = col_skip(),
`Breakout Footer` = col_skip()
)
)
View(HtnPrevalence_datadc)
Hypertension Data by Census Tracts
# Clean the necessary columns
htn_tract_clean <- HtnPrevalence_datadc %>%
filter(`Indicator Name` == "High Blood Pressure Prevalence",
`Location Type` == "Census Tract") %>%
rename(
GEOID = Location,
year = `Period of Measure`,
HTP = `Indicator Rate Value`
) %>%
mutate(
GEOID = as.character(GEOID),
year = as.integer(year)
) %>%
select(GEOID, year, HTP)
# Check structure
glimpse(htn_tract_clean)
## Rows: 867
## Columns: 3
## $ GEOID <chr> "11001000100", "11001000100", "11001000100", "11001000100", "110…
## $ year <int> 2019, 2017, 2015, 2013, 2021, 2019, 2017, 2015, 2013, 2021, 2019…
## $ HTP <dbl> 23.1, 21.8, 21.7, 23.8, 8.4, 8.6, 11.7, 9.1, 10.2, 19.3, 16.8, 1…
Data Cleaning
acs_summary <- dc_ACS_all_years %>%
group_by(year) %>%
summarise(
POV = mean(poverty_rate_pct, na.rm = TRUE),
UER = mean(unemployment_rate_pct, na.rm = TRUE),
LIN = mean(no_health_insurance_pct, na.rm = TRUE),
AGE65 = mean(pct_65plus, na.rm = TRUE),
ASIAN = mean(asian_pct, na.rm = TRUE),
PACIFIC = mean(pacific_islander_pct, na.rm = TRUE),
AMERICANINDIAN = mean(american_indian_pct, na.rm = TRUE),
BLACK = mean(black_population_pct, na.rm = TRUE),
WHITE = mean(white_population_pct, na.rm = TRUE)
)
Hypertension Data Interpolation for Missing Years
library(tidyr)
library(zoo)
# Interpolate, Create full panel
htn_tract_full <- htn_tract_clean %>%
complete(GEOID, year = 2013:2024) %>%
group_by(GEOID) %>%
arrange(year) %>%
mutate(HTP_interp = na.approx(HTP, year, rule = 2)) %>%
ungroup()
Merge and Clean Dataset
dc_ACS_all_years <- dc_ACS_all_years %>%
mutate(GEOID = as.character(GEOID))
df1 <- dc_ACS_all_years %>%
left_join(htn_tract_full, by = c("GEOID", "year"))
View(df1)
# Clean table for analysis
df2 <- df1 %>%
select(
GEOID,
NAME,
year,
HTP_interp, # Interpolated hypertension prevalence
poverty_rate_pct, # POV
unemployment_rate_pct, # UER
no_health_insurance_pct, # LIN
pct_65plus, # AGE65
asian_pct, # ASIAN
pacific_islander_pct, # PACIFIC
american_indian_pct, # AMERICANINDIAN
black_population_pct, # BLACK
white_population_pct # WHITE
) %>%
rename(
Year = year,
HTP = HTP_interp,
POV = poverty_rate_pct,
UER = unemployment_rate_pct,
LIN = no_health_insurance_pct,
AGE65 = pct_65plus,
ASIAN = asian_pct,
PACIFIC = pacific_islander_pct,
AMERICANINDIAN = american_indian_pct,
BLACK = black_population_pct,
WHITE = white_population_pct
)
View(df2)
Final Dataset
library(dplyr)
# Check column types first
str(df2)
## tibble [2,077 × 13] (S3: tbl_df/tbl/data.frame)
## $ GEOID : chr [1:2077] "11001002600" "11001004002" "11001005600" "11001004600" ...
## $ NAME : chr [1:2077] "Census Tract 26, District of Columbia, District of Columbia" "Census Tract 40.02, District of Columbia, District of Columbia" "Census Tract 56, District of Columbia, District of Columbia" "Census Tract 46, District of Columbia, District of Columbia" ...
## $ Year : int [1:2077] 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
## $ HTP : num [1:2077] 34.8 16.8 18.9 27.3 17.2 18.6 37.5 41.8 20.2 17.8 ...
## $ POV : num [1:2077] 5.76 3.93 15.22 10.15 7.85 ...
## $ UER : num [1:2077] 4.72 1.12 2.19 5.33 2.71 ...
## $ LIN : num [1:2077] 1.84 0.71 1.14 4 3.05 ...
## $ AGE65 : num [1:2077] 25.01 10.32 14.65 17.77 9.58 ...
## $ ASIAN : num [1:2077] 4.63 7.03 13.34 3.18 10.71 ...
## $ PACIFIC : num [1:2077] 0.501 0 0 0 0 ...
## $ AMERICANINDIAN: num [1:2077] 0.334 0.262 0 3.218 0.377 ...
## $ BLACK : num [1:2077] 45.34 8.86 6.3 59.28 8.31 ...
## $ WHITE : num [1:2077] 41.5 79.6 74.3 31.5 72.2 ...
# Select only numeric columns for missing analysis
numeric_cols <- df2 %>%
select(where(is.numeric)) # Only numeric
# Calculate missing data properly
total_rows <- nrow(numeric_cols)
missing_counts <- colSums(is.na(numeric_cols))
missing_percentages <- (missing_counts / total_rows) * 100
# Print results sorted by most missing
missing_percentages %>%
sort(decreasing = TRUE) %>%
print()
## HTP POV UER LIN AGE65
## 10.01444391 0.09629273 0.09629273 0.09629273 0.09629273
## ASIAN PACIFIC AMERICANINDIAN BLACK WHITE
## 0.09629273 0.09629273 0.09629273 0.09629273 0.09629273
## Year
## 0.00000000
# Remove rows with NAs (only from numeric columns)
df <- df2 %>%
filter(complete.cases(numeric_cols))
# Verify
glimpse(df)
## Rows: 1,867
## Columns: 13
## $ GEOID <chr> "11001002600", "11001004002", "11001005600", "110010046…
## $ NAME <chr> "Census Tract 26, District of Columbia, District of Col…
## $ Year <int> 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2013, 2…
## $ HTP <dbl> 34.8, 16.8, 18.9, 27.3, 17.2, 18.6, 37.5, 41.8, 20.2, 1…
## $ POV <dbl> 5.762004, 3.925234, 15.221297, 10.153736, 7.849186, 21.…
## $ UER <dbl> 4.718163, 1.121495, 2.188470, 5.327136, 2.707798, 7.658…
## $ LIN <dbl> 1.8371608, 0.7102804, 1.1432304, 4.0042903, 3.0505570, …
## $ AGE65 <dbl> 25.010438, 10.317757, 14.649682, 17.769038, 9.580120, 1…
## $ ASIAN <dbl> 4.6346555, 7.0280374, 13.3431325, 3.1819807, 10.7112254…
## $ PACIFIC <dbl> 0.5010438, 0.0000000, 0.0000000, 0.0000000, 0.0000000, …
## $ AMERICANINDIAN <dbl> 0.33402923, 0.26168224, 0.00000000, 3.21773329, 0.37703…
## $ BLACK <dbl> 45.344468, 8.859813, 6.304099, 59.277798, 8.311911, 27.…
## $ WHITE <dbl> 41.461378, 79.626168, 74.260983, 31.533786, 72.219366, …
View(df)
#write.csv(df, "final_dataset_DC_htn.csv", row.names = FALSE)
Timeseries Plot of Variables
library(dplyr)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# Calculate yearly averages
df_yearly <- df %>%
group_by(Year) %>%
summarise(
HTP = mean(HTP, na.rm = TRUE),
POV = mean(POV, na.rm = TRUE),
UER = mean(UER, na.rm = TRUE),
LIN = mean(LIN, na.rm = TRUE),
AGE65 = mean(AGE65, na.rm = TRUE)
)
# Create AND STORE interactive plot
interactive_plot <- plot_ly(df_yearly, x = ~Year) %>%
add_lines(y = ~HTP, name = "Hypertension (%)", line = list(color = "red")) %>%
add_lines(y = ~POV, name = "Poverty (%)", line = list(color = "blue")) %>%
add_lines(y = ~UER, name = "Unemployment (%)", line = list(color = "green")) %>%
add_lines(y = ~LIN, name = "No Insurance (%)", line = list(color = "purple")) %>%
add_lines(y = ~AGE65, name = "Age 65+ (%)", line = list(color = "orange")) %>%
layout(
title = "DC Yearly Averages (2013-2023)",
xaxis = list(title = "Year", dtick = 1),
yaxis = list(title = "Percentage (%)"),
hovermode = "x unified"
)
# Now you can display it
interactive_plot
Statistical Analysis
Descriptive Statistics
summary(df[,-1])
## NAME Year HTP POV
## Length:1867 Min. :2013 Min. : 0.00 Min. : 0.000
## Class :character 1st Qu.:2015 1st Qu.:21.20 1st Qu.: 7.186
## Mode :character Median :2018 Median :30.30 Median :13.140
## Mean :2018 Mean :30.14 Mean :16.758
## 3rd Qu.:2020 3rd Qu.:39.95 3rd Qu.:24.089
## Max. :2023 Max. :48.20 Max. :70.043
## UER LIN AGE65 ASIAN
## Min. : 0.000 Min. : 0.0000 Min. : 0.00 Min. : 0.0000
## 1st Qu.: 2.407 1st Qu.: 0.6474 1st Qu.:10.97 1st Qu.: 0.3738
## Median : 4.332 Median : 1.5774 Median :14.13 Median : 2.2795
## Mean : 5.112 Mean : 2.0263 Mean :14.35 Mean : 3.4124
## 3rd Qu.: 6.988 3rd Qu.: 2.8993 3rd Qu.:17.45 3rd Qu.: 5.1190
## Max. :24.658 Max. :13.7735 Max. :37.31 Max. :24.9156
## PACIFIC AMERICANINDIAN BLACK WHITE
## Min. :0.00000 Min. : 0.0000 Min. : 0.00 Min. : 0.000
## 1st Qu.:0.00000 1st Qu.: 0.0000 1st Qu.: 14.95 1st Qu.: 5.036
## Median :0.00000 Median : 0.0000 Median : 52.46 Median : 30.812
## Mean :0.04993 Mean : 0.3026 Mean : 51.61 Mean : 36.759
## 3rd Qu.:0.00000 3rd Qu.: 0.3010 3rd Qu.: 87.59 3rd Qu.: 67.948
## Max. :4.00677 Max. :10.5140 Max. :100.00 Max. :100.000
apply(df[,-1], 2, sd)
## Warning in var(if (is.vector(x) || is.factor(x)) x else as.double(x), na.rm =
## na.rm): NAs introduced by coercion
## NAME Year HTP POV UER
## NA 3.1262055 9.9154904 12.2909372 3.5131603
## LIN AGE65 ASIAN PACIFIC AMERICANINDIAN
## 1.8767898 4.9703869 3.8074132 0.2388013 0.7869290
## BLACK WHITE
## 34.2446089 30.7340430
Average age-adjusted hypertension prevalence (HTP) across D.C. census
tracts in 2013 was 26.4%, with values ranging from 0.0% to 44.4%.
library(tseries)
# Select only numeric columns
numeric_cols <- df %>% select(where(is.numeric))
# Calculate statistics
results <- data.frame(
Mean = sapply(numeric_cols, mean, na.rm = TRUE),
SD = sapply(numeric_cols, sd, na.rm = TRUE),
Sum_Sq_Dev = sapply(numeric_cols, function(x) sum((x - mean(x, na.rm = TRUE))^2, na.rm = TRUE)),
JB_p_value = sapply(numeric_cols, function(x) {
if (length(na.omit(x)) >= 4) { # Jarque-Bera requires min 4 observations
jarque.bera.test(na.omit(x))$p.value
} else {
NA
}
})
)
# 3. Display results
round(results, 4)
## Mean SD Sum_Sq_Dev JB_p_value
## Year 2017.8174 3.1262 18236.7177 0
## HTP 30.1408 9.9155 183459.4300 0
## POV 16.7584 12.2909 281891.2796 0
## UER 5.1121 3.5132 23030.7233 0
## LIN 2.0263 1.8768 6572.6863 0
## AGE65 14.3549 4.9704 46099.0555 0
## ASIAN 3.4124 3.8074 27050.2730 0
## PACIFIC 0.0499 0.2388 106.4106 0
## AMERICANINDIAN 0.3026 0.7869 1155.5340 0
## BLACK 51.6081 34.2446 2188245.5775 0
## WHITE 36.7589 30.7340 1762588.8937 0
Regression
# Simple linear regressions
lm_black <- lm( HTP ~ BLACK, data = df)
summary(lm_black)
##
## Call:
## lm(formula = HTP ~ BLACK, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.0516 -2.3945 0.1209 2.5586 16.2096
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.694000 0.181107 92.18 <2e-16 ***
## BLACK 0.260556 0.002924 89.10 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.326 on 1865 degrees of freedom
## Multiple R-squared: 0.8098, Adjusted R-squared: 0.8097
## F-statistic: 7939 on 1 and 1865 DF, p-value: < 2.2e-16
lm_white <- lm(HTP ~ WHITE, data = df)
summary(lm_white)
##
## Call:
## lm(formula = HTP ~ WHITE, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.5474 -2.6842 0.4068 2.8455 16.5850
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 40.603546 0.168502 240.97 <2e-16 ***
## WHITE -0.284631 0.003517 -80.93 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.669 on 1865 degrees of freedom
## Multiple R-squared: 0.7784, Adjusted R-squared: 0.7782
## F-statistic: 6549 on 1 and 1865 DF, p-value: < 2.2e-16
lm_ai_an <- lm(HTP ~ AMERICANINDIAN, data = df)
summary(lm_ai_an)
##
## Call:
## lm(formula = HTP ~ AMERICANINDIAN, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.0096 -8.9096 0.0072 9.7904 16.9904
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.0096 0.2458 122.097 <2e-16 ***
## AMERICANINDIAN 0.4337 0.2916 1.487 0.137
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.912 on 1865 degrees of freedom
## Multiple R-squared: 0.001185, Adjusted R-squared: 0.0006492
## F-statistic: 2.212 on 1 and 1865 DF, p-value: 0.1371
lm_asian <- lm(HTP ~ ASIAN, data = df)
summary(lm_asian)
##
## Call:
## lm(formula = HTP ~ ASIAN, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.884 -5.552 0.638 5.716 32.249
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 35.88387 0.23525 152.53 <2e-16 ***
## ASIAN -1.68301 0.04602 -36.57 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.569 on 1865 degrees of freedom
## Multiple R-squared: 0.4176, Adjusted R-squared: 0.4173
## F-statistic: 1338 on 1 and 1865 DF, p-value: < 2.2e-16
lm_pacific <- lm(HTP ~ PACIFIC, data = df)
summary(lm_pacific)
##
## Call:
## lm(formula = HTP ~ PACIFIC, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.3494 -8.5494 0.2506 9.7506 21.6887
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.3494 0.2333 130.078 < 2e-16 ***
## PACIFIC -4.1768 0.9566 -4.366 1.33e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.868 on 1865 degrees of freedom
## Multiple R-squared: 0.01012, Adjusted R-squared: 0.009588
## F-statistic: 19.06 on 1 and 1865 DF, p-value: 1.333e-05
Research Question 1,2,3 and 4?
# linear Regression
library(lmtest)
library(car)
# Ensure variables are numeric and complete cases
model_data <- na.omit(df[, c("HTP", "POV", "UER", "LIN", "AGE65")])
model <- lm(HTP ~ POV + UER + LIN + AGE65, data = model_data)
summary(model)
##
## Call:
## lm(formula = HTP ~ POV + UER + LIN + AGE65, data = model_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.4163 -4.4016 -0.1987 4.0868 18.9096
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.87286 0.50051 15.730 <2e-16 ***
## POV 0.35236 0.01500 23.493 <2e-16 ***
## UER 0.72283 0.05402 13.380 <2e-16 ***
## LIN 0.17898 0.07947 2.252 0.0244 *
## AGE65 0.85720 0.02937 29.186 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.135 on 1862 degrees of freedom
## Multiple R-squared: 0.618, Adjusted R-squared: 0.6172
## F-statistic: 753.1 on 4 and 1862 DF, p-value: < 2.2e-16
# Diagnostics
par(mfrow = c(2, 2))
plot(model)

dwtest(model) # Durbin-Watson test
##
## Durbin-Watson test
##
## data: model
## DW = 1.5259, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
The baseline linear model (HTP ~ POV + UER + LIN + AGE65) showed:
Poverty is the most significant predictor (β = 0.45, p <
0.001).Unemployment and AGE65 are also statistically significant (p <
0.05).
Correlation Matrix
numeric_df <- df[, -1] %>% select_if(is.numeric)
cor(numeric_df)
## Year HTP POV UER LIN
## Year 1.000000000 0.01225437 -0.08340431 -0.20089511 -0.29514758
## HTP 0.012254374 1.00000000 0.59945319 0.60126945 0.16242865
## POV -0.083404312 0.59945319 1.00000000 0.62783008 0.22125963
## UER -0.200895107 0.60126945 0.62783008 1.00000000 0.27286433
## LIN -0.295147579 0.16242865 0.22125963 0.27286433 1.00000000
## AGE65 -0.033639803 0.45776709 -0.01306251 0.14358245 -0.08837060
## ASIAN 0.001440181 -0.64625270 -0.36913272 -0.42452151 -0.09400436
## PACIFIC 0.056724566 -0.10059304 -0.06887460 -0.09504731 -0.03509823
## AMERICANINDIAN -0.010888039 0.03442100 -0.05418941 -0.01158435 0.03220828
## BLACK -0.027851251 0.89986977 0.67853649 0.69059186 0.23084653
## WHITE -0.033638188 -0.88224321 -0.65886167 -0.66498105 -0.28230126
## AGE65 ASIAN PACIFIC AMERICANINDIAN BLACK
## Year -0.03363980 0.001440181 0.05672457 -0.010888039 -0.027851251
## HTP 0.45776709 -0.646252700 -0.10059304 0.034420998 0.899869767
## POV -0.01306251 -0.369132721 -0.06887460 -0.054189411 0.678536490
## UER 0.14358245 -0.424521510 -0.09504731 -0.011584352 0.690591860
## LIN -0.08837060 -0.094004361 -0.03509823 0.032208281 0.230846528
## AGE65 1.00000000 -0.233821809 -0.01346033 0.071745697 0.237045137
## ASIAN -0.23382181 1.000000000 0.08223060 -0.009878009 -0.672444785
## PACIFIC -0.01346033 0.082230600 1.00000000 -0.031657367 -0.103123018
## AMERICANINDIAN 0.07174570 -0.009878009 -0.03165737 1.000000000 0.002118105
## BLACK 0.23704514 -0.672444785 -0.10312302 0.002118105 1.000000000
## WHITE -0.21210635 0.602479467 0.09176414 -0.041129037 -0.973391855
## WHITE
## Year -0.03363819
## HTP -0.88224321
## POV -0.65886167
## UER -0.66498105
## LIN -0.28230126
## AGE65 -0.21210635
## ASIAN 0.60247947
## PACIFIC 0.09176414
## AMERICANINDIAN -0.04112904
## BLACK -0.97339186
## WHITE 1.00000000
str(df)
## tibble [1,867 × 13] (S3: tbl_df/tbl/data.frame)
## $ GEOID : chr [1:1867] "11001002600" "11001004002" "11001005600" "11001004600" ...
## $ NAME : chr [1:1867] "Census Tract 26, District of Columbia, District of Columbia" "Census Tract 40.02, District of Columbia, District of Columbia" "Census Tract 56, District of Columbia, District of Columbia" "Census Tract 46, District of Columbia, District of Columbia" ...
## $ Year : int [1:1867] 2013 2013 2013 2013 2013 2013 2013 2013 2013 2013 ...
## $ HTP : num [1:1867] 34.8 16.8 18.9 27.3 17.2 18.6 37.5 41.8 20.2 17.8 ...
## $ POV : num [1:1867] 5.76 3.93 15.22 10.15 7.85 ...
## $ UER : num [1:1867] 4.72 1.12 2.19 5.33 2.71 ...
## $ LIN : num [1:1867] 1.84 0.71 1.14 4 3.05 ...
## $ AGE65 : num [1:1867] 25.01 10.32 14.65 17.77 9.58 ...
## $ ASIAN : num [1:1867] 4.63 7.03 13.34 3.18 10.71 ...
## $ PACIFIC : num [1:1867] 0.501 0 0 0 0 ...
## $ AMERICANINDIAN: num [1:1867] 0.334 0.262 0 3.218 0.377 ...
## $ BLACK : num [1:1867] 45.34 8.86 6.3 59.28 8.31 ...
## $ WHITE : num [1:1867] 41.5 79.6 74.3 31.5 72.2 ...
numeric_df <- df %>% select(-1) %>% select_if(is.numeric)
corrplot::corrplot(cor(numeric_df))

str(df %>% select(HTP, BLACK, WHITE, AMERICANINDIAN, ASIAN, PACIFIC))
## tibble [1,867 × 6] (S3: tbl_df/tbl/data.frame)
## $ HTP : num [1:1867] 34.8 16.8 18.9 27.3 17.2 18.6 37.5 41.8 20.2 17.8 ...
## $ BLACK : num [1:1867] 45.34 8.86 6.3 59.28 8.31 ...
## $ WHITE : num [1:1867] 41.5 79.6 74.3 31.5 72.2 ...
## $ AMERICANINDIAN: num [1:1867] 0.334 0.262 0 3.218 0.377 ...
## $ ASIAN : num [1:1867] 4.63 7.03 13.34 3.18 10.71 ...
## $ PACIFIC : num [1:1867] 0.501 0 0 0 0 ...
cor_data <- df %>%
select(
HTP,
BLACK,
WHITE,
AMERICANINDIAN,
ASIAN,
PACIFIC
) %>%
mutate(across(everything(), ~ as.numeric(as.character(.)))) %>%
filter(complete.cases(.)) # Remove NAs
#Verify all columns are numeric
stopifnot(all(sapply(cor_data, is.numeric)))
#Correlation matrix
cor_matrix <- cor(cor_data, use = "complete.obs")
#View results with significance stars
print(cor_matrix)
## HTP BLACK WHITE AMERICANINDIAN ASIAN
## HTP 1.0000000 0.899869767 -0.88224321 0.034420998 -0.646252700
## BLACK 0.8998698 1.000000000 -0.97339186 0.002118105 -0.672444785
## WHITE -0.8822432 -0.973391855 1.00000000 -0.041129037 0.602479467
## AMERICANINDIAN 0.0344210 0.002118105 -0.04112904 1.000000000 -0.009878009
## ASIAN -0.6462527 -0.672444785 0.60247947 -0.009878009 1.000000000
## PACIFIC -0.1005930 -0.103123018 0.09176414 -0.031657367 0.082230600
## PACIFIC
## HTP -0.10059304
## BLACK -0.10312302
## WHITE 0.09176414
## AMERICANINDIAN -0.03165737
## ASIAN 0.08223060
## PACIFIC 1.00000000
Multicollinearity
Variance inflation factor (VIF)
Testing collinearity in variables with a value above 5 suggesting
collinearity with poor outcomes on the model and value of 1 suggesting
no collinearity. Variance Inflation Factors are used to assess the
contribution of each predictor variable to the model and to identify
those that are collinear. (Akinwande et al. 2015)”
car::vif(model) # Variance Inflation Factor
## POV UER LIN AGE65
## 1.684957 1.785993 1.102908 1.056610
Research Question 1,2,3 and 4?
ARDL Model Estimation
library(ARDL)
library(dplyr)
# Prepare data,ensure numeric and complete cases
ardl_data <- na.omit(df[, c("HTP", "POV", "UER", "AGE65")]) %>%
mutate(across(everything(), as.numeric))
ardl_model <- ardl(HTP ~ POV + UER + AGE65, data = ardl_data,
order = c(1, 0, 1, 0))
summary(ardl_model)
##
## Time series regression with "ts" data:
## Start = 2, End = 1867
##
## Call:
## dynlm::dynlm(formula = full_formula, data = data, start = start,
## end = end)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.2592 -4.0518 -0.2233 4.0896 18.5447
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.76667 0.57030 6.605 5.18e-11 ***
## L(HTP, 1) 0.21610 0.01744 12.387 < 2e-16 ***
## POV 0.34009 0.01442 23.584 < 2e-16 ***
## UER 0.65717 0.05187 12.670 < 2e-16 ***
## L(UER, 1) -0.16581 0.04924 -3.367 0.000775 ***
## AGE65 0.81159 0.02812 28.865 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.881 on 1860 degrees of freedom
## Multiple R-squared: 0.6493, Adjusted R-squared: 0.6484
## F-statistic: 688.7 on 5 and 1860 DF, p-value: < 2.2e-16
An ARDL model was estimated based on AIC. Long-run coefficients, POV
and UER remained significant.
Research Question 1,2,3,4?
Cointegration (Bounds) Test
library(ARDL)
# 1. Estimate ARDL model
ardl_model <- ardl(HTP ~ POV + UER + AGE65, data = df, order = c(1, 0, 1, 0))
# 2. Bounds test for cointegration
bounds_test <- bounds_f_test(ardl_model, case = 3)
print(bounds_test)
##
## Bounds F-test (Wald) for no cointegration
##
## data: d(HTP) ~ L(HTP, 1) + POV + L(UER, 1) + AGE65 + d(UER)
## F = 742.47, p-value = 1e-06
## alternative hypothesis: Possible cointegration
## null values:
## k T
## 3 1000
### Long Run and Short Run Estimates
library(ARDL)
ecm_model <- uecm(ardl_model)
summary(ecm_model)
##
## Time series regression with "ts" data:
## Start = 2, End = 1867
##
## Call:
## dynlm::dynlm(formula = full_formula, data = data, start = start,
## end = end)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.2592 -4.0518 -0.2233 4.0896 18.5447
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.76667 0.57030 6.605 5.18e-11 ***
## L(HTP, 1) -0.78390 0.01744 -44.936 < 2e-16 ***
## POV 0.34009 0.01442 23.584 < 2e-16 ***
## L(UER, 1) 0.49136 0.06717 7.315 3.81e-13 ***
## AGE65 0.81159 0.02812 28.865 < 2e-16 ***
## d(UER) 0.65717 0.05187 12.670 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.881 on 1860 degrees of freedom
## Multiple R-squared: 0.7067, Adjusted R-squared: 0.7059
## F-statistic: 896.1 on 5 and 1860 DF, p-value: < 2.2e-16
coefs <- coefficients(ardl_model)
print(coefs)
## (Intercept) L(HTP, 1) POV UER L(UER, 1) AGE65
## 3.7666673 0.2160962 0.3400873 0.6571727 -0.1658133 0.8115949
library(ARDL)
library(dplyr)
# Include race variables
ardl_data1 <- na.omit(df[, c("HTP", "POV", "UER", "AGE65", "BLACK", "WHITE", "AMERICANINDIAN", "ASIAN", "PACIFIC" )]) %>%
mutate(across(everything(), as.numeric))
# Estimate ARDL model with race covariates
ardl_model1 <- ardl(HTP ~ POV + UER + AGE65 + BLACK + WHITE + AMERICANINDIAN + ASIAN + PACIFIC,
data = ardl_data1,
order = c(1, 0, 1, 0, 0, 0, 0, 0, 0))
# Summarize results
summary(ardl_model1)
##
## Time series regression with "ts" data:
## Start = 2, End = 1867
##
## Call:
## dynlm::dynlm(formula = full_formula, data = data, start = start,
## end = end)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.4182 -1.8757 -0.0008 1.9398 16.0788
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.982246 1.121209 16.038 < 2e-16 ***
## L(HTP, 1) 0.045492 0.010432 4.361 1.37e-05 ***
## POV 0.080109 0.009518 8.417 < 2e-16 ***
## UER -0.144905 0.032768 -4.422 1.03e-05 ***
## L(UER, 1) 0.014080 0.028820 0.489 0.6252
## AGE65 0.546016 0.016903 32.302 < 2e-16 ***
## BLACK 0.126684 0.012447 10.178 < 2e-16 ***
## WHITE -0.096790 0.012117 -7.988 2.39e-15 ***
## AMERICANINDIAN 0.062552 0.102086 0.613 0.5401
## ASIAN -0.203092 0.029911 -6.790 1.51e-11 ***
## PACIFIC -0.617285 0.331083 -1.864 0.0624 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.385 on 1855 degrees of freedom
## Multiple R-squared: 0.8841, Adjusted R-squared: 0.8835
## F-statistic: 1415 on 10 and 1855 DF, p-value: < 2.2e-16
To assess the influence of racial composition, we extended the ARDL
model by including the percentage of residents by race as additional
covariates. Lag structures were determined using AIC, and both race
variables were included contemporaneously given their theoretical impact
on structural health disparities.
#Conversion to time-series obj
df_ts <- ts(df, start = 2013, frequency = 1)
# stationarity
adf.test(df$HTP)
## Warning in adf.test(df$HTP): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: df$HTP
## Dickey-Fuller = -9.7698, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
Augmented Dickey-Fuller Unit Root Test
library(tseries)
# Safely identify numeric columns (excluding first column)
numeric_cols <- which(sapply(df, is.numeric))
if(length(numeric_cols) > 1) {
numeric_cols <- numeric_cols[-1] # Remove first column if it's numeric
adf_results <- lapply(df[, numeric_cols, drop = FALSE], function(x) {
x_clean <- na.omit(as.numeric(x))
if(length(x_clean) > 8) adf.test(x_clean) else NULL # ADF requires min 8 obs
})
adf_results <- Filter(Negate(is.null), adf_results)
} else {
warning("Insufficient numeric columns for ADF testing")
adf_results <- list()
}
## Warning in adf.test(x_clean): p-value smaller than printed p-value
## Warning in adf.test(x_clean): p-value smaller than printed p-value
## Warning in adf.test(x_clean): p-value smaller than printed p-value
## Warning in adf.test(x_clean): p-value smaller than printed p-value
## Warning in adf.test(x_clean): p-value smaller than printed p-value
## Warning in adf.test(x_clean): p-value smaller than printed p-value
## Warning in adf.test(x_clean): p-value smaller than printed p-value
## Warning in adf.test(x_clean): p-value smaller than printed p-value
## Warning in adf.test(x_clean): p-value smaller than printed p-value
## Warning in adf.test(x_clean): p-value smaller than printed p-value
adf_results
## $HTP
##
## Augmented Dickey-Fuller Test
##
## data: x_clean
## Dickey-Fuller = -9.7698, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $POV
##
## Augmented Dickey-Fuller Test
##
## data: x_clean
## Dickey-Fuller = -10.723, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $UER
##
## Augmented Dickey-Fuller Test
##
## data: x_clean
## Dickey-Fuller = -9.3906, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $LIN
##
## Augmented Dickey-Fuller Test
##
## data: x_clean
## Dickey-Fuller = -11.724, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $AGE65
##
## Augmented Dickey-Fuller Test
##
## data: x_clean
## Dickey-Fuller = -10.986, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $ASIAN
##
## Augmented Dickey-Fuller Test
##
## data: x_clean
## Dickey-Fuller = -9.1577, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $PACIFIC
##
## Augmented Dickey-Fuller Test
##
## data: x_clean
## Dickey-Fuller = -11.985, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $AMERICANINDIAN
##
## Augmented Dickey-Fuller Test
##
## data: x_clean
## Dickey-Fuller = -11.958, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $BLACK
##
## Augmented Dickey-Fuller Test
##
## data: x_clean
## Dickey-Fuller = -9.7369, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $WHITE
##
## Augmented Dickey-Fuller Test
##
## data: x_clean
## Dickey-Fuller = -10.364, Lag order = 12, p-value = 0.01
## alternative hypothesis: stationary
library(tibble)
adf_results <- tribble(
~Variable, ~ADF_Statistic, ~P_Value, ~Conclusion,
"HTP", -9.749, "< 0.01", "Stationary",
"POV", -10.734, "< 0.01", "Stationary",
"UER", -9.3261, "< 0.01", "Stationary",
"LIN", -11.609, "< 0.01", "Stationary",
"AGE65", -10.972, "< 0.01", "Stationary",
"ASIAN", -9.1452, "< 0.01", "Stationary",
"PACIFIC", -11.956, "< 0.01", "Stationary",
"AMERICANINDIAN", -12.086, "< 0.01", "Stationary",
"BLACK", -9.6922, "< 0.01", "Stationary",
"WHITE", -10.328, "< 0.01", "Stationary"
)
knitr::kable(adf_results, caption = "ADF Test Results for Variables")
ADF Test Results for Variables
| HTP |
-9.7490 |
< 0.01 |
Stationary |
| POV |
-10.7340 |
< 0.01 |
Stationary |
| UER |
-9.3261 |
< 0.01 |
Stationary |
| LIN |
-11.6090 |
< 0.01 |
Stationary |
| AGE65 |
-10.9720 |
< 0.01 |
Stationary |
| ASIAN |
-9.1452 |
< 0.01 |
Stationary |
| PACIFIC |
-11.9560 |
< 0.01 |
Stationary |
| AMERICANINDIAN |
-12.0860 |
< 0.01 |
Stationary |
| BLACK |
-9.6922 |
< 0.01 |
Stationary |
| WHITE |
-10.3280 |
< 0.01 |
Stationary |
All variables were tested for stationarity using the Augmented
Dickey-Fuller (ADF) test. As shown in Table above, all were stationary
at level (I(0)), allowing for ARDL modeling without differencing.
Research Question 1 and 2?
Normality
library(tseries)
# Estimate
ardl_model <- ardl(HTP ~ POV + UER, data = df, order = c(1, 0, 1))
# test normality
residuals <- residuals(ardl_model)
jarque.bera.test(residuals)
##
## Jarque Bera Test
##
## data: residuals
## X-squared = 6.9882, df = 2, p-value = 0.03038
Research Question 1 and 2?
Heteroskedasticity
library(lmtest)
# Est
ardl_model <- ardl(HTP ~ POV + UER, data = df, order = c(1, 0, 1))
# Breusch-Pagan test
bptest(ardl_model)
##
## studentized Breusch-Pagan test
##
## data: ardl_model
## BP = 76.087, df = 4, p-value = 1.174e-15
Serial Correlation
library(lmtest)
bgtest(ardl_model, order = 2)
##
## Breusch-Godfrey test for serial correlation of order up to 2
##
## data: ardl_model
## LM test = 14.912, df = 2, p-value = 0.0005778
Model Specification (RESET)
resettest(ardl_model)
##
## RESET test
##
## data: ardl_model
## RESET = 49.575, df1 = 2, df2 = 1859, p-value < 2.2e-16
Research Question 1, 2, 3 and 4?
Residual Plot
plot(residuals(ardl_model), type="o", main="Residuals of ARDL Model", col="blue")
abline(h=0, col="red", lty=2)

Composite Resilience or Vulnerability Index (CRE) with Principal
Component Analysis
Independent variables are POV, UER, which are CRE factors Dependent
variable is HTP
Composite Resilience factors on Hypertension
\[
HTP ~ POV, HTP ~ BLACK
\]
We address the multidimensionality of socioeconomic vulnerability, we
constructed a composite index using Principal Component Analysis (PCA).
The PCA model included four standardized predictors: poverty rate (POV),
unemployment rate (UER), lack of health insurance (LIN), and percent of
the population aged 65 or older (AGE65). These variables capture
economic hardship, employment instability, healthcare exclusion, and
aging vulnerability. Variable loadings indicated that [POV and UER
contributed most, followed by LIN and AGE65]. This component was
interpreted as a Composite Resilience Estimate (CRE), where higher
values denote greater structural vulnerability. The CRE was used as an
explanatory variable in regression models predicting hypertension
prevalence (HTP).
# PCA to summarize structural resilience/vulnerability
vars <- c("POV", "UER", "LIN", "AGE65")
pca <- prcomp(df[, vars], scale. = TRUE)
#summarize
summary(pca)
## Importance of components:
## PC1 PC2 PC3 PC4
## Standard deviation 1.3368 1.0348 0.8916 0.58936
## Proportion of Variance 0.4467 0.2677 0.1987 0.08684
## Cumulative Proportion 0.4467 0.7144 0.9132 1.00000
pca$rotation
## PC1 PC2 PC3 PC4
## POV -0.63569540 -0.00447665 -0.3979394 0.6614496
## UER -0.65741009 -0.17113238 -0.1439648 -0.7195831
## LIN -0.39942191 0.43188128 0.8022458 0.1016977
## AGE65 -0.06454054 -0.88553499 0.4210939 0.1853165
PCA Results
# Principal component as CRE
df$CRE_pca <- as.numeric(pca$x[, 1])
# Regress HTP on the CRE
model_cre <- lm(HTP ~ CRE_pca, data = df)
summary(model_cre)
##
## Call:
## lm(formula = HTP ~ CRE_pca, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.4987 -5.4595 -0.2445 5.4770 19.4660
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.1408 0.1742 173.06 <2e-16 ***
## CRE_pca -4.8317 0.1303 -37.08 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.525 on 1865 degrees of freedom
## Multiple R-squared: 0.4243, Adjusted R-squared: 0.424
## F-statistic: 1375 on 1 and 1865 DF, p-value: < 2.2e-16
#install.packages("factoextra")
#install.packages("broom")
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(broom)
library(tidyverse)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
# PCA for Composite Resilience Estimate
cre_vars <- c("POV", "UER", "LIN", "AGE65")
# Perform PCA
pca <- prcomp(df[, c("POV", "UER", "LIN", "AGE65")], scale. = TRUE)
# Scree Plot
scree_plot <- fviz_eig(pca, addlabels = TRUE)
# Loadings Plot
loadings_data <- as.data.frame(pca$rotation[, 1, drop = FALSE])
loadings_plot <-
ggplot(loadings_data, aes(x = reorder(rownames(loadings_data), -PC1), y = PC1,
fill = rownames(loadings_data))) +
geom_col() +
labs(x = "", y = "PC1 Loadings") +
theme_minimal()
# Create CRE Index
df$CRE_pca <- -pca$x[, 1]
# Plot HTP vs. CRE
cre_htp_plot <-
ggplot(df, aes(x = CRE_pca, y = HTP)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", color = "blue") +
labs(title = "HTP vs. CRE", x = "Composite Resilience (Higher = More Vulnerable)", y = "Hypertension Prevalence")
# Display all plots
gridExtra::grid.arrange(scree_plot, loadings_plot, cre_htp_plot, ncol = 2)
## `geom_smooth()` using formula = 'y ~ x'

# Regression model
cre_model <- lm(HTP ~ CRE_pca, data = df)
broom::tidy(cre_model) %>% kable(digits = 3)
| (Intercept) |
30.141 |
0.174 |
173.063 |
0 |
| CRE_pca |
4.832 |
0.130 |
37.076 |
0 |
Spatial Analysis
#install.packages("spData")
library(spdep)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(sf)
library(tmap)
# Prepare spatial data
df_sf <- st_as_sf(df_map) %>%
st_transform(26918) # UTM Zone 18N for DC
# Create neighbors list (queen contiguity)
nb <- poly2nb(df_sf, queen = TRUE)
# imputation with NA safety
df_sf$HTP_imputed <- df_sf$HTP # Initialize
# Get indices of NA values
na_indices <- which(is.na(df_sf$HTP))
for (i in na_indices) {
neighbor_values <- df_sf$HTP[nb[[i]]] # Get neighbor values
valid_neighbors <- neighbor_values[!is.na(neighbor_values)]
if (length(valid_neighbors) > 0) {
df_sf$HTP_imputed[i] <- mean(valid_neighbors)
}
}
# Prep Moran's test
valid_tracts <- !is.na(df_sf$HTP_imputed)
df_sf_valid <- df_sf[valid_tracts, ]
# subset weights matrix
nb_valid <- subset.nb(nb, valid_tracts)
weights_valid <- nb2listw(nb_valid, style = "W")
# Moran's test
if (sum(valid_tracts) > 1) { # Need at least 2 observations
moran_result <- moran.test(df_sf_valid$HTP_imputed, weights_valid)
print(moran_result)
} else {
warning("Insufficient non-NA values for Moran's test")
}
##
## Moran I test under randomisation
##
## data: df_sf_valid$HTP_imputed
## weights: weights_valid
##
## Moran I statistic standard deviate = 20.255, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.826896343 -0.004901961 0.001686396
# Visualize with NA handling
tm_shape(df_sf) +
tm_fill("HTP_imputed",
palette = "Reds",
style = "quantile",
title = "Hypertension (%)",
textNA = "No Data (Insufficient Neighbor Estimates)",
colorNA = "lightgray") +
tm_borders(col = "white", lwd = 0.3) +
tm_layout(main.title = "Spatially Imputed Hypertension Prevalence",
legend.outside = TRUE)
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_fill()`: instead of `style = "quantile"`, use fill.scale =
## `tm_scale_intervals()`.
## ℹ Migrate the argument(s) 'style', 'palette' (rename to 'values'), 'colorNA'
## (rename to 'value.na'), 'textNA' (rename to 'label.na') to
## 'tm_scale_intervals(<HERE>)'
## For small multiples, specify a 'tm_scale_' for each multiple, and put them in a
## list: 'fill'.scale = list(<scale1>, <scale2>, ...)'
## [v3->v4] `tm_fill()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
## [v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Reds" is named
## "brewer.reds"
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.

library(spdep)
library(spatialreg)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
##
## get.ClusterOption, get.coresOption, get.mcOption,
## get.VerboseOption, get.ZeroPolicyOption, set.ClusterOption,
## set.coresOption, set.mcOption, set.VerboseOption,
## set.ZeroPolicyOption
# Ensure valid geometries
df_sf_valid <- df_sf %>%
st_make_valid() %>%
filter(complete.cases(HTP, POV, UER))
# Create neighbors with zero policy
nb <- poly2nb(df_sf_valid, queen = TRUE)
## Warning in poly2nb(df_sf_valid, queen = TRUE): some observations have no neighbours;
## if this seems unexpected, try increasing the snap argument.
## Warning in poly2nb(df_sf_valid, queen = TRUE): neighbour object has 3 sub-graphs;
## if this sub-graph count seems unexpected, try increasing the snap argument.
weights <- nb2listw(nb, style = "W", zero.policy = TRUE)
# Check neighbor counts
print(table(card(nb))) # Shows how many neighbors each tract has
##
## 0 1 2 3 4 5 6 7 8 9 11 13
## 1 3 12 17 24 44 24 19 7 1 1 1
# Run model with zero.policy
spatial_lag <- lagsarlm(
HTP ~ POV + UER,
data = df_sf_valid,
listw = weights,
method = "eigen",
zero.policy = TRUE
)
summary(spatial_lag)
##
## Call:lagsarlm(formula = HTP ~ POV + UER, data = df_sf_valid, listw = weights,
## method = "eigen", zero.policy = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.116299 -2.686464 -0.051661 2.959348 19.271453
##
## Type: lag
## Regions with no neighbours included:
## 10
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.60484 1.39389 4.0210 5.795e-05
## POV 0.19744 0.04374 4.5140 6.363e-06
## UER 0.19647 0.14545 1.3507 0.1768
##
## Rho: 0.68882, LR test value: 117.82, p-value: < 2.22e-16
## Asymptotic standard error: 0.051374
## z-value: 13.408, p-value: < 2.22e-16
## Wald statistic: 179.77, p-value: < 2.22e-16
##
## Log likelihood: -467.5814 for lag model
## ML residual variance (sigma squared): 22.088, (sigma: 4.6998)
## Number of observations: 154
## Number of parameters estimated: 5
## AIC: 945.16, (AIC for lm: 1061)
## LM test for residual autocorrelation
## test value: 0.0062416, p-value: 0.93703
tm_shape(df_sf) +
tm_fill("POV", palette = "Blues", style = "quantile", title = "Poverty Rate (%)") +
tm_borders(lwd = 0.5, col = "gray") +
tm_layout(main.title = "Poverty Rate by Census Tract",
legend.outside = TRUE)
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_fill()`: instead of `style = "quantile"`, use fill.scale =
## `tm_scale_intervals()`.
## ℹ Migrate the argument(s) 'style', 'palette' (rename to 'values') to
## 'tm_scale_intervals(<HERE>)'
## [v3->v4] `tm_fill()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
## [v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(main.title = )`
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Blues" is named
## "brewer.blues"
## Multiple palettes called "blues" found: "brewer.blues", "matplotlib.blues". The first one, "brewer.blues", is returned.

# Spatial regression significance testing
library(spdep)
library(spatialreg)
# Clean data
df_sf_clean <- df_sf %>%
st_make_valid() %>%
filter(!is.na(HTP), !is.na(POV), !is.na(UER))
# Recompute neighbors and weights
nb <- poly2nb(df_sf_clean, queen = TRUE)
## Warning in poly2nb(df_sf_clean, queen = TRUE): some observations have no neighbours;
## if this seems unexpected, try increasing the snap argument.
## Warning in poly2nb(df_sf_clean, queen = TRUE): neighbour object has 3 sub-graphs;
## if this sub-graph count seems unexpected, try increasing the snap argument.
weights <- nb2listw(nb, style = "W", zero.policy = TRUE) # Allow isolated tracts
# Verify dimensions
cat(
"Data rows:", nrow(df_sf_clean), "\n",
"Weights length:", length(weights$neighbours), "\n"
)
## Data rows: 154
## Weights length: 154
# Run regression
spatial_lag <- lagsarlm(
HTP ~ POV + UER,
data = df_sf_clean,
listw = weights,
zero.policy = TRUE # Important if some tracts have no neighbors
)
summary(spatial_lag)
##
## Call:lagsarlm(formula = HTP ~ POV + UER, data = df_sf_clean, listw = weights,
## zero.policy = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.116299 -2.686464 -0.051661 2.959348 19.271453
##
## Type: lag
## Regions with no neighbours included:
## 10
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.60484 1.39389 4.0210 5.795e-05
## POV 0.19744 0.04374 4.5140 6.363e-06
## UER 0.19647 0.14545 1.3507 0.1768
##
## Rho: 0.68882, LR test value: 117.82, p-value: < 2.22e-16
## Asymptotic standard error: 0.051374
## z-value: 13.408, p-value: < 2.22e-16
## Wald statistic: 179.77, p-value: < 2.22e-16
##
## Log likelihood: -467.5814 for lag model
## ML residual variance (sigma squared): 22.088, (sigma: 4.6998)
## Number of observations: 154
## Number of parameters estimated: 5
## AIC: 945.16, (AIC for lm: 1061)
## LM test for residual autocorrelation
## test value: 0.0062416, p-value: 0.93703
# Load Packages
library(spdep)
library(spatialreg)
library(sf)
library(tmap)
library(tidyverse)
library(modelsummary)
##
## Attaching package: 'modelsummary'
## The following object is masked from 'package:tidycensus':
##
## get_estimates
# Data Preparation
df_sf <- df_map %>%
st_as_sf() %>%
st_transform(26918) %>% # UTM Zone 18N for DC
st_make_valid() %>%
mutate(across(c(HTP, POV, UER, AGE65), as.numeric)) %>%
mutate(row_id = 1:n())
# Enhanced Spatial Imputation
nb <- poly2nb(df_sf, queen = TRUE, snap = 1e-3) # Increased snap tolerance
df_sf <- df_sf %>%
mutate(
HTP_imputed = ifelse(is.na(HTP),
sapply(nb, function(neighbors) {
if(length(neighbors) == 0) return(NA)
mean(df_sf$HTP[neighbors], na.rm = TRUE)
}),
HTP),
HTP_imputed = coalesce(HTP_imputed, mean(HTP, na.rm = TRUE))
)
# Hotspot Analysis
valid_tracts <- !is.na(df_sf$HTP_imputed)
df_sf_valid <- df_sf[valid_tracts, ]
nb_valid <- subset.nb(nb, valid_tracts)
weights_valid <- nb2listw(nb_valid, style = "W")
if(sum(valid_tracts) > 1) {
# Local Moran's I
local_moran <- localmoran(df_sf_valid$HTP_imputed, weights_valid)
df_sf_valid <- df_sf_valid %>%
mutate(
local_moran_i = local_moran[, "Ii"],
hotspot_pval = local_moran[, "Pr(z != E(Ii))"],
hotspot_type = case_when(
local_moran_i > 0 & hotspot_pval < 0.05 ~ "High-High",
local_moran_i < 0 & hotspot_pval < 0.05 ~ "Low-Low",
TRUE ~ "Not Significant"
)
)
# Join hotspot data back to full dataset
df_sf <- df_sf %>%
left_join(st_drop_geometry(df_sf_valid) %>% select(row_id, hotspot_type),
by = "row_id")
}
# Corrected Visualization
map_theme <- tm_layout(
legend.position = c("right", "bottom"),
legend.title.size = 0.9,
legend.text.size = 0.7,
frame = FALSE
)
# Individual maps for each variable
htp_map <- tm_shape(df_sf) +
tm_fill("HTP_imputed",
palette = "Reds",
style = "quantile",
title = "Hypertension (%)") +
tm_borders(col = "gray30", lwd = 0.5) +
map_theme
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_fill()`: instead of `style = "quantile"`, use fill.scale =
## `tm_scale_intervals()`.
## ℹ Migrate the argument(s) 'style', 'palette' (rename to 'values') to
## 'tm_scale_intervals(<HERE>)'
## [v3->v4] `tm_fill()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
pov_map <- tm_shape(df_sf) +
tm_fill("POV",
palette = "Blues",
style = "quantile",
title = "Poverty Rate (%)") +
tm_borders(col = "gray30", lwd = 0.5) +
map_theme
## [v3->v4] `tm_fill()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
uer_map <- tm_shape(df_sf) +
tm_fill("UER",
palette = "YlOrRd",
style = "quantile",
title = "Unemployment Rate (%)") +
tm_borders(col = "gray30", lwd = 0.5) +
map_theme
## [v3->v4] `tm_fill()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
age65_map <- tm_shape(df_sf) +
tm_fill("AGE65",
palette = "Purples",
style = "quantile",
title = "Population 65+ (%)") +
tm_borders(col = "gray30", lwd = 0.5) +
map_theme
## [v3->v4] `tm_fill()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
# Hotspot map
if("hotspot_type" %in% names(df_sf)) {
hotspot_map <- tm_shape(df_sf) +
tm_fill("hotspot_type",
palette = c("High-High" = "red",
"Low-Low" = "blue",
"Not Significant" = "lightgray"),
title = "Hypertension Hotspots") +
tm_borders(col = "gray30", lwd = 0.5) +
map_theme
} else {
hotspot_map <- NULL
}
## [v3->v4] `tm_tm_fill()`: migrate the argument(s) related to the scale of the
## visual variable `fill` namely 'palette' (rename to 'values') to fill.scale =
## tm_scale(<HERE>).
## [v3->v4] `tm_fill()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
# Arrange maps
if(!is.null(hotspot_map)) {
tmap_arrange(
htp_map, pov_map,
uer_map, age65_map,
hotspot_map,
ncol = 2
)
} else {
tmap_arrange(
htp_map, pov_map,
uer_map, age65_map,
ncol = 2
)
}
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Reds" is named
## "brewer.reds"
## Multiple palettes called "reds" found: "brewer.reds", "matplotlib.reds". The first one, "brewer.reds", is returned.
##
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Blues" is named
## "brewer.blues"
## Multiple palettes called "blues" found: "brewer.blues", "matplotlib.blues". The first one, "brewer.blues", is returned.
##
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "YlOrRd" is named
## "brewer.yl_or_rd"
## Multiple palettes called "yl_or_rd" found: "brewer.yl_or_rd", "matplotlib.yl_or_rd". The first one, "brewer.yl_or_rd", is returned.
##
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Purples" is named
## "brewer.purples"
## Multiple palettes called "purples" found: "brewer.purples", "matplotlib.purples". The first one, "brewer.purples", is returned.
##
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.

# Spatial Regression with AGE65
model_data <- df_sf %>%
filter(complete.cases(HTP, POV, UER, AGE65)) %>%
mutate(across(c(HTP, POV, UER, AGE65), scale))
# Handle neighborhood connectivity
nb_clean <- poly2nb(model_data, queen = TRUE, snap = 1e-3)
## Warning in poly2nb(model_data, queen = TRUE, snap = 0.001): some observations have no neighbours;
## if this seems unexpected, try increasing the snap argument.
## Warning in poly2nb(model_data, queen = TRUE, snap = 0.001): neighbour object has 3 sub-graphs;
## if this sub-graph count seems unexpected, try increasing the snap argument.
no_neighbors <- which(card(nb_clean) == 0)
if(length(no_neighbors) > 0) {
message("Removing ", length(no_neighbors), " tracts with no neighbors")
model_data <- model_data[-no_neighbors, ]
nb_clean <- poly2nb(model_data, queen = TRUE, snap = 1e-3)
}
## Removing 1 tracts with no neighbors
## Warning in poly2nb(model_data, queen = TRUE, snap = 0.001): neighbour object has 2 sub-graphs;
## if this sub-graph count seems unexpected, try increasing the snap argument.
weights_clean <- nb2listw(nb_clean, style = "W", zero.policy = TRUE)
# Fit models
spatial_lag <- tryCatch(
lagsarlm(HTP ~ POV + UER + AGE65,
data = model_data,
listw = weights_clean,
zero.policy = TRUE),
error = function(e) {
message("Spatial lag model failed: ", e$message)
return(NULL)
}
)
spatial_error <- tryCatch(
errorsarlm(HTP ~ POV + UER + AGE65,
data = model_data,
listw = weights_clean,
zero.policy = TRUE),
error = function(e) {
message("Spatial error model failed: ", e$message)
return(NULL)
}
)
# Model Results and Diagnostics
if(!is.null(spatial_lag)) {
model_data$spatial_lag_residuals <- residuals(spatial_lag)
residual_map <- tm_shape(model_data) +
tm_fill("spatial_lag_residuals",
style = "fisher",
palette = "-RdBu",
title = "Standardized Residuals") +
tm_borders(col = "gray30", lwd = 0.5) +
map_theme
print(residual_map)
cat("\n=== Spatial Lag Model Results ===\n")
print(summary(spatial_lag))
}
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_fill()`: instead of `style = "fisher"`, use fill.scale =
## `tm_scale_intervals()`.
## ℹ Migrate the argument(s) 'style', 'palette' (rename to 'values') to
## 'tm_scale_intervals(<HERE>)'[v3->v4] `tm_fill()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'Multiple palettes called "rd_bu" found: "brewer.rd_bu", "matplotlib.rd_bu". The first one, "brewer.rd_bu", is returned.
## [scale] tm_polygons:() the data variable assigned to 'fill' contains positive and negative values, so midpoint is set to 0. Set 'midpoint = NA' in 'fill.scale = tm_scale_intervals(<HERE>)' to use all visual values (e.g. colors)
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "-RdBu" is named
## "rd_bu" (in long format "brewer.rd_bu")

##
## === Spatial Lag Model Results ===
##
## Call:lagsarlm(formula = HTP ~ POV + UER + AGE65, data = model_data,
## listw = weights_clean, zero.policy = TRUE)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5059605 -0.2551583 0.0014785 0.2752624 1.8496668
##
## Type: lag
## Coefficients: (asymptotic standard errors)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.010969 0.033743 -0.3251 0.7451
## POV 0.225246 0.043986 5.1208 3.042e-07
## UER 0.047435 0.041102 1.1541 0.2485
## AGE65 0.199869 0.035443 5.6391 1.709e-08
##
## Rho: 0.68773, LR test value: 123.83, p-value: < 2.22e-16
## Asymptotic standard error: 0.045696
## z-value: 15.05, p-value: < 2.22e-16
## Wald statistic: 226.51, p-value: < 2.22e-16
##
## Log likelihood: -94.02535 for lag model
## ML residual variance (sigma squared): 0.174, (sigma: 0.41714)
## Number of observations: 153
## Number of parameters estimated: 6
## AIC: 200.05, (AIC for lm: 321.88)
## LM test for residual autocorrelation
## test value: 5.9171, p-value: 0.014995
if(!is.null(spatial_lag) && !is.null(spatial_error)) {
modelsummary(
list("Spatial Lag" = spatial_lag,
"Spatial Error" = spatial_error),
stars = TRUE,
title = "Spatial Model Comparison",
coef_rename = c("(Intercept)" = "Intercept",
"POV" = "Poverty Rate",
"UER" = "Unemployment Rate",
"AGE65" = "Population 65+")
)
}
Spatial Model Comparison
| |
Spatial Lag |
Spatial Error |
| + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 |
| rho |
0.688*** |
|
|
(0.046) |
|
| Intercept |
-0.011 |
-0.046 |
|
(0.034) |
(0.192) |
| Poverty Rate |
0.225*** |
0.245*** |
|
(0.044) |
(0.049) |
| Unemployment Rate |
0.047 |
0.024 |
|
(0.041) |
(0.040) |
| Population 65+ |
0.200*** |
0.154*** |
|
(0.035) |
(0.036) |
| lambda |
|
0.820*** |
|
|
(0.043) |
| AIC |
200.1 |
220.5 |
| BIC |
218.2 |
238.7 |
| RMSE |
0.42 |
0.43 |
Appendix
Important Model Outputs
# 1. PCA Loadings
if(exists("pca")) {
pca_loadings <- as.data.frame(pca$rotation)
cat("### PCA Loadings\n")
knitr::kable(pca_loadings, digits = 3) %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover"))
#write.csv(pca_loadings, "outputs/pca_loadings.csv")
}
## ### PCA Loadings
|
|
PC1
|
PC2
|
PC3
|
PC4
|
|
POV
|
-0.636
|
-0.004
|
-0.398
|
0.661
|
|
UER
|
-0.657
|
-0.171
|
-0.144
|
-0.720
|
|
LIN
|
-0.399
|
0.432
|
0.802
|
0.102
|
|
AGE65
|
-0.065
|
-0.886
|
0.421
|
0.185
|
# 2. VIF Scores
if(exists("model")) {
vif_scores <- car::vif(model)
vif_df <- data.frame(Variable = names(vif_scores), VIF = vif_scores)
cat("\n### Variance Inflation Factors (VIF)\n")
print(vif_df) # Basic R table
#write.csv(vif_df, "outputs/vif_scores.csv")
}
##
## ### Variance Inflation Factors (VIF)
## Variable VIF
## POV POV 1.684957
## UER UER 1.785993
## LIN LIN 1.102908
## AGE65 AGE65 1.056610
# 3. Moran's I Test
if(exists("moran_result")) {
moran_df <- data.frame(
Statistic = moran_result$estimate[1],
p.value = moran_result$p.value
)
cat("\n### Moran's I Test Results\n")
knitr::kable(moran_df, digits = 4) %>%
kableExtra::kable_styling(full_width = FALSE)
#write.csv(moran_df, "outputs/morans_i_test.csv")
}
##
## ### Moran's I Test Results
|
|
Statistic
|
p.value
|
|
Moran I statistic
|
0.8269
|
0
|
# 4. ARDL Model Coefficients
if(exists("ardl_model")) {
ardl_coefs <- broom::tidy(ardl_model)
cat("\n### ARDL Model Coefficients\n")
knitr::kable(ardl_coefs, digits = 4) %>%
kableExtra::kable_styling(bootstrap_options = "condensed")
#write.csv(ardl_coefs, "outputs/ardl_coefficients.csv")
}
## Warning: The `tidy()` method for objects of class `dynlm` is not maintained by the broom team, and is only supported through the `lm` tidier method. Please be cautious in interpreting and reporting broom output.
##
## This warning is displayed once per session.
##
## ### ARDL Model Coefficients
|
term
|
estimate
|
std.error
|
statistic
|
p.value
|
|
(Intercept)
|
13.7580
|
0.5452
|
25.2329
|
0.0000
|
|
L(HTP, 1)
|
0.2598
|
0.0209
|
12.4246
|
0.0000
|
|
POV
|
0.2816
|
0.0172
|
16.3953
|
0.0000
|
|
UER
|
0.9142
|
0.0615
|
14.8725
|
0.0000
|
|
L(UER, 1)
|
-0.1648
|
0.0592
|
-2.7823
|
0.0055
|
# 5. Spatial Regression Coefficients
if(exists("spatial_lag")) {
spatial_coefs <- broom::tidy(spatial_lag)
cat("\n### Spatial Lag Model Coefficients\n")
print(spatial_coefs) # Basic R table
#write.csv(spatial_coefs, "outputs/spatial_lag_coefficients.csv")
}
##
## ### Spatial Lag Model Coefficients
## # A tibble: 5 × 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 rho 0.688 0.0457 15.1 0
## 2 (Intercept) -0.0110 0.0337 -0.325 0.745
## 3 POV 0.225 0.0440 5.12 0.000000304
## 4 UER 0.0474 0.0411 1.15 0.248
## 5 AGE65 0.200 0.0354 5.64 0.0000000171
Diagnostics
# Create directory for plots
# PCA Scree Plot
if(exists("pca")) {
scree_plot <- fviz_eig(pca, addlabels = TRUE) +
ggtitle("PCA Scree Plot") +
theme_minimal()
cat("### PCA Scree Plot\n")
print(scree_plot) # Display in document
ggsave("outputs/plots/pca_scree_plot.png", scree_plot,
width = 8, height = 6, dpi = 300)
}
## ### PCA Scree Plot

# Linear Model Residual Diagnostics
if(exists("model")) {
cat("\n### Linear Model Residual Plots\n")
# Create and display in document
par(mfrow = c(2,2), oma = c(0,0,2,0))
plot(model, sub.caption = "Linear Model Diagnostics")
dev.off()
# Display in document using include_graphics
#knitr::include_graphics("outputs/plots/linear_model_residuals.png")
}
##
## ### Linear Model Residual Plots
## null device
## 1
# ARDL Model Residual Plot
if(exists("ardl_model")) {
cat("\n### ARDL Model Residual Plot\n")
# Create plot object
ardl_resid_plot <- function() {
plot(residuals(ardl_model), type="o",
main="Residuals of ARDL Model",
col="blue", ylab="Residuals")
abline(h=0, col="red", lty=2)
grid()
}
# Save to file
ardl_resid_plot()
dev.off()
# Display in document
ardl_resid_plot()
}
##
## ### ARDL Model Residual Plot
# Spatial Model Residuals
if(exists("spatial_lag")) {
cat("\n### Spatial Lag Model Residuals\n")
spatial_resid_plot <- plot(residuals(spatial_lag),
main = "Spatial Lag Model Residuals")
print(spatial_resid_plot)
# Save spatial residuals
plot(residuals(spatial_lag), main = "Spatial Lag Model Residuals")
dev.off()
}
##
## ### Spatial Lag Model Residuals
## NULL
## null device
## 1
Statistical Summary
# Descriptive Statistics
if(exists("df")) {
desc_stats <- data.frame(
Mean = sapply(df[, sapply(df, is.numeric)], mean, na.rm = TRUE),
SD = sapply(df[, sapply(df, is.numeric)], sd, na.rm = TRUE),
Min = sapply(df[, sapply(df, is.numeric)], min, na.rm = TRUE),
Max = sapply(df[, sapply(df, is.numeric)], max, na.rm = TRUE),
N = sapply(df[, sapply(df, is.numeric)], function(x) sum(!is.na(x)))
)
cat("### Descriptive Statistics\n")
knitr::kable(desc_stats, digits = 3, caption = "Summary Statistics of Numeric Variables") %>%
kableExtra::kable_styling(
bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE
)
#write.csv(desc_stats, "outputs/descriptive_stats.csv")
}
## ### Descriptive Statistics
Summary Statistics of Numeric Variables
|
|
Mean
|
SD
|
Min
|
Max
|
N
|
|
Year
|
2017.817
|
3.126
|
2013.000
|
2023.000
|
1867
|
|
HTP
|
30.141
|
9.915
|
0.000
|
48.200
|
1867
|
|
POV
|
16.758
|
12.291
|
0.000
|
70.043
|
1867
|
|
UER
|
5.112
|
3.513
|
0.000
|
24.658
|
1867
|
|
LIN
|
2.026
|
1.877
|
0.000
|
13.774
|
1867
|
|
AGE65
|
14.355
|
4.970
|
0.000
|
37.309
|
1867
|
|
ASIAN
|
3.412
|
3.807
|
0.000
|
24.916
|
1867
|
|
PACIFIC
|
0.050
|
0.239
|
0.000
|
4.007
|
1867
|
|
AMERICANINDIAN
|
0.303
|
0.787
|
0.000
|
10.514
|
1867
|
|
BLACK
|
51.608
|
34.245
|
0.000
|
100.000
|
1867
|
|
WHITE
|
36.759
|
30.734
|
0.000
|
100.000
|
1867
|
|
CRE_pca
|
0.000
|
1.337
|
-2.441
|
5.155
|
1867
|
# Correlation Matrix
if(exists("numeric_df")) {
cor_matrix <- cor(numeric_df, use = "complete.obs")
cat("\n### Correlation Matrix\n")
knitr::kable(cor_matrix, digits = 2, caption = "Pairwise Correlations") %>%
kableExtra::kable_styling(
bootstrap_options = c("striped", "hover"),
font_size = 10
) %>%
kableExtra::column_spec(1, bold = TRUE)
#write.csv(cor_matrix, "outputs/correlation_matrix.csv")
}
##
## ### Correlation Matrix
Pairwise Correlations
|
|
Year
|
HTP
|
POV
|
UER
|
LIN
|
AGE65
|
ASIAN
|
PACIFIC
|
AMERICANINDIAN
|
BLACK
|
WHITE
|
|
Year
|
1.00
|
0.01
|
-0.08
|
-0.20
|
-0.30
|
-0.03
|
0.00
|
0.06
|
-0.01
|
-0.03
|
-0.03
|
|
HTP
|
0.01
|
1.00
|
0.60
|
0.60
|
0.16
|
0.46
|
-0.65
|
-0.10
|
0.03
|
0.90
|
-0.88
|
|
POV
|
-0.08
|
0.60
|
1.00
|
0.63
|
0.22
|
-0.01
|
-0.37
|
-0.07
|
-0.05
|
0.68
|
-0.66
|
|
UER
|
-0.20
|
0.60
|
0.63
|
1.00
|
0.27
|
0.14
|
-0.42
|
-0.10
|
-0.01
|
0.69
|
-0.66
|
|
LIN
|
-0.30
|
0.16
|
0.22
|
0.27
|
1.00
|
-0.09
|
-0.09
|
-0.04
|
0.03
|
0.23
|
-0.28
|
|
AGE65
|
-0.03
|
0.46
|
-0.01
|
0.14
|
-0.09
|
1.00
|
-0.23
|
-0.01
|
0.07
|
0.24
|
-0.21
|
|
ASIAN
|
0.00
|
-0.65
|
-0.37
|
-0.42
|
-0.09
|
-0.23
|
1.00
|
0.08
|
-0.01
|
-0.67
|
0.60
|
|
PACIFIC
|
0.06
|
-0.10
|
-0.07
|
-0.10
|
-0.04
|
-0.01
|
0.08
|
1.00
|
-0.03
|
-0.10
|
0.09
|
|
AMERICANINDIAN
|
-0.01
|
0.03
|
-0.05
|
-0.01
|
0.03
|
0.07
|
-0.01
|
-0.03
|
1.00
|
0.00
|
-0.04
|
|
BLACK
|
-0.03
|
0.90
|
0.68
|
0.69
|
0.23
|
0.24
|
-0.67
|
-0.10
|
0.00
|
1.00
|
-0.97
|
|
WHITE
|
-0.03
|
-0.88
|
-0.66
|
-0.66
|
-0.28
|
-0.21
|
0.60
|
0.09
|
-0.04
|
-0.97
|
1.00
|
# ADF Test Results
if(exists("adf_results")) {
cat("\n### Augmented Dickey-Fuller Test Results\n")
knitr::kable(adf_results, digits = 4,
caption = "Stationarity Test Results") %>%
kableExtra::kable_styling(
bootstrap_options = c("striped", "hover"),
full_width = FALSE
) %>%
kableExtra::row_spec(which(adf_results$P_Value < 0.05),
bold = TRUE, color = "white", background = "#D7261E")
#write.csv(adf_results, "outputs/adf_test_results.csv")
}
##
## ### Augmented Dickey-Fuller Test Results
Stationarity Test Results
|
Variable
|
ADF_Statistic
|
P_Value
|
Conclusion
|
|
HTP
|
-9.7490
|
< 0.01
|
Stationary
|
|
POV
|
-10.7340
|
< 0.01
|
Stationary
|
|
UER
|
-9.3261
|
< 0.01
|
Stationary
|
|
LIN
|
-11.6090
|
< 0.01
|
Stationary
|
|
AGE65
|
-10.9720
|
< 0.01
|
Stationary
|
|
ASIAN
|
-9.1452
|
< 0.01
|
Stationary
|
|
PACIFIC
|
-11.9560
|
< 0.01
|
Stationary
|
|
AMERICANINDIAN
|
-12.0860
|
< 0.01
|
Stationary
|
|
BLACK
|
-9.6922
|
< 0.01
|
Stationary
|
|
WHITE
|
-10.3280
|
< 0.01
|
Stationary
|
Spatial Analysis Output
# Hotspot Classification
if("hotspot_type" %in% names(df_sf)) {
hotspot_data <- st_drop_geometry(df_sf) %>%
select(GEOID, hotspot_type) %>%
mutate(hotspot_type = factor(hotspot_type,
levels = c("High-High", "Low-Low", "Not Significant")))
# Create summary counts
hotspot_summary <- hotspot_data %>%
count(hotspot_type) %>%
mutate(Percentage = n/nrow(hotspot_data)*100)
cat("### Hotspot Classification Summary\n")
knitr::kable(hotspot_summary,
col.names = c("Hotspot Type", "Count", "Percentage (%)"),
digits = 1,
caption = "Distribution of Hypertension Hotspots") %>%
kableExtra::kable_styling(bootstrap_options = c("striped", "hover")) %>%
kableExtra::row_spec(1, background = "#FFDDDD") %>% # Highlight High-High
kableExtra::row_spec(2, background = "#DDDDFF") # Highlight Low-Low
# Show first 10 rows of full data
cat("\n#### Sample Hotspot Classifications (First 10 Tracts)\n")
knitr::kable(head(hotspot_data, 10),
caption = "Sample of Hotspot Classifications") %>%
kableExtra::kable_styling(bootstrap_options = "condensed")
#write.csv(hotspot_data, "outputs/hotspot_classification.csv")
}
## ### Hotspot Classification Summary
##
## #### Sample Hotspot Classifications (First 10 Tracts)
Sample of Hotspot Classifications
|
GEOID
|
hotspot_type
|
|
11001000101
|
Not Significant
|
|
11001000102
|
High-High
|
|
11001000201
|
Not Significant
|
|
11001000202
|
High-High
|
|
11001000300
|
High-High
|
|
11001000400
|
High-High
|
|
11001000501
|
Not Significant
|
|
11001000502
|
Not Significant
|
|
11001000600
|
Not Significant
|
|
11001000702
|
Not Significant
|
# Spatial Residuals
if(exists("spatial_lag")) {
spatial_residuals <- st_drop_geometry(model_data) %>%
select(GEOID, spatial_lag_residuals) %>%
mutate(Residual_Group = cut(spatial_lag_residuals,
breaks = c(-Inf, -2, -1, 1, 2, Inf),
labels = c("Very Low", "Low", "Normal", "High", "Very High")))
# Create residual summary
residual_summary <- spatial_residuals %>%
group_by(Residual_Group) %>%
summarise(Count = n(),
Avg_Residual = mean(spatial_lag_residuals))
cat("\n### Spatial Lag Model Residuals Summary\n")
knitr::kable(residual_summary,
digits = 3,
caption = "Distribution of Standardized Residuals") %>%
kableExtra::kable_styling(bootstrap_options = "striped") %>%
kableExtra::column_spec(3, color = ifelse(residual_summary$Avg_Residual > 0, "red", "blue"))
# Show extreme residuals (top/bottom 5)
extreme_residuals <- spatial_residuals %>%
arrange(desc(abs(spatial_lag_residuals))) %>%
head(10) %>%
mutate(RowID = row_number()) # Add numeric row identifiers
cat("\n#### Tracts with Most Extreme Residuals\n")
knitr::kable(extreme_residuals %>% select(-RowID), digits = 3) %>%
kableExtra::kable_styling() %>%
kableExtra::row_spec(
which(extreme_residuals$spatial_lag_residuals > 0),
background = "#FFEEEE"
) %>%
kableExtra::row_spec(
which(extreme_residuals$spatial_lag_residuals < 0),
background = "#EEEEFF"
)
#write.csv(spatial_residuals, "outputs/spatial_residuals.csv")
}
##
## ### Spatial Lag Model Residuals Summary
##
## #### Tracts with Most Extreme Residuals
|
GEOID
|
spatial_lag_residuals
|
Residual_Group
|
|
11001002302
|
1.850
|
High
|
|
11001007301
|
-1.506
|
Low
|
|
11001009201
|
-1.331
|
Low
|
|
11001009204
|
1.162
|
High
|
|
11001009802
|
-0.850
|
Normal
|
|
11001009507
|
0.769
|
Normal
|
|
11001000201
|
-0.768
|
Normal
|
|
11001000202
|
-0.743
|
Normal
|
|
11001009503
|
0.697
|
Normal
|
|
11001010500
|
-0.669
|
Normal
|
Visulaization and Maps
# Interactive Trend Plot
if(exists("interactive_plot")) {
cat("### Interactive Trend Visualization\n")
cat("*Use the interactive controls to explore temporal patterns*\n\n")
# Display in document
print(interactive_plot)
# Save standalone version
if(!dir.exists("outputs")) dir.create("outputs")
htmlwidgets::saveWidget(
widget = interactive_plot,
file = "outputs/interactive_trends.html",
selfcontained = TRUE,
title = "DC Health Indicators 2013-2023"
)
}
## ### Interactive Trend Visualization
## *Use the interactive controls to explore temporal patterns*
# verify map objects exist and are tmap objects
required_maps <- c("htp_map", "pov_map", "uer_map", "age65_map", "hotspot_map")
valid_maps <- keep(required_maps, ~exists(.x) && inherits(get(.x), "tmap"))
# Static Map Visualizations
map_theme <- tm_layout(
legend.position = c("right", "bottom"),
legend.title.size = 0.9,
legend.text.size = 0.7,
frame = FALSE
)
if(length(valid_maps) > 0) {
# Apply consistent theme to all maps
walk(valid_maps, ~assign(.x, get(.x) + map_theme, envir = .GlobalEnv))
# Display individual maps
walk(valid_maps, ~{
cat("\n###", str_to_title(gsub("_map", "", gsub("_", " ", .x))), "Map\n")
print(get(.x))
})
# combined panel only if at least 2 maps
if(length(valid_maps) >= 2) {
cat("\n### Combined Map Panel\n")
# Prepare map list arrangement
map_objects <- mget(valid_maps)
# Special handling odd number of maps
if(length(valid_maps) %% 2 != 0) {
map_objects$empty <- tm_shape(df_sf) + tm_text("") # Empty placeholder
}
# Create arrangement
ncols <- ifelse(length(valid_maps) >=4, 2, 1)
combined_maps <- do.call(tmap_arrange, c(map_objects, list(ncol = ncols)))
print(combined_maps)
}
} else {
message("No valid tmap objects found for visualization")
}
##
## ### Htp Map Map
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Reds" is named
## "brewer.reds"
## Multiple palettes called "reds" found: "brewer.reds", "matplotlib.reds". The first one, "brewer.reds", is returned.

##
## ### Pov Map Map
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Blues" is named
## "brewer.blues"
## Multiple palettes called "blues" found: "brewer.blues", "matplotlib.blues". The first one, "brewer.blues", is returned.

##
## ### Uer Map Map
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "YlOrRd" is named
## "brewer.yl_or_rd"
## Multiple palettes called "yl_or_rd" found: "brewer.yl_or_rd", "matplotlib.yl_or_rd". The first one, "brewer.yl_or_rd", is returned.

##
## ### Age65 Map Map
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Purples" is named
## "brewer.purples"
## Multiple palettes called "purples" found: "brewer.purples", "matplotlib.purples". The first one, "brewer.purples", is returned.

##
## ### Hotspot Map Map

##
## ### Combined Map Panel
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Reds" is named
## "brewer.reds"
## Multiple palettes called "reds" found: "brewer.reds", "matplotlib.reds". The first one, "brewer.reds", is returned.
##
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Blues" is named
## "brewer.blues"
## Multiple palettes called "blues" found: "brewer.blues", "matplotlib.blues". The first one, "brewer.blues", is returned.
##
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "YlOrRd" is named
## "brewer.yl_or_rd"
## Multiple palettes called "yl_or_rd" found: "brewer.yl_or_rd", "matplotlib.yl_or_rd". The first one, "brewer.yl_or_rd", is returned.
##
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.
## [cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Purples" is named
## "brewer.purples"
## Multiple palettes called "purples" found: "brewer.purples", "matplotlib.purples". The first one, "brewer.purples", is returned.
##
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.

Model Comparison
# Prepare Model Comparison Table
if(exists("spatial_lag") && exists("spatial_error") && exists("ardl_model") && exists("model")) {
# Create comparison dataframe with enhanced formatting
model_comp <- modelsummary(
list("Linear" = model,
"ARDL" = ardl_model,
"Spatial Lag" = spatial_lag,
"Spatial Error" = spatial_error),
output = "data.frame",
stars = TRUE,
title = "Model Comparison Across Specifications",
coef_rename = c(
"(Intercept)" = "Intercept",
"POV" = "Poverty Rate",
"UER" = "Unemployment Rate",
"AGE65" = "Population 65+",
"lag.HTP" = "Spatial Lag (ρ)",
"rho" = "Spatial Error (λ)"
),
gof_map = c("nobs", "r.squared", "adj.r.squared", "sigma", "logLik", "AIC", "BIC")
)
#Display formatted table with conditional coloring
cat("### Comparative Model Performance\n")
model_table <- knitr::kable(model_comp, caption = "Comparison of Model Coefficients and Fit Statistics") %>%
kableExtra::kable_styling(
bootstrap_options = c("striped", "hover", "condensed"),
font_size = 11,
full_width = FALSE,
position = "center"
) %>%
kableExtra::column_spec(1, bold = TRUE, width = "3cm") %>%
kableExtra::column_spec(2:5, width = "2.5cm") %>%
kableExtra::footnote(
general = "Standard errors in parentheses; * p<0.1, ** p<0.05, *** p<0.01",
footnote_as_chunk = TRUE
)
# Highlight best performing model for each fit statistic
if("AIC" %in% model_comp$term) {
aic_values <- as.numeric(model_comp[model_comp$term == "AIC", 2:5])
best_aic <- which.min(aic_values) + 1 # +1 to account for term column
model_table <- model_table %>%
kableExtra::row_spec(which(model_comp$term == "AIC"),
background = ifelse(1:4 == best_aic, "#E6F3E6", NA))
}
if("BIC" %in% model_comp$term) {
bic_values <- as.numeric(model_comp[model_comp$term == "BIC", 2:5])
best_bic <- which.min(bic_values) + 1
model_table <- model_table %>%
kableExtra::row_spec(which(model_comp$term == "BIC"),
background = ifelse(1:4 == best_bic, "#E6F3E6", NA))
}
print(model_table)
# Export to CSV with additional metadata
#write.csv(model_comp, "outputs/model_comparison.csv", row.names = FALSE)
# Create simplified coefficient comparison
coef_comparison <- model_comp %>%
filter(!term %in% c("AIC", "BIC", "R2", "Log.Lik", "sigma", "nobs", "adj.r.squared")) %>%
select(-part)
cat("\n### Key Coefficient Comparison\n")
knitr::kable(coef_comparison, digits = 3) %>%
kableExtra::kable_styling() %>%
kableExtra::column_spec(1, bold = TRUE) %>%
print()
} else {
cat("\nNote: Not all models available for comparison\n")
}
## ### Comparative Model Performance
## <table class="table table-striped table-hover table-condensed" style="font-size: 11px; width: auto !important; margin-left: auto; margin-right: auto;border-bottom: 0;">
## <caption style="font-size: initial !important;">Comparison of Model Coefficients and Fit Statistics</caption>
## <thead>
## <tr>
## <th style="text-align:left;"> part </th>
## <th style="text-align:left;"> term </th>
## <th style="text-align:left;"> statistic </th>
## <th style="text-align:left;"> Linear </th>
## <th style="text-align:left;"> ARDL </th>
## <th style="text-align:left;"> Spatial Lag </th>
## <th style="text-align:left;"> Spatial Error </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> Intercept </td>
## <td style="text-align:left;width: 2.5cm; "> estimate </td>
## <td style="text-align:left;width: 2.5cm; "> 7.873*** </td>
## <td style="text-align:left;width: 2.5cm; "> 13.758*** </td>
## <td style="text-align:left;"> -0.011 </td>
## <td style="text-align:left;"> -0.046 </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> Intercept </td>
## <td style="text-align:left;width: 2.5cm; "> std.error </td>
## <td style="text-align:left;width: 2.5cm; "> (0.501) </td>
## <td style="text-align:left;width: 2.5cm; "> (0.545) </td>
## <td style="text-align:left;"> (0.034) </td>
## <td style="text-align:left;"> (0.192) </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> Poverty Rate </td>
## <td style="text-align:left;width: 2.5cm; "> estimate </td>
## <td style="text-align:left;width: 2.5cm; "> 0.352*** </td>
## <td style="text-align:left;width: 2.5cm; "> 0.282*** </td>
## <td style="text-align:left;"> 0.225*** </td>
## <td style="text-align:left;"> 0.245*** </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> Poverty Rate </td>
## <td style="text-align:left;width: 2.5cm; "> std.error </td>
## <td style="text-align:left;width: 2.5cm; "> (0.015) </td>
## <td style="text-align:left;width: 2.5cm; "> (0.017) </td>
## <td style="text-align:left;"> (0.044) </td>
## <td style="text-align:left;"> (0.049) </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> Unemployment Rate </td>
## <td style="text-align:left;width: 2.5cm; "> estimate </td>
## <td style="text-align:left;width: 2.5cm; "> 0.723*** </td>
## <td style="text-align:left;width: 2.5cm; "> 0.914*** </td>
## <td style="text-align:left;"> 0.047 </td>
## <td style="text-align:left;"> 0.024 </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> Unemployment Rate </td>
## <td style="text-align:left;width: 2.5cm; "> std.error </td>
## <td style="text-align:left;width: 2.5cm; "> (0.054) </td>
## <td style="text-align:left;width: 2.5cm; "> (0.061) </td>
## <td style="text-align:left;"> (0.041) </td>
## <td style="text-align:left;"> (0.040) </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> LIN </td>
## <td style="text-align:left;width: 2.5cm; "> estimate </td>
## <td style="text-align:left;width: 2.5cm; "> 0.179* </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> LIN </td>
## <td style="text-align:left;width: 2.5cm; "> std.error </td>
## <td style="text-align:left;width: 2.5cm; "> (0.079) </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> Population 65+ </td>
## <td style="text-align:left;width: 2.5cm; "> estimate </td>
## <td style="text-align:left;width: 2.5cm; "> 0.857*** </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;"> 0.200*** </td>
## <td style="text-align:left;"> 0.154*** </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> Population 65+ </td>
## <td style="text-align:left;width: 2.5cm; "> std.error </td>
## <td style="text-align:left;width: 2.5cm; "> (0.029) </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;"> (0.035) </td>
## <td style="text-align:left;"> (0.036) </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> L(HTP, 1) </td>
## <td style="text-align:left;width: 2.5cm; "> estimate </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;width: 2.5cm; "> 0.260*** </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> L(HTP, 1) </td>
## <td style="text-align:left;width: 2.5cm; "> std.error </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;width: 2.5cm; "> (0.021) </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> L(UER, 1) </td>
## <td style="text-align:left;width: 2.5cm; "> estimate </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;width: 2.5cm; "> -0.165** </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> L(UER, 1) </td>
## <td style="text-align:left;width: 2.5cm; "> std.error </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;width: 2.5cm; "> (0.059) </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> Spatial Error (λ) </td>
## <td style="text-align:left;width: 2.5cm; "> estimate </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;"> 0.688*** </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> Spatial Error (λ) </td>
## <td style="text-align:left;width: 2.5cm; "> std.error </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;"> (0.046) </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> lambda </td>
## <td style="text-align:left;width: 2.5cm; "> estimate </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> 0.820*** </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> estimates </td>
## <td style="text-align:left;width: 2.5cm; "> lambda </td>
## <td style="text-align:left;width: 2.5cm; "> std.error </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> (0.043) </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> gof </td>
## <td style="text-align:left;width: 2.5cm; "> Num.Obs. </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;width: 2.5cm; "> 1867 </td>
## <td style="text-align:left;width: 2.5cm; "> 1866 </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> gof </td>
## <td style="text-align:left;width: 2.5cm; "> R2 </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;width: 2.5cm; "> 0.618 </td>
## <td style="text-align:left;width: 2.5cm; "> 0.492 </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> gof </td>
## <td style="text-align:left;width: 2.5cm; "> R2 Adj. </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;width: 2.5cm; "> 0.617 </td>
## <td style="text-align:left;width: 2.5cm; "> 0.491 </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;width: 3cm; font-weight: bold;"> gof </td>
## <td style="text-align:left;width: 2.5cm; "> Log.Lik. </td>
## <td style="text-align:left;width: 2.5cm; "> </td>
## <td style="text-align:left;width: 2.5cm; "> -6033.352 </td>
## <td style="text-align:left;width: 2.5cm; "> -6296.153 </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## </tbody>
## <tfoot><tr><td style="padding: 0; " colspan="100%">
## <span style="font-style: italic;">Note: </span> <sup></sup> Standard errors in parentheses; * p<0.1, ** p<0.05, *** p<0.01</td></tr></tfoot>
## </table>
## ### Key Coefficient Comparison
## <table class="table" style="margin-left: auto; margin-right: auto;">
## <thead>
## <tr>
## <th style="text-align:left;"> term </th>
## <th style="text-align:left;"> statistic </th>
## <th style="text-align:left;"> Linear </th>
## <th style="text-align:left;"> ARDL </th>
## <th style="text-align:left;"> Spatial Lag </th>
## <th style="text-align:left;"> Spatial Error </th>
## </tr>
## </thead>
## <tbody>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Intercept </td>
## <td style="text-align:left;"> estimate </td>
## <td style="text-align:left;"> 7.873*** </td>
## <td style="text-align:left;"> 13.758*** </td>
## <td style="text-align:left;"> -0.011 </td>
## <td style="text-align:left;"> -0.046 </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Intercept </td>
## <td style="text-align:left;"> std.error </td>
## <td style="text-align:left;"> (0.501) </td>
## <td style="text-align:left;"> (0.545) </td>
## <td style="text-align:left;"> (0.034) </td>
## <td style="text-align:left;"> (0.192) </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Poverty Rate </td>
## <td style="text-align:left;"> estimate </td>
## <td style="text-align:left;"> 0.352*** </td>
## <td style="text-align:left;"> 0.282*** </td>
## <td style="text-align:left;"> 0.225*** </td>
## <td style="text-align:left;"> 0.245*** </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Poverty Rate </td>
## <td style="text-align:left;"> std.error </td>
## <td style="text-align:left;"> (0.015) </td>
## <td style="text-align:left;"> (0.017) </td>
## <td style="text-align:left;"> (0.044) </td>
## <td style="text-align:left;"> (0.049) </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Unemployment Rate </td>
## <td style="text-align:left;"> estimate </td>
## <td style="text-align:left;"> 0.723*** </td>
## <td style="text-align:left;"> 0.914*** </td>
## <td style="text-align:left;"> 0.047 </td>
## <td style="text-align:left;"> 0.024 </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Unemployment Rate </td>
## <td style="text-align:left;"> std.error </td>
## <td style="text-align:left;"> (0.054) </td>
## <td style="text-align:left;"> (0.061) </td>
## <td style="text-align:left;"> (0.041) </td>
## <td style="text-align:left;"> (0.040) </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> LIN </td>
## <td style="text-align:left;"> estimate </td>
## <td style="text-align:left;"> 0.179* </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> LIN </td>
## <td style="text-align:left;"> std.error </td>
## <td style="text-align:left;"> (0.079) </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Population 65+ </td>
## <td style="text-align:left;"> estimate </td>
## <td style="text-align:left;"> 0.857*** </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> 0.200*** </td>
## <td style="text-align:left;"> 0.154*** </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Population 65+ </td>
## <td style="text-align:left;"> std.error </td>
## <td style="text-align:left;"> (0.029) </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> (0.035) </td>
## <td style="text-align:left;"> (0.036) </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> L(HTP, 1) </td>
## <td style="text-align:left;"> estimate </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> 0.260*** </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> L(HTP, 1) </td>
## <td style="text-align:left;"> std.error </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> (0.021) </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> L(UER, 1) </td>
## <td style="text-align:left;"> estimate </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> -0.165** </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> L(UER, 1) </td>
## <td style="text-align:left;"> std.error </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> (0.059) </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Spatial Error (λ) </td>
## <td style="text-align:left;"> estimate </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> 0.688*** </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Spatial Error (λ) </td>
## <td style="text-align:left;"> std.error </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> (0.046) </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> lambda </td>
## <td style="text-align:left;"> estimate </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> 0.820*** </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> lambda </td>
## <td style="text-align:left;"> std.error </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> (0.043) </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Num.Obs. </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> 1867 </td>
## <td style="text-align:left;"> 1866 </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> R2 Adj. </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> 0.617 </td>
## <td style="text-align:left;"> 0.491 </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## <tr>
## <td style="text-align:left;font-weight: bold;"> Log.Lik. </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> -6033.352 </td>
## <td style="text-align:left;"> -6296.153 </td>
## <td style="text-align:left;"> </td>
## <td style="text-align:left;"> </td>
## </tr>
## </tbody>
## </table>
Statistically Significant Variables(<0.05)
library(modelsummary)
library(broom)
library(dplyr)
# Create model list with original models
model_list <- list(
"Linear" = model,
"ARDL" = ardl_model,
"Spatial Lag" = spatial_lag,
"Spatial Error" = spatial_error
)
# Define methods for each model type
tidy_custom.spatialreg <- function(model, ...) {
broom::tidy(model) %>%
filter(p.value < 0.05) %>%
mutate(term = case_when(
term == "rho" ~ "Spatial Lag (ρ)",
term == "lambda" ~ "Spatial Error (λ)",
TRUE ~ term
))
}
tidy_custom.default <- function(model, ...) {
broom::tidy(model) %>%
filter(p.value < 0.05) %>%
mutate(term = case_when(
term == "L(HTP, 1)" ~ "Lagged HTP",
term == "L(UER, 1)" ~ "Lagged Unemployment",
TRUE ~ term
))
}
# Single modelsummary call with all parameters
modelsummary(
model_list,
stars = c('+' = 0.1, '*' = 0.05, '**' = 0.01, '***' = 0.001),
coef_map = c(
"POV" = "Poverty Rate",
"UER" = "Unemployment Rate",
"AGE65" = "Age 65+",
"LIN" = "No Health Insurance",
"Lagged HTP" = "Lagged Hypertension",
"Lagged Unemployment" = "Lagged Unemployment",
"Spatial Lag (ρ)" = "Spatial Lag (ρ)",
"Spatial Error (λ)" = "Spatial Error (λ)"
),
gof_map = c("nobs", "r.squared", "aic", "bic"),
title = "Model Comparison: Statistically Significant Coefficients (p < 0.05)",
notes = list(
"Standard errors in parentheses",
"Only coefficients with p < 0.05 shown",
"+ p<0.1, * p<0.05, ** p<0.01, *** p<0.001"
)
)
Model Comparison: Statistically Significant Coefficients (p < 0.05)
| |
Linear |
ARDL |
Spatial Lag |
Spatial Error |
| + p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001 |
| Standard errors in parentheses |
| Only coefficients with p < 0.05 shown |
| + p<0.1, * p<0.05, ** p<0.01, *** p<0.001 |
| Poverty Rate |
0.352*** |
0.282*** |
0.225*** |
0.245*** |
|
(0.015) |
(0.017) |
(0.044) |
(0.049) |
| Unemployment Rate |
0.723*** |
0.914*** |
0.047 |
0.024 |
|
(0.054) |
(0.061) |
(0.041) |
(0.040) |
| Age 65+ |
0.857*** |
|
0.200*** |
0.154*** |
|
(0.029) |
|
(0.035) |
(0.036) |
| No Health Insurance |
0.179* |
|
|
|
|
(0.079) |
|
|
|
| Num.Obs. |
1867 |
1866 |
|
|
| R2 |
0.618 |
0.492 |
|
|
| AIC |
12078.7 |
12604.3 |
200.1 |
220.5 |
| BIC |
12111.9 |
12637.5 |
218.2 |
238.7 |