library(readr)
library(dplyr)
library(stringr)
library(tidyverse)
library(arrow)
library(gt)
library(DBI)
## Warning: package 'DBI' was built under R version 4.3.3
library(duckdb)
library(dplyr)
library(stringr)
con <- dbConnect(duckdb::duckdb(), dbdir = ":memory:")
dbExecute(con, "
CREATE OR REPLACE VIEW ip AS
SELECT *
FROM read_parquet('D:/thcic_parquet_IP/**/*.parquet', union_by_name=true);
")
## [1] 0
diag_cols_vec <- c("PRINC_DIAG_CODE", paste0("OTH_DIAG_CODE_", 1:24))
dx_list_sql <- paste(diag_cols_vec, collapse = ",")
write_codes(con, "codes_fall", fall_diags)
write_codes(con, "codes_home_scene", at_home_scene_ICD10s)
write_codes(con, "codes_hip", hip_frac_diags)
write_codes(con, "codes_tbi", TBI_ICD10s)
write_codes(con, "codes_adrd", ICD10_ADRD_NORC)
write_codes(con, "codes_covid", ICD10_COVID_conf_or_exposed)
write_codes(con, "codes_htn", HTN_ICD10s)
write_codes(con, "codes_hf", HF_ICD10s)
write_codes(con, "codes_t2dm", T2DM_ICD10s)
dbWriteTable(con, "icd_to_ccsr", ICD_DX_TO_CCSR, overwrite = TRUE)
dbExecute(con, "CREATE INDEX IF NOT EXISTS idx_icd_to_ccsr ON icd_to_ccsr(ICD10);")
## [1] 0
print("INPATIENT HOSPITALIZATIONS *FROM* ER (2021-2024)")
## [1] "INPATIENT HOSPITALIZATIONS *FROM* ER (2021-2024)"
dbGetQuery(con, "
SELECT
COUNT(*) AS n_all,
SUM(has_ADRD) AS n_adrd,
COUNT(*) - SUM(has_ADRD) AS n_noadrd
FROM ip;
")
## n_all n_adrd n_noadrd
## 1 4272704 622357 3650347
ip_denoms_all <- dbGetQuery(con, "
SELECT
20212024 AS year,
COUNT(*) AS n_all,
SUM(has_ADRD) AS n_adrd,
COUNT(*) - SUM(has_ADRD) AS n_noadrd
FROM ip;
")
ip_denoms <- dbGetQuery(con, "
SELECT
year,
COUNT(*) AS n_all,
SUM(has_ADRD) AS n_adrd,
COUNT(*) - SUM(has_ADRD) AS n_noadrd
FROM ip
GROUP BY year
ORDER BY year;
")
ip_denoms
## year n_all n_adrd n_noadrd
## 1 2021 983472 146190 837282
## 2 2022 1033097 148291 884806
## 3 2023 1103112 159723 943389
## 4 2024 1153023 168153 984870
ip_denoms <- dbGetQuery(con, "
SELECT
year,
COUNT(*) AS n_all,
SUM(has_ADRD) AS n_adrd,
COUNT(*) - SUM(has_ADRD) AS n_noadrd
FROM ip
GROUP BY year
ORDER BY year;
")
library(readxl)
library(dplyr)
X21to24_TX_cost_to_charge_ratios <- read_excel("21to24_TX_cost_to_charge_ratios.xlsx") %>%
rename(CCR = 8) %>%
mutate(YEAR = as.integer(YEAR))
avg_TX_CCRs <- X21to24_TX_cost_to_charge_ratios %>%
group_by(YEAR) %>%
summarise(
mean_CCR = mean(CCR, na.rm = TRUE),
median_CCR = median(CCR, na.rm = TRUE),
n_facilities = n(),
.groups = "drop"
)
##note!! we've got a 550-ish per year, but 670ish for THCIC set
###// WARNING
##// WARNING
## ORIGINAL ABSTRACT APPROACH BELOW (REQUIRES PULLING THE DATA FROM THE LAKE)
dbGetQuery(con, "SELECT * FROM ip;") ->TX_IP_21to24
## UPDATED TO ACTUALLY USE THE FALL CODES; HERE YOU GO, BUCKO
## THE ABOVE SQL/LAKE APPROACH IS GOOD, BUT IS ESPECIALLY USEFUL HERE IF YOU NEED TO SANITY CHECK THE NUMBER OF IP RECORDS (HATE THE 2024 DATA SO MUCH)
average_covered = mean(c(0.82, 0.72))
#derived from the State losses from Medicare/Medicaid
#Medicare: 82%, Medicaid: 72%
# source: https://www.tha.org/wp-content/uploads/2025/02/2025-Hospital-Payment-Sources.pdf
# 91% of all patients with a fall-related diagnosis were insured by Medicare, Medicare Advantage, Medicaid, or were uninsured — all but the latter of which are payer types that typically reimburse hospitals at 72–82% of actual care costs4.
## NEED TO BUILD NEW LOSS KEY FOR HUMAN-READABLE LABELS
pmt_src_key <- c("09" = "Self Pay",
"HM" = "Health Maintenance Organization",
"10" = "Central Certification",
"LI" = "Liability",
"11" = "Other Non-Federal Programs",
"LM" = "Liability Medical",
"12" = "Preferred Provider Organization (PPO)",
"MA" = "Medicare Part A",
"13" = "Point of Service (POS)",
"MB" = "Medicare Part B",
"14" = "Exclusive Provider Organization (EPO)",
"MC" = "Medicaid",
"15" = "Indemnity Insurance",
"TV" = "Title V",
"16" = "HMO Medicare Risk",
"OF" = "Other Federal Program",
"AM" = "Automobile Medical",
"VA" = "Veterans Administration Plan",
"BL" = "Blue Cross / Blue Shield",
"WC" = "Workers' Compensation Health Claim",
"CH" = "CHAMPUS",
"ZZ" = "Charity / Indigent / Unknown",
"CI" = "Commercial Insurance",
"DS" = "Disability Insurance")
# this is the *code* vector you want to use in %in% for human readable
pmt_src_loss_key <- unname(pmt_src_key[loss_leaders])
pmt_src_loss_key
## [1] "Medicaid" "Medicare Part A"
## [3] "Medicare Part B" "HMO Medicare Risk"
## [5] "Veterans Administration Plan"
TX_IP_21to24 %>% filter(has_hip_fracture == 1 | has_TBI == 1) %>% filter(is_fall == 1) %>% mutate(loss = ifelse(FIRST_PAYMENT_SRC %in% pmt_src_loss_key, "loss", "regular"), log_charge = log(TOTAL_CHARGES)) %>% .$loss %>% table(.) %>% as.data.frame(.) %>% dplyr::rename(., insurance=1) %>% mutate(prop = Freq/sum(Freq))
## insurance Freq prop
## 1 loss 81293 0.891097
## 2 regular 9935 0.108903
# These “loss-leading” coverage types incurred, on average, ~$20,000 more in hospital charges per case than other payer groups.
cost_table<- TX_IP_21to24 %>%
filter(has_hip_fracture == 1 | has_TBI == 1) %>% filter(is_fall == 1) %>%
mutate(loss = ifelse(FIRST_PAYMENT_SRC %in% pmt_src_loss_key,
"loss", "regular"),
log_charge = log(TOTAL_CHARGES), LENGTH_OF_STAY = as.numeric(LENGTH_OF_STAY)) %>%
group_by(loss) %>%
summarise(
median_log_charge = median(log_charge, na.rm = TRUE),
mad_log_charge = mad(log_charge, na.rm = TRUE),
mean_lengthofstay = mean(LENGTH_OF_STAY, na.rm=TRUE),
sd_lengthofstay = sd(LENGTH_OF_STAY, na.rm=TRUE)
) %>%
mutate(
upper_staylength = mean_lengthofstay + 2*sd_lengthofstay,
lower_staylength = mean_lengthofstay - 2*sd_lengthofstay,
## original code was median, now is mean
median_charge_dollars = exp(median_log_charge)
)
#total amount of "loss" per patient with lossy insurance
#// "The median cost of care for those with a fall-related injury and a loss-leading insurance, such as Medicare or Medicaid, was $86,828, resulting in an approximate loss of $19,970 per patient seen".
cost_table %>% filter(loss == "loss") %>% mutate(net_loss_profit = (1-average_covered) * median_charge_dollars ) %>% .$net_loss_profit
## [1] 19641.98
####### TOTAL CUMULATIVE COST OF CARE FOR THOSE WITH HIP FRAC OR TBI
#cumulative
##// "A total of $17.5M was charged across all years for all records with a hip fracture and/or TBI..."
TX_IP_21to24 %>% filter(has_hip_fracture == 1 | has_TBI == 1) %>% .$TOTAL_CHARGES %>% sum(., na.rm=TRUE) %>% scales::comma(.)
## [1] "21,413,064,578"
#with fall code (possible undercount; fall codes hit-or-miss)
##// "...of which an esimated $9.4M was attributable to fall..."
TX_IP_21to24 %>% filter(has_hip_fracture == 1 | has_TBI == 1) %>% filter(is_fall == 1) %>% .$TOTAL_CHARGES %>% sum(., na.rm=TRUE) %>% scales::comma(.)
## [1] "10,014,089,431"
# # cumulative with lossy insurance only
# TX_IP_21to24 %>% filter(has_hip_fracture == 1 | has_TBI == 1) %>% mutate(loss = ifelse(FIRST_PAYMENT_SRC %in% pmt_src_loss_key, "loss", "regular"), log_charge = log(TOTAL_CHARGES)) %>% filter(loss == "loss") %>% .$TOTAL_CHARGES %>% sum(., na.rm=TRUE) %>% scales::comma(.)
#
# #annual average
# #$4B
# TX_IP_21to24 %>% filter(has_hip_fracture == 1 | has_TBI == 1) %>% mutate(loss = ifelse(FIRST_PAYMENT_SRC %in% pmt_src_loss_key, "loss", "regular"), log_charge = log(TOTAL_CHARGES)) %>% filter(loss == "loss") %>% .$TOTAL_CHARGES %>% {sum(., na.rm=TRUE)/4} %>% scales::comma(.)
## average costs across PHRs by loss/regular
##### NOTE: ONLY USE MEDIAN VALUES NOW
avg_fall_costs_PHR<- TX_IP_21to24 %>%
mutate(PUBLIC_HEALTH_REGION = as.numeric(PUBLIC_HEALTH_REGION) %>% as.character(.), LENGTH_OF_STAY = as.numeric(LENGTH_OF_STAY)) %>%
filter(has_hip_fracture == 1 | has_TBI == 1) %>% filter(is_fall == 1) %>%
mutate(loss = ifelse(FIRST_PAYMENT_SRC %in% pmt_src_loss_key,
"loss", "regular"),
log_charge = log(TOTAL_CHARGES)) %>%
group_by(loss, PUBLIC_HEALTH_REGION) %>%
summarise(
median_log_charge = median(log_charge, na.rm = TRUE),
mad_log_charge = mad(log_charge, na.rm = TRUE),
mean_lengthofstay = mean(LENGTH_OF_STAY, na.rm=TRUE),
sd_lengthofstay = sd(LENGTH_OF_STAY, na.rm=TRUE)
) %>%
mutate(
upper_staylength = mean_lengthofstay + 2*sd_lengthofstay,
lower_staylength = mean_lengthofstay - 2*sd_lengthofstay,
upper_log_charge = median_log_charge + 2 * mad_log_charge,
lower_log_charge = median_log_charge - 2 * mad_log_charge,
median_charge_dollars = exp(median_log_charge)
)
## `summarise()` has grouped output by 'loss'. You can override using the
## `.groups` argument.
## across all TX and all PHRs, but not grouped by loss status
# TX_IP_21to24 %>% filter(has_hip_fracture == 1 | has_TBI == 1) %>%
# mutate(loss = ifelse(FIRST_PAYMENT_SRC %in% pmt_src_loss_key,
# "loss", "regular"),
# log_charge = log(TOTAL_CHARGES), LENGTH_OF_STAY = as.numeric(LENGTH_OF_STAY)) %>%
# summarise(
# mean_log_charge = mean(log_charge, na.rm = TRUE),
# sd_log_charge = sd(log_charge, na.rm = TRUE),
# mean_lengthofstay = mean(LENGTH_OF_STAY, na.rm=TRUE),
# sd_lengthofstay = sd(LENGTH_OF_STAY, na.rm=TRUE)
# )
# avg_fall_costs_PHR<- TX_IP_21to24 %>%
# mutate(PUBLIC_HEALTH_REGION = case_when(
# .$PUBLIC_HEALTH_REGION == "1" ~ "01",
# .$PUBLIC_HEALTH_REGION == "2" ~ "02",
# .$PUBLIC_HEALTH_REGION == "3" ~ "03",
# .$PUBLIC_HEALTH_REGION == "4" ~ "04",
# .$PUBLIC_HEALTH_REGION == "5" ~ "05",
# .$PUBLIC_HEALTH_REGION == "6" ~ "06",
# .$PUBLIC_HEALTH_REGION == "7" ~ "07",
# .$PUBLIC_HEALTH_REGION == "8" ~ "08",
# .$PUBLIC_HEALTH_REGION == "9" ~ "09",
# .default = PUBLIC_HEALTH_REGION
# ), LENGTH_OF_STAY = as.numeric(LENGTH_OF_STAY)) %>%
#
# filter(has_hip_fracture == 1 | has_TBI == 1) %>%
# mutate(loss = ifelse(FIRST_PAYMENT_SRC %in% pmt_src_loss_key,
# "loss", "regular"),
# log_charge = log(TOTAL_CHARGES)) %>%
# group_by(PUBLIC_HEALTH_REGION) %>%
# summarise(
# mean_log_charge = mean(log_charge, na.rm = TRUE),
# sd_log_charge = sd(log_charge, na.rm = TRUE),
# mean_lengthofstay = mean(LENGTH_OF_STAY, na.rm=TRUE),
# sd_lengthofstay = sd(LENGTH_OF_STAY, na.rm=TRUE)
# ) %>%
# mutate(
# upper_staylength = mean_lengthofstay + 2*sd_lengthofstay,
# lower_staylength = mean_lengthofstay - 2*sd_lengthofstay,
# upper_log_charge = mean_log_charge + 2 * sd_log_charge,
# lower_log_charge = mean_log_charge - 2 * sd_log_charge,
# mean_charge_dollars = exp(mean_log_charge + 0.5 * sd_log_charge^2), ## original code was median, now is mean
# upper_dollars = exp(mean_log_charge + 2 * sd_log_charge),
# lower_dollars = exp(mean_log_charge - 2 * sd_log_charge)
# )
avg_fall_costs_sex<- TX_IP_21to24 %>%
filter(has_hip_fracture == 1 | has_TBI == 1) %>% filter(is_fall == 1) %>%
mutate(loss = ifelse(FIRST_PAYMENT_SRC %in% pmt_src_loss_key,
"loss", "regular"),
log_charge = log(TOTAL_CHARGES),
LENGTH_OF_STAY = as.numeric(LENGTH_OF_STAY)) %>%
group_by(loss, SEX_CODE) %>%
summarise(
median_log_charge = median(log_charge, na.rm = TRUE),
mad_log_charge = mad(log_charge, na.rm = TRUE),
mean_lengthofstay = mean(LENGTH_OF_STAY, na.rm=TRUE),
sd_lengthofstay = sd(LENGTH_OF_STAY, na.rm=TRUE)
) %>%
mutate(
upper_staylength = mean_lengthofstay + 2*sd_lengthofstay,
lower_staylength = mean_lengthofstay - 2*sd_lengthofstay,
upper_log_charge = median_log_charge + 2 * mad_log_charge,
lower_log_charge = median_log_charge - 2 * mad_log_charge,
median_charge_dollars = exp(median_log_charge)
)
## `summarise()` has grouped output by 'loss'. You can override using the
## `.groups` argument.
## total amounts of profit loss per patient by sex
#by median
avg_fall_costs_sex %>% filter(loss == "loss") %>% mutate(net_loss_profit = median_charge_dollars * (1-average_covered)) %>% dplyr::select(loss, SEX_CODE, median_charge_dollars, net_loss_profit)
## # A tibble: 4 × 4
## # Groups: loss [1]
## loss SEX_CODE median_charge_dollars net_loss_profit
## <chr> <chr> <dbl> <dbl>
## 1 loss F 84497. 19434.
## 2 loss M 86266. 19841.
## 3 loss U 84366. 19404.
## 4 loss <NA> 89875. 20671.
### DR. Lee wants investigation of differences in length of stay by age
# install.packages("nortest", repos = "https://cloud.r-project.org")
# install.packages("rstatix", repos = "https://cloud.r-project.org")
library(nortest)
library(rstatix)
##
## Attaching package: 'rstatix'
##
## The following object is masked from 'package:stats':
##
## filter
## check if LOS distro is normal
#check visually
TX_IP_21to24 %>% filter(has_hip_fracture == 1 | has_TBI == 1, SEX_CODE %in% c("F","M")) %>% filter(is_fall == 1) %>% mutate(LENGTH_OF_STAY = as.numeric(LENGTH_OF_STAY)) %>%
.$LENGTH_OF_STAY %>% table(.) %>% plot(.)
#check w/ normality test
TX_IP_21to24 %>%
filter(has_hip_fracture == 1 | has_TBI == 1, SEX_CODE %in% c("F","M")) %>% filter(is_fall == 1) %>% mutate(LENGTH_OF_STAY = as.numeric(LENGTH_OF_STAY)) %>%.$LENGTH_OF_STAY %>% ad.test(.)
##
## Anderson-Darling normality test
##
## data: .
## A = 4929.8, p-value < 2.2e-16
##check median LOS values and the MAD for PHRs and Sex/Age
TX_IP_21to24 %>%
mutate(PUBLIC_HEALTH_REGION = as.numeric(PUBLIC_HEALTH_REGION) %>% as.character(.), LENGTH_OF_STAY = as.numeric(LENGTH_OF_STAY)) %>%
filter(has_hip_fracture == 1 | has_TBI == 1) %>% filter(is_fall == 1) %>%
mutate(LENGTH_OF_STAY = as.numeric(LENGTH_OF_STAY)) %>%
group_by(PUBLIC_HEALTH_REGION) %>%
summarise(
count = n(),
mean_LOS = mean(LENGTH_OF_STAY, na.rm = TRUE),
sd_LOS = sd(LENGTH_OF_STAY, na.rm=TRUE),
median_LOS = median(LENGTH_OF_STAY, na.rm = TRUE),
MAD_LOS = mad(LENGTH_OF_STAY)
) %>%
mutate(upper_LOS = mean_LOS + 2*sd_LOS,
lower_LOS = mean_LOS - 2*sd_LOS) %>% tibble(.) %>% View(.)
TX_IP_21to24 %>%
filter(has_hip_fracture == 1 | has_TBI == 1, SEX_CODE %in% c("F","M")) %>% filter(is_fall == 1) %>% mutate(LENGTH_OF_STAY = as.numeric(LENGTH_OF_STAY)) %>%
group_by(age_years, SEX_CODE) %>%
summarise(
count = n(),
mean_LOS = mean(LENGTH_OF_STAY, na.rm = TRUE),
sd_LOS = sd(LENGTH_OF_STAY, na.rm=TRUE),
median_LOS = median(LENGTH_OF_STAY, na.rm = TRUE),
MAD_LOS = mad(LENGTH_OF_STAY)
) %>%
mutate(upper_LOS = mean_LOS + 2*sd_LOS,
lower_LOS = mean_LOS - 2*sd_LOS) %>% tibble(.)
## `summarise()` has grouped output by 'age_years'. You can override using the
## `.groups` argument.
## # A tibble: 12 × 9
## age_years SEX_CODE count mean_LOS sd_LOS median_LOS MAD_LOS upper_LOS
## <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 65-69 F 4574 5.56 5.33 4 2.97 16.2
## 2 65-69 M 3067 6.50 7.33 5 2.97 21.2
## 3 70-74 F 7426 5.52 5.32 4 2.97 16.2
## 4 70-74 M 4353 6.48 7.54 5 2.97 21.5
## 5 75-79 F 10250 5.53 4.54 5 2.97 14.6
## 6 75-79 M 5718 6.22 5.37 5 2.97 17.0
## 7 80-84 F 11736 5.60 4.15 5 2.97 13.9
## 8 80-84 M 6148 6.20 5.17 5 2.97 16.5
## 9 85-89 F 10883 5.44 3.59 5 2.97 12.6
## 10 85-89 M 5368 6.08 5.03 5 2.97 16.1
## 11 90+ F 9404 5.47 3.64 5 2.97 12.7
## 12 90+ M 3866 5.73 4.31 5 2.97 14.4
## # ℹ 1 more variable: lower_LOS <dbl>
## wilcox and Kruskal to see if LOS between groups is significantly different
#SEX
TX_IP_21to24 %>%
filter(has_hip_fracture == 1 | has_TBI == 1, SEX_CODE %in% c("F","M")) %>% filter(is_fall == 1) %>%
mutate(LENGTH_OF_STAY = as.numeric(LENGTH_OF_STAY)) %>% rstatix::wilcox_test(LENGTH_OF_STAY ~ SEX_CODE)
## # A tibble: 1 × 7
## .y. group1 group2 n1 n2 statistic p
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl>
## 1 LENGTH_OF_STAY F M 54273 28520 741063950. 4.32e-24
#PHRs
#PHR: LOS significance
TX_IP_21to24 %>%
mutate(PUBLIC_HEALTH_REGION = as.numeric(PUBLIC_HEALTH_REGION) %>% as.character(.), LENGTH_OF_STAY = as.numeric(LENGTH_OF_STAY)) %>%
filter(has_hip_fracture == 1 | has_TBI == 1) %>% filter(is_fall == 1) %>%
mutate(LENGTH_OF_STAY = as.numeric(LENGTH_OF_STAY)) %>% rstatix::kruskal_test(LENGTH_OF_STAY ~ PUBLIC_HEALTH_REGION)
## # A tibble: 1 × 6
## .y. n statistic df p method
## * <chr> <int> <dbl> <int> <dbl> <chr>
## 1 LENGTH_OF_STAY 91228 902. 10 2.20e-187 Kruskal-Wallis
#PHR: log charge significance
TX_IP_21to24 %>%
mutate(PUBLIC_HEALTH_REGION = as.numeric(PUBLIC_HEALTH_REGION) %>% as.character(.), LENGTH_OF_STAY = as.numeric(LENGTH_OF_STAY), log_charge = log(TOTAL_CHARGES)) %>%
filter(has_hip_fracture == 1 | has_TBI == 1) %>% filter(is_fall == 1) %>%
mutate(LENGTH_OF_STAY = as.numeric(LENGTH_OF_STAY)) %>%
rstatix::kruskal_test(log_charge ~ PUBLIC_HEALTH_REGION)
## # A tibble: 1 × 6
## .y. n statistic df p method
## * <chr> <int> <dbl> <int> <dbl> <chr>
## 1 log_charge 91228 3905. 10 0 Kruskal-Wallis
### OKAY, it was, but by how much?
# install.packages("coin", repos = "https://cloud.r-project.org")
library("coin")
## Warning: package 'coin' was built under R version 4.3.3
## Loading required package: survival
## Warning: package 'survival' was built under R version 4.3.3
##
## Attaching package: 'coin'
##
## The following objects are masked from 'package:rstatix':
##
## chisq_test, friedman_test, kruskal_test, sign_test, wilcox_test
#SEX: LOS eff size
TX_IP_21to24 %>%
filter(has_hip_fracture == 1 | has_TBI == 1, SEX_CODE %in% c("F","M")) %>%
filter(is_fall == 1) %>%
mutate(LENGTH_OF_STAY = as.numeric(LENGTH_OF_STAY)) %>% wilcox_effsize(LENGTH_OF_STAY ~ SEX_CODE)
## # A tibble: 1 × 7
## .y. group1 group2 effsize n1 n2 magnitude
## * <chr> <chr> <chr> <dbl> <int> <int> <ord>
## 1 LENGTH_OF_STAY F M 0.0352 54273 28520 small
#PHR: LOS eff size
TX_IP_21to24 %>% mutate(PUBLIC_HEALTH_REGION = as.numeric(PUBLIC_HEALTH_REGION) %>% as.character(.), LENGTH_OF_STAY = as.numeric(LENGTH_OF_STAY)) %>%
filter(has_hip_fracture == 1 | has_TBI == 1) %>% filter(is_fall == 1) %>%
mutate(LENGTH_OF_STAY = as.numeric(LENGTH_OF_STAY)) %>% rstatix::kruskal_effsize(LENGTH_OF_STAY ~ PUBLIC_HEALTH_REGION)
## # A tibble: 1 × 5
## .y. n effsize method magnitude
## * <chr> <int> <dbl> <chr> <ord>
## 1 LENGTH_OF_STAY 91228 0.00978 eta2[H] small
#PHR: log charge eff size
TX_IP_21to24 %>%
mutate(PUBLIC_HEALTH_REGION = as.numeric(PUBLIC_HEALTH_REGION) %>% as.character(.), LENGTH_OF_STAY = as.numeric(LENGTH_OF_STAY), log_charge = log(TOTAL_CHARGES)) %>%
filter(has_hip_fracture == 1 | has_TBI == 1) %>% filter(is_fall == 1) %>%
mutate(LENGTH_OF_STAY = as.numeric(LENGTH_OF_STAY)) %>%
rstatix::kruskal_effsize(log_charge ~ PUBLIC_HEALTH_REGION)
## # A tibble: 1 × 5
## .y. n effsize method magnitude
## * <chr> <int> <dbl> <chr> <ord>
## 1 log_charge 91228 0.0427 eta2[H] small
#baby: 0.03 between SEX using Wilcox effsize
#0.00664 H-value for PHR using Kruskal effsize
TX_IP_21to24 %>%
filter(has_hip_fracture == 1 | has_TBI == 1, SEX_CODE %in% c("F","M")) %>% filter(is_fall == 1) %>%mutate(LENGTH_OF_STAY = as.numeric(LENGTH_OF_STAY))%>%
group_by(age_years, SEX_CODE) %>%
summarise(
count = n(),
mean_LOS = mean(LENGTH_OF_STAY, na.rm = TRUE),
sd_LOS = sd(LENGTH_OF_STAY, na.rm=TRUE),
median_LOS = median(LENGTH_OF_STAY, na.rm = TRUE),
MAD_LOS = mad(LENGTH_OF_STAY)
) %>%
mutate(upper_LOS = mean_LOS + 2*sd_LOS,
lower_LOS = mean_LOS - 2*sd_LOS) %>% tibble(.)
## `summarise()` has grouped output by 'age_years'. You can override using the
## `.groups` argument.
## # A tibble: 12 × 9
## age_years SEX_CODE count mean_LOS sd_LOS median_LOS MAD_LOS upper_LOS
## <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 65-69 F 4574 5.56 5.33 4 2.97 16.2
## 2 65-69 M 3067 6.50 7.33 5 2.97 21.2
## 3 70-74 F 7426 5.52 5.32 4 2.97 16.2
## 4 70-74 M 4353 6.48 7.54 5 2.97 21.5
## 5 75-79 F 10250 5.53 4.54 5 2.97 14.6
## 6 75-79 M 5718 6.22 5.37 5 2.97 17.0
## 7 80-84 F 11736 5.60 4.15 5 2.97 13.9
## 8 80-84 M 6148 6.20 5.17 5 2.97 16.5
## 9 85-89 F 10883 5.44 3.59 5 2.97 12.6
## 10 85-89 M 5368 6.08 5.03 5 2.97 16.1
## 11 90+ F 9404 5.47 3.64 5 2.97 12.7
## 12 90+ M 3866 5.73 4.31 5 2.97 14.4
## # ℹ 1 more variable: lower_LOS <dbl>
### NOTE: does a "weighted" LOS by age group change much?
#by age group
TX_IP_21to24 %>%
filter(has_hip_fracture == 1 | has_TBI == 1, SEX_CODE %in% c("F","M")) %>% filter(is_fall == 1) %>%
mutate(LENGTH_OF_STAY = as.numeric(LENGTH_OF_STAY)) %>%
group_by(age_years, SEX_CODE) %>%
summarise(
count = n(),
median_LOS = median(LENGTH_OF_STAY, na.rm = TRUE)
) %>%
ungroup() %>%
group_by(age_years) %>%
summarise(
weighted_median_LOS = sum(median_LOS * count) / sum(count)
)
## `summarise()` has grouped output by 'age_years'. You can override using the
## `.groups` argument.
## # A tibble: 6 × 2
## age_years weighted_median_LOS
## <chr> <dbl>
## 1 65-69 4.40
## 2 70-74 4.37
## 3 75-79 5
## 4 80-84 5
## 5 85-89 5
## 6 90+ 5
#by sex
TX_IP_21to24 %>%
filter(has_hip_fracture == 1 | has_TBI == 1, SEX_CODE %in% c("F","M")) %>% filter(is_fall == 1) %>%
mutate(LENGTH_OF_STAY = as.numeric(LENGTH_OF_STAY)) %>%
group_by(age_years, SEX_CODE) %>%
summarise(
count = n(),
median_LOS = median(LENGTH_OF_STAY, na.rm = TRUE)
) %>%
ungroup() %>%
group_by(SEX_CODE) %>%
summarise(
weighted_median_LOS = sum(median_LOS * count) / sum(count)
)
## `summarise()` has grouped output by 'age_years'. You can override using the
## `.groups` argument.
## # A tibble: 2 × 2
## SEX_CODE weighted_median_LOS
## <chr> <dbl>
## 1 F 4.78
## 2 M 5
#answer: nope
## visualization of LOS distro by age group via boxplot
TX_IP_21to24 %>%
filter(has_hip_fracture == 1 | has_TBI == 1, SEX_CODE %in% c("F","M")) %>% filter(is_fall == 1) %>% ggplot(., aes(x=age_years, y=LENGTH_OF_STAY, group = SEX_CODE, fill = SEX_CODE)) + geom_boxplot() + facet_wrap(~age_years) + labs(y="") + theme(axis.ticks.y = element_blank(), axis.text.y = element_blank())
04/06/2026 DAWN OF THE RISING DOCTORS BORN IN THE CRUCIBLE OF DR. LIU’S ABSTRACT STUFF REBORN AS A MORE COMPETENT VERSION OF THE ORIGINAL (PROBABLY)
BEHOLD
pmt_src_key <- c("09" = "Self Pay",
"9" = "Self Pay",
"HM" = "Health Maintenance Organization",
"10" = "Central Certification",
"LI" = "Liability",
"11" = "Other Non-Federal Programs",
"LM" = "Liability Medical",
"12" = "Preferred Provider Organization (PPO)",
"MA" = "Medicare Part A",
"13" = "Point of Service (POS)",
"MB" = "Medicare Part B",
"14" = "Exclusive Provider Organization (EPO)",
"MC" = "Medicaid",
"15" = "Indemnity Insurance",
"TV" = "Title V",
"16" = "HMO Medicare Risk",
"OF" = "Other Federal Program",
"AM" = "Automobile Medical",
"VA" = "Veterans Administration Plan",
"BL" = "Blue Cross / Blue Shield",
"WC" = "Workers' Compensation Health Claim",
"CH" = "CHAMPUS",
"ZZ" = "Charity / Indigent / Unknown",
"CI" = "Commercial Insurance",
"DS" = "Disability Insurance")
# this is the *code* vector you want to use in %in% for human readable
pmt_src_loss_key <- unname(pmt_src_key[loss_leaders])
pmt_src_loss_key
## [1] "Medicaid" "Medicare Part A"
## [3] "Medicare Part B" "HMO Medicare Risk"
## [5] "Veterans Administration Plan"
dbGetQuery(con, "SELECT
has_ADRD,
year,
FIRST_PAYMENT_SRC,
age_years,
Sex,
Race_1,
Ethnicity_1,
has_TBI,
has_hip_fracture,
routine_DC_home,
is_fall,
SOURCE_OF_ADMISSION,
PAT_STATUS_desc,
TOTAL_CHARGES,
FROM ip;") ->TX_IP_logcols_21to24
factor_vars <- c(
"has_ADRD",
"age_years",
"Sex",
"Race_1",
"Ethnicity_1",
"has_TBI",
"has_hip_fracture",
"PAT_STATUS_desc",
"routine_DC_home",
"SOURCE_OF_ADMISSION",
"is_fall"
)
TX_IP_logcols_21to24 <- TX_IP_logcols_21to24 |>
mutate(across(all_of(factor_vars), as.factor))
## reference level establishment (inpatient)
TX_IP_logcols_21to24 <- TX_IP_logcols_21to24 %>%
mutate(
Race_1 = fct_relevel(Race_1, "White"),
Ethnicity_1 = fct_relevel(Ethnicity_1, "Non-Hispanic"),
Sex = fct_relevel(Sex, "Male"),
age_years = fct_relevel(age_years, "65-69"),
has_ADRD = factor(has_ADRD, levels = c(0,1)),
has_TBI = factor(has_TBI, levels = c(0, 1)),
is_fall = factor(is_fall, levels = c(0, 1)),
routine_DC_home = factor(routine_DC_home, levels = c(0, 1)),
has_hip_fracture = factor(has_hip_fracture, levels = c(0, 1)),
PAT_STATUS_desc = fct_relevel(PAT_STATUS_desc, "Discharged to home or self-care (routine discharge)")
)
TX_IP_logcols_21to24<- TX_IP_logcols_21to24 %>% mutate(loss = ifelse(FIRST_PAYMENT_SRC %in% pmt_src_loss_key, "loss", "regular"))
TX_IP_logcols_21to24<- left_join(TX_IP_logcols_21to24, avg_TX_CCRs, by=c("year"="YEAR")) %>% mutate(hosp_cost = TOTAL_CHARGES * median_CCR, expec_reimb = ifelse(loss == "loss", hosp_cost * average_covered, TOTAL_CHARGES), expec_gain = expec_reimb-hosp_cost)
####
TX_IP_logcols_21to24 %>% filter(has_ADRD == 1 & is_fall == 1) %>% .$TOTAL_CHARGES %>% sum(.) %>% scales::comma(.)
## [1] "6,084,374,833"
##
adrd_fall_hospcost<- TX_IP_logcols_21to24 %>% filter(has_ADRD == 1 & is_fall == 1 & loss == "loss") %>% .$hosp_cost %>% sum(.)
print("adrd_fall_hospcost")
## [1] "adrd_fall_hospcost"
adrd_fall_hospcost %>% scales::comma(.)
## [1] "2,662,122,002"
adrd_fall_hosp_reimburse<- TX_IP_logcols_21to24 %>% filter(has_ADRD == 1 & is_fall == 1 & loss == "loss") %>% .$expec_reimb %>% sum(.)
print("adrd_fall_hosp_reimburse")
## [1] "adrd_fall_hosp_reimburse"
adrd_fall_hosp_reimburse %>% scales::comma(.)
## [1] "2,049,833,942"
{adrd_fall_hospcost - adrd_fall_hosp_reimburse} %>% as.numeric(.) %>% scales::comma(.)
## [1] "612,288,061"
# From 2021 to 2024, $6.08B were charged to hospitalized patients with ADRD who experienced a fall. Of these, $5.6B were charged to patients with a loss-leading insurance. The total cost of care across all years for those with a "lossy" public source insurance was approximately $2.69B of which only $2.07B, on average, was expected to be reimbursed. Altogether, this resulted in a net loss of $618M for all hospitals across Texas, not including prospective profit.
TX_IP_logcols_21to24 %>% filter(has_ADRD == 1 & is_fall == 1) %>% .$hosp_cost %>% sum(.) %>% scales::comma(.)
## [1] "2,903,244,724"
## of the
TX_IP_logcols_21to24 %>% filter(has_ADRD == 1 & is_fall == 1) %>% .$expec_reimb %>% sum(.) %>% scales::comma(.)
## [1] "2,555,423,168"
####
glm(TOTAL_CHARGES ~ has_hip_fracture*has_TBI*loss + age_years + Race_1+Ethnicity_1+Sex, family = Gamma(link = "log"), data = TX_IP_logcols_21to24 %>% filter(TOTAL_CHARGES > 0 & has_ADRD ==1 & is_fall == 1)) ->ip_glm_logcharge_fall
library(marginaleffects)
mf <- model.frame(ip_glm_logcharge_fall)
preds<- avg_predictions(ip_glm_logcharge_fall, by = c("has_hip_fracture", "has_TBI", "loss"), type = "response", newdata = mf)
preds
##
## has_hip_fracture has_TBI loss Estimate Std. Error z Pr(>|z|) S
## 0 0 loss 88915 471 188.67 <0.001 Inf
## 0 0 regular 84413 1441 58.57 <0.001 Inf
## 0 1 loss 102697 1343 76.47 <0.001 Inf
## 0 1 regular 87259 3828 22.79 <0.001 379.7
## 1 0 loss 120106 960 125.14 <0.001 Inf
## 1 0 regular 114596 3006 38.12 <0.001 Inf
## 1 1 loss 151790 9130 16.63 <0.001 203.8
## 1 1 regular 88331 18331 4.82 <0.001 19.4
## 2.5 % 97.5 %
## 87992 89839
## 81588 87237
## 100065 105329
## 79756 94762
## 118225 121987
## 108704 120487
## 133896 169685
## 52403 124258
##
## Type: response
preds <- preds %>%
mutate(group = case_when(
has_hip_fracture == 0 & has_TBI == 0 ~ "None",
has_hip_fracture == 0 & has_TBI == 1 ~ "TBI only",
has_hip_fracture == 1 & has_TBI == 0 ~ "Hip fracture only",
TRUE ~ "Both"
))
ratio_df <- preds %>%
mutate(group = case_when(
has_hip_fracture == 0 & has_TBI == 0 ~ "None",
has_hip_fracture == 0 & has_TBI == 1 ~ "TBI only",
has_hip_fracture == 1 & has_TBI == 0 ~ "Hip fracture only",
TRUE ~ "Both"
)) %>%
select(group, loss, estimate) %>%
pivot_wider(names_from = loss, values_from = estimate) %>%
mutate(ratio = loss / regular)
ggplot(ratio_df, aes(x = group, y = ratio)) +
geom_hline(yintercept = 1, linetype = "dashed", color = "gray40") +
geom_point(size = 4, color = "#D55E00") +
geom_text(aes(label = round(ratio, 2)), vjust = -1) +
labs(
y = "Loss / Regular Predicted Charges",
x = "Injury Profile"
) +
ylim(0.8, max(ratio_df$ratio) * 1.2)
###
cmp <- comparisons(
ip_glm_logcharge_fall,
variables = "loss",
by = c("has_hip_fracture", "has_TBI"),
type = "response"
)
cmp
##
## has_hip_fracture has_TBI Estimate Std. Error z Pr(>|z|) S 2.5 %
## 0 0 -8160 1467 -5.56 < 0.001 25.2 -11034
## 0 1 -18416 3949 -4.66 < 0.001 18.3 -26155
## 1 0 -9643 3065 -3.15 0.00165 9.2 -15650
## 1 1 -63180 20523 -3.08 0.00208 8.9 -103405
## 97.5 %
## -5285
## -10676
## -3636
## -22955
##
## Term: loss
## Type: response
## Comparison: regular - loss
library(dplyr)
library(gt)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:coin':
##
## pvalue
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
cmp_tbl <- cmp %>%
mutate(
`Injury profile` = case_when(
has_hip_fracture == 0 & has_TBI == 0 ~ "No HF/TBI",
has_hip_fracture == 1 & has_TBI == 0 ~ "Hip fracture only",
has_hip_fracture == 0 & has_TBI == 1 ~ "TBI only",
has_hip_fracture == 1 & has_TBI == 1 ~ "Hip fracture + TBI"
),
`Adjusted difference` = abs(estimate),
`95% CI` = paste0(
dollar(abs(`conf.high`), accuracy = 1),
" to ",
dollar(abs(`conf.low`), accuracy = 1)
),
`P value` = case_when(
`p.value` < 0.001 ~ "<0.001",
TRUE ~ formatC(`p.value`, format = "f", digits = 3)
)
) %>%
select(
`Injury profile`,
`Adjusted difference`,
`95% CI`,
`P value`
)
gt_tbl <- cmp_tbl %>%
gt() %>%
fmt_currency(
columns = `Adjusted difference`,
currency = "USD",
decimals = 0
) %>%
cols_label(
`Injury profile` = "Injury profile",
`Adjusted difference` = md("**Higher charges in loss-associated payer group**"),
`95% CI` = "95% CI",
`P value` = md("*p*")
) %>%
tab_header(
title = md("**Adjusted Charge Differences by Injury Profile**"),
subtitle = "Model-based comparison of loss-associated vs other payers"
) %>%
tab_source_note(
source_note = md("Values are absolute differences from the adjusted model; comparison originally estimated as regular minus loss.")
) %>%
cols_align(
align = "left",
columns = `Injury profile`
) %>%
cols_align(
align = "center",
columns = c(`Adjusted difference`, `95% CI`, `P value`)
) %>%
tab_style(
style = list(
cell_text(weight = "bold")
),
locations = cells_column_labels(everything())
) %>%
opt_row_striping() %>%
tab_options(
table.font.size = px(18),
data_row.padding = px(8),
heading.title.font.size = px(24),
heading.subtitle.font.size = px(16),
source_notes.font.size = px(12),
table.border.top.width = px(2),
table.border.bottom.width = px(2)
)
gt_tbl
| Adjusted Charge Differences by Injury Profile | |||
| Model-based comparison of loss-associated vs other payers | |||
| Injury profile | Higher charges in loss-associated payer group | 95% CI | p |
|---|---|---|---|
| No HF/TBI | $8,160 | $5,285 to $11,034 | <0.001 |
| TBI only | $18,416 | $10,676 to $26,155 | <0.001 |
| Hip fracture only | $9,643 | $3,636 to $15,650 | 0.002 |
| Hip fracture + TBI | $63,180 | $22,955 to $103,405 | 0.002 |
| Values are absolute differences from the adjusted model; comparison originally estimated as regular minus loss. | |||
gtsave(gt_tbl, "poster_table.png")
## file:///C:/Users/DANIEL~1.PIN/AppData/Local/Temp/RtmpsPa0vu/filecf027143303.html screenshot completed
##
library(dplyr)
library(ggplot2)
library(forcats)
library(scales)
plot_df <- preds %>%
mutate(
group = case_when(
group %in% c("None") ~ "No HF/TBI",
group %in% c("Hip fracture only") ~ "Hip fracture only",
group %in% c("TBI only") ~ "TBI only",
group %in% c("Both") ~ "Hip fracture + TBI",
TRUE ~ as.character(group)
),
group = factor(
group,
levels = c("No HF/TBI", "Hip fracture only", "TBI only", "Hip fracture + TBI")
),
loss = recode(
loss,
"loss" = "Loss-associated payer",
"regular" = "Other payer"
)
)
# label the rightmost points directly so the legend can go away
label_df <- plot_df %>%
filter(group == "Hip fracture + TBI")
poster_chart<- ggplot(
plot_df,
aes(x = group, y = estimate, color = loss, group = loss)
) +
geom_line(linewidth = 1.1, position = position_dodge(width = 0.20)) +
geom_point(size = 3.8, position = position_dodge(width = 0.20)) +
geom_errorbar(
aes(ymin = conf.low, ymax = conf.high),
width = 0.10,
linewidth = 0.8,
position = position_dodge(width = 0.20)
) +
geom_text(
data = label_df,
aes(label = loss),
hjust = -0.15,
size = 4.2,
show.legend = FALSE,
position = position_dodge(width = 0.20)
) +
annotate(
"text",
x = 3.5,
y = max(plot_df$conf.high, na.rm = TRUE) * 0.93,
label = "~$63K gap\nlargest difference",
fontface = "bold",
size = 4.4
) +
scale_y_continuous(
labels = label_dollar(scale = 1),
expand = expansion(mult = c(0.05, 0.18))
) +
coord_cartesian(clip = "off") +
labs(
title = "Loss-Associated Payers Have Higher Predicted Charges Across Injury Profiles",
x = NULL,
y = "Predicted Hospital Charges"
) +
theme_minimal(base_size = 15) +
theme(
legend.position = "none",
plot.title = element_text(face = "bold", size = 17),
plot.subtitle = element_text(size = 12),
axis.text.x = element_text(size = 12, face = "bold"),
axis.text.y = element_text(size = 11),
axis.title.y = element_text(size = 13, face = "bold"),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank(),
plot.margin = margin(15, 80, 15, 15)
)
poster_chart