Show code
str(params, give.attr = F)
## List of 4
## $ rollout_start: chr "2023-08-01"
## $ rollout_end : chr "2023-10-01"
## $ ignore_before: chr "2022-08-01"
## $ ignore_after : chr "2024-10-01"This report is run with the following parameters
str(params, give.attr = F)
## List of 4
## $ rollout_start: chr "2023-08-01"
## $ rollout_end : chr "2023-10-01"
## $ ignore_before: chr "2022-08-01"
## $ ignore_after : chr "2024-10-01"You can skip over this section, it’s just where I import the data
This comes from a Google Sheet where I store info for each hospital. There is important data on the sheet that governs the steps below
library(googlesheets4)
options(gargle_oauth_email = TRUE)
gs4_auth(cache = TRUE)
if(!exists("hospitals")){
hospitals_raw <- paste0("https://docs.google.com/",
"spreadsheets/d/",
"1zZ1o1HhzOPrbj2KBdGsaO4rJvLDPMWqN0ogjMCNHfGw/") %>%
read_sheet(sheet = "CHG Hospitals")
hospitals <- hospitals_raw
rm(hospitals_raw)
}
# Used to join get the names of the hospitals in the NHSN data
CCNs <- hospitals %>%
mutate(NHSN_abbv = case_when(!is.na(NHSN_abbv) ~ NHSN_abbv,
CMS_ID=="510031" ~ "STF",
CMS_ID=="510029" ~ "TMH")) %>%
distinct(NHSN_abbv, CMS_ID)CLABSI_ACH <- readxl::read_excel("NHSN data/ACH (2015 baseline) SIRforCentralLine_AssociatedBSI_1.2020_10.2024.xlsm",
col_types = c("text", "numeric", "numeric",
"numeric", "numeric", "numeric",
"text", "numeric", "text", "text",
"text", "text", "text", "text", "text",
"text", "numeric")) %>%
filter(!is.na(location)) %>%
rename(Month=summaryYM) %>%
mutate(Month = str_replace(Month, "M", "-"),
Month = ymd(str_glue("{Month}-15"))) %>%
left_join(CCNs, by=c("ccn"="CMS_ID")) %>%
select(Hosp=NHSN_abbv, everything())
CLABSI_CAH <- readxl::read_excel("NHSN data/CAH (2015 baseline) SIRforCentralLine_associatedBSI 1.2020_10.2024.xlsm",
col_types = c("text", "numeric", "numeric",
"numeric", "numeric", "numeric",
"text", "numeric", "text", "text",
"text", "text", "text", "text", "text",
"text", "numeric")) %>%
filter(!is.na(location)) %>%
rename(Month=summaryYM) %>%
mutate(Month = str_replace(Month, "M", "-"),
Month = ymd(str_glue("{Month}-15"))) %>%
left_join(CCNs, by=c("ccn"="CMS_ID")) %>%
select(Hosp=NHSN_abbv, everything())
CLABSI <- list(CLABSI_ACH, CLABSI_CAH) %>%
# Bind into a single table
list_rbind() %>%
# Clean up
mutate(Hosp = as.factor(Hosp),
Month = Month,
Inf_pred = as.numeric(numPred),
Inf_obs = as.numeric(infCount),
CL_days = as.numeric(numcldays),
locationType = as.factor(locationType),
locCDC = as.character(locCDC),
location = location,
facType = as.factor(facType),
# .keep="none" is the same as transmute, keeps only new columns
.keep="none") %>%
# Clean up CDC location
mutate(locCDC = str_remove(locCDC, "IN:ACUTE:")) %>%
relocate(Hosp, Month, Inf_pred, Inf_obs, CL_days, location) %>%
# Ignore data before & after
filter(Month > ymd(params$ignore_before)) %>%
filter(Month < ymd(params$ignore_after))
rm(CLABSI_ACH, CLABSI_CAH)
# CLABSI %>% rmarkdown::paged_table()CAUTI_ACH <- readxl::read_excel("NHSN data/ACH (2015 baseline) SIRforCatheter_AssociatedUTIDat 1.2020-10.2024.xlsm",
col_types = c("date", "numeric", "numeric",
"numeric", "numeric", "numeric",
"text", "numeric", "text", "text",
"text", "text", "text", "text", "text",
"text", "numeric")) %>%
filter(!is.na(location)) %>%
rename(Month=summaryYM) %>%
mutate(Month = as.Date(Month) + days(14)) %>%
left_join(CCNs, by=c("ccn"="CMS_ID")) %>%
select(Hosp=NHSN_abbv, everything())
CAUTI_CAH <- readxl::read_excel("NHSN data/CAH (2015 baseline) SIRforCatheter_AssociatedUTIDat 1.2020_10.2024.xlsm",
col_types = c("date", "numeric", "numeric",
"numeric", "numeric", "numeric",
"text", "numeric", "text", "text",
"text", "text", "text", "text", "text",
"text", "numeric")) %>%
filter(!is.na(location)) %>%
rename(Month=summaryYM) %>%
mutate(Month = as.Date(Month) + days(14)) %>%
left_join(CCNs, by=c("ccn"="CMS_ID"))%>%
select(Hosp=NHSN_abbv, everything())
CAUTI <- list(CAUTI_ACH, CAUTI_CAH) %>%
# Bind into a single table
list_rbind() %>%
# Clean up
mutate(Hosp = as.factor(Hosp),
Month = Month,
Inf_pred = as.numeric(numPred),
Inf_obs = as.numeric(infCount),
cath_days = as.numeric(numucathdays),
locationType = as.factor(locationType),
locCDC = as.character(loccdc),
location = location,
facType = as.factor(facType),
# .keep="none" is the same as transmute, keeps only new columns
.keep="none") %>%
# Clean up CDC location
mutate(locCDC = str_remove(locCDC, "IN:ACUTE:")) %>%
relocate(Hosp, Month, Inf_pred, Inf_obs, cath_days, location) %>%
# Ignore data before & after
filter(Month > ymd(params$ignore_before)) %>%
filter(Month < ymd(params$ignore_after))
rm(CAUTI_ACH, CAUTI_CAH)
# CAUTI %>% rmarkdown::paged_table()MRSA_ACH <- readxl::read_excel("NHSN data/ACH (2022 baseline) SIRforMRSABloodFacwideINLabID.xlsx",
col_types = c("text", "date", "numeric",
"numeric", "numeric", "skip", "skip",
"skip", "skip", "text", "text", "text",
"text", "text")) %>%
filter(!is.na(ccn)) %>%
rename(Month=summaryYM) %>%
mutate(Month = as.Date(Month) + days(14)) %>%
left_join(CCNs, by=c("ccn"="CMS_ID")) %>%
select(Hosp=NHSN_abbv, everything())
MRSA_CAH <- readxl::read_excel("NHSN data/CAH (2022 baseline) SIRforMRSABloodFacwideINLabID.xlsx",
col_types = c("text", "date", "numeric",
"numeric", "numeric", "skip", "skip",
"skip", "skip", "text", "text", "text")) %>%
filter(!is.na(ccn)) %>%
rename(Month=summaryYM) %>%
mutate(Month = as.Date(Month) + days(14)) %>%
left_join(CCNs, by=c("ccn"="CMS_ID")) %>%
select(Hosp=NHSN_abbv, everything())
MRSA <- list(MRSA_ACH, MRSA_CAH) %>%
# Bind into a single table
list_rbind() %>%
# Clean up
mutate(Hosp = as.factor(Hosp),
Month = Month,
Inf_pred = as.numeric(numPred),
Inf_obs = as.numeric(MRSA_bldIncCount),
MRSA_days = as.numeric(numpatdays),
facType = as.factor(facType),
# .keep="none" is the same as transmute, keeps only new columns
.keep="none") %>%
relocate(Hosp, Month, Inf_pred, Inf_obs, MRSA_days) %>%
# Ignore data before & after
filter(Month > ymd(params$ignore_before)) %>%
filter(Month < ymd(params$ignore_after))
rm(MRSA_ACH, MRSA_CAH)CHG data was obtained from the CHG compliance dashboard. There are a few ways to split the data, outlined below. You can limit the dashboard by CHG indication (step 1) and expand it to show department level data (step 2)
I have a few versions of the CHG data downloaded, and because it only shows the last 12 months, the versions aren’t consistent. The files I have available are below
| File ID | Date exported | Filters | By department |
|---|---|---|---|
| File A | 10/18/2024 | Central lines only | No |
| File B1 | 12/05/2024 | None | Yes |
| File B2 | 12/05/2024 | Central lines only | Yes |
| File B3 | 12/05/2024 | Foleys only | Yes |
For now, I’m using the old version that I did in the prior analysis (File A), just so I can reproduce my prior work. Ideally, I could get access to the raw data (going back to implementation) and avoid this mess
CHG_raw <- "TableauData/2024-10-18 CHG CL only/All Overall Rate by Hospital and Department_data.csv" %>%
read_csv() %>%
select(Hospital, Date=`Week of Census Time`,
Denominator=`All Lines Denominator`,
Numerator=`All Lines Numerator`) %>%
mutate(Date = mdy(Date)) %>%
group_by(Hospital, Date) %>%
summarise(Denominator = sum(Denominator, na.rm = T),
Numerator = sum(Numerator, na.rm = T)) %>%
ungroup() %>%
# Remove most recent week
filter(Date != ymd("2024-10-13"))Because it is reported weekly (and everything else is reported monthly), we need to convert the weekly data into monthly data. Some weeks span the course of multiple months1. In these cases, we will group them with whichever month that the majority of their days fell under
When you extract the data from Tableau, the dates it lists are all Sundays. It doesn’t look like it’s explicitly documented anywhere, but I assume that this means the week starts on Sunday, meaning the date provided is day 1 of 7 for that week
week_to_month <- function(date, beginning=T) {
require(lubridate)
# Make sure you are given only one date, not a vector
stopifnot(length(date)==1)
# Calculate the midpoint of the week, which is 3 days away from the start
# (or end) date
if(beginning) {
mid_date <- date + days(3)
} else({
# Otherwise, treat the given day as the end of the week
mid_date <- date - days(3)
})
# Extract the month and the year from this date
new_month <- month(mid_date)
new_year <- year(mid_date)
# Create a new date and return
new_date <- ymd(paste0(new_year, "-", new_month, "-15"))
return(new_date)
}
CHG_hosp <- CHG_raw %>%
# Needed to only process one date at a time
rowwise() %>%
mutate(Month = week_to_month(Date)) %>%
ungroup() %>%
group_by(Hospital, Month) %>%
summarise(Denominator =sum(Denominator , na.rm = T),
Numerator =sum(Numerator , na.rm = T))
# rm(CHG_clean)
# CHG_monthly %>% rmarkdown::paged_table()
# mutate(Hospital = as.factor(Hospital)) %>%
# DT::datatable(filter="top", rownames = F)I’ll also grab the newer export with CHG compliance by department (file 2B in the table above). This doesn’t go back as far as the old CHG data (file A), but is more granular
CHG_dept <- read_csv("TableauData/2024-12-05/CHG_byDept_CentralLines_2024-12-05.csv") %>%
select(Hospital, Department,
Date=`Week of Census Time`,
Denominator=`All Lines Denominator`,
Numerator=`All Lines Numerator`) %>%
mutate(Date = mdy(Date)) %>%
# Needed to only process one date at a time
rowwise() %>%
mutate(Month = week_to_month(Date)) %>%
ungroup() %>%
group_by(Hospital, Department, Month) %>%
summarise(Denominator =sum(Denominator , na.rm = T),
Numerator =sum(Numerator , na.rm = T)) %>%
ungroup()
CHG_dept <- CHG_dept %>%
filter(Month!=ymd("2024-12-15"))The figure below shows the timespans of the various datasets used for this run
# max(CAUTI$Month)
# max(CLABSI$Month)
# max(CHG_hosp$Month)
# max(CHG_dept$Month)
tibble(dataset = c("CLABSI", "CAUTI", "MRSA", "CHG_hosp", "CHG_dept"),
min = c(min(CLABSI$Month), min(CAUTI$Month), min(MRSA$Month),
min(CHG_hosp$Month), min(CHG_dept$Month)),
max = c(max(CLABSI$Month), max(CAUTI$Month), max(MRSA$Month),
max(CHG_hosp$Month), max(CHG_dept$Month))) %>%
ggplot(aes(y=dataset)) +
geom_linerange(aes(xmin=min, xmax=max)) +
geom_vline(xintercept = ymd(params$rollout_start),
alpha=0.5, linetype = "dashed",
color="grey30") +
geom_vline(xintercept = ymd(params$rollout_end),
alpha=0.5, linetype = "dashed",
color="grey30") This is the section where I aggregate and match the data to prepare it for the final analysis.
The NHSN exports reports HAIs down to the “ward/unit” level (called location in the export) and these locations can be further classified by “acuity” (using the CDC’s location code; locCDC). An example for Princeton is shown below
CLABSI %>%
filter(Hosp=="PRN") %>%
# group_by(Hosp, locCDC, locationType, location) %>%
# summarise(NHSN_n = n(),
# NHSN_min = min(Month),
# NHSN_max = max(Month))
filter(year(Month)>=2023) %>%
mutate(Quarter = str_glue("{year(Month)}_Q{quarter(Month)}")) %>%
group_by(Hosp, locCDC, locationType, location, Quarter) %>%
summarise(#n_obs = n(),
# Predicted = sum(Inf_pred),
Observed = sum(Inf_obs, na.rm = T),
.groups = "drop") %>%
pivot_wider(names_from = Quarter, values_from = Observed) %>%
gt() %>%
fmt_missing() %>%
tab_spanner_delim("_") %>%
tab_header(title="Quarterly observed CLABSIs at PRN, by unit") %>%
opt_stylize(6, color = "gray")| Quarterly observed CLABSIs at PRN, by unit | ||||||||||
| Hosp | locCDC | locationType | location | 2023 | 2024 | |||||
|---|---|---|---|---|---|---|---|---|---|---|
| Q1 | Q2 | Q3 | Q4 | Q1 | Q2 | Q3 | ||||
| PRN | CC:MS | CC | CCU | 1 | 2 | 1 | 4 | 1 | 1 | 0 |
| PRN | CC:MS | CC | ICU | 1 | 0 | 1 | 0 | 0 | 0 | 1 |
| PRN | MIXED:ALL | OTHER | 3E | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| PRN | STEP | STEP | 2WSD | — | — | — | — | 0 | 0 | 0 |
| PRN | WARD:LD_PP | WARD | WC | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| PRN | WARD:M | WARD | 2W | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| PRN | WARD:M | WARD | 3WPCU | 2 | 0 | 1 | 0 | 0 | 1 | 0 |
| PRN | WARD:M | WARD | 4E | 0 | 0 | 1 | 0 | 1 | 0 | 0 |
| PRN | WARD:MS | WARD | 3SORTHO | 1 | 0 | 0 | 0 | 0 | 1 | 0 |
Eventually, I want to match the CHG departmental level data to the NHSN location level data, but that’s going to take quite a bit of work. So for now, I’m going to aggregate the observed & predicted HAIs at a hospital level, which I do in the next section
# This is where aggregated NHSN & CHG data will live temporarily
temp_NSHS <- list()
temp_CHG <- list()
# This is where lookup tables will live
temp_lookup <- list()
# This is where matched data (final products) will go
matched_data <- list()
## Aggregate across departments (by hospital), creating df's
## like the prior runs
temp_NSHS[["ByHosp"]][["CLABSI"]] <- CLABSI %>%
group_by(Hosp, Month) %>%
summarise(CLABSI_pred = sum(Inf_pred),
CLABSI_obs = sum(Inf_obs),
days_CL = sum(CL_days),
.groups = "drop")
temp_NSHS[["ByHosp"]][["CAUTI"]] <- CAUTI %>%
group_by(Hosp, Month) %>%
summarise(CAUTI_pred = sum(Inf_pred),
CAUTI_obs = sum(Inf_obs),
days_foley = sum(cath_days),
.groups = "drop")
temp_NSHS[["ByHosp"]][["MRSA"]] <- MRSA %>%
group_by(Hosp, Month) %>%
summarise(MRSA_pred = sum(Inf_pred),
MRSA_obs = sum(Inf_obs),
days_MRSA = sum(MRSA_days),
.groups = "drop")
## Match the CHG data to the NHSN abbreviations, and aggregate
## based off the NHSN abbreviation (this collapses CH/FMT into WVUH)
temp_CHG[["ByHosp"]] <- CHG_hosp %>%
# Join to the GS data to match keys
left_join(select(hospitals, NHSN_abbv, CHG_abbv),
by=c("Hospital"="CHG_abbv")) %>%
# Drop data if the `NHSN_abbv` is NA
filter(!is.na(NHSN_abbv)) %>%
# Aggregate data on NHSN abbreviation
group_by(NHSN_abbv, Month) %>%
summarise(Denominator =sum(Denominator , na.rm = T),
Numerator =sum(Numerator , na.rm = T),
.groups = "drop") %>%
rename(Hosp=NHSN_abbv, Date=Month)
## Create a table which will be the backbone of the matched
## dataset. It uses `crossing` to ensure all permutations
## of dates and hospitals are included
temp_lookup[["crossing"]] <- crossing(
# Vector with all of the hospitals included. Note that this uses
# the name format from NHSN (although the data comes from a CHG
# table, we converted the abbreviations to the NHSN format in the
# step above)
Hosp = temp_CHG[["ByHosp"]]$Hosp,
# This is a vector of all of the months included in the analysis
# across both the CHG dataset and the NHSN dataset. By using the
# crossing function, in ensures that we have rows for dates that
# may be missing ing one of the datasets (e.g. early days of the
# CHG dataset)
Date = c(temp_CHG[["ByHosp"]]$Date,
temp_NSHS[["ByHosp"]][["CLABSI"]]$Month,
temp_NSHS[["ByHosp"]][["CAUTI"]]$Month,
temp_NSHS[["ByHosp"]][["MRSA"]]$Month))
## Using the crossing df as a backbone, join the CHG and HAI data together
matched_data[["all_by_hosp"]] <- temp_lookup[["crossing"]] %>%
left_join(temp_NSHS[["ByHosp"]][["CLABSI"]], by = join_by(Hosp, Date==Month)) %>%
left_join(temp_NSHS[["ByHosp"]][["CAUTI"]], by = join_by(Hosp, Date==Month)) %>%
left_join(temp_NSHS[["ByHosp"]][["MRSA"]], by = join_by(Hosp, Date==Month)) %>%
left_join(temp_CHG[["ByHosp"]], by = join_by(Hosp, Date)) %>%
## Annotate the era
mutate(Era = case_when(Date < ymd(params$rollout_start) ~ "Before",
Date > ymd(params$rollout_end) ~ "After",
TRUE ~ "During"),
Era = as_factor(Era))
## To annotate the names, we need to make minor adjustments
## to the data from the Google Sheet, namely recoding Ruby/FMT/CH
temp_lookup[["names_full"]] <- hospitals %>%
select(NHSN_abbv, name_short, hosp_type) %>%
# Recode the ones that are under WVUH
mutate(name_short = if_else(NHSN_abbv == "WVUH",
"Ruby & Fairmont",name_short)) %>%
unique()
matched_data[["all_by_hosp"]] <- matched_data[["all_by_hosp"]] %>%
left_join(temp_lookup[["names_full"]], join_by(Hosp==NHSN_abbv))There are a few months where there were no central line days or foley days (mainly for St Francis). Since there can be no CLABSI or CAUTI during these time periods, I counted that data as missing (and thus it is ignored)
# Verify no infections observed when they don't have lines
testthat::expect_equal({matched_data[["all_by_hosp"]] %>%
filter(days_CL==0) %>%
pull(CLABSI_obs) %>%
max()}, 0)
testthat::expect_equal({matched_data[["all_by_hosp"]] %>%
filter(days_foley==0) %>%
pull(CAUTI_obs) %>%
max()}, 0)
matched_data[["all_by_hosp"]] <- matched_data[["all_by_hosp"]] %>%
mutate(CLABSI_obs = if_else(days_CL==0, NA, CLABSI_obs),
CAUTI_obs = if_else(days_foley==0, NA, CAUTI_obs)) The Critical Access Hospitals (CAHs) represent an important segment here, as they serve a distinct patient populations compared to larger hopsitals. From a modeling standpoint, they are also unique in that they have much lower rates of infections, and the predicted number of HAIs are calculated differently by NHSN.
## First make a list of the CAHs & small hospitals
temp_lookup[["CAHs"]] <- hospitals %>%
filter(hosp_type=="Critical access") %>%
pull(NHSN_abbv)
matched_data[["small_hospitals"]] <- c(temp_lookup[["CAHs"]], "WTZ")
## Pull out the small hospitals and aggregate them
temp_merging <- matched_data[["all_by_hosp"]] %>%
# Starting with the matched data from the last step, limit to the
# hospitals identified as small
filter(Hosp %in% matched_data[["small_hospitals"]]) %>%
# Reaggregate the data for these hospitals
group_by(Date) %>%
summarise(CLABSI_pred = sum(CLABSI_pred, na.rm = T),
CLABSI_obs = sum(CLABSI_obs, na.rm = T),
days_CL = sum(days_CL, na.rm = T),
CAUTI_pred = sum(CAUTI_pred, na.rm = T),
CAUTI_obs = sum(CAUTI_obs, na.rm = T),
days_foley = sum(days_foley, na.rm = T),
MRSA_pred = sum(MRSA_pred, na.rm = T),
MRSA_obs = sum(MRSA_obs, na.rm = T),
days_MRSA = sum(days_MRSA, na.rm = T),
Denominator = sum(Denominator, na.rm = T),
Numerator = sum(Numerator, na.rm = T),
.groups = "drop") %>%
# Becuase implicit zeros may have been introduced my
# the `summarise` function, handle them here
mutate(CLABSI_obs = if_else(days_CL==0, NA, CLABSI_obs),
CAUTI_obs = if_else(days_foley==0, NA, CAUTI_obs)) %>%
# If both numerator & denominator are 0, assume that is
# from missing data
mutate(missing_CHG = Denominator==0 & Numerator == 0,
Denominator = if_else(missing_CHG, NA, Denominator),
Numerator = if_else(missing_CHG, NA, Numerator)) %>%
select(-missing_CHG) %>%
## Annotate the era
mutate(Era = case_when(Date < ymd(params$rollout_start) ~ "Before",
Date > ymd(params$rollout_end) ~ "After",
TRUE ~ "During"),
Era = as_factor(Era)) %>%
# Reintroduce the names
mutate(Hosp = "Small",
name_short = "Small hospitals",
hosp_type = "CAH / small") %>%
relocate(Hosp)
## Now get the other (non-small) hospitals and bind to the
## aggregated data
matched_data[["small_by_hosp"]] <- matched_data[["all_by_hosp"]] %>%
filter(!Hosp %in% matched_data[["small_hospitals"]]) %>%
bind_rows(temp_merging) To accommodate this unique set of hospitals in the analysis, we will aggregate their monthly numbers together and treat them as one “hospital”. Although WTZ isn’t classified as a critical access hospital, it has a smaller capacity (bed size < 50) so we will include it with the CAHs. Specifically, we will treat BRN, BRX, GRMC, GMH, HRS, JAX, JMC, PVH, STJ, SMR, and WTZ as small hospitals.
## Clean up enviroment, do some final touch ups
rm(temp_merging, temp_CHG, temp_lookup, temp_NSHS,
CLABSI, CAUTI, MRSA)
matched_data[["all_by_hosp"]] <- matched_data[["all_by_hosp"]] %>%
mutate(CHG_rate = Numerator / Denominator) %>%
relocate(CHG_rate, .before = Denominator) %>%
# Ignore data before & after
filter(Date > ymd(params$ignore_before)) %>%
filter(Date < ymd(params$ignore_after)) %>%
# Ignore STF
filter(Hosp!="STF")
matched_data[["small_by_hosp"]] <- matched_data[["small_by_hosp"]] %>%
mutate(CHG_rate = Numerator / Denominator) %>%
relocate(CHG_rate, .before = Denominator) %>%
# Ignore data before & after
filter(Date > ymd(params$ignore_before)) %>%
filter(Date < ymd(params$ignore_after)) %>%
# Ignore STF
filter(Hosp!="STF")To get a feel for what we are looking at, this shows the time span that is covered by the data I have available. We should have equal numbers before and after the implementation period. The datasets for HAIs (CLABSI, CAUTI, MRSA) come from NHSN and the CHG dataset is extracted from Epic documentation (via Enterprise Analytics’s Tableau dashboard)
matched_data$small_by_hosp %>%
group_by(Date, Era) %>%
summarise(CLABSI = sum(!is.na(CLABSI_obs),na.rm = T),
CAUTI = sum(!is.na(CAUTI_pred),na.rm = T),
MRSA = sum(!is.na(MRSA_pred),na.rm = T),
CHG = sum(!is.na(CHG_rate),na.rm = T),
n = n()) %>%
# group_by(Hosp, Date) %>%
pivot_longer(cols=c(CLABSI, CAUTI, MRSA, CHG),
names_to = "Dataset",
values_to = "n_obs") %>%
mutate(pct_obs = n_obs / n) %>%
filter(n_obs > 0) %>%
ggplot(aes(x=Date, y=Dataset)) +
geom_line() +
geom_point(aes(color=Era)) +
geom_vline(xintercept = ymd(params$rollout_start),
alpha=0.5, linetype = "dashed",
color="grey30") +
geom_vline(xintercept = ymd(params$rollout_end),
alpha=0.5, linetype = "dashed",
color="grey30") +
theme_bw()temp_numbers <- list(
n_hosp = n_groups(group_by(matched_data$small_by_hosp, Hosp)),
CLABSI = list(
n_obs = nrow(filter(matched_data$small_by_hosp, !is.na(CLABSI_obs))),
n_before = nrow(filter(matched_data$small_by_hosp,
!is.na(CLABSI_obs), Era=="Before")),
n_during = nrow(filter(matched_data$small_by_hosp,
!is.na(CLABSI_obs), Era=="During")),
n_after = nrow(filter(matched_data$small_by_hosp,
!is.na(CLABSI_obs), Era=="After"))
),
CAUTI = list(
n_obs = nrow(filter(matched_data$small_by_hosp, !is.na(CAUTI_obs))),
n_before = nrow(filter(matched_data$small_by_hosp,
!is.na(CAUTI_obs), Era=="Before")),
n_during = nrow(filter(matched_data$small_by_hosp,
!is.na(CAUTI_obs), Era=="During")),
n_after = nrow(filter(matched_data$small_by_hosp,
!is.na(CAUTI_obs), Era=="After"))
),
MRSA = list(
n_obs = nrow(filter(matched_data$small_by_hosp, !is.na(MRSA_obs))),
n_before = nrow(filter(matched_data$small_by_hosp,
!is.na(MRSA_obs), Era=="Before")),
n_during = nrow(filter(matched_data$small_by_hosp,
!is.na(MRSA_obs), Era=="During")),
n_after = nrow(filter(matched_data$small_by_hosp,
!is.na(MRSA_obs), Era=="After"))
),
CHG_hosp = list(
n_obs = nrow(filter(matched_data$small_by_hosp, !is.na(CHG_rate))),
start = {matched_data$small_by_hosp %>%
filter(!is.na(CHG_rate)) %>% pull(Date) %>% min() %>%
strftime(format="%b %Y")},
end = {matched_data$small_by_hosp %>%
filter(!is.na(CHG_rate)) %>% pull(Date) %>% max() %>%
strftime(format="%b %Y")},
n_before = nrow(filter(matched_data$small_by_hosp,
!is.na(CHG_rate), Era=="Before")),
n_during = nrow(filter(matched_data$small_by_hosp,
!is.na(CHG_rate), Era=="During")),
n_after = nrow(filter(matched_data$small_by_hosp,
!is.na(CHG_rate), Era=="After"))
),
CHG_dept = list(
n_obs = nrow(CHG_dept),
start = {CHG_dept %>% pull(Month) %>% min() %>%
strftime(format="%b %Y")},
end = {CHG_dept %>% pull(Month) %>% max() %>%
strftime(format="%b %Y")}
),
Dates = list(
before = list(
start = {matched_data$small_by_hosp %>%
filter(Era=="Before") %>% pull(Date) %>% min() %>%
strftime(format="%b %Y")},
end = {matched_data$small_by_hosp %>%
filter(Era=="Before") %>% pull(Date) %>% max() %>%
strftime(format="%b %Y")},
len = {matched_data$small_by_hosp %>%
filter(Era=="Before") %>% pull(Date) %>%
unique() %>% length()}
),
during = list(
start = {matched_data$small_by_hosp %>%
filter(Era=="During") %>% pull(Date) %>% min() %>%
strftime(format="%b %Y")},
end = {matched_data$small_by_hosp %>%
filter(Era=="During") %>% pull(Date) %>% max() %>%
strftime(format="%b %Y")},
len = {matched_data$small_by_hosp %>%
filter(Era=="During") %>% pull(Date) %>%
unique() %>% length()}
),
after = list(
start = {matched_data$small_by_hosp %>%
filter(Era=="After") %>% pull(Date) %>% min() %>%
strftime(format="%b %Y")},
end = {matched_data$small_by_hosp %>%
filter(Era=="After") %>% pull(Date) %>% max() %>%
strftime(format="%b %Y")},
len = {matched_data$small_by_hosp %>%
filter(Era=="After") %>% pull(Date) %>%
unique() %>% length()}
)
)
)The data generally spans three era, the before era (Aug 2022 to Jul 2023; 12 months), the implementation era (Aug 2023 to Sep 2023; 2 months), and the after era (Oct 2023 to Sep 2024; 12 months).
Across the 10 hospitals included (keeping in mind that we aggregated 11 of the smaller hospitals into one “small hospital”), there are 260 observations for CLABSIs (120 in the before era, 20 in the implementation period, and 120 in the after era), 260 observations for CAUTIs (120 in the before era, 20 in the implementation period, and 120 in the after era), and 258 observations for LabID MRSA (120 in the before era, 20 in the implementation period, and 118 in the after era).
The CHG data is a little more complicated. I have one dataset that shows CHG compliance at a hospital level. The hospital level CHG data has 114 observations from Oct 2023 to Sep 2024 (0 in the before era, 0 in the implementation period, and 114 in the after era). I also have CHG data at a departmental level that has 1255 observations but this data spans from Jan 2024 observations Nov 2024 (meaning we are missing some of the early data). We will explore both, but keep in mind that they are looking at slightly different time periods.
To start, we will look at the number of HAIs that occurred monthly by each hospital. The colored bars represent the observed HAIs (the very small ones at the bottom of the Y-axis are zero infections), and the horizontal black solid line are the predicted number of infections as calculated by the NHSN. There is also a grey dashed vertical line that marks the implementation phase of the system wide CHG roll out.
The next two figures show the rates of CLABSIs (per thousand central line days). The first figure (below) shows the overall rate across the entire system
matched_data$small_by_hosp %>%
group_by(Date, Era) %>%
summarise(obs_rate = 1000 * sum(CLABSI_obs) / sum(days_CL),
pred_rate = 1000 * sum(CLABSI_pred) / sum(days_CL)) %>%
ggplot(aes(x=Date, y=obs_rate)) +
geom_vline(xintercept = ymd(params$rollout_start),
alpha=0.5, linetype = "dashed",
color="grey30") +
geom_vline(xintercept = ymd(params$rollout_end),
alpha=0.5, linetype = "dashed",
color="grey30") +
geom_line(aes(y=pred_rate)) +
geom_col(aes(fill=Era), alpha=0.7, color="grey70") +
sjPlot::scale_fill_sjplot(palette = "circus") +
# sjPlot::scale_color_sjplot(palette = "circus") +
guides(color="none") +
scale_x_date(date_labels = "%b\n'%y") +
labs(title="CLABSI Rates: Observed vs Predicted",
subtitle="Black line solid is predicted number from NHSN",
y="CLABSI Rate\n(per 1,000 CL days)",
x="Month",
fill="",
caption = "CL = central line") +
theme_bw()This figure breaks it down by hospital, still showing the rate (per thousand central line days)
matched_data$small_by_hosp %>%
mutate(obs_rate = 1000 * CLABSI_obs / days_CL,
pred_rate = 1000 * CLABSI_pred / days_CL) %>%
ggplot(aes(x=Date, y=obs_rate, fill=hosp_type)) +
geom_vline(xintercept = ymd(params$rollout_start),
alpha=0.5, linetype = "dashed",
color="grey30") +
geom_vline(xintercept = ymd(params$rollout_end),
alpha=0.5, linetype = "dashed",
color="grey30") +
geom_line(aes(y=pred_rate)) +
geom_col(aes(color=hosp_type),alpha=0.7) +
facet_wrap("name_short", ncol=3) +
guides(color="none") +
scale_x_date(date_labels = "%b\n'%y") +
labs(title="CLABSI Rates: Observed vs Predicted",
subtitle="Black line solid is predicted number from NHSN",
y="CLABSI Rate\n(per 1,000 CL days)",
x="Month",
fill="Hospital\nType",
caption = "CL = central line\nCAH = Critical Access Hospital") +
theme_bw()This section follows the same format as before, but with CAUTIs. The first two figures show the rates of CAUTIs (per thousand foley days), across the entire system and by each hospital (respectively).
matched_data$small_by_hosp %>%
group_by(Date, Era) %>%
summarise(obs_rate = 1000 * sum(CAUTI_obs) / sum(days_foley),
pred_rate = 1000 * sum(CAUTI_pred) / sum(days_foley)) %>%
ggplot(aes(x=Date, y=obs_rate)) +
geom_vline(xintercept = ymd(params$rollout_start),
alpha=0.5, linetype = "dashed",
color="grey30") +
geom_vline(xintercept = ymd(params$rollout_end),
alpha=0.5, linetype = "dashed",
color="grey30") +
geom_line(aes(y=pred_rate)) +
geom_col(aes(fill=Era), alpha=0.7, color="grey70") +
sjPlot::scale_fill_sjplot(palette = "circus") +
# sjPlot::scale_color_sjplot(palette = "circus") +
guides(color="none") +
scale_x_date(date_labels = "%b\n'%y") +
labs(title="CAUTI Rates: Observed vs Predicted",
subtitle="Black line solid is predicted number from NHSN",
y="CAUTI Rate\n(per 1,000 foley days)",
x="Month",
fill="") +
theme_bw()Berkeley is quite impressive with their change in the rates of CAUTIs (before and after)
matched_data$small_by_hosp %>%
mutate(obs_rate = 1000 * CAUTI_obs / days_foley,
pred_rate = 1000 * CAUTI_pred / days_foley) %>%
ggplot(aes(x=Date, y=obs_rate, fill=hosp_type)) +
geom_vline(xintercept = ymd(params$rollout_start),
alpha=0.5, linetype = "dashed",
color="grey30") +
geom_vline(xintercept = ymd(params$rollout_end),
alpha=0.5, linetype = "dashed",
color="grey30") +
geom_line(aes(y=pred_rate)) +
geom_col(aes(color=hosp_type),alpha=0.7) +
facet_wrap("name_short", ncol=3) +
guides(color="none") +
scale_x_date(date_labels = "%b\n'%y") +
labs(title="CAUTI Rates: Observed vs Predicted",
subtitle="Black line solid is predicted number from NHSN",
y="CAUTI Rate\n(per 1,000 foley days)",
x="Month",
fill="Hospital\nType",
caption = "CAH = Critical Access Hospital") +
theme_bw()This section follows the same format as before, but with Lab ID MRSA. The first two figures show the rates of MRSA (per 10,000 patient days), across the entire system and by each hospital (respectively).
matched_data$small_by_hosp %>%
group_by(Date, Era) %>%
summarise(obs_rate = 10000 * sum(MRSA_obs) / sum(days_MRSA),
pred_rate = 10000 * sum(MRSA_pred) / sum(days_MRSA)) %>%
ggplot(aes(x=Date, y=obs_rate)) +
geom_vline(xintercept = ymd(params$rollout_start),
alpha=0.5, linetype = "dashed",
color="grey30") +
geom_vline(xintercept = ymd(params$rollout_end),
alpha=0.5, linetype = "dashed",
color="grey30") +
geom_line(aes(y=pred_rate)) +
geom_col(aes(fill=Era), alpha=0.7, color="grey70") +
sjPlot::scale_fill_sjplot(palette = "circus") +
# sjPlot::scale_color_sjplot(palette = "circus") +
guides(color="none") +
scale_x_date(date_labels = "%b\n'%y") +
labs(title="MRSA Rates: Observed vs Predicted",
subtitle="Black line solid is predicted number from NHSN",
y="MRSA Rate\n(per 10,000 patient days)",
x="Month",
fill="") +
theme_bw()Again, the rate by each hospital
matched_data$small_by_hosp %>%
mutate(obs_rate = 10000 * MRSA_obs / days_MRSA,
pred_rate = 10000 * MRSA_pred / days_MRSA) %>%
ggplot(aes(x=Date, y=obs_rate, fill=hosp_type)) +
geom_vline(xintercept = ymd(params$rollout_start),
alpha=0.5, linetype = "dashed",
color="grey30") +
geom_vline(xintercept = ymd(params$rollout_end),
alpha=0.5, linetype = "dashed",
color="grey30") +
geom_line(aes(y=pred_rate)) +
geom_col(aes(color=hosp_type),alpha=0.7) +
facet_wrap("name_short", ncol=3) +
guides(color="none") +
scale_x_date(date_labels = "%b\n'%y") +
labs(title="MRSA Rates: Observed vs Predicted",
subtitle="Black line solid is predicted number from NHSN",
y="MRSA Rate\n(per 10,000 patient days)",
x="Month",
fill="Hospital\nType",
caption = "CAH = Critical Access Hospital") +
theme_bw()This table shows each hospital’s average2 rates of healthcare associated infections broken down before and after CHG bathing was implemented. The column labeled with the percent sign (%) is the relative change after implementing CHG bathing (\(\frac{After - Before}{Before}\)).
matched_data$small_by_hosp %>%
filter(Era!="During") %>%
group_by(name_short, Era) %>%
summarise(CAUTI_mean = mean(10000 * CAUTI_obs / days_foley, na.rm = T),
CAUTI_sd = sd(10000 * CAUTI_obs / days_foley, na.rm = T),
# CAUTI_n.mean = mean(CAUTI_obs, na.rm = T),
# CAUTI_n.sd = sd(CAUTI_obs, na.rm = T),
# CLABSI_n.mean = mean(CLABSI_obs, na.rm = T),
# CLABSI_n.sd = sd(CLABSI_obs, na.rm = T),
CLABSI_mean = mean(10000 * CLABSI_obs / days_CL, na.rm = T),
CLABSI_sd = sd(10000 * CLABSI_obs / days_CL, na.rm = T),
MRSA_mean = mean(100000 * MRSA_obs / days_MRSA, na.rm = T),
MRSA_sd = sd(100000 * MRSA_obs / days_MRSA, na.rm = T),
.groups = "drop") %>%
pivot_longer(cols=-c(name_short, Era),
names_to = c("HAI", ".value"),
names_pattern = "(.*)_(.*)") %>%
pivot_wider(names_from = Era,
values_from = c(mean, sd),
names_glue = "{Era}_{.value}",
names_vary = "slowest") %>%
pivot_wider(names_from = HAI,
values_from = -c(HAI, name_short),
names_glue = "{HAI}.{.value}",
names_vary = "slowest") %>%
# Add the percent change
mutate(CAUTI.pct = (CAUTI.After_mean - CAUTI.Before_mean)/CAUTI.Before_mean) %>%
relocate(CAUTI.pct, .after = CAUTI.After_sd) %>%
mutate(CLABSI.pct = (CLABSI.After_mean - CLABSI.Before_mean)/CLABSI.Before_mean) %>%
relocate(CLABSI.pct, .after = CLABSI.After_sd) %>%
mutate(MRSA.pct = (MRSA.After_mean - MRSA.Before_mean)/MRSA.Before_mean) %>%
relocate(MRSA.pct, .after = MRSA.After_sd) %>%
mutate(CAUTI.pct = if_else(!is.finite(CAUTI.pct), NA, CAUTI.pct),
CLABSI.pct = if_else(!is.finite(CLABSI.pct), NA, CLABSI.pct),
MRSA.pct = if_else(!is.finite(MRSA.pct), NA, MRSA.pct)) %>%
gt(rowname_col = "name_short") %>%
cols_merge_uncert(CLABSI.Before_mean, CLABSI.Before_sd) %>%
cols_merge_uncert(CLABSI.After_mean, CLABSI.After_sd) %>%
cols_merge_uncert(CAUTI.Before_mean, CAUTI.Before_sd) %>%
cols_merge_uncert(CAUTI.After_mean, CAUTI.After_sd) %>%
cols_merge_uncert(MRSA.Before_mean, MRSA.Before_sd) %>%
cols_merge_uncert(MRSA.After_mean, MRSA.After_sd) %>%
tab_spanner_delim(delim = ".") %>%
gt::cols_label(CAUTI.Before_mean = "Before",
CAUTI.After_mean = "After",
CLABSI.Before_mean = "Before",
CLABSI.After_mean = "After",
MRSA.Before_mean = "Before",
MRSA.After_mean = "After",
CAUTI.pct = "%",
CLABSI.pct = "%",
MRSA.pct = "%") %>%
fmt_number(decimals = 1) %>%
fmt_percent(ends_with("pct"), decimals = 0, force_sign =T) %>%
fmt_missing() %>%
# data_color(
# columns = CAUTI.pct,
# target_columns = CAUTI.pct,
# method = "numeric",
# palette = "viridis",
# domain = c(-1, 1)
# ) %>%
# fmt_number() %>%
# data_color(
# columns = CLABSI.pct,
# target_columns = CLABSI.pct,
# method = "numeric",
# palette = "viridis",
# domain = c(-1, 1)
# ) %>%
# cols_hide(CAUTIdiff)
data_color(
columns = CAUTI.pct,
reverse = T,
na_color = "white",
domain = c(-1,1),
palette = c("#86102f", "#f7b8ad", "#b9ddf1","#2c5782"),
) %>%
data_color(
columns = CLABSI.pct,
reverse = T,
na_color = "white",
domain = c(-1,1),
palette = c("#86102f", "#f7b8ad", "#b9ddf1","#2c5782"),
) %>%
data_color(
columns = MRSA.pct,
reverse = T,
na_color = "white",
domain = c(-2.5,2.5),
palette = c("#86102f", "#f7b8ad", "#b9ddf1","#2c5782"),
) %>%
# gtExtras::gt_hulk_col_numeric(
# columns = CAUTI.pct,
# reverse = T,
# domain = c(-1,1),
# palette = "RdYlBu",
# # pal_type = "continuous"
# ) %>%
# gtExtras::gt_hulk_col_numeric(
# columns = CLABSI.pct,
# reverse = T,
# domain = c(-1,1),
# palette = "RdYlBu",
# # pal_type = "continuous"
# ) %>%
# gtExtras::gt_hulk_col_numeric(
# columns = MRSA.pct,
# reverse = T,
# domain = c(-2.5,2.5),
# palette = c("#86102f", "#f7b8ad", "#b9ddf1","#2c5782"),
# ) %>%
tab_header(
title = md('Rates of HAIs before & after CHG bathing'),
subtitle = "Expressed as mean +/- SD" ) %>%
opt_stylize(2, color="gray") %>%
tab_footnote(
footnote = "Per 10,000 foley/central line days",
locations = cells_column_spanners(spanners = contains("CLABSI")
)) %>%
tab_footnote(
footnote = "Per 10,000 foley/central line days",
locations = cells_column_spanners(spanners = contains("CAUTI")
)) %>%
tab_footnote(
footnote = "Per 100,000 patient days",
locations = cells_column_spanners(spanners = contains("MRSA")
)) | Rates of HAIs before & after CHG bathing | |||||||||
| Expressed as mean +/- SD | |||||||||
| CAUTI1 | CLABSI1 | MRSA2 | |||||||
|---|---|---|---|---|---|---|---|---|---|
| Before | After | % | Before | After | % | Before | After | % | |
| Berkeley | 50.0 ± 27.1 | 7.1 ± 17.3 | −86% | 1.9 ± 6.7 | 0.0 ± 0.0 | −100% | 4.4 ± 10.3 | 4.5 ± 10.5 | +3% |
| Camden Clark | 6.8 ± 8.4 | 5.0 ± 7.7 | −27% | 0.0 ± 0.0 | 6.7 ± 9.2 | — | 1.8 ± 6.1 | 2.9 ± 6.7 | +64% |
| Princeton | 5.7 ± 7.5 | 4.6 ± 6.9 | −19% | 17.9 ± 17.8 | 15.6 ± 12.6 | −12% | 11.6 ± 19.0 | 7.3 ± 13.3 | −37% |
| Reynolds | 0.0 ± 0.0 | 3.9 ± 13.4 | — | 0.0 ± 0.0 | 0.0 ± 0.0 | — | 0.0 ± 0.0 | 0.0 ± 0.0 | — |
| Ruby & Fairmont | 9.2 ± 7.3 | 13.0 ± 8.2 | +41% | 10.9 ± 6.0 | 8.1 ± 5.7 | −26% | 8.5 ± 8.1 | 3.6 ± 3.5 | −58% |
| Small hospitals | 12.9 ± 14.0 | 3.5 ± 8.6 | −73% | 5.0 ± 11.9 | 5.5 ± 12.8 | +9% | 1.9 ± 6.7 | 4.1 ± 9.5 | +110% |
| Thomas | 7.8 ± 8.2 | 9.3 ± 9.2 | +20% | 15.2 ± 12.1 | 3.0 ± 5.5 | −80% | 2.1 ± 7.2 | 6.6 ± 10.6 | +215% |
| UHC | 11.0 ± 8.1 | 10.0 ± 9.5 | −9% | 7.0 ± 8.3 | 3.1 ± 6.0 | −56% | 2.9 ± 6.9 | 1.4 ± 5.0 | −51% |
| Uniontown | 7.4 ± 8.8 | 5.6 ± 8.2 | −25% | 23.8 ± 23.2 | 7.1 ± 18.0 | −70% | 12.3 ± 18.2 | 10.9 ± 16.1 | −12% |
| Wheeling | 2.6 ± 5.0 | 3.7 ± 7.4 | +40% | 5.1 ± 9.5 | 3.8 ± 7.3 | −25% | 4.0 ± 9.4 | 0.0 ± 0.0 | −100% |
| 1 Per 10,000 foley/central line days | |||||||||
| 2 Per 100,000 patient days | |||||||||
Now we look at CHG compliance. We will break it apart various ways (by time, by hospital, etc) so there there is some redundancy across these subsections
The first figure shows compliance trends over time, beginning at the end of the rollout period (Oct 2023). The overall system wide compliance is shown as the bold solid line and the smaller black lines show the trends for each of the hospitals
# Pooled aveage
df_temp <- matched_data$small_by_hosp %>%
filter(!is.na(CHG_rate)) %>%
filter(CHG_rate!=0) %>%
group_by(Date) %>%
summarise(Numerator = sum(Numerator, na.rm = T),
Denominator = sum(Denominator, na.rm = T),
CHG_rate = Numerator / Denominator)
matched_data$small_by_hosp %>%
filter(!is.na(CHG_rate)) %>%
filter(CHG_rate!=0) %>%
mutate(cut_grp = Hmisc::cut2(Denominator, g=3)) %>%
ggplot(aes(x=Date, y=CHG_rate)) +
ggrepel::geom_label_repel(aes(label=name_short),
nudge_y = -.05,
nudge_x = 25,
arrow = arrow(length = unit(0.02, "npc")),
data=filter(matched_data$small_by_hosp,
Date==ymd("2024-04-15") & Hosp=="BMC")) +
ggrepel::geom_label_repel(aes(label=name_short),
nudge_y = +.05,
nudge_x = -25,
arrow = arrow(length = unit(0.02, "npc")),
data=filter(matched_data$small_by_hosp,
Date==ymd("2024-03-15") & Hosp=="WHL")) +
# geom_point(aes(size=cut_grp), alpha=.5) +
geom_line(aes(group=Hosp, linetype=hosp_type), alpha=0.5) +
geom_smooth(se = F, span=.5,
data=df_temp) +
# stat_smooth(geom="line", alpha=0.75) +
# geom_line(alpha=0.5) +
scale_y_continuous(limits = c(NA, 1), labels = scales::percent) +
scale_linetype_manual(values=c("Academic"="twodash",
"CAH / small"="dotted",
"Community" = "solid")) +
theme_bw() +
labs(title = "System wide CHG compliance vs time",
y = "CHG compliance",
linetype = "Hospital\nType")
rm(df_temp)
# matched_data$small_by_hosp %>%
# filter(!is.na(CHG_rate)) %>%
# filter(CHG_rate!=0) %>%
# filter(Hosp %in% c("BMC", "WHL", "UTN")) %>%
# mutate(cut_grp = Hmisc::cut2(Denominator, g=3)) %>%
# ggplot(aes(x=Date, y=CHG_rate)) +
# geom_line(aes(group=Hosp, color=Hosp), alpha=0.5)A few patterns emerge at first glance:
Over time, compliance seems to go up but only to a point before it plateaus
This is true for community hospitals like Wheeling (that reached a high plateau early) and Berkeley (that reached a lower plateau later on)
It also looks to be the case for CAH and our academic hospital (Ruby)
Different hospitals plateau at different levels
By one year after roll out, the community and CAH generally ended at or above the system wide goal (80%) but the largest hospital, our academic hospital did not reach it’s goal and brings down the average for the entire system
For further details, below is an interactive figure that shows each hospital’s trend
# Pooled aveage
df_temp <- matched_data$small_by_hosp %>%
filter(!is.na(CHG_rate)) %>%
filter(CHG_rate!=0) %>%
group_by(Date) %>%
summarise(Numerator = sum(Numerator, na.rm = T),
Denominator = sum(Denominator, na.rm = T),
CHG_rate = Numerator / Denominator)
x <- matched_data$small_by_hosp %>%
filter(!is.na(CHG_rate)) %>%
filter(CHG_rate!=0) %>%
mutate(cut_grp = Hmisc::cut2(Denominator, g=3)) %>%
ggplot(aes(x=Date, y=CHG_rate, label=Denominator)) +
geom_line(aes(color=Hosp), alpha=0.5) +
geom_smooth(se = F, span=.5, color="black",
data=df_temp) +
scale_y_continuous(limits = c(NA, 1), labels = scales::percent) +
theme_bw() +
labs(title = "Hospital's CHG compliance vs time",
y = "CHG compliance")
plotly::ggplotly(x)rm(df_temp, x)If you want to dynamically explore the data yourself, in the interactive figure below you can:
Hover over points to view the actual numbers at each point, including the exact compliance rate, date, and denominator (number of patient-days eligible for CHG bathing)
Clicking on each hospital’s name/abbreviation in the figure’s legend will show/hide it from view (double clicking will isolate that particular hospital). This can be helpful for comparing specific hospitals
# matched_data$all_by_hosp %>%
# filter(!is.na(CHG_rate)) %>%
# ggplot(aes(x=Date, y=CHG_rate, group=Hosp, color=hosp_type)) +
# geom_line() + facet_wrap("Hosp")
#
# matched_data$small_by_hosp %>%
# # filter(!is.na(CHG_rate)) %>%
# filter(CHG_rate!=0) %>%
# ggplot(aes(x=Date, y=CHG_rate, group=Hosp, color=hosp_type)) +
# # geom_smooth(se = F, span=.5, alpha=0.25) +
# stat_smooth(geom="line", span=.5, alpha=0.5) +
# # geom_line(alpha=0.5) +
# scale_y_continuous(limits = c(NA, 1), labels = scales::percent)
matched_data$small_by_hosp %>%
# filter(!is.na(CHG_rate)) %>%
filter(CHG_rate!=0) %>%
ggplot(aes(x=Date, y=CHG_rate, group=Hosp, color=Hosp)) +
# geom_smooth(se = F, span=.5, alpha=0.25) +
# geom_point(aes(linetype=hosp_type)) +
stat_smooth(geom="line", alpha=0.75) +
# geom_line(alpha=0.5) +
scale_y_continuous(limits = c(NA, 1), labels = scales::percent) +
theme_bw()Critical access hospitals3 are of particular interest. Recall Figure 8 (slightly modified below) which showed an apparent dip in compliance early after the rollout period, then sustained increased compliance until it plateaus.
# Pooled aveage
df_temp <- matched_data$small_by_hosp %>%
filter(!is.na(CHG_rate)) %>%
filter(CHG_rate!=0) %>%
group_by(Date) %>%
summarise(Numerator = sum(Numerator, na.rm = T),
Denominator = sum(Denominator, na.rm = T),
CHG_rate = Numerator / Denominator)
matched_data$small_by_hosp %>%
filter(!is.na(CHG_rate)) %>%
filter(CHG_rate!=0) %>%
mutate(cut_grp = Hmisc::cut2(Denominator, g=3)) %>%
# filter(Hosp=="Small") %>%
mutate(grp = if_else(Hosp=="Small", "CAH / Small", "All others")) %>%
ggplot(aes(x=Date, y=CHG_rate)) +
geom_line(aes(group=Hosp, color=hosp_type), alpha=0.7) +
geom_smooth(se = F, span=.5, color="black",
data=df_temp) +
# stat_smooth(geom="line", alpha=0.75) +
# geom_line(alpha=0.5) +
scale_y_continuous(limits = c(NA, 1), labels = scales::percent) +
scale_color_manual(values=c("CAH / small"="red",
"Community"="grey80",
"Academic"="lightblue")) +
theme_bw() +
labs(y = "CHG compliance",
color = "")
rm(df_temp)To dive into this a little more, I’ve de-aggregated the “CAH / Small hospital” group in the figure below. Similar to Figure 9 the thick black line is the system wide average, but to make it (slightly less) busy I’ve omitted the community and academic hospitals. Because the compliance rate for the “CAH / Small hospital” group in the main analysis is a weighted average4 of these various hospitals, the transparency of each line is reflective of that’s hospital’s average number of opportunities for CHG bathing
# Pooled aveages
df_avg_all <- matched_data$small_by_hosp %>%
filter(!is.na(CHG_rate)) %>%
filter(CHG_rate!=0) %>%
group_by(Date) %>%
summarise(Numerator = sum(Numerator, na.rm = T),
Denominator = sum(Denominator, na.rm = T),
CHG_rate = Numerator / Denominator)
df_avg_small <- matched_data$all_by_hosp %>%
filter(!is.na(CHG_rate)) %>%
filter(CHG_rate!=0) %>%
filter(Hosp %in% matched_data$small_hospitals) %>%
group_by(Date) %>%
summarise(Numerator = sum(Numerator, na.rm = T),
Denominator = sum(Denominator, na.rm = T),
CHG_rate = Numerator / Denominator)
x <- matched_data$all_by_hosp %>%
filter(!is.na(CHG_rate)) %>%
filter(CHG_rate!=0) %>%
filter(Hosp %in% matched_data$small_hospitals) %>%
group_by(Hosp) %>%
mutate(avg_denom = mean(Denominator, na.rm = T)) %>%
ggplot(aes(x=Date, y=CHG_rate, label=Denominator)) +
geom_line(aes(color=Hosp, alpha=avg_denom)) +
# geom_smooth(se = F, span=.5, color="grey40", linetype="dotted",
# data=df_avg_small) +
geom_smooth(se = F, span=.5, color="black",
data=df_avg_all) +
scale_y_continuous(limits = c(NA, 1), labels = scales::percent) +
theme_bw() +
guides(alpha=F) +
labs(title = "CHG compliance (CAH only) vs time",
y = "CHG compliance",
color="CAH")
plotly::ggplotly(x)rm(df_temp, df_avg_small, df_avg_all)The takeaways I have here are:
The three CAH hospitals that contribute the most in terms of CHG opportunities are GRMC, PVH, SMR, and GMH
Among these four hospitals, they all have improved their compliance over time and are at or above the system wide average
Notably, Grant (GMH) didn’t report data for 10/2023 (but did start reporting in 11/2023) and started out with the lowest compliance when they did report data
BRX and WTZ struggle a little more than the rest of the group, especially compared to JAX (despite all three having comparable numbers of CHG opportunities)
JMC actually has been going down over time, a trend not seen for any other hospital
BRN and HRS have very few opportunities (see avg_denom when you hover over their lines) but generally do a great/excellent job when they do have opportunities (Harrison is actually a straight line at 100%)
Next we’ll look at how compliance varies by the number of CHG bathing opportunities (the number of patient days that are eligble for CHG bathing) for each hospital. There are two reasons to explore this relationship
As mentioned above, the rate can be very sensitive to one missed opportunity, if the denominator is small. This is particularly true for the smaller and critical access hospitals
There might be some a priori reasons to expect that the amount opportunities could influence compliance
One one hand, it’s possible that facilities who see fewer patients who are eligible for CHG bathing might not have CHG bathing ingrained into their workflow (it may not be front of mind for the healthcare providers)
Conversely, more opportunities for bathing are also more opportunities to miss bathing. Are there issues with scaling?
When I say “CHG bathing opportunities”, I’m refering to the denominator in the Tableau dashboard. So this section is looking at “CHG compliance rate” (\(Rate = \frac{Numerator}{Denominator} \times 100\)) vs \(Denominator\) .
The table below displays the same information from Figure 10 (looking at CAH only). The table is sorted by hospital, starting with those who have the fewest opportunities and ending with those who have the most opportunities. By scanning from left to right (’23 Q4 to ’24 Q3), you can see compliance went up over time (the colors move from purple to green), which is what we saw in Figure 10. Early on (2023 Q4 and 2024 Q1) it appears that hospitals with more opportunities did worse than those with fewer opportunities, but by then end (Q2 & Q3 of 2024) that trend seems to go away.
# matched_data$all_by_hosp %>%
# filter(!is.na(CHG_rate)) %>%
# filter(CHG_rate!=0) %>%
# filter(Hosp %in% matched_data$small_hospitals) %>%
# group_by(Hosp) %>%
# summarise(denom = mean(Denominator, na.rm = T),
# denom_avg = mean(Denominator, na.rm = T),
# denom_sd = sd(Denominator, na.rm = T),
#
# CHG_avg = mean(CHG_rate, na.rm = T),
# CHG_sd = sd(CHG_rate, na.rm = T),
# # denom_med = qwraps2::median_iqr(Denominator, na_rm = T),
# # denom_avg = qwraps2::mean_sd(Denominator, na_rm = T),
# # denom_med = qwraps2::median_iqr(Denominator, na_rm = T),
#
# ) %>%
# arrange(denom) %>%
# print(n=100) %>%
# gt() %>%
# cols_hide(denom) %>%
# cols_merge_uncert(denom_avg, denom_sd) %>%
# cols_merge_uncert(CHG_avg, CHG_sd) %>%
# fmt_number() %>%
# fmt_percent(starts_with("CHG"), decimals = 1) %>%
# gtExtras::gt_hulk_col_numeric(
# columns = CHG_avg,
# reverse = T,
# # domain = c(-1,1),
# # palette = "BuPu",
# # pal_type = "continuous"
# ) %>%
# cols_label(denom_avg="CHG opportunities",
# CHG_avg="CHG compliance",
# Hosp="Hospital") %>%
# tab_header(
# title = md('Average CHG opportunities & compliance for CAH'),
# subtitle = "Expressed as mean +/- SD" ) %>%
# opt_stylize(2, color="gray")
t1 <- matched_data$all_by_hosp %>%
filter(!is.na(CHG_rate)) %>%
filter(CHG_rate!=0) %>%
filter(Hosp %in% matched_data$small_hospitals) %>%
group_by(Hosp, name_short) %>%
summarise(Overall.denom_avg = mean(Denominator, na.rm = T),
Overall.denom_sd = sd(Denominator, na.rm = T),
Overall.CHG_avg = mean(CHG_rate, na.rm = T),
Overall.CHG_sd = sd(CHG_rate, na.rm = T),
# denom_med = qwraps2::median_iqr(Denominator, na_rm = T),
# denom_avg = qwraps2::mean_sd(Denominator, na_rm = T),
# denom_med = qwraps2::median_iqr(Denominator, na_rm = T),
) %>%
arrange(Overall.denom_avg)
t2 <- matched_data$all_by_hosp %>%
filter(!is.na(CHG_rate)) %>%
filter(CHG_rate!=0) %>%
filter(Hosp %in% matched_data$small_hospitals) %>%
mutate(q = str_glue("{year(Date)} Q{quarter(Date)}"),
q = str_remove(q, "20"),
q = str_glue("'{q}")) %>%
# count(Date, q) %>%
group_by(Hosp, q) %>%
summarise(denom_avg = mean(Denominator, na.rm = T),
denom_sd = sd(Denominator, na.rm = T),
CHG_avg = mean(CHG_rate, na.rm = T),
CHG_sd = sd(CHG_rate, na.rm = T),
# denom_med = qwraps2::median_iqr(Denominator, na_rm = T),
# denom_avg = qwraps2::mean_sd(Denominator, na_rm = T),
# denom_med = qwraps2::median_iqr(Denominator, na_rm = T),
) %>%
pivot_wider(names_from = q,
values_from = c(denom_avg, denom_sd, CHG_avg, CHG_sd),
names_vary = "slowest",
names_glue = "{q}.{.value}")
inner_join(t1, t2, by="Hosp") %>%
mutate(Hosp = str_glue("{Hosp} ({name_short})")) %>%
select(-name_short) %>%
ungroup() %>%
gt(rowname_col = "Hosp") %>%
tab_spanner_delim(delim = ".") %>%
fmt_missing() %>%
fmt_number(decimals = 1) %>%
fmt_percent(contains("CHG"), decimals = 0) %>%
cols_merge_uncert(Overall.denom_avg, Overall.denom_sd) %>%
cols_merge_uncert(Overall.CHG_avg, Overall.CHG_sd) %>%
cols_merge_uncert(`'23 Q4.denom_avg`, `'23 Q4.denom_sd`) %>%
cols_merge_uncert(`'23 Q4.CHG_avg`, `'23 Q4.CHG_sd`) %>%
cols_merge_uncert(`'24 Q1.denom_avg`, `'24 Q1.denom_sd`) %>%
cols_merge_uncert(`'24 Q1.CHG_avg`, `'24 Q1.CHG_sd`) %>%
cols_merge_uncert(`'24 Q2.denom_avg`, `'24 Q2.denom_sd`) %>%
cols_merge_uncert(`'24 Q2.CHG_avg`, `'24 Q2.CHG_sd`) %>%
cols_merge_uncert(`'24 Q3.denom_avg`, `'24 Q3.denom_sd`) %>%
cols_merge_uncert(`'24 Q3.CHG_avg`, `'24 Q3.CHG_sd`) %>%
cols_label(Overall.denom_avg ="Eligible Days",
`'23 Q4.denom_avg`="Eligible Days",
`'24 Q1.denom_avg`="Eligible Days",
`'24 Q2.denom_avg`="Eligible Days",
`'24 Q3.denom_avg`="Eligible Days",
Overall.CHG_avg ="Compliance",
`'23 Q4.CHG_avg`="Compliance",
`'24 Q1.CHG_avg`="Compliance",
`'24 Q2.CHG_avg`="Compliance",
`'24 Q3.CHG_avg`="Compliance",
Hosp="Hospital") %>%
# gtExtras::gt_hulk_col_numeric(columns = contains("CHG"),domain = c(0,1)) %>%
data_color(
columns = contains("CHG"),
palette = c("#86102f", "#f7b8ad", "#b9ddf1","#2c5782"),
na_color = "white",
domain = c(0,1)) %>%
data_color(
columns = Overall.CHG_avg,
method = "numeric",
palette = "PuOr",
# palette = "YlGnBu",
reverse = T
) %>%
tab_header(
title = md('**Small/CAH:** Average CHG opportunities & compliance'),
subtitle = "Expressed as mean +/- SD" ) %>%
opt_stylize(2, color="gray") | Small/CAH: Average CHG opportunities & compliance | ||||||||||
| Expressed as mean +/- SD | ||||||||||
| Overall | '23 Q4 | '24 Q1 | '24 Q2 | '24 Q3 | ||||||
|---|---|---|---|---|---|---|---|---|---|---|
| Eligible Days | Compliance | Eligible Days | Compliance | Eligible Days | Compliance | Eligible Days | Compliance | Eligible Days | Compliance | |
| BRN (Barnesville) | 2.3 ± 1.7 | 92% ± 14% | 3.3 ± 2.1 | 93% ± 12% | 2.0 ± 1.4 | 83% ± 24% | 1.0 ± 0.0 | 100% ± 0% | — | — |
| HRS (Harrison) | 5.1 ± 6.1 | 100% ± 0% | 1.5 ± 0.7 | 100% ± 0% | 10.0 ± 11.3 | 100% ± 0% | 8.0 | 100% | 2.5 ± 0.7 | 100% ± 0% |
| WTZ (Wetzel) | 11.0 ± 8.6 | 84% ± 18% | 2.5 ± 2.1 | 100% ± 0% | 14.0 ± 1.7 | 77% ± 21% | 18.0 ± 9.6 | 86% ± 10% | 6.7 ± 9.0 | 79% ± 26% |
| BRX (Braxton) | 11.5 ± 8.6 | 71% ± 23% | 20.3 ± 13.3 | 80% ± 10% | 6.7 ± 3.5 | 51% ± 34% | 10.5 ± 4.9 | 71% ± 20% | 8.3 ± 1.5 | 82% ± 16% |
| JAX (Jackson) | 16.2 ± 9.8 | 95% ± 7% | 13.0 ± 7.1 | 91% ± 5% | 19.7 ± 13.6 | 93% ± 12% | 16.3 ± 11.8 | 100% ± 0% | 14.7 ± 10.0 | 95% ± 8% |
| JMC (Jefferson) | 27.3 ± 17.5 | 73% ± 13% | 42.0 ± 17.1 | 84% ± 7% | 30.0 ± 21.8 | 77% ± 6% | 25.0 ± 12.5 | 73% ± 17% | 12.3 ± 8.7 | 58% ± 3% |
| STJ (St Joes) | 27.8 ± 14.1 | 66% ± 18% | 24.7 ± 10.3 | 50% ± 17% | 29.0 ± 26.5 | 60% ± 20% | 31.7 ± 15.6 | 80% ± 12% | 26.0 ± 2.6 | 73% ± 3% |
| SMR (Summersville) | 50.9 ± 28.7 | 73% ± 14% | 58.7 ± 14.5 | 63% ± 9% | 76.7 ± 45.9 | 60% ± 8% | 29.3 ± 2.9 | 83% ± 14% | 39.0 ± 14.0 | 84% ± 1% |
| PVH (Potomac Valley) | 66.8 ± 18.7 | 77% ± 10% | 73.0 ± 33.8 | 69% ± 8% | 76.7 ± 15.1 | 71% ± 6% | 60.0 ± 11.5 | 80% ± 3% | 57.3 ± 4.0 | 90% ± 7% |
| GMH (Grant) | 95.6 ± 35.7 | 53% ± 37% | 119.0 ± 29.7 | 7% ± 1% | 128.5 ± 37.5 | 19% ± 12% | 65.3 ± 38.4 | 77% ± 9% | 88.3 ± 5.9 | 84% ± 17% |
| GRMC (Garrett) | 144.3 ± 39.0 | 72% ± 22% | 128.3 ± 23.2 | 44% ± 6% | 163.3 ± 64.7 | 66% ± 11% | 131.3 ± 24.6 | 83% ± 15% | 154.3 ± 41.2 | 97% ± 1% |
rm(t1, t2)
Across all hospitals the the system there are maybe hints at decreasing compliance as the number of opportunities increase, shown by the black trend lines below. However, the variation in CHG opportunities are mostly clustered by hospital along the X-axis, so perhaps it’s really the hospital that is accounting for the differences.
# matched_data$small_by_hosp %>%
# ggplot(aes(x=Denominator, y=CHG_rate)) +
# geom_point() + geom_smooth() +
# facet_wrap("hosp_type", scales = "free")
x <- matched_data$all_by_hosp %>%
filter(!is.na(CHG_rate)) %>%
filter(CHG_rate!=0) %>%
mutate(hosp_type = case_when(hosp_type=="Academic" ~ "Academic",
hosp_type=="Community" ~ "Community",
TRUE ~ "CAH / small")) %>%
mutate(Month = strftime(Date, format="%b %Y")) %>%
ggplot(aes(x=Denominator, y=CHG_rate)) +
geom_smooth(span=.9, color="black", se=F) +
geom_point(aes(color=Hosp, shape=hosp_type, text=Month)) +
facet_wrap("hosp_type", scales = "free_x") +
guides(shape=F) +
scale_y_continuous(limits = c(NA, 1), labels = scales::percent) +
theme_bw() +
labs(title = "CHG compliance vs CHG opportunities",
y = "CHG compliance",
x="CHG opportunities")
plotly::ggplotly(x)rm(x)Importantly, Figure 11 above doesn’t account for time (though you can hover over each point to view the month). If we stratify by quarter, any trend seems to go away, again implying that if there are differences in compliance by opportunities, it’s those differences are probably occuring at a hospital level (i.e. hospital is a confounding factor for this relationship)
matched_data$all_by_hosp %>%
filter(!is.na(CHG_rate)) %>%
filter(CHG_rate!=0) %>%
mutate(hosp_type = case_when(hosp_type=="Academic" ~ "Academic",
hosp_type=="Community" ~ "Community",
TRUE ~ "CAH / small")) %>%
mutate(q = str_glue("{year(Date)} Q{quarter(Date)}"),
q = str_remove(q, "20"),
q = str_glue("'{q}")) %>%
mutate(Month = strftime(Date, format="%b %Y")) %>%
ggplot(aes(x=Denominator, y=CHG_rate)) +
geom_smooth(span=.9, color="black", se=F, alpha=.5) +
geom_point(aes(color=Hosp, shape=hosp_type, text=Month)) +
facet_grid(q ~ hosp_type, scales = "free_x") +
guides(shape=F, color=F) +
scale_y_continuous(limits = c(NA, 1), labels = scales::percent) +
theme_bw() +
labs(title = "CHG compliance vs CHG opportunities",
y = "CHG compliance",
x="CHG opportunities")We will start by looking at a table similar to Table 2, but now for all hospitals5. As we saw graphically in Figure 8 (in the Over time section), compliance rates increase over time until they plateau. Most striking is WVUH, with it’s high number of opportunities and plateau at such a low number compared to all of the other facilities.
t1 <- matched_data$small_by_hosp %>%
filter(!is.na(CHG_rate)) %>%
filter(CHG_rate!=0) %>%
group_by(Hosp, name_short) %>%
summarise(Overall.denom_avg = mean(Denominator, na.rm = T),
Overall.denom_sd = sd(Denominator, na.rm = T),
Overall.CHG_avg = mean(CHG_rate, na.rm = T),
Overall.CHG_sd = sd(CHG_rate, na.rm = T),
# denom_med = qwraps2::median_iqr(Denominator, na_rm = T),
# denom_avg = qwraps2::mean_sd(Denominator, na_rm = T),
# denom_med = qwraps2::median_iqr(Denominator, na_rm = T),
) %>%
arrange(Overall.denom_avg)
t2 <- matched_data$small_by_hosp %>%
filter(!is.na(CHG_rate)) %>%
filter(CHG_rate!=0) %>%
mutate(q = str_glue("{year(Date)} Q{quarter(Date)}"),
q = str_remove(q, "20"),
q = str_glue("'{q}")) %>%
# count(Date, q) %>%
group_by(Hosp, q) %>%
summarise(denom_avg = mean(Denominator, na.rm = T),
denom_sd = sd(Denominator, na.rm = T),
CHG_avg = mean(CHG_rate, na.rm = T),
CHG_sd = sd(CHG_rate, na.rm = T),
# denom_med = qwraps2::median_iqr(Denominator, na_rm = T),
# denom_avg = qwraps2::mean_sd(Denominator, na_rm = T),
# denom_med = qwraps2::median_iqr(Denominator, na_rm = T),
) %>%
pivot_wider(names_from = q,
values_from = c(denom_avg, denom_sd, CHG_avg, CHG_sd),
names_vary = "slowest",
names_glue = "{q}.{.value}")
inner_join(t1, t2, by="Hosp") %>%
mutate(Hosp = str_glue("{Hosp} ({name_short})")) %>%
select(-name_short) %>%
ungroup() %>%
select(-contains("denom")) %>%
gt(rowname_col = "Hosp") %>%
# tab_spanner_delim(delim = ".") %>%
fmt_missing() %>%
fmt_number(decimals = 1) %>%
fmt_percent(contains("CHG"), decimals = 0) %>%
# cols_merge_uncert(Overall.denom_avg, Overall.denom_sd) %>%
cols_merge_uncert(Overall.CHG_avg, Overall.CHG_sd) %>%
# cols_merge_uncert(`'23 Q4.denom_avg`, `'23 Q4.denom_sd`) %>%
cols_merge_uncert(`'23 Q4.CHG_avg`, `'23 Q4.CHG_sd`) %>%
# cols_merge_uncert(`'24 Q1.denom_avg`, `'24 Q1.denom_sd`) %>%
cols_merge_uncert(`'24 Q1.CHG_avg`, `'24 Q1.CHG_sd`) %>%
# cols_merge_uncert(`'24 Q2.denom_avg`, `'24 Q2.denom_sd`) %>%
cols_merge_uncert(`'24 Q2.CHG_avg`, `'24 Q2.CHG_sd`) %>%
# cols_merge_uncert(`'24 Q3.denom_avg`, `'24 Q3.denom_sd`) %>%
cols_merge_uncert(`'24 Q3.CHG_avg`, `'24 Q3.CHG_sd`) %>%
cols_label(Overall.CHG_avg ="Overall",
`'23 Q4.CHG_avg`="2023 Q4",
`'24 Q1.CHG_avg`="2024 Q1",
`'24 Q2.CHG_avg`="2024 Q2",
`'24 Q3.CHG_avg`="2024 Q3",
# Overall.CHG_avg ="Compliance",
# `'23 Q4.CHG_avg`="Compliance",
# `'24 Q1.CHG_avg`="Compliance",
# `'24 Q2.CHG_avg`="Compliance",
# `'24 Q3.CHG_avg`="Compliance",
# Overall.denom_avg ="Eligible Days",
# `'23 Q4.denom_avg`="Eligible Days",
# `'24 Q1.denom_avg`="Eligible Days",
# `'24 Q2.denom_avg`="Eligible Days",
# `'24 Q3.denom_avg`="Eligible Days",
Hosp="Hospital") %>%
# gtExtras::gt_hulk_col_numeric(
# columns = contains("CHG"),
# domain = c(min(pull(filter(matched_data$small_by_hosp, CHG_rate!=0),
# "CHG_rate"), na.rm = T) ,1)) %>%
data_color(
columns = contains("CHG"),
palette = c("#86102f", "#f7b8ad", "#b9ddf1","#2c5782"),
na_color = "white",
domain = c(min(pull(filter(matched_data$small_by_hosp, CHG_rate!=0),
"CHG_rate"), na.rm = T) ,1)) %>%
data_color(
columns = Overall.CHG_avg,
method = "numeric",
palette = "PuOr", reverse = F
# palette = "YlGnBu", reverse = T
) %>%
tab_header(
title = md('**CHG compliance** by hospital'),
subtitle = "Expressed as mean +/- SD" ) %>%
opt_stylize(2, color="gray") | CHG compliance by hospital | |||||
| Expressed as mean +/- SD | |||||
| Overall | 2023 Q4 | 2024 Q1 | 2024 Q2 | 2024 Q3 | |
|---|---|---|---|---|---|
| RMH (Reynolds) | 74% ± 9% | 65% ± 4% | 69% ± 3% | 82% ± 5% | 82% ± 3% |
| Small (Small hospitals) | 70% ± 17% | 52% ± 6% | 56% ± 5% | 83% ± 3% | 88% ± 4% |
| UTN (Uniontown) | 72% ± 8% | 62% ± 6% | 74% ± 1% | 72% ± 4% | 81% ± 4% |
| BMC (Berkeley) | 66% ± 6% | 60% ± 3% | 63% ± 2% | 68% ± 7% | 71% ± 2% |
| PRN (Princeton) | 80% ± 5% | 74% ± 3% | 79% ± 4% | 81% ± 2% | 85% ± 3% |
| TMH (Thomas) | 86% ± 5% | — | — | 81% ± 6% | 89% ± 2% |
| WHL (Wheeling) | 86% ± 8% | 74% ± 6% | 89% ± 2% | 90% ± 2% | 90% ± 2% |
| UHC (UHC) | 80% ± 4% | 74% ± 1% | 80% ± 1% | 81% ± 1% | 85% ± 1% |
| CCMC (Camden Clark) | 78% ± 12% | 63% ± 3% | 72% ± 5% | 87% ± 6% | 91% ± 0% |
| WVUH (Ruby & Fairmont) | 62% ± 7% | 53% ± 1% | 62% ± 5% | 67% ± 1% | 67% ± 3% |
rm(t1, t2)This can be shown graphically, in the boxplots below
matched_data$small_by_hosp %>%
filter(!is.na(CHG_rate)) %>%
filter(CHG_rate!=0) %>%
# mutate(hosp_type = case_when(hosp_type=="Academic" ~ "Academic / Community",
# hosp_type=="Community" ~ "Academic / Community",
# TRUE ~ "CAH / small")) %>%
mutate(q = str_glue("{year(Date)} Q{quarter(Date)}"),
q = str_remove(q, "20"),
q = str_glue("'{q}"),
q = factor(q, ordered=T)) %>%
mutate(Month = strftime(Date, format="%b %Y")) %>%
ggplot(aes(x=Hosp, y=CHG_rate)) +
# geom_boxplot(fill="#BDD7E7", color="grey30") +
# "#EFF3FF" "#BDD7E7" "#6BAED6" "#2171B5"
geom_boxplot(aes(fill=q)) +
# geom_jitter(aes(text=Month)) +
# coord_flip() +
# facet_wrap("q", scales = "free_y") +
# facet_wrap(hosp_type~q, scales = "free_y") +
theme_bw() +
scale_y_continuous(labels = scales::percent) +
theme(legend.position = "bottom") +
scale_fill_brewer(palette = "RdPu") +
labs(title="Compliance by hospital",
subtitle = "Broken down by quarter",
x = "",
y = "CHG compliance",
fill="")
# facet_grid(.~hosp_type, scales = "free_y")Although that’s already a busy figure (at the risk of making it even more busy), the version below adds each hospital’s overall boxplot in blue in the background in blue
matched_data$small_by_hosp %>%
filter(!is.na(CHG_rate)) %>%
filter(CHG_rate!=0) %>%
# mutate(hosp_type = case_when(hosp_type=="Academic" ~ "Academic / Community",
# hosp_type=="Community" ~ "Academic / Community",
# TRUE ~ "CAH / small")) %>%
mutate(q = str_glue("{year(Date)} Q{quarter(Date)}"),
q = str_remove(q, "20"),
q = str_glue("'{q}"),
q = factor(q, ordered=T)) %>%
mutate(Month = strftime(Date, format="%b %Y")) %>%
ggplot(aes(x=Hosp, y=CHG_rate)) +
geom_boxplot(fill="#BDD7E7", color="grey30") +
# "#EFF3FF" "#BDD7E7" "#6BAED6" "#2171B5"
geom_boxplot(aes(fill=q)) +
# geom_jitter(aes(text=Month)) +
# coord_flip() +
# facet_wrap("q", scales = "free_y") +
# facet_wrap(hosp_type~q, scales = "free_y") +
theme_bw() +
scale_y_continuous(labels = scales::percent) +
theme(legend.position = "bottom") +
scale_fill_brewer(palette = "RdPu") +
labs(title="Compliance by hospital",
subtitle = "Broken down by quarter",
x = "",
y = "CHG compliance",
fill="") # facet_grid(.~hosp_type, scales = "free_y")df_glm <- matched_data$small_by_hosp %>% filter(Era!="During")
mods <- list()
# █─mods
# ├─CLABSI
# ├─CAUTI
# ├─MRSA
# └─█─RE
# ├─CLABSI
# ├─CAUTI
# └─MRSAIn this section, we will use a negative binomial regression to examine any influence that CHG bathing has on HAIs This is the same type of model that the NHSN uses to predict the number of expected infections.
Unless otherwise specified, our outcome of interest will be the monthly number of HAIs observed for each hospital. Notably, this is the absolute number, not a rate. To account for confounding variables we will include the number of predicted infections (from the NHSN data).
mods[["CLABSI"]] <- MASS::glm.nb(CLABSI_obs ~ CLABSI_pred + Era, data = df_glm)
summary(mods[["CLABSI"]])
# DHARMa::testDispersion(mods[["CLABSI"]])
# glance(mods[["CLABSI"]])
# unique(select(time_limited, Date, Era)) %>% left_join(NHSN_clean) To explain the various models, this section will go into detail using CLABSIs as an example. All of the other HAIs will follow the same format, but aren’t shown for space. The backbone model is a simple negative binomial model, using the formula below for CLABSI
## CLABSI_obs ~ CLABSI_pred + Era
The results of this specific model is shown in the table below:
broom::tidy(mods[["CLABSI"]], conf.int = T, exponentiate=T) %>%
relocate(conf.low, conf.high, .before = p.value) %>%
select(-c(std.error, statistic)) %>%
gt() %>%
cols_merge_range(col_begin = conf.low, col_end = conf.high, sep = " - ") %>%
# tab_stub_indent(everything()) %>%
cols_label(estimate = md("Incident Rate Ratio (IRR)"),
term = "Term",
conf.low = "95% CI",
p.value = "P value") %>%
fmt_number(columns = c(estimate, conf.low, conf.high, p.value), n_sigfig = 3) %>%
fmt_scientific(columns = p.value,
rows = p.value < 0.001) %>%
fmt_scientific(columns = conf.low,
rows = conf.low < 0.001 | conf.low > 1000) %>%
fmt_scientific(columns = conf.high,
rows = conf.high < 0.001 | conf.high > 1000) %>%
tab_header(
title = 'CLABSI: Negative binomial model'
) %>%
opt_stylize(5)| CLABSI: Negative binomial model | |||
| Term | Incident Rate Ratio (IRR) | 95% CI | P value |
|---|---|---|---|
| (Intercept) | 0.381 | 0.284 - 0.505 | 3.52 × 10−11 |
| CLABSI_pred | 1.80 | 1.62 - 2.00 | 1.97 × 10−29 |
| EraAfter | 0.599 | 0.410 - 0.869 | 0.00758 |
This shows that compared to the before era, the “after” era (post CHG implementation) had a 40% reduction in CLABSIs (1 - 0.599) with a P-value of 0.008. It also shows that the predicted infections are significantly associated with increased observed infections (which is to be expected).
The same data is shown graphically below:
sjPlot::plot_model(mods[["CLABSI"]])library(glmmTMB)
mods[["RE"]][["CLABSI"]] <- glmmTMB(CLABSI_obs ~ CLABSI_pred + Era + (1|Hosp) ,
data = df_glm,
family = "nbinom1")
summary(mods[["RE"]][["CLABSI"]])
# broom.mixed::glance(mods[["RE"]][["CLABSI"]])
# DHARMa::testDispersion(mods[["RE"]][["CLABSI"]])In the previous model, we didn’t do anything to account for the various hospitals. There are arguments for and against adjusting for hospital-specific effects, outlined below
I think it’s worthwhile to at least include this model (even if it’s a sensitivity analysis), which is the same as [Model 1] but now with random effects for each hospital. The model formula I use is below:
## CLABSI_obs ~ CLABSI_pred + Era + (1 | Hosp)
The (1|Hosp) term means each hospital has a random intercept, capturing the variation in infection rates across hospitals. This helps to account for unobserved hospital-specific factors that may influence the CLABSI rates, but still assumes that the effect of CHG implementation is similar across hospitals
broom.mixed::tidy(mods[["RE"]][["CLABSI"]], conf.int = T, exponentiate=T) %>%
filter(effect=="fixed") %>%
select(term, estimate, conf.low, conf.high, p.value) %>%
gt() %>%
cols_merge_range(col_begin = conf.low, col_end = conf.high, sep = " - ") %>%
# tab_stub_indent(everything()) %>%
cols_label(estimate = md("Incident Rate Ratio (IRR)"),
term = "Term",
conf.low = "95% CI",
p.value = "P value") %>%
fmt_number(columns = c(estimate, conf.low, conf.high, p.value), n_sigfig = 3) %>%
fmt_scientific(columns = p.value,
rows = p.value < 0.001) %>%
fmt_scientific(columns = conf.low,
rows = conf.low < 0.001 | conf.low > 1000) %>%
fmt_scientific(columns = conf.high,
rows = conf.high < 0.001 | conf.high > 1000) %>%
tab_header(
title = md('CLABSI: **Random effects** negative binomial model'),
subtitle = "Random effects for each hospital"
) %>%
opt_stylize(5, color = "green")| CLABSI: Random effects negative binomial model | |||
| Random effects for each hospital | |||
| Term | Incident Rate Ratio (IRR) | 95% CI | P value |
|---|---|---|---|
| (Intercept) | 0.358 | 0.171 - 0.746 | 0.00616 |
| CLABSI_pred | 1.26 | 0.834 - 1.91 | 0.269 |
| EraAfter | 0.662 | 0.462 - 0.949 | 0.0246 |
mods[["CAUTI"]] <- MASS::glm.nb(CAUTI_obs ~ CAUTI_pred + Era, data = df_glm)
summary(mods[["CAUTI"]])
# DHARMa::testDispersion(mods[["CAUTI"]])
# glance(mods[["CAUTI"]])
# unique(select(time_limited, Date, Era)) %>% left_join(NHSN_clean)
mods[["MRSA"]] <- MASS::glm.nb(MRSA_obs ~ MRSA_pred + Era, data = df_glm)
summary(mods[["MRSA"]])
# DHARMa::testDispersion(mods[["MRSA"]])
# glance(mods[["MRSA"]]) This shows all three models (CALBSI, CAUTI, MRSA) in the same table
bind_rows(
mutate(broom::tidy(mods[["CLABSI"]], conf.int = T, exponentiate=T),
Outcome = "CLABSI"),
mutate(broom::tidy(mods[["CAUTI"]], conf.int = T, exponentiate=T),
Outcome = "CAUTI"),
mutate(broom::tidy(mods[["MRSA"]], conf.int = T, exponentiate=T),
Outcome = "MRSA")
) %>%
relocate(Outcome) %>%
relocate(conf.low, conf.high, .before = p.value) %>%
select(-c(std.error, statistic)) %>%
mutate(term = if_else(str_detect(term, "_pred"), "NHSN prediction", term)) %>%
group_by(Outcome) %>%
gt(rowname_col = "term") %>%
tab_stub_indent(rows = everything(), indent = 5) %>%
cols_merge_range(col_begin = conf.low, col_end = conf.high, sep = " - ") %>%
cols_label(estimate = md("Incident Rate Ratio (IRR)"),
term = "Term",
conf.low = "95% CI",
p.value = "P value") %>%
fmt_number(columns = c(estimate, conf.low, conf.high, p.value), n_sigfig = 3) %>%
fmt_scientific(columns = p.value,
rows = p.value < 0.001) %>%
fmt_scientific(columns = conf.low,
rows = conf.low < 0.001 | conf.low > 1000) %>%
fmt_scientific(columns = conf.high,
rows = conf.high < 0.001 | conf.high > 1000) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
columns = c(estimate, p.value),
rows = p.value <= 0.05 & term == "EraAfter"
)
) %>%
tab_header(
title = 'HAIs: Negative binomial models'
) %>%
opt_stylize(5)| HAIs: Negative binomial models | |||
| Incident Rate Ratio (IRR) | 95% CI | P value | |
|---|---|---|---|
| CLABSI | |||
| (Intercept) | 0.381 | 0.284 - 0.505 | 3.52 × 10−11 |
| NHSN prediction | 1.80 | 1.62 - 2.00 | 1.97 × 10−29 |
| EraAfter | 0.599 | 0.410 - 0.869 | 0.00758 |
| CAUTI | |||
| (Intercept) | 0.557 | 0.426 - 0.721 | 1.92 × 10−5 |
| NHSN prediction | 1.54 | 1.40 - 1.70 | 1.33 × 10−19 |
| EraAfter | 0.668 | 0.476 - 0.933 | 0.0180 |
| MRSA | |||
| (Intercept) | 0.143 | 0.0904 - 0.216 | 1.94 × 10−18 |
| NHSN prediction | 4.14 | 2.85 - 5.94 | 2.57 × 10−14 |
| EraAfter | 0.657 | 0.391 - 1.09 | 0.105 |
sjPlot::plot_models(mods$CLABSI, mods$CAUTI, mods$MRSA,
rm.terms = c("CLABSI_pred", "CAUTI_pred", "MRSA_pred"),
m.labels = c("CLABSIs", "CAUTIs", "MRSA"),
axis.labels = c("After CHG"),
vline.color = "grey30",
show.values = T,
show.p = T) Done: Add LabID MRSA
Done: Add to descriptive statistics (will require a bit different of a workflow)
Done: Add the regression
Add interrupted time series (for all models)
Compare compliance to HAIs (bivariable analysis in descriptive statistics)
Look at unit level compliance, similar to the Figure 14 below
CHG_dept %>%
mutate(Rate = Numerator/Denominator) %>%
mutate(ID = str_glue("{Hospital}_{Department}")) %>%
ggplot(aes(x=Month, y=Rate, group=ID)) +
geom_line(aes(alpha=Denominator)) +
facet_wrap("Hospital")For example, March 31st (2024) was on a Sunday, so the week starting on 3/31/24 mostly reflects April 2024↩︎
I was going to do median, but given the number of months with zero HAIs the table was not very informative↩︎
In this section I use the term “CAH” for brevity, but recall that this includes smaller hospitals (namely WTZ) which isn’t technically a critical access hospital↩︎
Meaning that hospitals with greater numbers of eligible patient days (like GRMC) will contribute more to the average than hospitals with fewer oppurtunities (like WTZ or JAX)↩︎
Unlike Table 2, all the small and critical access hospitals are now aggregated into the “small hospitals” group↩︎