NHSN & CHG

This report is run with the following parameters

Show code

str(params, give.attr = F)
## List of 4
##  $ rollout_start: chr "2023-08-01"
##  $ rollout_end  : chr "2023-10-01"
##  $ ignore_before: chr "2022-08-01"
##  $ ignore_after : chr "2024-10-01"

Importing data

You can skip over this section, it’s just where I import the data

Hospital info

This comes from a Google Sheet where I store info for each hospital. There is important data on the sheet that governs the steps below

Show code

library(googlesheets4)
options(gargle_oauth_email = TRUE)
gs4_auth(cache = TRUE)
if(!exists("hospitals")){
  hospitals_raw <- paste0("https://docs.google.com/",
                          "spreadsheets/d/",
                          "1zZ1o1HhzOPrbj2KBdGsaO4rJvLDPMWqN0ogjMCNHfGw/") %>%
    read_sheet(sheet = "CHG Hospitals")
  
  hospitals <- hospitals_raw
  rm(hospitals_raw)
}

# Used to join get the names of the hospitals in the NHSN data
CCNs <- hospitals %>%
  mutate(NHSN_abbv = case_when(!is.na(NHSN_abbv) ~ NHSN_abbv,
                               CMS_ID=="510031" ~ "STF",
                               CMS_ID=="510029" ~ "TMH")) %>%
  distinct(NHSN_abbv, CMS_ID)

NHSN data

Show code

CLABSI_ACH <- readxl::read_excel("NHSN data/ACH (2015 baseline) SIRforCentralLine_AssociatedBSI_1.2020_10.2024.xlsm", 
                                 col_types = c("text", "numeric", "numeric", 
                                               "numeric", "numeric", "numeric", 
                                               "text", "numeric", "text", "text", 
                                               "text", "text", "text", "text", "text", 
                                               "text", "numeric")) %>%
  filter(!is.na(location)) %>%
  rename(Month=summaryYM) %>%
  mutate(Month = str_replace(Month, "M", "-"),
         Month = ymd(str_glue("{Month}-15"))) %>%
  left_join(CCNs, by=c("ccn"="CMS_ID")) %>%
  select(Hosp=NHSN_abbv, everything())

CLABSI_CAH <- readxl::read_excel("NHSN data/CAH (2015 baseline) SIRforCentralLine_associatedBSI 1.2020_10.2024.xlsm", 
                                 col_types = c("text", "numeric", "numeric", 
                                               "numeric", "numeric", "numeric", 
                                               "text", "numeric", "text", "text", 
                                               "text", "text", "text", "text", "text", 
                                               "text", "numeric")) %>%
  filter(!is.na(location)) %>%
  rename(Month=summaryYM) %>%
  mutate(Month = str_replace(Month, "M", "-"),
         Month = ymd(str_glue("{Month}-15"))) %>%
  left_join(CCNs, by=c("ccn"="CMS_ID")) %>%
  select(Hosp=NHSN_abbv, everything())


CLABSI <- list(CLABSI_ACH, CLABSI_CAH) %>%
  # Bind into a single table
  list_rbind() %>%
  
  # Clean up
  mutate(Hosp     = as.factor(Hosp),
         Month    = Month,
         Inf_pred = as.numeric(numPred),
         Inf_obs  = as.numeric(infCount),
         CL_days  = as.numeric(numcldays),
         locationType = as.factor(locationType),
         locCDC       = as.character(locCDC),
         location     = location,
         facType      = as.factor(facType),
         # .keep="none" is the same as transmute, keeps only new columns
         .keep="none") %>%
  
  # Clean up CDC location
  mutate(locCDC = str_remove(locCDC, "IN:ACUTE:")) %>%
  
  relocate(Hosp, Month, Inf_pred, Inf_obs, CL_days, location) %>%
  
  # Ignore data before & after
  filter(Month > ymd(params$ignore_before)) %>%
  filter(Month < ymd(params$ignore_after)) 
  
  
    

rm(CLABSI_ACH, CLABSI_CAH)

# CLABSI %>% rmarkdown::paged_table()

Date formats

Although the data is reported monthly, it’s much easier to work with if it’s in date format. Therefore, by convention I set the date to be the 15th day of each month (e.g. data for September 2023 will appear as “Sept 15th 2023”). This shouldn’t really matter to anyone but me, but if you’re looking at the raw data that’s why!

Show code

CAUTI_ACH <- readxl::read_excel("NHSN data/ACH (2015 baseline) SIRforCatheter_AssociatedUTIDat 1.2020-10.2024.xlsm", 
                                 col_types = c("date", "numeric", "numeric", 
                                               "numeric", "numeric", "numeric", 
                                               "text", "numeric", "text", "text", 
                                               "text", "text", "text", "text", "text", 
                                               "text", "numeric")) %>%
  filter(!is.na(location)) %>%
  rename(Month=summaryYM) %>%
  mutate(Month = as.Date(Month) + days(14)) %>%
  left_join(CCNs, by=c("ccn"="CMS_ID")) %>%
  select(Hosp=NHSN_abbv, everything())


CAUTI_CAH <- readxl::read_excel("NHSN data/CAH (2015 baseline) SIRforCatheter_AssociatedUTIDat 1.2020_10.2024.xlsm", 
                                col_types = c("date", "numeric", "numeric", 
                                              "numeric", "numeric", "numeric", 
                                              "text", "numeric", "text", "text", 
                                              "text", "text", "text", "text", "text", 
                                              "text", "numeric")) %>%
  filter(!is.na(location)) %>%
  rename(Month=summaryYM) %>%
  mutate(Month = as.Date(Month) + days(14)) %>%
  left_join(CCNs, by=c("ccn"="CMS_ID"))%>%
  select(Hosp=NHSN_abbv, everything())


CAUTI <- list(CAUTI_ACH, CAUTI_CAH) %>%
  # Bind into a single table
  list_rbind() %>%
  
  # Clean up
  mutate(Hosp     = as.factor(Hosp),
         Month    = Month,
         Inf_pred = as.numeric(numPred),
         Inf_obs  = as.numeric(infCount),
         cath_days  = as.numeric(numucathdays),
         locationType = as.factor(locationType),
         locCDC       = as.character(loccdc),
         location     = location,
         facType      = as.factor(facType),
         # .keep="none" is the same as transmute, keeps only new columns
         .keep="none") %>%

  # Clean up CDC location
  mutate(locCDC = str_remove(locCDC, "IN:ACUTE:")) %>%
  
  relocate(Hosp, Month, Inf_pred, Inf_obs, cath_days, location) %>%
  
  # Ignore data before & after
  filter(Month > ymd(params$ignore_before)) %>%
  filter(Month < ymd(params$ignore_after)) 

rm(CAUTI_ACH, CAUTI_CAH)

# CAUTI %>% rmarkdown::paged_table()

Show code

MRSA_ACH <- readxl::read_excel("NHSN data/ACH (2022 baseline) SIRforMRSABloodFacwideINLabID.xlsx", 
    col_types = c("text", "date", "numeric", 
        "numeric", "numeric", "skip", "skip", 
        "skip", "skip", "text", "text", "text", 
        "text", "text")) %>%
  filter(!is.na(ccn)) %>%
  rename(Month=summaryYM) %>%
  mutate(Month = as.Date(Month) + days(14)) %>%
  left_join(CCNs, by=c("ccn"="CMS_ID")) %>%
  select(Hosp=NHSN_abbv, everything())

MRSA_CAH <- readxl::read_excel("NHSN data/CAH (2022 baseline) SIRforMRSABloodFacwideINLabID.xlsx", 
    col_types = c("text", "date", "numeric", 
        "numeric", "numeric", "skip", "skip", 
        "skip", "skip", "text", "text", "text")) %>%
  filter(!is.na(ccn)) %>%
  rename(Month=summaryYM) %>%
  mutate(Month = as.Date(Month) + days(14)) %>%
  left_join(CCNs, by=c("ccn"="CMS_ID")) %>%
  select(Hosp=NHSN_abbv, everything())

MRSA <- list(MRSA_ACH, MRSA_CAH) %>%
  # Bind into a single table
  list_rbind() %>%
  
  # Clean up
  mutate(Hosp     = as.factor(Hosp),
         Month    = Month,
         Inf_pred = as.numeric(numPred),
         Inf_obs  = as.numeric(MRSA_bldIncCount),
         MRSA_days  = as.numeric(numpatdays),
         facType      = as.factor(facType),
         # .keep="none" is the same as transmute, keeps only new columns
         .keep="none") %>%
  
  relocate(Hosp, Month, Inf_pred, Inf_obs, MRSA_days) %>%
  
  # Ignore data before & after
  filter(Month > ymd(params$ignore_before)) %>%
  filter(Month < ymd(params$ignore_after)) 

rm(MRSA_ACH, MRSA_CAH)

CHG compliance

CHG data was obtained from the CHG compliance dashboard. There are a few ways to split the data, outlined below. You can limit the dashboard by CHG indication (step 1) and expand it to show department level data (step 2)

Tableau screen shot showing how to (1) filter by CHG indication and (2) list by department

I have a few versions of the CHG data downloaded, and because it only shows the last 12 months, the versions aren’t consistent. The files I have available are below

File ID	Date exported	Filters	By department
File A	10/18/2024	Central lines only	No
File B1	12/05/2024	None	Yes
File B2	12/05/2024	Central lines only	Yes
File B3	12/05/2024	Foleys only	Yes

Which CHG to use

For now, I’m using the old version that I did in the prior analysis (File A), just so I can reproduce my prior work. Ideally, I could get access to the raw data (going back to implementation) and avoid this mess

Show code

CHG_raw <- "TableauData/2024-10-18 CHG CL only/All Overall Rate by Hospital and Department_data.csv" %>% 
  read_csv() %>%
  
  select(Hospital, Date=`Week of Census Time`,
         Denominator=`All Lines Denominator`,
         Numerator=`All Lines Numerator`) %>%
  
  mutate(Date = mdy(Date)) %>%
  
  group_by(Hospital, Date) %>%
  summarise(Denominator = sum(Denominator, na.rm = T),
            Numerator   = sum(Numerator, na.rm = T)) %>%
  ungroup() %>%
  
  # Remove most recent week 
  filter(Date != ymd("2024-10-13"))

Because it is reported weekly (and everything else is reported monthly), we need to convert the weekly data into monthly data. Some weeks span the course of multiple months¹. In these cases, we will group them with whichever month that the majority of their days fell under

How are the weeks calculated?

When you extract the data from Tableau, the dates it lists are all Sundays. It doesn’t look like it’s explicitly documented anywhere, but I assume that this means the week starts on Sunday, meaning the date provided is day 1 of 7 for that week

Show code

week_to_month <- function(date, beginning=T) {
  require(lubridate)
  
  # Make sure you are given only one date, not a vector
  stopifnot(length(date)==1)
  
  # Calculate the midpoint of the week, which is 3 days away from the start 
  # (or end) date
  if(beginning) {
    mid_date <- date + days(3)
  } else({
    # Otherwise, treat the given day as the end of the week
    mid_date <- date - days(3)
  })
  
  # Extract the month and the year from this date
  new_month <- month(mid_date)
  new_year  <- year(mid_date)
  
  # Create a new date and return
  new_date <- ymd(paste0(new_year, "-", new_month, "-15"))
  return(new_date)
  
}

CHG_hosp <- CHG_raw %>% 
  #  Needed to only process one date at a time
  rowwise() %>%
  mutate(Month = week_to_month(Date)) %>%
  ungroup() %>%
  
  group_by(Hospital, Month) %>%
  summarise(Denominator =sum(Denominator , na.rm = T),
            Numerator   =sum(Numerator   , na.rm = T))

# rm(CHG_clean)
# CHG_monthly %>% rmarkdown::paged_table()
#   mutate(Hospital = as.factor(Hospital)) %>% 
#   DT::datatable(filter="top", rownames = F)

I’ll also grab the newer export with CHG compliance by department (file 2B in the table above). This doesn’t go back as far as the old CHG data (file A), but is more granular

Show code

CHG_dept <- read_csv("TableauData/2024-12-05/CHG_byDept_CentralLines_2024-12-05.csv") %>%
  select(Hospital, Department,
         Date=`Week of Census Time`,
         Denominator=`All Lines Denominator`,
         Numerator=`All Lines Numerator`) %>%
  mutate(Date = mdy(Date)) %>%
  
  #  Needed to only process one date at a time
  rowwise() %>%
  mutate(Month = week_to_month(Date)) %>%
  ungroup() %>%
  
  group_by(Hospital, Department, Month) %>%
  summarise(Denominator =sum(Denominator , na.rm = T),
            Numerator   =sum(Numerator   , na.rm = T)) %>%
  ungroup()

CHG_dept <- CHG_dept %>%
  filter(Month!=ymd("2024-12-15"))

Summary of available data

The figure below shows the timespans of the various datasets used for this run

Show code


# max(CAUTI$Month)
# max(CLABSI$Month)
# max(CHG_hosp$Month)
# max(CHG_dept$Month)

tibble(dataset = c("CLABSI", "CAUTI", "MRSA", "CHG_hosp", "CHG_dept"),
       min = c(min(CLABSI$Month), min(CAUTI$Month), min(MRSA$Month), 
               min(CHG_hosp$Month), min(CHG_dept$Month)),
       max = c(max(CLABSI$Month), max(CAUTI$Month), max(MRSA$Month), 
               max(CHG_hosp$Month), max(CHG_dept$Month))) %>%
  ggplot(aes(y=dataset)) + 
  geom_linerange(aes(xmin=min, xmax=max)) +
  geom_vline(xintercept = ymd(params$rollout_start), 
             alpha=0.5, linetype = "dashed",
             color="grey30") +
  geom_vline(xintercept = ymd(params$rollout_end), 
             alpha=0.5, linetype = "dashed",
             color="grey30")

Pre-processing

This is the section where I aggregate and match the data to prepare it for the final analysis.

Structure of the NHSN data

The NHSN exports reports HAIs down to the “ward/unit” level (called location in the export) and these locations can be further classified by “acuity” (using the CDC’s location code; locCDC). An example for Princeton is shown below

Show code

CLABSI %>%
  filter(Hosp=="PRN") %>%
  # group_by(Hosp, locCDC, locationType, location) %>%
  # summarise(NHSN_n = n(),
  #           NHSN_min = min(Month),
  #           NHSN_max = max(Month))
  filter(year(Month)>=2023) %>%
  mutate(Quarter = str_glue("{year(Month)}_Q{quarter(Month)}")) %>%
  group_by(Hosp, locCDC, locationType, location, Quarter) %>%
  summarise(#n_obs = n(),
            # Predicted = sum(Inf_pred),
            Observed = sum(Inf_obs, na.rm = T),
            .groups = "drop") %>% 
  pivot_wider(names_from = Quarter, values_from = Observed) %>%
  gt() %>%
  fmt_missing() %>%
  tab_spanner_delim("_") %>%
  tab_header(title="Quarterly observed CLABSIs at PRN, by unit") %>%
  opt_stylize(6, color = "gray")

Hosp	locCDC	locationType	location	2023				2024
Quarterly observed CLABSIs at PRN, by unit
Hosp	locCDC	locationType	location	Q1	Q2	Q3	Q4	Q1	Q2	Q3
PRN	CC:MS	CC	CCU	1	2	1	4	1	1	0
PRN	CC:MS	CC	ICU	1	0	1	0	0	0	1
PRN	MIXED:ALL	OTHER	3E	0	0	0	0	0	0	1
PRN	STEP	STEP	2WSD	—	—	—	—	0	0	0
PRN	WARD:LD_PP	WARD	WC	0	0	0	0	0	0	0
PRN	WARD:M	WARD	2W	0	0	0	0	0	0	0
PRN	WARD:M	WARD	3WPCU	2	0	1	0	0	1	0
PRN	WARD:M	WARD	4E	0	0	1	0	1	0	0
PRN	WARD:MS	WARD	3SORTHO	1	0	0	0	0	1	0

Eventually, I want to match the CHG departmental level data to the NHSN location level data, but that’s going to take quite a bit of work. So for now, I’m going to aggregate the observed & predicted HAIs at a hospital level, which I do in the next section

Aggregating NHSN data & matching to CHG

Show code

# This is where aggregated NHSN & CHG data will live temporarily
temp_NSHS <- list()
temp_CHG <- list()

# This is where lookup tables will live
temp_lookup <- list()

# This is where matched data (final products) will go
matched_data <- list()

## Aggregate across departments (by hospital), creating df's 
## like the prior runs
temp_NSHS[["ByHosp"]][["CLABSI"]] <- CLABSI %>%
  group_by(Hosp, Month) %>%
  summarise(CLABSI_pred = sum(Inf_pred),
            CLABSI_obs = sum(Inf_obs), 
            days_CL    = sum(CL_days),
            .groups = "drop")

temp_NSHS[["ByHosp"]][["CAUTI"]] <- CAUTI %>%
  group_by(Hosp, Month) %>%
  summarise(CAUTI_pred = sum(Inf_pred),
            CAUTI_obs = sum(Inf_obs), 
            days_foley    = sum(cath_days),
            .groups = "drop")

temp_NSHS[["ByHosp"]][["MRSA"]] <- MRSA %>%
  group_by(Hosp, Month) %>%
  summarise(MRSA_pred = sum(Inf_pred),
            MRSA_obs = sum(Inf_obs), 
            days_MRSA    = sum(MRSA_days),
            .groups = "drop")
  


## Match the CHG data to the NHSN abbreviations, and aggregate  
## based off the NHSN abbreviation (this collapses CH/FMT into WVUH)
temp_CHG[["ByHosp"]] <- CHG_hosp %>%
  # Join to the GS data to match keys
  left_join(select(hospitals, NHSN_abbv, CHG_abbv), 
            by=c("Hospital"="CHG_abbv")) %>%
  
  # Drop data if the `NHSN_abbv` is NA
  filter(!is.na(NHSN_abbv)) %>%

  # Aggregate data on NHSN abbreviation
  group_by(NHSN_abbv, Month) %>%
  summarise(Denominator =sum(Denominator , na.rm = T),
            Numerator   =sum(Numerator   , na.rm = T),
            .groups = "drop") %>%
  
  rename(Hosp=NHSN_abbv, Date=Month)



## Create a table which will be the backbone of the matched
## dataset. It uses `crossing` to ensure all permutations
## of dates and hospitals are included
temp_lookup[["crossing"]] <- crossing(
  # Vector with all of the hospitals included. Note that this uses 
  # the name format from NHSN (although the data comes from a CHG
  # table, we converted the abbreviations to the NHSN format in the 
  # step above)
  Hosp = temp_CHG[["ByHosp"]]$Hosp, 
  # This is a vector of all of the months included in the analysis
  # across both the CHG dataset and the NHSN dataset. By using the
  # crossing function, in ensures that we have rows for dates that
  # may be missing ing one of the datasets (e.g. early days of the 
  # CHG dataset) 
  Date = c(temp_CHG[["ByHosp"]]$Date, 
            temp_NSHS[["ByHosp"]][["CLABSI"]]$Month, 
            temp_NSHS[["ByHosp"]][["CAUTI"]]$Month,
           temp_NSHS[["ByHosp"]][["MRSA"]]$Month))



## Using the crossing df as a backbone, join the CHG and HAI data together
matched_data[["all_by_hosp"]] <- temp_lookup[["crossing"]] %>%
  left_join(temp_NSHS[["ByHosp"]][["CLABSI"]], by = join_by(Hosp, Date==Month)) %>%
  left_join(temp_NSHS[["ByHosp"]][["CAUTI"]], by = join_by(Hosp, Date==Month)) %>%
  left_join(temp_NSHS[["ByHosp"]][["MRSA"]], by = join_by(Hosp, Date==Month)) %>%
  left_join(temp_CHG[["ByHosp"]], by = join_by(Hosp, Date)) %>%
  
  ## Annotate the era
  mutate(Era = case_when(Date < ymd(params$rollout_start) ~ "Before",
                         Date > ymd(params$rollout_end)   ~ "After",
                         TRUE                             ~ "During"),
         Era = as_factor(Era)) 

## To annotate the names, we need to make minor adjustments 
## to the data from the Google Sheet, namely recoding Ruby/FMT/CH
temp_lookup[["names_full"]] <- hospitals %>%
  select(NHSN_abbv, name_short, hosp_type) %>%
  
  # Recode the ones that are under WVUH
  mutate(name_short = if_else(NHSN_abbv == "WVUH",
                              "Ruby & Fairmont",name_short)) %>%
  unique()

matched_data[["all_by_hosp"]] <- matched_data[["all_by_hosp"]] %>%
  left_join(temp_lookup[["names_full"]], join_by(Hosp==NHSN_abbv))

Assumption

There are a few months where there were no central line days or foley days (mainly for St Francis). Since there can be no CLABSI or CAUTI during these time periods, I counted that data as missing (and thus it is ignored)

Show code

# Verify no infections observed when they don't have lines
testthat::expect_equal({matched_data[["all_by_hosp"]] %>% 
    filter(days_CL==0) %>% 
    pull(CLABSI_obs) %>% 
    max()}, 0)

testthat::expect_equal({matched_data[["all_by_hosp"]] %>% 
    filter(days_foley==0) %>% 
    pull(CAUTI_obs) %>% 
    max()}, 0)

matched_data[["all_by_hosp"]] <- matched_data[["all_by_hosp"]] %>%
  mutate(CLABSI_obs = if_else(days_CL==0, NA, CLABSI_obs),
         CAUTI_obs  = if_else(days_foley==0, NA, CAUTI_obs))

Aggregating smaller hospitals

The Critical Access Hospitals (CAHs) represent an important segment here, as they serve a distinct patient populations compared to larger hopsitals. From a modeling standpoint, they are also unique in that they have much lower rates of infections, and the predicted number of HAIs are calculated differently by NHSN.

Show code

## First make a list of the CAHs & small hospitals
temp_lookup[["CAHs"]] <- hospitals %>% 
  filter(hosp_type=="Critical access") %>%
  pull(NHSN_abbv)

matched_data[["small_hospitals"]] <- c(temp_lookup[["CAHs"]], "WTZ")



## Pull out the small hospitals and aggregate them
temp_merging <- matched_data[["all_by_hosp"]] %>%
  # Starting with the matched data from the last step, limit to the
  # hospitals identified as small
  filter(Hosp %in% matched_data[["small_hospitals"]]) %>%
  
  # Reaggregate the data for these hospitals
  group_by(Date) %>%
  summarise(CLABSI_pred = sum(CLABSI_pred, na.rm = T),
            CLABSI_obs = sum(CLABSI_obs, na.rm = T),
            days_CL = sum(days_CL, na.rm = T),
            CAUTI_pred = sum(CAUTI_pred, na.rm = T),
            CAUTI_obs = sum(CAUTI_obs, na.rm = T),
            days_foley = sum(days_foley, na.rm = T),
            MRSA_pred = sum(MRSA_pred, na.rm = T),
            MRSA_obs = sum(MRSA_obs, na.rm = T),
            days_MRSA = sum(days_MRSA, na.rm = T),
            Denominator = sum(Denominator, na.rm = T),
            Numerator = sum(Numerator, na.rm = T),
            .groups = "drop") %>%
  
  # Becuase implicit zeros may have been introduced my 
  # the `summarise` function, handle them here
  mutate(CLABSI_obs = if_else(days_CL==0, NA, CLABSI_obs),
         CAUTI_obs  = if_else(days_foley==0, NA, CAUTI_obs)) %>%
  
  # If both numerator & denominator are 0, assume that is 
  # from missing data
  mutate(missing_CHG = Denominator==0 & Numerator == 0,
         Denominator = if_else(missing_CHG, NA, Denominator),
         Numerator   = if_else(missing_CHG, NA, Numerator)) %>%
  select(-missing_CHG) %>%
  
  ## Annotate the era
  mutate(Era = case_when(Date < ymd(params$rollout_start) ~ "Before",
                         Date > ymd(params$rollout_end)   ~ "After",
                         TRUE                             ~ "During"),
         Era = as_factor(Era)) %>%
  
  # Reintroduce the names
  mutate(Hosp  = "Small",
         name_short = "Small hospitals",
         hosp_type  = "CAH / small") %>%
  relocate(Hosp)



## Now get the other (non-small) hospitals and bind to the 
## aggregated data
matched_data[["small_by_hosp"]] <- matched_data[["all_by_hosp"]] %>% 
  filter(!Hosp %in% matched_data[["small_hospitals"]]) %>%
  bind_rows(temp_merging)

To accommodate this unique set of hospitals in the analysis, we will aggregate their monthly numbers together and treat them as one “hospital”. Although WTZ isn’t classified as a critical access hospital, it has a smaller capacity (bed size < 50) so we will include it with the CAHs. Specifically, we will treat BRN, BRX, GRMC, GMH, HRS, JAX, JMC, PVH, STJ, SMR, and WTZ as small hospitals.

Show code

## Clean up enviroment, do some final touch ups
rm(temp_merging, temp_CHG, temp_lookup, temp_NSHS,
   CLABSI, CAUTI, MRSA)


matched_data[["all_by_hosp"]] <- matched_data[["all_by_hosp"]] %>%
  mutate(CHG_rate = Numerator / Denominator) %>%
  relocate(CHG_rate, .before = Denominator) %>%
    
  # Ignore data before & after
  filter(Date > ymd(params$ignore_before)) %>%
  filter(Date < ymd(params$ignore_after)) %>%
  
  # Ignore STF
  filter(Hosp!="STF")

matched_data[["small_by_hosp"]] <- matched_data[["small_by_hosp"]] %>%
  mutate(CHG_rate = Numerator / Denominator) %>%
  relocate(CHG_rate, .before = Denominator) %>%
  
  # Ignore data before & after
  filter(Date > ymd(params$ignore_before)) %>%
  filter(Date < ymd(params$ignore_after)) %>%
  
  # Ignore STF
  filter(Hosp!="STF")

Descriptive statistics

To get a feel for what we are looking at, this shows the time span that is covered by the data I have available. We should have equal numbers before and after the implementation period. The datasets for HAIs (CLABSI, CAUTI, MRSA) come from NHSN and the CHG dataset is extracted from Epic documentation (via Enterprise Analytics’s Tableau dashboard)

Show code

matched_data$small_by_hosp %>%
  
  group_by(Date, Era) %>%
  summarise(CLABSI = sum(!is.na(CLABSI_obs),na.rm = T),
         CAUTI  = sum(!is.na(CAUTI_pred),na.rm = T),
         MRSA   = sum(!is.na(MRSA_pred),na.rm = T),
         CHG    = sum(!is.na(CHG_rate),na.rm = T),
         n = n()) %>%
  
  # group_by(Hosp, Date) %>%
  pivot_longer(cols=c(CLABSI, CAUTI, MRSA, CHG), 
               names_to = "Dataset", 
               values_to = "n_obs") %>%
  mutate(pct_obs = n_obs / n) %>%
  filter(n_obs > 0) %>%
  ggplot(aes(x=Date, y=Dataset)) +
  geom_line() +
  geom_point(aes(color=Era)) +
    geom_vline(xintercept = ymd(params$rollout_start), 
             alpha=0.5, linetype = "dashed",
             color="grey30") +
  geom_vline(xintercept = ymd(params$rollout_end), 
             alpha=0.5, linetype = "dashed",
             color="grey30") +
  theme_bw()

Figure 1: Time span of data in the analysis

Show code

temp_numbers <- list(
  n_hosp = n_groups(group_by(matched_data$small_by_hosp, Hosp)),
  CLABSI = list(
    n_obs = nrow(filter(matched_data$small_by_hosp, !is.na(CLABSI_obs))),
    
    n_before = nrow(filter(matched_data$small_by_hosp, 
                           !is.na(CLABSI_obs), Era=="Before")),
    n_during = nrow(filter(matched_data$small_by_hosp, 
                           !is.na(CLABSI_obs), Era=="During")),
    n_after = nrow(filter(matched_data$small_by_hosp, 
                           !is.na(CLABSI_obs), Era=="After"))
  ),
  CAUTI = list(
    n_obs = nrow(filter(matched_data$small_by_hosp, !is.na(CAUTI_obs))),
    
    n_before = nrow(filter(matched_data$small_by_hosp, 
                           !is.na(CAUTI_obs), Era=="Before")),
    n_during = nrow(filter(matched_data$small_by_hosp, 
                           !is.na(CAUTI_obs), Era=="During")),
    n_after = nrow(filter(matched_data$small_by_hosp, 
                           !is.na(CAUTI_obs), Era=="After"))
  ),
  MRSA = list(
    n_obs = nrow(filter(matched_data$small_by_hosp, !is.na(MRSA_obs))),
    
    n_before = nrow(filter(matched_data$small_by_hosp, 
                           !is.na(MRSA_obs), Era=="Before")),
    n_during = nrow(filter(matched_data$small_by_hosp, 
                           !is.na(MRSA_obs), Era=="During")),
    n_after = nrow(filter(matched_data$small_by_hosp, 
                           !is.na(MRSA_obs), Era=="After"))
  ),
  CHG_hosp = list(
    n_obs = nrow(filter(matched_data$small_by_hosp, !is.na(CHG_rate))),
    start = {matched_data$small_by_hosp %>% 
        filter(!is.na(CHG_rate)) %>% pull(Date) %>% min() %>%
        strftime(format="%b %Y")},
    end = {matched_data$small_by_hosp %>% 
        filter(!is.na(CHG_rate)) %>% pull(Date) %>% max() %>%
        strftime(format="%b %Y")},
    
    n_before = nrow(filter(matched_data$small_by_hosp, 
                           !is.na(CHG_rate), Era=="Before")),
    n_during = nrow(filter(matched_data$small_by_hosp, 
                           !is.na(CHG_rate), Era=="During")),
    n_after = nrow(filter(matched_data$small_by_hosp, 
                           !is.na(CHG_rate), Era=="After"))
  ),
  CHG_dept = list(
    n_obs = nrow(CHG_dept),
    start = {CHG_dept %>% pull(Month) %>% min() %>%
        strftime(format="%b %Y")},
    end = {CHG_dept %>% pull(Month) %>% max() %>%
        strftime(format="%b %Y")}
  ),
  Dates = list(
    before = list(
      start = {matched_data$small_by_hosp %>% 
        filter(Era=="Before") %>% pull(Date) %>% min() %>%
        strftime(format="%b %Y")},
      end = {matched_data$small_by_hosp %>% 
        filter(Era=="Before") %>% pull(Date) %>% max() %>%
        strftime(format="%b %Y")},
      len = {matched_data$small_by_hosp %>% 
        filter(Era=="Before") %>% pull(Date) %>% 
          unique() %>% length()}
    ),
    during = list(
      start = {matched_data$small_by_hosp %>% 
        filter(Era=="During") %>% pull(Date) %>% min() %>%
        strftime(format="%b %Y")},
      end = {matched_data$small_by_hosp %>% 
        filter(Era=="During") %>% pull(Date) %>% max() %>%
        strftime(format="%b %Y")},
      len = {matched_data$small_by_hosp %>% 
        filter(Era=="During") %>% pull(Date) %>% 
          unique() %>% length()}
    ),
    after = list(
      start = {matched_data$small_by_hosp %>% 
        filter(Era=="After") %>% pull(Date) %>% min() %>%
        strftime(format="%b %Y")},
      end = {matched_data$small_by_hosp %>% 
        filter(Era=="After") %>% pull(Date) %>% max() %>%
        strftime(format="%b %Y")},
      len = {matched_data$small_by_hosp %>% 
        filter(Era=="After") %>% pull(Date) %>% 
          unique() %>% length()}
    )
  )
)

The data generally spans three era, the before era (Aug 2022 to Jul 2023; 12 months), the implementation era (Aug 2023 to Sep 2023; 2 months), and the after era (Oct 2023 to Sep 2024; 12 months).

Across the 10 hospitals included (keeping in mind that we aggregated 11 of the smaller hospitals into one “small hospital”), there are 260 observations for CLABSIs (120 in the before era, 20 in the implementation period, and 120 in the after era), 260 observations for CAUTIs (120 in the before era, 20 in the implementation period, and 120 in the after era), and 258 observations for LabID MRSA (120 in the before era, 20 in the implementation period, and 118 in the after era).

The CHG data is a little more complicated. I have one dataset that shows CHG compliance at a hospital level. The hospital level CHG data has 114 observations from Oct 2023 to Sep 2024 (0 in the before era, 0 in the implementation period, and 114 in the after era). I also have CHG data at a departmental level that has 1255 observations but this data spans from Jan 2024 observations Nov 2024 (meaning we are missing some of the early data). We will explore both, but keep in mind that they are looking at slightly different time periods.

Infection rates over time

To start, we will look at the number of HAIs that occurred monthly by each hospital. The colored bars represent the observed HAIs (the very small ones at the bottom of the Y-axis are zero infections), and the horizontal black solid line are the predicted number of infections as calculated by the NHSN. There is also a grey dashed vertical line that marks the implementation phase of the system wide CHG roll out.

CLABSI

The next two figures show the rates of CLABSIs (per thousand central line days). The first figure (below) shows the overall rate across the entire system

Show code

matched_data$small_by_hosp %>%
  group_by(Date, Era) %>%
  summarise(obs_rate = 1000 * sum(CLABSI_obs) / sum(days_CL),
            pred_rate = 1000 * sum(CLABSI_pred) / sum(days_CL)) %>%
  
  ggplot(aes(x=Date, y=obs_rate)) +
  geom_vline(xintercept = ymd(params$rollout_start), 
             alpha=0.5, linetype = "dashed",
             color="grey30") +
  geom_vline(xintercept = ymd(params$rollout_end), 
             alpha=0.5, linetype = "dashed",
             color="grey30") +
  geom_line(aes(y=pred_rate)) +
  geom_col(aes(fill=Era), alpha=0.7, color="grey70") +
  sjPlot::scale_fill_sjplot(palette = "circus") +
  # sjPlot::scale_color_sjplot(palette = "circus") +
  guides(color="none") +
  scale_x_date(date_labels = "%b\n'%y") +
  labs(title="CLABSI Rates: Observed vs Predicted",
       subtitle="Black line solid is predicted number from NHSN",
       y="CLABSI Rate\n(per 1,000 CL days)",
       x="Month",
       fill="",
       caption = "CL = central line") +
  theme_bw()

Figure 2: CLABSI rate over time, across entire system

This figure breaks it down by hospital, still showing the rate (per thousand central line days)

Show code

matched_data$small_by_hosp %>%
    mutate(obs_rate = 1000 * CLABSI_obs / days_CL,
           pred_rate = 1000 * CLABSI_pred / days_CL) %>%
  
  ggplot(aes(x=Date, y=obs_rate, fill=hosp_type)) +
  geom_vline(xintercept = ymd(params$rollout_start), 
             alpha=0.5, linetype = "dashed",
             color="grey30") +
  geom_vline(xintercept = ymd(params$rollout_end), 
             alpha=0.5, linetype = "dashed",
             color="grey30") +
  geom_line(aes(y=pred_rate)) +
  geom_col(aes(color=hosp_type),alpha=0.7) +
  facet_wrap("name_short", ncol=3) +
  guides(color="none") +
  scale_x_date(date_labels = "%b\n'%y") +
  labs(title="CLABSI Rates: Observed vs Predicted",
       subtitle="Black line solid is predicted number from NHSN",
       y="CLABSI Rate\n(per 1,000 CL days)",
       x="Month",
       fill="Hospital\nType",
       caption = "CL = central line\nCAH = Critical Access Hospital") +
  theme_bw()

Figure 3: CLABSI rate over time, broken down by hospital

CAUTI

This section follows the same format as before, but with CAUTIs. The first two figures show the rates of CAUTIs (per thousand foley days), across the entire system and by each hospital (respectively).

Show code

matched_data$small_by_hosp %>%
  group_by(Date, Era) %>%
  summarise(obs_rate = 1000 * sum(CAUTI_obs) / sum(days_foley),
            pred_rate = 1000 * sum(CAUTI_pred) / sum(days_foley)) %>%
  
  ggplot(aes(x=Date, y=obs_rate)) +
  geom_vline(xintercept = ymd(params$rollout_start), 
             alpha=0.5, linetype = "dashed",
             color="grey30") +
  geom_vline(xintercept = ymd(params$rollout_end), 
             alpha=0.5, linetype = "dashed",
             color="grey30") +
  geom_line(aes(y=pred_rate)) +
  geom_col(aes(fill=Era), alpha=0.7, color="grey70") +
  sjPlot::scale_fill_sjplot(palette = "circus") +
  # sjPlot::scale_color_sjplot(palette = "circus") +
  guides(color="none") +
  scale_x_date(date_labels = "%b\n'%y") +
  labs(title="CAUTI Rates: Observed vs Predicted",
       subtitle="Black line solid is predicted number from NHSN",
       y="CAUTI Rate\n(per 1,000 foley days)",
       x="Month",
       fill="") +
  theme_bw()

Figure 4: CAUTI rate over time, across entire system

Berkeley is quite impressive with their change in the rates of CAUTIs (before and after)

Show code

matched_data$small_by_hosp %>%
    mutate(obs_rate = 1000 * CAUTI_obs / days_foley,
           pred_rate = 1000 * CAUTI_pred / days_foley) %>%
  
  ggplot(aes(x=Date, y=obs_rate, fill=hosp_type)) +
  geom_vline(xintercept = ymd(params$rollout_start), 
             alpha=0.5, linetype = "dashed",
             color="grey30") +
  geom_vline(xintercept = ymd(params$rollout_end), 
             alpha=0.5, linetype = "dashed",
             color="grey30") +
  geom_line(aes(y=pred_rate)) +
  geom_col(aes(color=hosp_type),alpha=0.7) +
  facet_wrap("name_short", ncol=3) +
  guides(color="none") +
  scale_x_date(date_labels = "%b\n'%y") +
  labs(title="CAUTI Rates: Observed vs Predicted",
       subtitle="Black line solid is predicted number from NHSN",
       y="CAUTI Rate\n(per 1,000 foley days)",
       x="Month",
       fill="Hospital\nType",
       caption = "CAH = Critical Access Hospital") +
  theme_bw()

Figure 5: CAUTI rate over time, broken down by hospital

MRSA

This section follows the same format as before, but with Lab ID MRSA. The first two figures show the rates of MRSA (per 10,000 patient days), across the entire system and by each hospital (respectively).

Show code

matched_data$small_by_hosp %>%
  group_by(Date, Era) %>%
  summarise(obs_rate = 10000 * sum(MRSA_obs) / sum(days_MRSA),
            pred_rate = 10000 * sum(MRSA_pred) / sum(days_MRSA)) %>%
  
  ggplot(aes(x=Date, y=obs_rate)) +
  geom_vline(xintercept = ymd(params$rollout_start), 
             alpha=0.5, linetype = "dashed",
             color="grey30") +
  geom_vline(xintercept = ymd(params$rollout_end), 
             alpha=0.5, linetype = "dashed",
             color="grey30") +
  geom_line(aes(y=pred_rate)) +
  geom_col(aes(fill=Era), alpha=0.7, color="grey70") +
  sjPlot::scale_fill_sjplot(palette = "circus") +
  # sjPlot::scale_color_sjplot(palette = "circus") +
  guides(color="none") +
  scale_x_date(date_labels = "%b\n'%y") +
  labs(title="MRSA Rates: Observed vs Predicted",
       subtitle="Black line solid is predicted number from NHSN",
       y="MRSA Rate\n(per 10,000 patient days)",
       x="Month",
       fill="") +
  theme_bw()

Figure 6: MRSA rate over time, across entire system

Again, the rate by each hospital

Show code

matched_data$small_by_hosp %>%
      mutate(obs_rate = 10000 * MRSA_obs / days_MRSA,
           pred_rate = 10000 * MRSA_pred / days_MRSA) %>%
  
  ggplot(aes(x=Date, y=obs_rate, fill=hosp_type)) +
  geom_vline(xintercept = ymd(params$rollout_start), 
             alpha=0.5, linetype = "dashed",
             color="grey30") +
  geom_vline(xintercept = ymd(params$rollout_end), 
             alpha=0.5, linetype = "dashed",
             color="grey30") +
  geom_line(aes(y=pred_rate)) +
  geom_col(aes(color=hosp_type),alpha=0.7) +
  facet_wrap("name_short", ncol=3) +
  guides(color="none") +
  scale_x_date(date_labels = "%b\n'%y") +
  labs(title="MRSA Rates: Observed vs Predicted",
       subtitle="Black line solid is predicted number from NHSN",
       y="MRSA Rate\n(per 10,000 patient days)",
       x="Month",
       fill="Hospital\nType",
       caption = "CAH = Critical Access Hospital") +
  theme_bw()

Figure 7: MRSA rate over time, broken down by hospital

Table

This table shows each hospital’s average² rates of healthcare associated infections broken down before and after CHG bathing was implemented. The column labeled with the percent sign (%) is the relative change after implementing CHG bathing ($\frac{After - Before}{Before}$).

Show code

matched_data$small_by_hosp %>%
  filter(Era!="During") %>%
  group_by(name_short, Era) %>%
  summarise(CAUTI_mean    = mean(10000 * CAUTI_obs / days_foley, na.rm = T),
            CAUTI_sd      = sd(10000 * CAUTI_obs / days_foley, na.rm = T),
            # CAUTI_n.mean    = mean(CAUTI_obs, na.rm = T),
            # CAUTI_n.sd      = sd(CAUTI_obs, na.rm = T),
            # CLABSI_n.mean   = mean(CLABSI_obs, na.rm = T),
            # CLABSI_n.sd     = sd(CLABSI_obs, na.rm = T),
            CLABSI_mean   = mean(10000 * CLABSI_obs / days_CL, na.rm = T),
            CLABSI_sd     = sd(10000 * CLABSI_obs / days_CL, na.rm = T),
            MRSA_mean   = mean(100000 * MRSA_obs / days_MRSA, na.rm = T),
            MRSA_sd     = sd(100000 * MRSA_obs / days_MRSA, na.rm = T),
            .groups = "drop")  %>%
            
 
    
  pivot_longer(cols=-c(name_short, Era),
               names_to = c("HAI", ".value"),
               names_pattern = "(.*)_(.*)") %>%
  pivot_wider(names_from = Era, 
              values_from = c(mean, sd), 
              names_glue = "{Era}_{.value}",
              names_vary = "slowest") %>%
  pivot_wider(names_from = HAI, 
              values_from = -c(HAI, name_short), 
              names_glue = "{HAI}.{.value}",
              names_vary = "slowest") %>%
  
  # Add the percent change
  mutate(CAUTI.pct = (CAUTI.After_mean - CAUTI.Before_mean)/CAUTI.Before_mean) %>%
  relocate(CAUTI.pct, .after = CAUTI.After_sd) %>%
  mutate(CLABSI.pct = (CLABSI.After_mean - CLABSI.Before_mean)/CLABSI.Before_mean) %>%
  relocate(CLABSI.pct, .after = CLABSI.After_sd) %>%
  mutate(MRSA.pct = (MRSA.After_mean - MRSA.Before_mean)/MRSA.Before_mean) %>%
  relocate(MRSA.pct, .after = MRSA.After_sd) %>%
  mutate(CAUTI.pct = if_else(!is.finite(CAUTI.pct), NA, CAUTI.pct),
         CLABSI.pct = if_else(!is.finite(CLABSI.pct), NA, CLABSI.pct),
         MRSA.pct = if_else(!is.finite(MRSA.pct), NA, MRSA.pct)) %>%
  
  gt(rowname_col = "name_short") %>%
  cols_merge_uncert(CLABSI.Before_mean, CLABSI.Before_sd) %>%
  cols_merge_uncert(CLABSI.After_mean, CLABSI.After_sd) %>%
  cols_merge_uncert(CAUTI.Before_mean, CAUTI.Before_sd) %>%
  cols_merge_uncert(CAUTI.After_mean, CAUTI.After_sd) %>%
  cols_merge_uncert(MRSA.Before_mean, MRSA.Before_sd) %>%
  cols_merge_uncert(MRSA.After_mean, MRSA.After_sd) %>%
  tab_spanner_delim(delim = ".") %>%
  gt::cols_label(CAUTI.Before_mean = "Before",
                 CAUTI.After_mean  = "After",
                 CLABSI.Before_mean = "Before",
                 CLABSI.After_mean  = "After",
                 MRSA.Before_mean = "Before",
                 MRSA.After_mean  = "After",
                 CAUTI.pct = "%",
                 CLABSI.pct = "%",
                 MRSA.pct = "%") %>%
  fmt_number(decimals = 1) %>%
  fmt_percent(ends_with("pct"), decimals = 0, force_sign =T) %>%
  fmt_missing() %>%
  #   data_color(
  #   columns = CAUTI.pct,
  #   target_columns = CAUTI.pct,
  #   method = "numeric",
  #   palette = "viridis",
  #   domain = c(-1, 1)
  # ) %>%
  # fmt_number() %>%
  #   data_color(
  #   columns = CLABSI.pct,
  #   target_columns = CLABSI.pct,
  #   method = "numeric",
  #   palette = "viridis",
  #   domain = c(-1, 1)
  # ) %>%
  # cols_hide(CAUTIdiff)
  data_color(
    columns = CAUTI.pct,
    reverse = T,
    na_color = "white",
    domain = c(-1,1),
    palette = c("#86102f", "#f7b8ad", "#b9ddf1","#2c5782"),
  ) %>%
  data_color(
    columns = CLABSI.pct,
    reverse = T,
    na_color = "white",
    domain = c(-1,1),
    palette = c("#86102f", "#f7b8ad", "#b9ddf1","#2c5782"),
  ) %>%
  data_color(
    columns = MRSA.pct,
    reverse = T,
    na_color = "white",
    domain = c(-2.5,2.5),
    palette = c("#86102f", "#f7b8ad", "#b9ddf1","#2c5782"),
  ) %>%
  # gtExtras::gt_hulk_col_numeric(
  #   columns = CAUTI.pct,
  #   reverse = T,
  #   domain = c(-1,1),
  #   palette = "RdYlBu",
  #   # pal_type = "continuous"
  # ) %>%
  # gtExtras::gt_hulk_col_numeric(
  #   columns = CLABSI.pct,
  #   reverse = T,
  #   domain = c(-1,1),
  #   palette = "RdYlBu",
  #   # pal_type = "continuous"
  # ) %>%
  # gtExtras::gt_hulk_col_numeric(
  #   columns = MRSA.pct,
  #   reverse = T,
  #   domain = c(-2.5,2.5),
  #   palette = c("#86102f", "#f7b8ad", "#b9ddf1","#2c5782"),
  # ) %>%
  tab_header(
    title = md('Rates of HAIs before & after CHG bathing'),
    subtitle = "Expressed as mean +/- SD" ) %>%
  opt_stylize(2, color="gray") %>%
  tab_footnote(
    footnote = "Per 10,000 foley/central line days",
    locations = cells_column_spanners(spanners = contains("CLABSI")
  )) %>%
  tab_footnote(
    footnote = "Per 10,000 foley/central line days",
    locations = cells_column_spanners(spanners = contains("CAUTI")
  ))  %>%
  tab_footnote(
    footnote = "Per 100,000 patient days",
    locations = cells_column_spanners(spanners = contains("MRSA")
  ))

Table 1:
HAIs in the pre and post CHG era
	CAUTI¹			CLABSI¹			MRSA²
Rates of HAIs before & after CHG bathing
Expressed as mean +/- SD
	Before	After	%	Before	After	%	Before	After	%
Berkeley	50.0 ± 27.1	7.1 ± 17.3	−86%	1.9 ± 6.7	0.0 ± 0.0	−100%	4.4 ± 10.3	4.5 ± 10.5	+3%
Camden Clark	6.8 ± 8.4	5.0 ± 7.7	−27%	0.0 ± 0.0	6.7 ± 9.2	—	1.8 ± 6.1	2.9 ± 6.7	+64%
Princeton	5.7 ± 7.5	4.6 ± 6.9	−19%	17.9 ± 17.8	15.6 ± 12.6	−12%	11.6 ± 19.0	7.3 ± 13.3	−37%
Reynolds	0.0 ± 0.0	3.9 ± 13.4	—	0.0 ± 0.0	0.0 ± 0.0	—	0.0 ± 0.0	0.0 ± 0.0	—
Ruby & Fairmont	9.2 ± 7.3	13.0 ± 8.2	+41%	10.9 ± 6.0	8.1 ± 5.7	−26%	8.5 ± 8.1	3.6 ± 3.5	−58%
Small hospitals	12.9 ± 14.0	3.5 ± 8.6	−73%	5.0 ± 11.9	5.5 ± 12.8	+9%	1.9 ± 6.7	4.1 ± 9.5	+110%
Thomas	7.8 ± 8.2	9.3 ± 9.2	+20%	15.2 ± 12.1	3.0 ± 5.5	−80%	2.1 ± 7.2	6.6 ± 10.6	+215%
UHC	11.0 ± 8.1	10.0 ± 9.5	−9%	7.0 ± 8.3	3.1 ± 6.0	−56%	2.9 ± 6.9	1.4 ± 5.0	−51%
Uniontown	7.4 ± 8.8	5.6 ± 8.2	−25%	23.8 ± 23.2	7.1 ± 18.0	−70%	12.3 ± 18.2	10.9 ± 16.1	−12%
Wheeling	2.6 ± 5.0	3.7 ± 7.4	+40%	5.1 ± 9.5	3.8 ± 7.3	−25%	4.0 ± 9.4	0.0 ± 0.0	−100%
¹ Per 10,000 foley/central line days
² Per 100,000 patient days

CHG compliance

Now we look at CHG compliance. We will break it apart various ways (by time, by hospital, etc) so there there is some redundancy across these subsections

Over time

The first figure shows compliance trends over time, beginning at the end of the rollout period (Oct 2023). The overall system wide compliance is shown as the bold solid line and the smaller black lines show the trends for each of the hospitals

Show code

# Pooled aveage
df_temp <- matched_data$small_by_hosp %>%
  filter(!is.na(CHG_rate)) %>%
  filter(CHG_rate!=0) %>%
  group_by(Date) %>%
  summarise(Numerator = sum(Numerator, na.rm = T),
            Denominator = sum(Denominator, na.rm = T),
            CHG_rate = Numerator / Denominator)

matched_data$small_by_hosp %>%
  filter(!is.na(CHG_rate)) %>%
  filter(CHG_rate!=0) %>%
  mutate(cut_grp = Hmisc::cut2(Denominator, g=3)) %>%
  ggplot(aes(x=Date, y=CHG_rate)) +
  ggrepel::geom_label_repel(aes(label=name_short),
                            nudge_y = -.05,
                            nudge_x = 25, 
                            arrow = arrow(length = unit(0.02, "npc")),
                            data=filter(matched_data$small_by_hosp, 
                                        Date==ymd("2024-04-15") & Hosp=="BMC")) +
  ggrepel::geom_label_repel(aes(label=name_short),
                            nudge_y = +.05,
                            nudge_x = -25, 
                            arrow = arrow(length = unit(0.02, "npc")),
                            data=filter(matched_data$small_by_hosp, 
                                        Date==ymd("2024-03-15") & Hosp=="WHL")) +
  # geom_point(aes(size=cut_grp), alpha=.5) +
  geom_line(aes(group=Hosp, linetype=hosp_type), alpha=0.5) +
  geom_smooth(se = F, span=.5,
              data=df_temp) +
  # stat_smooth(geom="line", alpha=0.75) +
  # geom_line(alpha=0.5) +
  scale_y_continuous(limits = c(NA, 1), labels = scales::percent) +
  scale_linetype_manual(values=c("Academic"="twodash", 
                                 "CAH / small"="dotted",
                                 "Community" = "solid")) +
  theme_bw() +
  labs(title = "System wide CHG compliance vs time",
       y = "CHG compliance",
       linetype = "Hospital\nType")

rm(df_temp)


# matched_data$small_by_hosp %>%
#   filter(!is.na(CHG_rate)) %>%
#   filter(CHG_rate!=0) %>%
#   filter(Hosp %in% c("BMC", "WHL", "UTN")) %>%
#   mutate(cut_grp = Hmisc::cut2(Denominator, g=3)) %>%
#   ggplot(aes(x=Date, y=CHG_rate)) +
#   geom_line(aes(group=Hosp, color=Hosp), alpha=0.5)

Figure 8: CHG compliance over time, across entire system. The bolded line is the overall CHG complaince, and the more faint lines are each hospital’s trend

A few patterns emerge at first glance:

Over time, compliance seems to go up but only to a point before it plateaus
- This is true for community hospitals like Wheeling (that reached a high plateau early) and Berkeley (that reached a lower plateau later on)
- It also looks to be the case for CAH and our academic hospital (Ruby)
Different hospitals plateau at different levels
- As mentioned above, Wheeling ends up much higher than Berkley
By one year after roll out, the community and CAH generally ended at or above the system wide goal (80%) but the largest hospital, our academic hospital did not reach it’s goal and brings down the average for the entire system

For further details, below is an interactive figure that shows each hospital’s trend

Show code

# Pooled aveage
df_temp <- matched_data$small_by_hosp %>%
  filter(!is.na(CHG_rate)) %>%
  filter(CHG_rate!=0) %>%
  group_by(Date) %>%
  summarise(Numerator = sum(Numerator, na.rm = T),
            Denominator = sum(Denominator, na.rm = T),
            CHG_rate = Numerator / Denominator)

x <- matched_data$small_by_hosp %>%
  filter(!is.na(CHG_rate)) %>%
  filter(CHG_rate!=0) %>%
  mutate(cut_grp = Hmisc::cut2(Denominator, g=3)) %>%
  ggplot(aes(x=Date, y=CHG_rate, label=Denominator)) +
  geom_line(aes(color=Hosp), alpha=0.5) +
  geom_smooth(se = F, span=.5, color="black",
              data=df_temp) +
  scale_y_continuous(limits = c(NA, 1), labels = scales::percent) +
  theme_bw() +
  labs(title = "Hospital's CHG compliance vs time",
       y = "CHG compliance")

plotly::ggplotly(x)

Figure 9: CHG compliance over time, by hospital

Show code

rm(df_temp, x)

Tip

If you want to dynamically explore the data yourself, in the interactive figure below you can:

Hover over points to view the actual numbers at each point, including the exact compliance rate, date, and denominator (number of patient-days eligible for CHG bathing)
Clicking on each hospital’s name/abbreviation in the figure’s legend will show/hide it from view (double clicking will isolate that particular hospital). This can be helpful for comparing specific hospitals

Show code

# matched_data$all_by_hosp %>%
#   filter(!is.na(CHG_rate)) %>%
#   ggplot(aes(x=Date, y=CHG_rate, group=Hosp, color=hosp_type)) +
#   geom_line() + facet_wrap("Hosp")
# 
# matched_data$small_by_hosp %>%
#   # filter(!is.na(CHG_rate)) %>%
#   filter(CHG_rate!=0) %>%
#   ggplot(aes(x=Date, y=CHG_rate, group=Hosp, color=hosp_type)) +
#   # geom_smooth(se = F, span=.5, alpha=0.25) +
#   stat_smooth(geom="line", span=.5, alpha=0.5) +
#   # geom_line(alpha=0.5) +
#   scale_y_continuous(limits = c(NA, 1), labels = scales::percent)



matched_data$small_by_hosp %>%
  # filter(!is.na(CHG_rate)) %>%
  filter(CHG_rate!=0) %>%
  ggplot(aes(x=Date, y=CHG_rate, group=Hosp, color=Hosp)) +
  # geom_smooth(se = F, span=.5, alpha=0.25) +
  # geom_point(aes(linetype=hosp_type)) +
  stat_smooth(geom="line", alpha=0.75) +
  # geom_line(alpha=0.5) +
  scale_y_continuous(limits = c(NA, 1), labels = scales::percent) +
  theme_bw()

CAH specifically

Critical access hospitals³ are of particular interest. Recall Figure 8 (slightly modified below) which showed an apparent dip in compliance early after the rollout period, then sustained increased compliance until it plateaus.

Show code

# Pooled aveage
df_temp <- matched_data$small_by_hosp %>%
  filter(!is.na(CHG_rate)) %>%
  filter(CHG_rate!=0) %>%
  group_by(Date) %>%
  summarise(Numerator = sum(Numerator, na.rm = T),
            Denominator = sum(Denominator, na.rm = T),
            CHG_rate = Numerator / Denominator)

matched_data$small_by_hosp %>%
  filter(!is.na(CHG_rate)) %>%
  filter(CHG_rate!=0) %>%
  mutate(cut_grp = Hmisc::cut2(Denominator, g=3)) %>%
  # filter(Hosp=="Small") %>%
  mutate(grp = if_else(Hosp=="Small", "CAH / Small", "All others")) %>%
  ggplot(aes(x=Date, y=CHG_rate)) +
  
  geom_line(aes(group=Hosp, color=hosp_type), alpha=0.7) +
  geom_smooth(se = F, span=.5, color="black",
              data=df_temp) +
  # stat_smooth(geom="line", alpha=0.75) +
  # geom_line(alpha=0.5) +
  scale_y_continuous(limits = c(NA, 1), labels = scales::percent) +
  scale_color_manual(values=c("CAH / small"="red", 
                              "Community"="grey80",
                              "Academic"="lightblue")) +
  theme_bw() +
  labs(y = "CHG compliance",
       color = "")

Show code


rm(df_temp)

To dive into this a little more, I’ve de-aggregated the “CAH / Small hospital” group in the figure below. Similar to Figure 9 the thick black line is the system wide average, but to make it (slightly less) busy I’ve omitted the community and academic hospitals. Because the compliance rate for the “CAH / Small hospital” group in the main analysis is a weighted average⁴ of these various hospitals, the transparency of each line is reflective of that’s hospital’s average number of opportunities for CHG bathing

Show code

# Pooled aveages
df_avg_all <- matched_data$small_by_hosp %>%
  filter(!is.na(CHG_rate)) %>%
  filter(CHG_rate!=0) %>%
  group_by(Date) %>%
  summarise(Numerator = sum(Numerator, na.rm = T),
            Denominator = sum(Denominator, na.rm = T),
            CHG_rate = Numerator / Denominator)

df_avg_small <- matched_data$all_by_hosp %>%
  filter(!is.na(CHG_rate)) %>%
  filter(CHG_rate!=0) %>%
  filter(Hosp %in% matched_data$small_hospitals) %>%
  group_by(Date) %>%
  summarise(Numerator = sum(Numerator, na.rm = T),
            Denominator = sum(Denominator, na.rm = T),
            CHG_rate = Numerator / Denominator)



x <- matched_data$all_by_hosp %>%
  filter(!is.na(CHG_rate)) %>%
  filter(CHG_rate!=0) %>%
  filter(Hosp %in% matched_data$small_hospitals) %>%
  group_by(Hosp) %>%
  mutate(avg_denom = mean(Denominator, na.rm = T)) %>%
  ggplot(aes(x=Date, y=CHG_rate, label=Denominator)) +
  geom_line(aes(color=Hosp, alpha=avg_denom)) +
  # geom_smooth(se = F, span=.5, color="grey40", linetype="dotted",
  #             data=df_avg_small) +
  geom_smooth(se = F, span=.5, color="black",
              data=df_avg_all) +
  scale_y_continuous(limits = c(NA, 1), labels = scales::percent) +
  theme_bw() +
  guides(alpha=F) +
  labs(title = "CHG compliance (CAH only) vs time",
       y = "CHG compliance",
       color="CAH")

plotly::ggplotly(x)

Figure 10: CHG compliance over time, limited to critical access hospitals (CAH). Each lines transparency is weighted to how many opportunities for CHG bathing the hospital had across the year (this is the ‘denominator’ in the Tableau dashboard). Similar to before, the solid black line shows the system wide average CHG compliance.

Show code

rm(df_temp, df_avg_small, df_avg_all)

The takeaways I have here are:

The three CAH hospitals that contribute the most in terms of CHG opportunities are GRMC, PVH, SMR, and GMH
- Among these four hospitals, they all have improved their compliance over time and are at or above the system wide average
- Notably, Grant (GMH) didn’t report data for 10/2023 (but did start reporting in 11/2023) and started out with the lowest compliance when they did report data
  - The early dip seen in the late fall of 2023 on Figure 8 may be an artifact. When Grant did start reporting, they brought the group’s average down quite a bit, but the CAH group’s rate probably would have been lower in 10/2023 if GMH had reported data
BRX and WTZ struggle a little more than the rest of the group, especially compared to JAX (despite all three having comparable numbers of CHG opportunities)
- STJ struggled at first, but generally ended up around the system wide average
JMC actually has been going down over time, a trend not seen for any other hospital
BRN and HRS have very few opportunities (see avg_denom when you hover over their lines) but generally do a great/excellent job when they do have opportunities (Harrison is actually a straight line at 100%)
- Important to remember that because of how the data is collected in Epic, the compliance percentage may not be very reliable with small denominators. For example, a patient who is admitted at 11pm (and doesn’t get a bath in their first hour) who has a 72h length of stay will get 67% compliance even if they do everything right from a CHG bathing standpoint

By number of opportunities

Next we’ll look at how compliance varies by the number of CHG bathing opportunities (the number of patient days that are eligble for CHG bathing) for each hospital. There are two reasons to explore this relationship

As mentioned above, the rate can be very sensitive to one missed opportunity, if the denominator is small. This is particularly true for the smaller and critical access hospitals
There might be some a priori reasons to expect that the amount opportunities could influence compliance
- One one hand, it’s possible that facilities who see fewer patients who are eligible for CHG bathing might not have CHG bathing ingrained into their workflow (it may not be front of mind for the healthcare providers)
- Conversely, more opportunities for bathing are also more opportunities to miss bathing. Are there issues with scaling?

Note

When I say “CHG bathing opportunities”, I’m refering to the denominator in the Tableau dashboard. So this section is looking at “CHG compliance rate” ($Rate = \frac{Numerator}{Denominator} \times 100$) vs $Denominator$ .

The table below displays the same information from Figure 10 (looking at CAH only). The table is sorted by hospital, starting with those who have the fewest opportunities and ending with those who have the most opportunities. By scanning from left to right (’23 Q4 to ’24 Q3), you can see compliance went up over time (the colors move from purple to green), which is what we saw in Figure 10. Early on (2023 Q4 and 2024 Q1) it appears that hospitals with more opportunities did worse than those with fewer opportunities, but by then end (Q2 & Q3 of 2024) that trend seems to go away.

Show code

# matched_data$all_by_hosp %>%
#   filter(!is.na(CHG_rate)) %>%
#   filter(CHG_rate!=0) %>%
#   filter(Hosp %in% matched_data$small_hospitals) %>%
#   group_by(Hosp) %>%
#   summarise(denom = mean(Denominator, na.rm  = T),
#             denom_avg = mean(Denominator, na.rm  = T),
#             denom_sd = sd(Denominator, na.rm  = T),
#             
#             CHG_avg = mean(CHG_rate, na.rm  = T),
#             CHG_sd = sd(CHG_rate, na.rm  = T),
#             # denom_med = qwraps2::median_iqr(Denominator, na_rm  = T),
#             # denom_avg = qwraps2::mean_sd(Denominator, na_rm  = T),
#             # denom_med = qwraps2::median_iqr(Denominator, na_rm  = T),
#             
#             ) %>%
#   arrange(denom) %>%
#   print(n=100) %>%
#   gt() %>%
#   cols_hide(denom) %>%
#   cols_merge_uncert(denom_avg, denom_sd) %>%
#   cols_merge_uncert(CHG_avg, CHG_sd) %>%
#   fmt_number() %>%
#   fmt_percent(starts_with("CHG"), decimals = 1) %>%
#     gtExtras::gt_hulk_col_numeric(
#     columns = CHG_avg,
#     reverse = T,
#     # domain = c(-1,1),
#     # palette = "BuPu",
#     # pal_type = "continuous"
#   ) %>%
#   cols_label(denom_avg="CHG opportunities",
#              CHG_avg="CHG compliance",
#              Hosp="Hospital") %>%
#    tab_header(
#     title = md('Average CHG opportunities & compliance for CAH'),
#     subtitle = "Expressed as mean +/- SD" ) %>%
#   opt_stylize(2, color="gray") 


t1 <- matched_data$all_by_hosp %>%
  filter(!is.na(CHG_rate)) %>%
  filter(CHG_rate!=0) %>%
  filter(Hosp %in% matched_data$small_hospitals) %>%
  group_by(Hosp, name_short) %>%
  summarise(Overall.denom_avg = mean(Denominator, na.rm  = T),
            Overall.denom_sd = sd(Denominator, na.rm  = T),
            
            Overall.CHG_avg = mean(CHG_rate, na.rm  = T),
            Overall.CHG_sd = sd(CHG_rate, na.rm  = T),
            # denom_med = qwraps2::median_iqr(Denominator, na_rm  = T),
            # denom_avg = qwraps2::mean_sd(Denominator, na_rm  = T),
            # denom_med = qwraps2::median_iqr(Denominator, na_rm  = T),
            
            ) %>%
  arrange(Overall.denom_avg) 



t2 <- matched_data$all_by_hosp %>%
  filter(!is.na(CHG_rate)) %>%
  filter(CHG_rate!=0) %>%
  filter(Hosp %in% matched_data$small_hospitals) %>%
  mutate(q = str_glue("{year(Date)} Q{quarter(Date)}"),
         q = str_remove(q, "20"),
         q = str_glue("'{q}")) %>%
  # count(Date, q) %>% 
  group_by(Hosp, q) %>%
  summarise(denom_avg = mean(Denominator, na.rm  = T),
            denom_sd = sd(Denominator, na.rm  = T),
            
            CHG_avg = mean(CHG_rate, na.rm  = T),
            CHG_sd = sd(CHG_rate, na.rm  = T),
            # denom_med = qwraps2::median_iqr(Denominator, na_rm  = T),
            # denom_avg = qwraps2::mean_sd(Denominator, na_rm  = T),
            # denom_med = qwraps2::median_iqr(Denominator, na_rm  = T),
            ) %>%
  pivot_wider(names_from = q, 
              values_from = c(denom_avg, denom_sd, CHG_avg, CHG_sd),
              names_vary = "slowest",
              names_glue = "{q}.{.value}")

inner_join(t1, t2, by="Hosp") %>%
  mutate(Hosp = str_glue("{Hosp} ({name_short})")) %>% 
  select(-name_short) %>% 
  ungroup() %>%
  gt(rowname_col = "Hosp") %>%
  tab_spanner_delim(delim = ".") %>%
  fmt_missing() %>%
  fmt_number(decimals = 1) %>%
  fmt_percent(contains("CHG"), decimals = 0) %>%
  cols_merge_uncert(Overall.denom_avg, Overall.denom_sd) %>%
  cols_merge_uncert(Overall.CHG_avg, Overall.CHG_sd) %>%
  cols_merge_uncert(`'23 Q4.denom_avg`, `'23 Q4.denom_sd`) %>%
  cols_merge_uncert(`'23 Q4.CHG_avg`,   `'23 Q4.CHG_sd`) %>%
  cols_merge_uncert(`'24 Q1.denom_avg`, `'24 Q1.denom_sd`) %>%
  cols_merge_uncert(`'24 Q1.CHG_avg`,   `'24 Q1.CHG_sd`) %>%
  cols_merge_uncert(`'24 Q2.denom_avg`, `'24 Q2.denom_sd`) %>%
  cols_merge_uncert(`'24 Q2.CHG_avg`,   `'24 Q2.CHG_sd`) %>%
  cols_merge_uncert(`'24 Q3.denom_avg`, `'24 Q3.denom_sd`) %>%
  cols_merge_uncert(`'24 Q3.CHG_avg`,   `'24 Q3.CHG_sd`) %>%
  cols_label(Overall.denom_avg ="Eligible Days",
             `'23 Q4.denom_avg`="Eligible Days",
             `'24 Q1.denom_avg`="Eligible Days",
             `'24 Q2.denom_avg`="Eligible Days",
             `'24 Q3.denom_avg`="Eligible Days",
             Overall.CHG_avg ="Compliance",
             `'23 Q4.CHG_avg`="Compliance",
             `'24 Q1.CHG_avg`="Compliance",
             `'24 Q2.CHG_avg`="Compliance",
             `'24 Q3.CHG_avg`="Compliance",
             
             Hosp="Hospital") %>%
  
  # gtExtras::gt_hulk_col_numeric(columns = contains("CHG"),domain = c(0,1)) %>%
  data_color(
    columns = contains("CHG"),
    palette = c("#86102f", "#f7b8ad", "#b9ddf1","#2c5782"),
    na_color = "white",
    domain = c(0,1)) %>%
  data_color(
    columns = Overall.CHG_avg,
    method = "numeric",
    palette = "PuOr",
    # palette = "YlGnBu",
    reverse = T
  ) %>%
  
   tab_header(
    title = md('**Small/CAH:** Average CHG opportunities & compliance'),
    subtitle = "Expressed as mean +/- SD" ) %>%
  opt_stylize(2, color="gray")

Table 2:
Comparison of the number of patient-days eligable for CHG bathing with CHG compliance for critical access and small hospitals
	Overall		'23 Q4		'24 Q1		'24 Q2		'24 Q3
Small/CAH: Average CHG opportunities & compliance
Expressed as mean +/- SD
	Eligible Days	Compliance	Eligible Days	Compliance	Eligible Days	Compliance	Eligible Days	Compliance	Eligible Days	Compliance
BRN (Barnesville)	2.3 ± 1.7	92% ± 14%	3.3 ± 2.1	93% ± 12%	2.0 ± 1.4	83% ± 24%	1.0 ± 0.0	100% ± 0%	—	—
HRS (Harrison)	5.1 ± 6.1	100% ± 0%	1.5 ± 0.7	100% ± 0%	10.0 ± 11.3	100% ± 0%	8.0	100%	2.5 ± 0.7	100% ± 0%
WTZ (Wetzel)	11.0 ± 8.6	84% ± 18%	2.5 ± 2.1	100% ± 0%	14.0 ± 1.7	77% ± 21%	18.0 ± 9.6	86% ± 10%	6.7 ± 9.0	79% ± 26%
BRX (Braxton)	11.5 ± 8.6	71% ± 23%	20.3 ± 13.3	80% ± 10%	6.7 ± 3.5	51% ± 34%	10.5 ± 4.9	71% ± 20%	8.3 ± 1.5	82% ± 16%
JAX (Jackson)	16.2 ± 9.8	95% ± 7%	13.0 ± 7.1	91% ± 5%	19.7 ± 13.6	93% ± 12%	16.3 ± 11.8	100% ± 0%	14.7 ± 10.0	95% ± 8%
JMC (Jefferson)	27.3 ± 17.5	73% ± 13%	42.0 ± 17.1	84% ± 7%	30.0 ± 21.8	77% ± 6%	25.0 ± 12.5	73% ± 17%	12.3 ± 8.7	58% ± 3%
STJ (St Joes)	27.8 ± 14.1	66% ± 18%	24.7 ± 10.3	50% ± 17%	29.0 ± 26.5	60% ± 20%	31.7 ± 15.6	80% ± 12%	26.0 ± 2.6	73% ± 3%
SMR (Summersville)	50.9 ± 28.7	73% ± 14%	58.7 ± 14.5	63% ± 9%	76.7 ± 45.9	60% ± 8%	29.3 ± 2.9	83% ± 14%	39.0 ± 14.0	84% ± 1%
PVH (Potomac Valley)	66.8 ± 18.7	77% ± 10%	73.0 ± 33.8	69% ± 8%	76.7 ± 15.1	71% ± 6%	60.0 ± 11.5	80% ± 3%	57.3 ± 4.0	90% ± 7%
GMH (Grant)	95.6 ± 35.7	53% ± 37%	119.0 ± 29.7	7% ± 1%	128.5 ± 37.5	19% ± 12%	65.3 ± 38.4	77% ± 9%	88.3 ± 5.9	84% ± 17%
GRMC (Garrett)	144.3 ± 39.0	72% ± 22%	128.3 ± 23.2	44% ± 6%	163.3 ± 64.7	66% ± 11%	131.3 ± 24.6	83% ± 15%	154.3 ± 41.2	97% ± 1%

Show code


rm(t1, t2)

Across all hospitals the the system there are maybe hints at decreasing compliance as the number of opportunities increase, shown by the black trend lines below. However, the variation in CHG opportunities are mostly clustered by hospital along the X-axis, so perhaps it’s really the hospital that is accounting for the differences.

Show code

# matched_data$small_by_hosp %>%
#   ggplot(aes(x=Denominator, y=CHG_rate)) +
#   geom_point() + geom_smooth() +
#   facet_wrap("hosp_type", scales = "free")

x <- matched_data$all_by_hosp %>%
  filter(!is.na(CHG_rate)) %>%
  filter(CHG_rate!=0) %>%
  mutate(hosp_type = case_when(hosp_type=="Academic" ~ "Academic",
                               hosp_type=="Community" ~ "Community",
                               TRUE ~ "CAH / small")) %>%
  mutate(Month = strftime(Date, format="%b %Y")) %>%
  ggplot(aes(x=Denominator, y=CHG_rate)) +
  geom_smooth(span=.9, color="black", se=F) +
  geom_point(aes(color=Hosp, shape=hosp_type, text=Month)) + 
  facet_wrap("hosp_type", scales = "free_x") +
  guides(shape=F) +
  scale_y_continuous(limits = c(NA, 1), labels = scales::percent) +
  theme_bw() +
  labs(title = "CHG compliance vs CHG opportunities",
       y = "CHG compliance",
       x="CHG opportunities")

plotly::ggplotly(x)

Figure 11: CHG compliance by number of opportunities for CHG bathing. The solid black line shows the trend for each panel.

Show code

rm(x)

Importantly, Figure 11 above doesn’t account for time (though you can hover over each point to view the month). If we stratify by quarter, any trend seems to go away, again implying that if there are differences in compliance by opportunities, it’s those differences are probably occuring at a hospital level (i.e. hospital is a confounding factor for this relationship)

Show code

matched_data$all_by_hosp %>%
  filter(!is.na(CHG_rate)) %>%
  filter(CHG_rate!=0) %>%
  mutate(hosp_type = case_when(hosp_type=="Academic" ~ "Academic",
                               hosp_type=="Community" ~ "Community",
                               TRUE ~ "CAH / small")) %>%
  mutate(q = str_glue("{year(Date)} Q{quarter(Date)}"),
         q = str_remove(q, "20"),
         q = str_glue("'{q}")) %>%
  mutate(Month = strftime(Date, format="%b %Y")) %>%
  ggplot(aes(x=Denominator, y=CHG_rate)) +
  geom_smooth(span=.9, color="black", se=F, alpha=.5) +
  geom_point(aes(color=Hosp, shape=hosp_type, text=Month)) + 
  facet_grid(q ~ hosp_type, scales = "free_x") +
  guides(shape=F, color=F) +
  scale_y_continuous(limits = c(NA, 1), labels = scales::percent) +
  theme_bw() +
  labs(title = "CHG compliance vs CHG opportunities",
       y = "CHG compliance",
       x="CHG opportunities")

Key take aways

It doesn’t really look like the number of opportunities is an important factor, especially compared to trends over time and across hospitals

By Hospital

We will start by looking at a table similar to Table 2, but now for all hospitals⁵. As we saw graphically in Figure 8 (in the Over time section), compliance rates increase over time until they plateau. Most striking is WVUH, with it’s high number of opportunities and plateau at such a low number compared to all of the other facilities.

Show code

t1 <- matched_data$small_by_hosp %>%
  filter(!is.na(CHG_rate)) %>%
  filter(CHG_rate!=0) %>%
  group_by(Hosp, name_short) %>%
  summarise(Overall.denom_avg = mean(Denominator, na.rm  = T),
            Overall.denom_sd = sd(Denominator, na.rm  = T),
            
            Overall.CHG_avg = mean(CHG_rate, na.rm  = T),
            Overall.CHG_sd = sd(CHG_rate, na.rm  = T),
            # denom_med = qwraps2::median_iqr(Denominator, na_rm  = T),
            # denom_avg = qwraps2::mean_sd(Denominator, na_rm  = T),
            # denom_med = qwraps2::median_iqr(Denominator, na_rm  = T),
            
            ) %>%
  arrange(Overall.denom_avg) 



t2 <- matched_data$small_by_hosp %>%
  filter(!is.na(CHG_rate)) %>%
  filter(CHG_rate!=0) %>%
  mutate(q = str_glue("{year(Date)} Q{quarter(Date)}"),
         q = str_remove(q, "20"),
         q = str_glue("'{q}")) %>%
  # count(Date, q) %>% 
  group_by(Hosp, q) %>%
  summarise(denom_avg = mean(Denominator, na.rm  = T),
            denom_sd = sd(Denominator, na.rm  = T),
            
            CHG_avg = mean(CHG_rate, na.rm  = T),
            CHG_sd = sd(CHG_rate, na.rm  = T),
            # denom_med = qwraps2::median_iqr(Denominator, na_rm  = T),
            # denom_avg = qwraps2::mean_sd(Denominator, na_rm  = T),
            # denom_med = qwraps2::median_iqr(Denominator, na_rm  = T),
            ) %>%
  pivot_wider(names_from = q, 
              values_from = c(denom_avg, denom_sd, CHG_avg, CHG_sd),
              names_vary = "slowest",
              names_glue = "{q}.{.value}")

inner_join(t1, t2, by="Hosp") %>%
  mutate(Hosp = str_glue("{Hosp} ({name_short})")) %>% 
  select(-name_short) %>% 
  ungroup() %>%
  select(-contains("denom")) %>%
  gt(rowname_col = "Hosp") %>%
  # tab_spanner_delim(delim = ".") %>%
  fmt_missing() %>%
  fmt_number(decimals = 1) %>%
  fmt_percent(contains("CHG"), decimals = 0) %>%
  # cols_merge_uncert(Overall.denom_avg, Overall.denom_sd) %>%
  cols_merge_uncert(Overall.CHG_avg, Overall.CHG_sd) %>%
  # cols_merge_uncert(`'23 Q4.denom_avg`, `'23 Q4.denom_sd`) %>%
  cols_merge_uncert(`'23 Q4.CHG_avg`,   `'23 Q4.CHG_sd`) %>%
  # cols_merge_uncert(`'24 Q1.denom_avg`, `'24 Q1.denom_sd`) %>%
  cols_merge_uncert(`'24 Q1.CHG_avg`,   `'24 Q1.CHG_sd`) %>%
  # cols_merge_uncert(`'24 Q2.denom_avg`, `'24 Q2.denom_sd`) %>%
  cols_merge_uncert(`'24 Q2.CHG_avg`,   `'24 Q2.CHG_sd`) %>%
  # cols_merge_uncert(`'24 Q3.denom_avg`, `'24 Q3.denom_sd`) %>%
  cols_merge_uncert(`'24 Q3.CHG_avg`,   `'24 Q3.CHG_sd`) %>%
  cols_label(Overall.CHG_avg ="Overall",
             `'23 Q4.CHG_avg`="2023 Q4",
             `'24 Q1.CHG_avg`="2024 Q1",
             `'24 Q2.CHG_avg`="2024 Q2",
             `'24 Q3.CHG_avg`="2024 Q3",
             # Overall.CHG_avg ="Compliance",
             # `'23 Q4.CHG_avg`="Compliance",
             # `'24 Q1.CHG_avg`="Compliance",
             # `'24 Q2.CHG_avg`="Compliance",
             # `'24 Q3.CHG_avg`="Compliance",
             # Overall.denom_avg ="Eligible Days",
             # `'23 Q4.denom_avg`="Eligible Days",
             # `'24 Q1.denom_avg`="Eligible Days",
             # `'24 Q2.denom_avg`="Eligible Days",
             # `'24 Q3.denom_avg`="Eligible Days",
             
             Hosp="Hospital") %>%
  

  # gtExtras::gt_hulk_col_numeric(
  #   columns = contains("CHG"), 
  #   domain = c(min(pull(filter(matched_data$small_by_hosp, CHG_rate!=0), 
  #                       "CHG_rate"), na.rm = T) ,1)) %>%
   data_color(
    columns = contains("CHG"),
    palette = c("#86102f", "#f7b8ad", "#b9ddf1","#2c5782"),
    na_color = "white",
    domain = c(min(pull(filter(matched_data$small_by_hosp, CHG_rate!=0), 
                        "CHG_rate"), na.rm = T) ,1)) %>%
  
  
  data_color(
    columns = Overall.CHG_avg,
    method = "numeric",
    palette = "PuOr", reverse = F
    # palette = "YlGnBu", reverse = T
    
  ) %>%
  
   tab_header(
    title = md('**CHG compliance** by hospital'),
    subtitle = "Expressed as mean +/- SD" ) %>%
  opt_stylize(2, color="gray")

Table 3:
Comparison of the number of patient-days eligable for CHG bathing with CHG compliance for all hospitals
	Overall	2023 Q4	2024 Q1	2024 Q2	2024 Q3
CHG compliance by hospital
Expressed as mean +/- SD
RMH (Reynolds)	74% ± 9%	65% ± 4%	69% ± 3%	82% ± 5%	82% ± 3%
Small (Small hospitals)	70% ± 17%	52% ± 6%	56% ± 5%	83% ± 3%	88% ± 4%
UTN (Uniontown)	72% ± 8%	62% ± 6%	74% ± 1%	72% ± 4%	81% ± 4%
BMC (Berkeley)	66% ± 6%	60% ± 3%	63% ± 2%	68% ± 7%	71% ± 2%
PRN (Princeton)	80% ± 5%	74% ± 3%	79% ± 4%	81% ± 2%	85% ± 3%
TMH (Thomas)	86% ± 5%	—	—	81% ± 6%	89% ± 2%
WHL (Wheeling)	86% ± 8%	74% ± 6%	89% ± 2%	90% ± 2%	90% ± 2%
UHC (UHC)	80% ± 4%	74% ± 1%	80% ± 1%	81% ± 1%	85% ± 1%
CCMC (Camden Clark)	78% ± 12%	63% ± 3%	72% ± 5%	87% ± 6%	91% ± 0%
WVUH (Ruby & Fairmont)	62% ± 7%	53% ± 1%	62% ± 5%	67% ± 1%	67% ± 3%

Show code


rm(t1, t2)

This can be shown graphically, in the boxplots below

Show code

matched_data$small_by_hosp %>%
  filter(!is.na(CHG_rate)) %>%
  filter(CHG_rate!=0) %>%
  # mutate(hosp_type = case_when(hosp_type=="Academic" ~ "Academic / Community",
  #                              hosp_type=="Community" ~ "Academic / Community",
  #                              TRUE ~ "CAH / small")) %>%
  mutate(q = str_glue("{year(Date)} Q{quarter(Date)}"),
         q = str_remove(q, "20"),
         q = str_glue("'{q}"),
         q = factor(q, ordered=T)) %>%
  mutate(Month = strftime(Date, format="%b %Y")) %>%
  ggplot(aes(x=Hosp, y=CHG_rate)) +
  # geom_boxplot(fill="#BDD7E7", color="grey30") +
  # "#EFF3FF" "#BDD7E7" "#6BAED6" "#2171B5"
  geom_boxplot(aes(fill=q)) +
  # geom_jitter(aes(text=Month)) +
  # coord_flip() +
  # facet_wrap("q", scales = "free_y") +
  # facet_wrap(hosp_type~q, scales = "free_y") +
  theme_bw() +
  scale_y_continuous(labels = scales::percent) +
  theme(legend.position = "bottom") +
  scale_fill_brewer(palette = "RdPu") +
  labs(title="Compliance by hospital", 
       subtitle = "Broken down by quarter",
       x = "",
       y = "CHG compliance",
       fill="")
  # facet_grid(.~hosp_type, scales = "free_y")

Figure 13: CHG compliance for each hospital, broken down by quarter

Although that’s already a busy figure (at the risk of making it even more busy), the version below adds each hospital’s overall boxplot in blue in the background in blue

Show code

matched_data$small_by_hosp %>%
  filter(!is.na(CHG_rate)) %>%
  filter(CHG_rate!=0) %>%
  # mutate(hosp_type = case_when(hosp_type=="Academic" ~ "Academic / Community",
  #                              hosp_type=="Community" ~ "Academic / Community",
  #                              TRUE ~ "CAH / small")) %>%
  mutate(q = str_glue("{year(Date)} Q{quarter(Date)}"),
         q = str_remove(q, "20"),
         q = str_glue("'{q}"),
         q = factor(q, ordered=T)) %>%
  mutate(Month = strftime(Date, format="%b %Y")) %>%
  ggplot(aes(x=Hosp, y=CHG_rate)) +
  geom_boxplot(fill="#BDD7E7", color="grey30") +
  # "#EFF3FF" "#BDD7E7" "#6BAED6" "#2171B5"
  geom_boxplot(aes(fill=q)) +
  # geom_jitter(aes(text=Month)) +
  # coord_flip() +
  # facet_wrap("q", scales = "free_y") +
  # facet_wrap(hosp_type~q, scales = "free_y") +
  theme_bw() +
  scale_y_continuous(labels = scales::percent) +
  theme(legend.position = "bottom") +
  scale_fill_brewer(palette = "RdPu") +
  labs(title="Compliance by hospital", 
       subtitle = "Broken down by quarter",
       x = "",
       y = "CHG compliance",
       fill="")

Show code

  # facet_grid(.~hosp_type, scales = "free_y")

Regressions

Show code

df_glm <- matched_data$small_by_hosp %>% filter(Era!="During")
mods <- list()
# █─mods
# ├─CLABSI 
# ├─CAUTI
# ├─MRSA
# └─█─RE
#   ├─CLABSI 
#   ├─CAUTI
#   └─MRSA

In this section, we will use a negative binomial regression to examine any influence that CHG bathing has on HAIs This is the same type of model that the NHSN uses to predict the number of expected infections.

Unless otherwise specified, our outcome of interest will be the monthly number of HAIs observed for each hospital. Notably, this is the absolute number, not a rate. To account for confounding variables we will include the number of predicted infections (from the NHSN data).

Example: CLABSI

Show code

mods[["CLABSI"]] <- MASS::glm.nb(CLABSI_obs ~ CLABSI_pred + Era, data = df_glm)  
summary(mods[["CLABSI"]]) 
# DHARMa::testDispersion(mods[["CLABSI"]]) 
# glance(mods[["CLABSI"]]) 
# unique(select(time_limited, Date, Era)) %>% left_join(NHSN_clean)

To explain the various models, this section will go into detail using CLABSIs as an example. All of the other HAIs will follow the same format, but aren’t shown for space. The backbone model is a simple negative binomial model, using the formula below for CLABSI

## CLABSI_obs ~ CLABSI_pred + Era

The results of this specific model is shown in the table below:

Show code

broom::tidy(mods[["CLABSI"]], conf.int = T, exponentiate=T) %>%
  relocate(conf.low, conf.high, .before = p.value) %>%
  select(-c(std.error, statistic)) %>%
  
  gt() %>%
  cols_merge_range(col_begin = conf.low, col_end = conf.high, sep = " - ") %>%
  # tab_stub_indent(everything()) %>%
  cols_label(estimate = md("Incident Rate Ratio (IRR)"),
             term     = "Term",
             conf.low = "95% CI",
             p.value  = "P value") %>%
  fmt_number(columns = c(estimate, conf.low, conf.high, p.value), n_sigfig = 3) %>%
  fmt_scientific(columns = p.value, 
             rows = p.value < 0.001) %>%
  fmt_scientific(columns = conf.low, 
             rows = conf.low < 0.001 | conf.low > 1000) %>%
  fmt_scientific(columns = conf.high, 
             rows = conf.high < 0.001 | conf.high > 1000) %>%
  tab_header(
    title = 'CLABSI: Negative binomial model'
  ) %>%
  opt_stylize(5)

Term	Incident Rate Ratio (IRR)	95% CI	P value
CLABSI: Negative binomial model
(Intercept)	0.381	0.284 - 0.505	3.52 × 10⁻¹¹
CLABSI_pred	1.80	1.62 - 2.00	1.97 × 10⁻²⁹
EraAfter	0.599	0.410 - 0.869	0.00758

This shows that compared to the before era, the “after” era (post CHG implementation) had a 40% reduction in CLABSIs (1 - 0.599) with a P-value of 0.008. It also shows that the predicted infections are significantly associated with increased observed infections (which is to be expected).

The same data is shown graphically below:

Show code

sjPlot::plot_model(mods[["CLABSI"]])

Random effects CLABSI

Show code

library(glmmTMB)
mods[["RE"]][["CLABSI"]] <- glmmTMB(CLABSI_obs ~ CLABSI_pred + Era + (1|Hosp) ,
        data = df_glm, 
        family = "nbinom1") 
summary(mods[["RE"]][["CLABSI"]])
# broom.mixed::glance(mods[["RE"]][["CLABSI"]])
# DHARMa::testDispersion(mods[["RE"]][["CLABSI"]])

In the previous model, we didn’t do anything to account for the various hospitals. There are arguments for and against adjusting for hospital-specific effects, outlined below

Reason for adjusting for each hospital

The biggest reason for doing this would be because we have repeated measures, meaning that we have within-group correlation (e.g. the likelihood of CLABSIs at Ruby in July are not independent of the likelihood at Ruby in November). I also feel like doing a random effects model would be helpful for getting a true population level estimate, but I don’t have a clear reason for this belief

Reasons not to adjust for each hospital

The beauty (and downfall) of using NHSN’s predicted number of infections is that it already accounts for many variables that we would want to include. Among these, NHSN already accounts for hospital size & teaching hospital, so introducing “additional adjustment” for hospital size is redundant. This might cause issues with multicollinearity as well as difficulty in interpruting the results. Additionally, the only factors NHSN uses for calculating the predicted CLABSIs at CAH is the number of central line days (see Table 3 and page 42 of the NHSN documents), so I’m really not sure about this

I think it’s worthwhile to at least include this model (even if it’s a sensitivity analysis), which is the same as [Model 1] but now with random effects for each hospital. The model formula I use is below:

## CLABSI_obs ~ CLABSI_pred + Era + (1 | Hosp)

The (1|Hosp) term means each hospital has a random intercept, capturing the variation in infection rates across hospitals. This helps to account for unobserved hospital-specific factors that may influence the CLABSI rates, but still assumes that the effect of CHG implementation is similar across hospitals

Show code

broom.mixed::tidy(mods[["RE"]][["CLABSI"]], conf.int = T, exponentiate=T) %>%
  filter(effect=="fixed") %>%
  select(term, estimate, conf.low, conf.high, p.value) %>%
  
  gt() %>%
  cols_merge_range(col_begin = conf.low, col_end = conf.high, sep = " - ") %>%
  # tab_stub_indent(everything()) %>%
  cols_label(estimate = md("Incident Rate Ratio (IRR)"),
             term     = "Term",
             conf.low = "95% CI",
             p.value  = "P value") %>%
  fmt_number(columns = c(estimate, conf.low, conf.high, p.value), n_sigfig = 3) %>%
  fmt_scientific(columns = p.value, 
             rows = p.value < 0.001) %>%
  fmt_scientific(columns = conf.low, 
             rows = conf.low < 0.001 | conf.low > 1000) %>%
  fmt_scientific(columns = conf.high, 
             rows = conf.high < 0.001 | conf.high > 1000) %>%
  tab_header(
    title = md('CLABSI: **Random effects** negative binomial model'),
    subtitle = "Random effects for each hospital"
  ) %>%
  opt_stylize(5, color = "green")

Term	Incident Rate Ratio (IRR)	95% CI	P value
CLABSI: Random effects negative binomial model
Random effects for each hospital
(Intercept)	0.358	0.171 - 0.746	0.00616
CLABSI_pred	1.26	0.834 - 1.91	0.269
EraAfter	0.662	0.462 - 0.949	0.0246

All HAIs, negative binomial

Show code

mods[["CAUTI"]] <- MASS::glm.nb(CAUTI_obs ~ CAUTI_pred + Era, data = df_glm)  
summary(mods[["CAUTI"]]) 
# DHARMa::testDispersion(mods[["CAUTI"]]) 
# glance(mods[["CAUTI"]]) 
# unique(select(time_limited, Date, Era)) %>% left_join(NHSN_clean) 

mods[["MRSA"]] <- MASS::glm.nb(MRSA_obs ~ MRSA_pred + Era, data = df_glm)  
summary(mods[["MRSA"]]) 
# DHARMa::testDispersion(mods[["MRSA"]]) 
# glance(mods[["MRSA"]])

This shows all three models (CALBSI, CAUTI, MRSA) in the same table

Show code

bind_rows(
  mutate(broom::tidy(mods[["CLABSI"]], conf.int = T, exponentiate=T),
         Outcome = "CLABSI"),
  mutate(broom::tidy(mods[["CAUTI"]], conf.int = T, exponentiate=T),
         Outcome = "CAUTI"),
  mutate(broom::tidy(mods[["MRSA"]], conf.int = T, exponentiate=T),
         Outcome = "MRSA")
) %>%
  relocate(Outcome) %>%
  relocate(conf.low, conf.high, .before = p.value) %>%
  select(-c(std.error, statistic)) %>%
  mutate(term = if_else(str_detect(term, "_pred"), "NHSN prediction", term)) %>%
  group_by(Outcome) %>%
  gt(rowname_col = "term") %>%
   tab_stub_indent(rows = everything(), indent = 5) %>%
  cols_merge_range(col_begin = conf.low, col_end = conf.high, sep = " - ") %>%
  cols_label(estimate = md("Incident Rate Ratio (IRR)"),
             term     = "Term",
             conf.low = "95% CI",
             p.value  = "P value") %>%
  fmt_number(columns = c(estimate, conf.low, conf.high, p.value), n_sigfig = 3) %>%
  fmt_scientific(columns = p.value, 
             rows = p.value < 0.001) %>%
  fmt_scientific(columns = conf.low, 
             rows = conf.low < 0.001 | conf.low > 1000) %>%
  fmt_scientific(columns = conf.high, 
             rows = conf.high < 0.001 | conf.high > 1000) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(
      columns = c(estimate, p.value),
      rows = p.value <= 0.05 & term == "EraAfter"
    )
  ) %>%
  tab_header(
    title = 'HAIs: Negative binomial models'
  ) %>%
  opt_stylize(5)

	Incident Rate Ratio (IRR)	95% CI	P value
HAIs: Negative binomial models
CLABSI
(Intercept)	0.381	0.284 - 0.505	3.52 × 10⁻¹¹
NHSN prediction	1.80	1.62 - 2.00	1.97 × 10⁻²⁹
EraAfter	0.599	0.410 - 0.869	0.00758
CAUTI
(Intercept)	0.557	0.426 - 0.721	1.92 × 10⁻⁵
NHSN prediction	1.54	1.40 - 1.70	1.33 × 10⁻¹⁹
EraAfter	0.668	0.476 - 0.933	0.0180
MRSA
(Intercept)	0.143	0.0904 - 0.216	1.94 × 10⁻¹⁸
NHSN prediction	4.14	2.85 - 5.94	2.57 × 10⁻¹⁴
EraAfter	0.657	0.391 - 1.09	0.105

Show code

sjPlot::plot_models(mods$CLABSI, mods$CAUTI, mods$MRSA,
                    rm.terms = c("CLABSI_pred", "CAUTI_pred", "MRSA_pred"), 
                    m.labels = c("CLABSIs", "CAUTIs", "MRSA"),
                    axis.labels = c("After CHG"),
                    vline.color = "grey30",
                    show.values = T, 
                    show.p = T)

To do

Done: Add LabID MRSA
- Done: Add to descriptive statistics (will require a bit different of a workflow)
- Done: Add the regression
Add interrupted time series (for all models)
Compare compliance to HAIs (bivariable analysis in descriptive statistics)
Look at unit level compliance, similar to the Figure 14 below
- This is turning out to be alot more work than I expected… It might need to be done after the abstract (I think we have plenty here as is)

Show code

CHG_dept %>%   
  mutate(Rate = Numerator/Denominator) %>%   
  mutate(ID = str_glue("{Hospital}_{Department}")) %>%      
  ggplot(aes(x=Month, y=Rate, group=ID)) +   
  geom_line(aes(alpha=Denominator)) + 
  facet_wrap("Hospital")

Footnotes

For example, March 31st (2024) was on a Sunday, so the week starting on 3/31/24 mostly reflects April 2024↩︎
I was going to do median, but given the number of months with zero HAIs the table was not very informative↩︎
In this section I use the term “CAH” for brevity, but recall that this includes smaller hospitals (namely WTZ) which isn’t technically a critical access hospital↩︎
Meaning that hospitals with greater numbers of eligible patient days (like GRMC) will contribute more to the average than hospitals with fewer oppurtunities (like WTZ or JAX)↩︎
Unlike Table 2, all the small and critical access hospitals are now aggregated into the “small hospitals” group↩︎

--- title: "NHSN & CHG" params: rollout_start: label: "Beginning of rollout" value: "2023-08-01" input: dateInput rollout_end: label: "end of rollout" value: "2023-10-01" input: dateInput ignore_before: label: "Date to start the before-era (default: 08/2022)" value: "2022-08-01" input: dateInput ignore_after: label: "Date to end the after-era (default: 10/2024)" value: "2024-10-01" input: dateInput format: html: toc: true code-fold: true code-summary: "Show code" code-tools: true lightbox: true editor: visual tbl-cap-location: bottom --- ```{r setup, include=FALSE, echo=F} knitr::opts_chunk$set(warning=F, message=F, echo=T, collapse = T) library(MASS); library(DHARMa) library(tidyverse) library(lubridate) library(googlesheets4) library(gt); library(gtExtras) library(broom) library(ggeffects) # for extra ggplot help # library(ggpp) # help with dodging positions library(ggridges) # ridge plots ``` This report is run with the following parameters ```{r listParams} #| code-fold: show str(params, give.attr = F) ``` # Importing data You can skip over this section, it's just where I import the data ## Hospital info This comes from a [Google Sheet](https://docs.google.com/spreadsheets/d/1zZ1o1HhzOPrbj2KBdGsaO4rJvLDPMWqN0ogjMCNHfGw/edit?usp=sharing) where I store info for each hospital. There is important data on the sheet that governs the steps below ```{r read_CSV_Hosp} library(googlesheets4) options(gargle_oauth_email = TRUE) gs4_auth(cache = TRUE) if(!exists("hospitals")){ hospitals_raw <- paste0("https://docs.google.com/", "spreadsheets/d/", "1zZ1o1HhzOPrbj2KBdGsaO4rJvLDPMWqN0ogjMCNHfGw/") %>% read_sheet(sheet = "CHG Hospitals") hospitals <- hospitals_raw rm(hospitals_raw) } # Used to join get the names of the hospitals in the NHSN data CCNs <- hospitals %>% mutate(NHSN_abbv = case_when(!is.na(NHSN_abbv) ~ NHSN_abbv, CMS_ID=="510031" ~ "STF", CMS_ID=="510029" ~ "TMH")) %>% distinct(NHSN_abbv, CMS_ID) ``` ## NHSN data ```{r read_CSV_CLABSI} #| code-fold: true CLABSI_ACH <- readxl::read_excel("NHSN data/ACH (2015 baseline) SIRforCentralLine_AssociatedBSI_1.2020_10.2024.xlsm", col_types = c("text", "numeric", "numeric", "numeric", "numeric", "numeric", "text", "numeric", "text", "text", "text", "text", "text", "text", "text", "text", "numeric")) %>% filter(!is.na(location)) %>% rename(Month=summaryYM) %>% mutate(Month = str_replace(Month, "M", "-"), Month = ymd(str_glue("{Month}-15"))) %>% left_join(CCNs, by=c("ccn"="CMS_ID")) %>% select(Hosp=NHSN_abbv, everything()) CLABSI_CAH <- readxl::read_excel("NHSN data/CAH (2015 baseline) SIRforCentralLine_associatedBSI 1.2020_10.2024.xlsm", col_types = c("text", "numeric", "numeric", "numeric", "numeric", "numeric", "text", "numeric", "text", "text", "text", "text", "text", "text", "text", "text", "numeric")) %>% filter(!is.na(location)) %>% rename(Month=summaryYM) %>% mutate(Month = str_replace(Month, "M", "-"), Month = ymd(str_glue("{Month}-15"))) %>% left_join(CCNs, by=c("ccn"="CMS_ID")) %>% select(Hosp=NHSN_abbv, everything()) CLABSI <- list(CLABSI_ACH, CLABSI_CAH) %>% # Bind into a single table list_rbind() %>% # Clean up mutate(Hosp = as.factor(Hosp), Month = Month, Inf_pred = as.numeric(numPred), Inf_obs = as.numeric(infCount), CL_days = as.numeric(numcldays), locationType = as.factor(locationType), locCDC = as.character(locCDC), location = location, facType = as.factor(facType), # .keep="none" is the same as transmute, keeps only new columns .keep="none") %>% # Clean up CDC location mutate(locCDC = str_remove(locCDC, "IN:ACUTE:")) %>% relocate(Hosp, Month, Inf_pred, Inf_obs, CL_days, location) %>% # Ignore data before & after filter(Month > ymd(params$ignore_before)) %>% filter(Month < ymd(params$ignore_after)) rm(CLABSI_ACH, CLABSI_CAH) # CLABSI %>% rmarkdown::paged_table() ``` ::: {.callout-tip icon="false"} ## Date formats Although the data is reported monthly, it's much easier to work with if it's in date format. Therefore, by convention I set the date to be the 15th day of each month (e.g. data for September 2023 will appear as "Sept 15th 2023"). This shouldn't really matter to anyone but me, but if you're looking at the raw data that's why! ::: ```{r read_CSV_CAUTI} #| code-fold: true CAUTI_ACH <- readxl::read_excel("NHSN data/ACH (2015 baseline) SIRforCatheter_AssociatedUTIDat 1.2020-10.2024.xlsm", col_types = c("date", "numeric", "numeric", "numeric", "numeric", "numeric", "text", "numeric", "text", "text", "text", "text", "text", "text", "text", "text", "numeric")) %>% filter(!is.na(location)) %>% rename(Month=summaryYM) %>% mutate(Month = as.Date(Month) + days(14)) %>% left_join(CCNs, by=c("ccn"="CMS_ID")) %>% select(Hosp=NHSN_abbv, everything()) CAUTI_CAH <- readxl::read_excel("NHSN data/CAH (2015 baseline) SIRforCatheter_AssociatedUTIDat 1.2020_10.2024.xlsm", col_types = c("date", "numeric", "numeric", "numeric", "numeric", "numeric", "text", "numeric", "text", "text", "text", "text", "text", "text", "text", "text", "numeric")) %>% filter(!is.na(location)) %>% rename(Month=summaryYM) %>% mutate(Month = as.Date(Month) + days(14)) %>% left_join(CCNs, by=c("ccn"="CMS_ID"))%>% select(Hosp=NHSN_abbv, everything()) CAUTI <- list(CAUTI_ACH, CAUTI_CAH) %>% # Bind into a single table list_rbind() %>% # Clean up mutate(Hosp = as.factor(Hosp), Month = Month, Inf_pred = as.numeric(numPred), Inf_obs = as.numeric(infCount), cath_days = as.numeric(numucathdays), locationType = as.factor(locationType), locCDC = as.character(loccdc), location = location, facType = as.factor(facType), # .keep="none" is the same as transmute, keeps only new columns .keep="none") %>% # Clean up CDC location mutate(locCDC = str_remove(locCDC, "IN:ACUTE:")) %>% relocate(Hosp, Month, Inf_pred, Inf_obs, cath_days, location) %>% # Ignore data before & after filter(Month > ymd(params$ignore_before)) %>% filter(Month < ymd(params$ignore_after)) rm(CAUTI_ACH, CAUTI_CAH) # CAUTI %>% rmarkdown::paged_table() ``` ```{r read_CSV_MRSA} #| code-fold: true MRSA_ACH <- readxl::read_excel("NHSN data/ACH (2022 baseline) SIRforMRSABloodFacwideINLabID.xlsx", col_types = c("text", "date", "numeric", "numeric", "numeric", "skip", "skip", "skip", "skip", "text", "text", "text", "text", "text")) %>% filter(!is.na(ccn)) %>% rename(Month=summaryYM) %>% mutate(Month = as.Date(Month) + days(14)) %>% left_join(CCNs, by=c("ccn"="CMS_ID")) %>% select(Hosp=NHSN_abbv, everything()) MRSA_CAH <- readxl::read_excel("NHSN data/CAH (2022 baseline) SIRforMRSABloodFacwideINLabID.xlsx", col_types = c("text", "date", "numeric", "numeric", "numeric", "skip", "skip", "skip", "skip", "text", "text", "text")) %>% filter(!is.na(ccn)) %>% rename(Month=summaryYM) %>% mutate(Month = as.Date(Month) + days(14)) %>% left_join(CCNs, by=c("ccn"="CMS_ID")) %>% select(Hosp=NHSN_abbv, everything()) MRSA <- list(MRSA_ACH, MRSA_CAH) %>% # Bind into a single table list_rbind() %>% # Clean up mutate(Hosp = as.factor(Hosp), Month = Month, Inf_pred = as.numeric(numPred), Inf_obs = as.numeric(MRSA_bldIncCount), MRSA_days = as.numeric(numpatdays), facType = as.factor(facType), # .keep="none" is the same as transmute, keeps only new columns .keep="none") %>% relocate(Hosp, Month, Inf_pred, Inf_obs, MRSA_days) %>% # Ignore data before & after filter(Month > ymd(params$ignore_before)) %>% filter(Month < ymd(params$ignore_after)) rm(MRSA_ACH, MRSA_CAH) ``` ## CHG compliance CHG data was obtained from the [CHG compliance dashboard](https://tableau.wvumedicine.org/#/views/CHGTreatmentCompliance/AllCHGTreatmentCompliance?:iid=1). There are a few ways to split the data, outlined below. You can limit the dashboard by CHG indication (step 1) and expand it to show department level data (step 2) ![Tableau screen shot showing how to (1) filter by CHG indication and (2) list by department](images/Screenshot%202024-12-05%20094951.png) I have a few versions of the CHG data downloaded, and because it only shows the last 12 months, the versions aren't consistent. The files I have available are below | File ID | Date exported | Filters | By department | |---------|---------------|--------------------|---------------| | File A | 10/18/2024 | Central lines only | No | | File B1 | 12/05/2024 | None | Yes | | File B2 | 12/05/2024 | Central lines only | Yes | | File B3 | 12/05/2024 | Foleys only | Yes | ::: callout-important ## Which CHG to use For now, I'm using the old version that I did in the prior analysis (File A), just so I can reproduce my prior work. Ideally, I could get access to the raw data (going back to implementation) and avoid this mess ::: ```{r read_CSV_CHG} CHG_raw <- "TableauData/2024-10-18 CHG CL only/All Overall Rate by Hospital and Department_data.csv" %>% read_csv() %>% select(Hospital, Date=`Week of Census Time`, Denominator=`All Lines Denominator`, Numerator=`All Lines Numerator`) %>% mutate(Date = mdy(Date)) %>% group_by(Hospital, Date) %>% summarise(Denominator = sum(Denominator, na.rm = T), Numerator = sum(Numerator, na.rm = T)) %>% ungroup() %>% # Remove most recent week filter(Date != ymd("2024-10-13")) ``` Because it is reported weekly (and everything else is reported monthly), we need to convert the weekly data into monthly data. Some weeks span the course of multiple months[^1]. In these cases, we will group them with whichever month that the majority of their days fell under [^1]: For example, March 31st (2024) was on a Sunday, so the week starting on 3/31/24 mostly reflects April 2024 ::: callout-note ## How are the weeks calculated? When you extract the data from Tableau, the dates it lists are all Sundays. It doesn't look like it's explicitly documented anywhere, but I assume that this means the **week starts on Sunday**, meaning the date provided is day 1 of 7 for that week ::: ```{r CHG_to_month} week_to_month <- function(date, beginning=T) { require(lubridate) # Make sure you are given only one date, not a vector stopifnot(length(date)==1) # Calculate the midpoint of the week, which is 3 days away from the start # (or end) date if(beginning) { mid_date <- date + days(3) } else({ # Otherwise, treat the given day as the end of the week mid_date <- date - days(3) }) # Extract the month and the year from this date new_month <- month(mid_date) new_year <- year(mid_date) # Create a new date and return new_date <- ymd(paste0(new_year, "-", new_month, "-15")) return(new_date) } CHG_hosp <- CHG_raw %>% # Needed to only process one date at a time rowwise() %>% mutate(Month = week_to_month(Date)) %>% ungroup() %>% group_by(Hospital, Month) %>% summarise(Denominator =sum(Denominator , na.rm = T), Numerator =sum(Numerator , na.rm = T)) # rm(CHG_clean) # CHG_monthly %>% rmarkdown::paged_table() # mutate(Hospital = as.factor(Hospital)) %>% # DT::datatable(filter="top", rownames = F) ``` I'll also grab the newer export with CHG compliance by department (file 2B in the table above). This doesn't go back as far as the old CHG data (file A), but is more granular ```{r} CHG_dept <- read_csv("TableauData/2024-12-05/CHG_byDept_CentralLines_2024-12-05.csv") %>% select(Hospital, Department, Date=`Week of Census Time`, Denominator=`All Lines Denominator`, Numerator=`All Lines Numerator`) %>% mutate(Date = mdy(Date)) %>% # Needed to only process one date at a time rowwise() %>% mutate(Month = week_to_month(Date)) %>% ungroup() %>% group_by(Hospital, Department, Month) %>% summarise(Denominator =sum(Denominator , na.rm = T), Numerator =sum(Numerator , na.rm = T)) %>% ungroup() CHG_dept <- CHG_dept %>% filter(Month!=ymd("2024-12-15")) ``` ## Summary of available data The figure below shows the timespans of the various datasets used for this run ```{r} # max(CAUTI$Month) # max(CLABSI$Month) # max(CHG_hosp$Month) # max(CHG_dept$Month) tibble(dataset = c("CLABSI", "CAUTI", "MRSA", "CHG_hosp", "CHG_dept"), min = c(min(CLABSI$Month), min(CAUTI$Month), min(MRSA$Month), min(CHG_hosp$Month), min(CHG_dept$Month)), max = c(max(CLABSI$Month), max(CAUTI$Month), max(MRSA$Month), max(CHG_hosp$Month), max(CHG_dept$Month))) %>% ggplot(aes(y=dataset)) + geom_linerange(aes(xmin=min, xmax=max)) + geom_vline(xintercept = ymd(params$rollout_start), alpha=0.5, linetype = "dashed", color="grey30") + geom_vline(xintercept = ymd(params$rollout_end), alpha=0.5, linetype = "dashed", color="grey30") ``` # Pre-processing This is the section where I aggregate and match the data to prepare it for the final analysis. ## Structure of the NHSN data The NHSN exports reports HAIs down to the "ward/unit" level (called `location` in the export) and these locations can be further classified by "acuity" (using the [CDC's location code](https://www.cdc.gov/nhsn/pdfs/pscmanual/15locationsdescriptions_current.pdf#page=9); `locCDC`). An example for Princeton is shown below ```{r} CLABSI %>% filter(Hosp=="PRN") %>% # group_by(Hosp, locCDC, locationType, location) %>% # summarise(NHSN_n = n(), # NHSN_min = min(Month), # NHSN_max = max(Month)) filter(year(Month)>=2023) %>% mutate(Quarter = str_glue("{year(Month)}_Q{quarter(Month)}")) %>% group_by(Hosp, locCDC, locationType, location, Quarter) %>% summarise(#n_obs = n(), # Predicted = sum(Inf_pred), Observed = sum(Inf_obs, na.rm = T), .groups = "drop") %>% pivot_wider(names_from = Quarter, values_from = Observed) %>% gt() %>% fmt_missing() %>% tab_spanner_delim("_") %>% tab_header(title="Quarterly observed CLABSIs at PRN, by unit") %>% opt_stylize(6, color = "gray") ``` Eventually, I want to match the CHG departmental level data to the NHSN location level data, but that's going to take quite a bit of work. So for now, I'm going to aggregate the observed & predicted HAIs at a hospital level, which I do in the next section ## Aggregating NHSN data & matching to CHG ```{r aggregateNHSN} # This is where aggregated NHSN & CHG data will live temporarily temp_NSHS <- list() temp_CHG <- list() # This is where lookup tables will live temp_lookup <- list() # This is where matched data (final products) will go matched_data <- list() ## Aggregate across departments (by hospital), creating df's ## like the prior runs temp_NSHS[["ByHosp"]][["CLABSI"]] <- CLABSI %>% group_by(Hosp, Month) %>% summarise(CLABSI_pred = sum(Inf_pred), CLABSI_obs = sum(Inf_obs), days_CL = sum(CL_days), .groups = "drop") temp_NSHS[["ByHosp"]][["CAUTI"]] <- CAUTI %>% group_by(Hosp, Month) %>% summarise(CAUTI_pred = sum(Inf_pred), CAUTI_obs = sum(Inf_obs), days_foley = sum(cath_days), .groups = "drop") temp_NSHS[["ByHosp"]][["MRSA"]] <- MRSA %>% group_by(Hosp, Month) %>% summarise(MRSA_pred = sum(Inf_pred), MRSA_obs = sum(Inf_obs), days_MRSA = sum(MRSA_days), .groups = "drop") ## Match the CHG data to the NHSN abbreviations, and aggregate ## based off the NHSN abbreviation (this collapses CH/FMT into WVUH) temp_CHG[["ByHosp"]] <- CHG_hosp %>% # Join to the GS data to match keys left_join(select(hospitals, NHSN_abbv, CHG_abbv), by=c("Hospital"="CHG_abbv")) %>% # Drop data if the `NHSN_abbv` is NA filter(!is.na(NHSN_abbv)) %>% # Aggregate data on NHSN abbreviation group_by(NHSN_abbv, Month) %>% summarise(Denominator =sum(Denominator , na.rm = T), Numerator =sum(Numerator , na.rm = T), .groups = "drop") %>% rename(Hosp=NHSN_abbv, Date=Month) ## Create a table which will be the backbone of the matched ## dataset. It uses `crossing` to ensure all permutations ## of dates and hospitals are included temp_lookup[["crossing"]] <- crossing( # Vector with all of the hospitals included. Note that this uses # the name format from NHSN (although the data comes from a CHG # table, we converted the abbreviations to the NHSN format in the # step above) Hosp = temp_CHG[["ByHosp"]]$Hosp, # This is a vector of all of the months included in the analysis # across both the CHG dataset and the NHSN dataset. By using the # crossing function, in ensures that we have rows for dates that # may be missing ing one of the datasets (e.g. early days of the # CHG dataset) Date = c(temp_CHG[["ByHosp"]]$Date, temp_NSHS[["ByHosp"]][["CLABSI"]]$Month, temp_NSHS[["ByHosp"]][["CAUTI"]]$Month, temp_NSHS[["ByHosp"]][["MRSA"]]$Month)) ## Using the crossing df as a backbone, join the CHG and HAI data together matched_data[["all_by_hosp"]] <- temp_lookup[["crossing"]] %>% left_join(temp_NSHS[["ByHosp"]][["CLABSI"]], by = join_by(Hosp, Date==Month)) %>% left_join(temp_NSHS[["ByHosp"]][["CAUTI"]], by = join_by(Hosp, Date==Month)) %>% left_join(temp_NSHS[["ByHosp"]][["MRSA"]], by = join_by(Hosp, Date==Month)) %>% left_join(temp_CHG[["ByHosp"]], by = join_by(Hosp, Date)) %>% ## Annotate the era mutate(Era = case_when(Date < ymd(params$rollout_start) ~ "Before", Date > ymd(params$rollout_end) ~ "After", TRUE ~ "During"), Era = as_factor(Era)) ## To annotate the names, we need to make minor adjustments ## to the data from the Google Sheet, namely recoding Ruby/FMT/CH temp_lookup[["names_full"]] <- hospitals %>% select(NHSN_abbv, name_short, hosp_type) %>% # Recode the ones that are under WVUH mutate(name_short = if_else(NHSN_abbv == "WVUH", "Ruby & Fairmont",name_short)) %>% unique() matched_data[["all_by_hosp"]] <- matched_data[["all_by_hosp"]] %>% left_join(temp_lookup[["names_full"]], join_by(Hosp==NHSN_abbv)) ``` ::: callout-important ## Assumption There are a few months where there were no central line days or foley days (mainly for St Francis). Since there can be no CLABSI or CAUTI during these time periods, I counted that data as missing (and thus it is ignored) ::: ```{r} # Verify no infections observed when they don't have lines testthat::expect_equal({matched_data[["all_by_hosp"]] %>% filter(days_CL==0) %>% pull(CLABSI_obs) %>% max()}, 0) testthat::expect_equal({matched_data[["all_by_hosp"]] %>% filter(days_foley==0) %>% pull(CAUTI_obs) %>% max()}, 0) matched_data[["all_by_hosp"]] <- matched_data[["all_by_hosp"]] %>% mutate(CLABSI_obs = if_else(days_CL==0, NA, CLABSI_obs), CAUTI_obs = if_else(days_foley==0, NA, CAUTI_obs)) ``` ## Aggregating smaller hospitals The Critical Access Hospitals (CAHs) represent an important segment here, as they serve a distinct patient populations compared to larger hopsitals. From a modeling standpoint, they are also unique in that they have much lower rates of infections, and the predicted number of HAIs are calculated differently by NHSN. ```{r list_small} ## First make a list of the CAHs & small hospitals temp_lookup[["CAHs"]] <- hospitals %>% filter(hosp_type=="Critical access") %>% pull(NHSN_abbv) matched_data[["small_hospitals"]] <- c(temp_lookup[["CAHs"]], "WTZ") ## Pull out the small hospitals and aggregate them temp_merging <- matched_data[["all_by_hosp"]] %>% # Starting with the matched data from the last step, limit to the # hospitals identified as small filter(Hosp %in% matched_data[["small_hospitals"]]) %>% # Reaggregate the data for these hospitals group_by(Date) %>% summarise(CLABSI_pred = sum(CLABSI_pred, na.rm = T), CLABSI_obs = sum(CLABSI_obs, na.rm = T), days_CL = sum(days_CL, na.rm = T), CAUTI_pred = sum(CAUTI_pred, na.rm = T), CAUTI_obs = sum(CAUTI_obs, na.rm = T), days_foley = sum(days_foley, na.rm = T), MRSA_pred = sum(MRSA_pred, na.rm = T), MRSA_obs = sum(MRSA_obs, na.rm = T), days_MRSA = sum(days_MRSA, na.rm = T), Denominator = sum(Denominator, na.rm = T), Numerator = sum(Numerator, na.rm = T), .groups = "drop") %>% # Becuase implicit zeros may have been introduced my # the `summarise` function, handle them here mutate(CLABSI_obs = if_else(days_CL==0, NA, CLABSI_obs), CAUTI_obs = if_else(days_foley==0, NA, CAUTI_obs)) %>% # If both numerator & denominator are 0, assume that is # from missing data mutate(missing_CHG = Denominator==0 & Numerator == 0, Denominator = if_else(missing_CHG, NA, Denominator), Numerator = if_else(missing_CHG, NA, Numerator)) %>% select(-missing_CHG) %>% ## Annotate the era mutate(Era = case_when(Date < ymd(params$rollout_start) ~ "Before", Date > ymd(params$rollout_end) ~ "After", TRUE ~ "During"), Era = as_factor(Era)) %>% # Reintroduce the names mutate(Hosp = "Small", name_short = "Small hospitals", hosp_type = "CAH / small") %>% relocate(Hosp) ## Now get the other (non-small) hospitals and bind to the ## aggregated data matched_data[["small_by_hosp"]] <- matched_data[["all_by_hosp"]] %>% filter(!Hosp %in% matched_data[["small_hospitals"]]) %>% bind_rows(temp_merging) ``` To accommodate this unique set of hospitals in the analysis, we will aggregate their monthly numbers together and treat them as one "hospital". Although **WTZ** isn't classified as a critical access hospital, it has a smaller capacity (bed size \< 50) so we will include it with the CAHs. Specifically, we will treat `r str_flatten_comma(matched_data[["small_hospitals"]], last = ", and ")` as **small hospitals**. ```{r} ## Clean up enviroment, do some final touch ups rm(temp_merging, temp_CHG, temp_lookup, temp_NSHS, CLABSI, CAUTI, MRSA) matched_data[["all_by_hosp"]] <- matched_data[["all_by_hosp"]] %>% mutate(CHG_rate = Numerator / Denominator) %>% relocate(CHG_rate, .before = Denominator) %>% # Ignore data before & after filter(Date > ymd(params$ignore_before)) %>% filter(Date < ymd(params$ignore_after)) %>% # Ignore STF filter(Hosp!="STF") matched_data[["small_by_hosp"]] <- matched_data[["small_by_hosp"]] %>% mutate(CHG_rate = Numerator / Denominator) %>% relocate(CHG_rate, .before = Denominator) %>% # Ignore data before & after filter(Date > ymd(params$ignore_before)) %>% filter(Date < ymd(params$ignore_after)) %>% # Ignore STF filter(Hosp!="STF") ``` # Descriptive statistics To get a feel for what we are looking at, this shows the time span that is covered by the data I have available. We should have equal numbers before and after the implementation period. The datasets for HAIs (`CLABSI`, `CAUTI`, `MRSA`) come from NHSN and the `CHG` dataset is extracted from Epic documentation (via Enterprise Analytics's Tableau dashboard) ```{r fig-timePeriods} #| fig-cap: "Time span of data in the analysis" #| fig-scap: "Timespan" matched_data$small_by_hosp %>% group_by(Date, Era) %>% summarise(CLABSI = sum(!is.na(CLABSI_obs),na.rm = T), CAUTI = sum(!is.na(CAUTI_pred),na.rm = T), MRSA = sum(!is.na(MRSA_pred),na.rm = T), CHG = sum(!is.na(CHG_rate),na.rm = T), n = n()) %>% # group_by(Hosp, Date) %>% pivot_longer(cols=c(CLABSI, CAUTI, MRSA, CHG), names_to = "Dataset", values_to = "n_obs") %>% mutate(pct_obs = n_obs / n) %>% filter(n_obs > 0) %>% ggplot(aes(x=Date, y=Dataset)) + geom_line() + geom_point(aes(color=Era)) + geom_vline(xintercept = ymd(params$rollout_start), alpha=0.5, linetype = "dashed", color="grey30") + geom_vline(xintercept = ymd(params$rollout_end), alpha=0.5, linetype = "dashed", color="grey30") + theme_bw() ``` ```{r descriptive_nums} temp_numbers <- list( n_hosp = n_groups(group_by(matched_data$small_by_hosp, Hosp)), CLABSI = list( n_obs = nrow(filter(matched_data$small_by_hosp, !is.na(CLABSI_obs))), n_before = nrow(filter(matched_data$small_by_hosp, !is.na(CLABSI_obs), Era=="Before")), n_during = nrow(filter(matched_data$small_by_hosp, !is.na(CLABSI_obs), Era=="During")), n_after = nrow(filter(matched_data$small_by_hosp, !is.na(CLABSI_obs), Era=="After")) ), CAUTI = list( n_obs = nrow(filter(matched_data$small_by_hosp, !is.na(CAUTI_obs))), n_before = nrow(filter(matched_data$small_by_hosp, !is.na(CAUTI_obs), Era=="Before")), n_during = nrow(filter(matched_data$small_by_hosp, !is.na(CAUTI_obs), Era=="During")), n_after = nrow(filter(matched_data$small_by_hosp, !is.na(CAUTI_obs), Era=="After")) ), MRSA = list( n_obs = nrow(filter(matched_data$small_by_hosp, !is.na(MRSA_obs))), n_before = nrow(filter(matched_data$small_by_hosp, !is.na(MRSA_obs), Era=="Before")), n_during = nrow(filter(matched_data$small_by_hosp, !is.na(MRSA_obs), Era=="During")), n_after = nrow(filter(matched_data$small_by_hosp, !is.na(MRSA_obs), Era=="After")) ), CHG_hosp = list( n_obs = nrow(filter(matched_data$small_by_hosp, !is.na(CHG_rate))), start = {matched_data$small_by_hosp %>% filter(!is.na(CHG_rate)) %>% pull(Date) %>% min() %>% strftime(format="%b %Y")}, end = {matched_data$small_by_hosp %>% filter(!is.na(CHG_rate)) %>% pull(Date) %>% max() %>% strftime(format="%b %Y")}, n_before = nrow(filter(matched_data$small_by_hosp, !is.na(CHG_rate), Era=="Before")), n_during = nrow(filter(matched_data$small_by_hosp, !is.na(CHG_rate), Era=="During")), n_after = nrow(filter(matched_data$small_by_hosp, !is.na(CHG_rate), Era=="After")) ), CHG_dept = list( n_obs = nrow(CHG_dept), start = {CHG_dept %>% pull(Month) %>% min() %>% strftime(format="%b %Y")}, end = {CHG_dept %>% pull(Month) %>% max() %>% strftime(format="%b %Y")} ), Dates = list( before = list( start = {matched_data$small_by_hosp %>% filter(Era=="Before") %>% pull(Date) %>% min() %>% strftime(format="%b %Y")}, end = {matched_data$small_by_hosp %>% filter(Era=="Before") %>% pull(Date) %>% max() %>% strftime(format="%b %Y")}, len = {matched_data$small_by_hosp %>% filter(Era=="Before") %>% pull(Date) %>% unique() %>% length()} ), during = list( start = {matched_data$small_by_hosp %>% filter(Era=="During") %>% pull(Date) %>% min() %>% strftime(format="%b %Y")}, end = {matched_data$small_by_hosp %>% filter(Era=="During") %>% pull(Date) %>% max() %>% strftime(format="%b %Y")}, len = {matched_data$small_by_hosp %>% filter(Era=="During") %>% pull(Date) %>% unique() %>% length()} ), after = list( start = {matched_data$small_by_hosp %>% filter(Era=="After") %>% pull(Date) %>% min() %>% strftime(format="%b %Y")}, end = {matched_data$small_by_hosp %>% filter(Era=="After") %>% pull(Date) %>% max() %>% strftime(format="%b %Y")}, len = {matched_data$small_by_hosp %>% filter(Era=="After") %>% pull(Date) %>% unique() %>% length()} ) ) ) ``` The data generally spans three era, the **before era** (`r temp_numbers$Dates$before$start` to `r temp_numbers$Dates$before$end`; `r temp_numbers$Dates$before$len` months), the **implementation era** (`r temp_numbers$Dates$during$start` to `r temp_numbers$Dates$during$end`; `r temp_numbers$Dates$during$len` months), and the **after era** (`r temp_numbers$Dates$after$start` to `r temp_numbers$Dates$after$end`; `r temp_numbers$Dates$after$len` months). Across the `r temp_numbers$n_hosp` hospitals included (keeping in mind that we aggregated `r length(matched_data$small_hospitals)` of the smaller hospitals into one "small hospital"), there are **`r temp_numbers$CLABSI$n_obs` observations for CLABSIs** (`r temp_numbers$CLABSI$n_before` in the before era, `r temp_numbers$CLABSI$n_during` in the implementation period, and `r temp_numbers$CLABSI$n_after` in the after era), **`r temp_numbers$CLABSI$n_obs` observations for CAUTIs** (`r temp_numbers$CAUTI$n_before` in the before era, `r temp_numbers$CAUTI$n_during` in the implementation period, and `r temp_numbers$CAUTI$n_after` in the after era), and **`r temp_numbers$MRSA$n_obs` observations for LabID MRSA** (`r temp_numbers$MRSA$n_before` in the before era, `r temp_numbers$MRSA$n_during` in the implementation period, and `r temp_numbers$MRSA$n_after` in the after era). The CHG data is a little more complicated. I have one dataset that shows CHG compliance at a hospital level. The **hospital level CHG data has `r temp_numbers$CHG_hosp$n_obs` observations** from `r temp_numbers$CHG_hosp$start` to `r temp_numbers$CHG_hosp$end` (`r temp_numbers$CHG_hosp$n_before` in the before era, `r temp_numbers$CHG_hosp$n_during` in the implementation period, and `r temp_numbers$CHG_hosp$n_after` in the after era). I also have CHG data **at a departmental level that has `r temp_numbers$CHG_dept$n_obs` observations** but this data spans from `r temp_numbers$CHG_dept$start` observations `r temp_numbers$CHG_dept$end` (meaning we are missing some of the early data). We will explore both, but keep in mind that they are looking at [slightly different time periods]{style="color: red"}. ## Infection rates over time To start, we will look at the number of HAIs that occurred monthly by each hospital. The **colored bars represent the observed HAIs** (the very small ones at the bottom of the Y-axis are zero infections), and the **horizontal black solid line are the predicted number of infections** as calculated by the NHSN. There is also a grey dashed vertical line that marks the implementation phase of the system wide CHG roll out. ### CLABSI The next two figures show the [**rates**]{.underline} **of CLABSIs** (per thousand central line days). The first figure (below) shows the overall rate across the entire system ```{r fig-CLABSI_rate_vs_time_graph} #| fig-width: 8 #| fig-height: 5 #| column: screen-inset-shaded #| fig-cap-location: margin #| fig-cap: "CLABSI rate over time, across entire system" #| fig-scap: "CLABSI rate overall" matched_data$small_by_hosp %>% group_by(Date, Era) %>% summarise(obs_rate = 1000 * sum(CLABSI_obs) / sum(days_CL), pred_rate = 1000 * sum(CLABSI_pred) / sum(days_CL)) %>% ggplot(aes(x=Date, y=obs_rate)) + geom_vline(xintercept = ymd(params$rollout_start), alpha=0.5, linetype = "dashed", color="grey30") + geom_vline(xintercept = ymd(params$rollout_end), alpha=0.5, linetype = "dashed", color="grey30") + geom_line(aes(y=pred_rate)) + geom_col(aes(fill=Era), alpha=0.7, color="grey70") + sjPlot::scale_fill_sjplot(palette = "circus") + # sjPlot::scale_color_sjplot(palette = "circus") + guides(color="none") + scale_x_date(date_labels = "%b\n'%y") + labs(title="CLABSI Rates: Observed vs Predicted", subtitle="Black line solid is predicted number from NHSN", y="CLABSI Rate\n(per 1,000 CL days)", x="Month", fill="", caption = "CL = central line") + theme_bw() ``` This figure breaks it down by hospital, still showing the rate (per thousand central line days) ```{r fig-CLABSI_rate_vs_time_graph_detailed} #| fig-width: 8 #| fig-height: 6 #| fig-cap-location: bottom #| fig-cap: "CLABSI rate over time, broken down by hospital" #| fig-scap: "CLABSI rate by hospital" matched_data$small_by_hosp %>% mutate(obs_rate = 1000 * CLABSI_obs / days_CL, pred_rate = 1000 * CLABSI_pred / days_CL) %>% ggplot(aes(x=Date, y=obs_rate, fill=hosp_type)) + geom_vline(xintercept = ymd(params$rollout_start), alpha=0.5, linetype = "dashed", color="grey30") + geom_vline(xintercept = ymd(params$rollout_end), alpha=0.5, linetype = "dashed", color="grey30") + geom_line(aes(y=pred_rate)) + geom_col(aes(color=hosp_type),alpha=0.7) + facet_wrap("name_short", ncol=3) + guides(color="none") + scale_x_date(date_labels = "%b\n'%y") + labs(title="CLABSI Rates: Observed vs Predicted", subtitle="Black line solid is predicted number from NHSN", y="CLABSI Rate\n(per 1,000 CL days)", x="Month", fill="Hospital\nType", caption = "CL = central line\nCAH = Critical Access Hospital") + theme_bw() ``` ### CAUTI This section follows the same format as before, but with CAUTIs. The first two figures show the [**rates**]{.underline} **of CAUTIs** (per thousand foley days), across the entire system and by each hospital (respectively). ```{r fig-CAUTI_rate_vs_time_graph} #| fig-width: 8 #| fig-height: 5 #| column: screen-inset-shaded #| fig-cap-location: margin #| fig-cap: "CAUTI rate over time, across entire system" #| fig-scap: "CAUTI rate overall" matched_data$small_by_hosp %>% group_by(Date, Era) %>% summarise(obs_rate = 1000 * sum(CAUTI_obs) / sum(days_foley), pred_rate = 1000 * sum(CAUTI_pred) / sum(days_foley)) %>% ggplot(aes(x=Date, y=obs_rate)) + geom_vline(xintercept = ymd(params$rollout_start), alpha=0.5, linetype = "dashed", color="grey30") + geom_vline(xintercept = ymd(params$rollout_end), alpha=0.5, linetype = "dashed", color="grey30") + geom_line(aes(y=pred_rate)) + geom_col(aes(fill=Era), alpha=0.7, color="grey70") + sjPlot::scale_fill_sjplot(palette = "circus") + # sjPlot::scale_color_sjplot(palette = "circus") + guides(color="none") + scale_x_date(date_labels = "%b\n'%y") + labs(title="CAUTI Rates: Observed vs Predicted", subtitle="Black line solid is predicted number from NHSN", y="CAUTI Rate\n(per 1,000 foley days)", x="Month", fill="") + theme_bw() ``` Berkeley is quite impressive with their change in the rates of CAUTIs (before and after) ```{r fig-CAUTI_rate_vs_time_graph_detailed} #| fig-width: 8 #| fig-height: 6 #| fig-cap: "CAUTI rate over time, broken down by hospital" #| fig-scap: "CAUTI rate by hospital" matched_data$small_by_hosp %>% mutate(obs_rate = 1000 * CAUTI_obs / days_foley, pred_rate = 1000 * CAUTI_pred / days_foley) %>% ggplot(aes(x=Date, y=obs_rate, fill=hosp_type)) + geom_vline(xintercept = ymd(params$rollout_start), alpha=0.5, linetype = "dashed", color="grey30") + geom_vline(xintercept = ymd(params$rollout_end), alpha=0.5, linetype = "dashed", color="grey30") + geom_line(aes(y=pred_rate)) + geom_col(aes(color=hosp_type),alpha=0.7) + facet_wrap("name_short", ncol=3) + guides(color="none") + scale_x_date(date_labels = "%b\n'%y") + labs(title="CAUTI Rates: Observed vs Predicted", subtitle="Black line solid is predicted number from NHSN", y="CAUTI Rate\n(per 1,000 foley days)", x="Month", fill="Hospital\nType", caption = "CAH = Critical Access Hospital") + theme_bw() ``` ### MRSA This section follows the same format as before, but with Lab ID MRSA. The first two figures show the [**rates**]{.underline} **of MRSA** (per 10,000 patient days), across the entire system and by each hospital (respectively). ```{r fig-MRSA_rate_vs_time_graph} #| fig-width: 8 #| fig-height: 5 #| column: screen-inset-shaded #| fig-cap-location: margin #| fig-cap: "MRSA rate over time, across entire system" #| fig-scap: "MRSA rate overall" matched_data$small_by_hosp %>% group_by(Date, Era) %>% summarise(obs_rate = 10000 * sum(MRSA_obs) / sum(days_MRSA), pred_rate = 10000 * sum(MRSA_pred) / sum(days_MRSA)) %>% ggplot(aes(x=Date, y=obs_rate)) + geom_vline(xintercept = ymd(params$rollout_start), alpha=0.5, linetype = "dashed", color="grey30") + geom_vline(xintercept = ymd(params$rollout_end), alpha=0.5, linetype = "dashed", color="grey30") + geom_line(aes(y=pred_rate)) + geom_col(aes(fill=Era), alpha=0.7, color="grey70") + sjPlot::scale_fill_sjplot(palette = "circus") + # sjPlot::scale_color_sjplot(palette = "circus") + guides(color="none") + scale_x_date(date_labels = "%b\n'%y") + labs(title="MRSA Rates: Observed vs Predicted", subtitle="Black line solid is predicted number from NHSN", y="MRSA Rate\n(per 10,000 patient days)", x="Month", fill="") + theme_bw() ``` Again, the rate by each hospital ```{r fig-MRSA_rate_vs_time_graph_detailed} #| fig-width: 8 #| fig-height: 6 #| fig-cap: "MRSA rate over time, broken down by hospital" #| fig-scap: "MRSA rate by hospital" matched_data$small_by_hosp %>% mutate(obs_rate = 10000 * MRSA_obs / days_MRSA, pred_rate = 10000 * MRSA_pred / days_MRSA) %>% ggplot(aes(x=Date, y=obs_rate, fill=hosp_type)) + geom_vline(xintercept = ymd(params$rollout_start), alpha=0.5, linetype = "dashed", color="grey30") + geom_vline(xintercept = ymd(params$rollout_end), alpha=0.5, linetype = "dashed", color="grey30") + geom_line(aes(y=pred_rate)) + geom_col(aes(color=hosp_type),alpha=0.7) + facet_wrap("name_short", ncol=3) + guides(color="none") + scale_x_date(date_labels = "%b\n'%y") + labs(title="MRSA Rates: Observed vs Predicted", subtitle="Black line solid is predicted number from NHSN", y="MRSA Rate\n(per 10,000 patient days)", x="Month", fill="Hospital\nType", caption = "CAH = Critical Access Hospital") + theme_bw() ``` ### Table This table shows each **hospital's average**[^2] **rates of healthcare associated infections** broken down before and after CHG bathing was implemented. The column labeled with the percent sign (%) is the relative change after implementing CHG bathing ($\frac{After - Before}{Before}$). [^2]: I was going to do median, but given the number of months with zero HAIs the table was not very informative ```{r tbl-HAI_over_time} #| tbl-cap: HAIs in the pre and post CHG era matched_data$small_by_hosp %>% filter(Era!="During") %>% group_by(name_short, Era) %>% summarise(CAUTI_mean = mean(10000 * CAUTI_obs / days_foley, na.rm = T), CAUTI_sd = sd(10000 * CAUTI_obs / days_foley, na.rm = T), # CAUTI_n.mean = mean(CAUTI_obs, na.rm = T), # CAUTI_n.sd = sd(CAUTI_obs, na.rm = T), # CLABSI_n.mean = mean(CLABSI_obs, na.rm = T), # CLABSI_n.sd = sd(CLABSI_obs, na.rm = T), CLABSI_mean = mean(10000 * CLABSI_obs / days_CL, na.rm = T), CLABSI_sd = sd(10000 * CLABSI_obs / days_CL, na.rm = T), MRSA_mean = mean(100000 * MRSA_obs / days_MRSA, na.rm = T), MRSA_sd = sd(100000 * MRSA_obs / days_MRSA, na.rm = T), .groups = "drop") %>% pivot_longer(cols=-c(name_short, Era), names_to = c("HAI", ".value"), names_pattern = "(.*)_(.*)") %>% pivot_wider(names_from = Era, values_from = c(mean, sd), names_glue = "{Era}_{.value}", names_vary = "slowest") %>% pivot_wider(names_from = HAI, values_from = -c(HAI, name_short), names_glue = "{HAI}.{.value}", names_vary = "slowest") %>% # Add the percent change mutate(CAUTI.pct = (CAUTI.After_mean - CAUTI.Before_mean)/CAUTI.Before_mean) %>% relocate(CAUTI.pct, .after = CAUTI.After_sd) %>% mutate(CLABSI.pct = (CLABSI.After_mean - CLABSI.Before_mean)/CLABSI.Before_mean) %>% relocate(CLABSI.pct, .after = CLABSI.After_sd) %>% mutate(MRSA.pct = (MRSA.After_mean - MRSA.Before_mean)/MRSA.Before_mean) %>% relocate(MRSA.pct, .after = MRSA.After_sd) %>% mutate(CAUTI.pct = if_else(!is.finite(CAUTI.pct), NA, CAUTI.pct), CLABSI.pct = if_else(!is.finite(CLABSI.pct), NA, CLABSI.pct), MRSA.pct = if_else(!is.finite(MRSA.pct), NA, MRSA.pct)) %>% gt(rowname_col = "name_short") %>% cols_merge_uncert(CLABSI.Before_mean, CLABSI.Before_sd) %>% cols_merge_uncert(CLABSI.After_mean, CLABSI.After_sd) %>% cols_merge_uncert(CAUTI.Before_mean, CAUTI.Before_sd) %>% cols_merge_uncert(CAUTI.After_mean, CAUTI.After_sd) %>% cols_merge_uncert(MRSA.Before_mean, MRSA.Before_sd) %>% cols_merge_uncert(MRSA.After_mean, MRSA.After_sd) %>% tab_spanner_delim(delim = ".") %>% gt::cols_label(CAUTI.Before_mean = "Before", CAUTI.After_mean = "After", CLABSI.Before_mean = "Before", CLABSI.After_mean = "After", MRSA.Before_mean = "Before", MRSA.After_mean = "After", CAUTI.pct = "%", CLABSI.pct = "%", MRSA.pct = "%") %>% fmt_number(decimals = 1) %>% fmt_percent(ends_with("pct"), decimals = 0, force_sign =T) %>% fmt_missing() %>% # data_color( # columns = CAUTI.pct, # target_columns = CAUTI.pct, # method = "numeric", # palette = "viridis", # domain = c(-1, 1) # ) %>% # fmt_number() %>% # data_color( # columns = CLABSI.pct, # target_columns = CLABSI.pct, # method = "numeric", # palette = "viridis", # domain = c(-1, 1) # ) %>% # cols_hide(CAUTIdiff) data_color( columns = CAUTI.pct, reverse = T, na_color = "white", domain = c(-1,1), palette = c("#86102f", "#f7b8ad", "#b9ddf1","#2c5782"), ) %>% data_color( columns = CLABSI.pct, reverse = T, na_color = "white", domain = c(-1,1), palette = c("#86102f", "#f7b8ad", "#b9ddf1","#2c5782"), ) %>% data_color( columns = MRSA.pct, reverse = T, na_color = "white", domain = c(-2.5,2.5), palette = c("#86102f", "#f7b8ad", "#b9ddf1","#2c5782"), ) %>% # gtExtras::gt_hulk_col_numeric( # columns = CAUTI.pct, # reverse = T, # domain = c(-1,1), # palette = "RdYlBu", # # pal_type = "continuous" # ) %>% # gtExtras::gt_hulk_col_numeric( # columns = CLABSI.pct, # reverse = T, # domain = c(-1,1), # palette = "RdYlBu", # # pal_type = "continuous" # ) %>% # gtExtras::gt_hulk_col_numeric( # columns = MRSA.pct, # reverse = T, # domain = c(-2.5,2.5), # palette = c("#86102f", "#f7b8ad", "#b9ddf1","#2c5782"), # ) %>% tab_header( title = md('Rates of HAIs before & after CHG bathing'), subtitle = "Expressed as mean +/- SD" ) %>% opt_stylize(2, color="gray") %>% tab_footnote( footnote = "Per 10,000 foley/central line days", locations = cells_column_spanners(spanners = contains("CLABSI") )) %>% tab_footnote( footnote = "Per 10,000 foley/central line days", locations = cells_column_spanners(spanners = contains("CAUTI") )) %>% tab_footnote( footnote = "Per 100,000 patient days", locations = cells_column_spanners(spanners = contains("MRSA") )) ``` ## CHG compliance Now we look at CHG compliance. We will break it apart various ways (by time, by hospital, etc) so there there is some redundancy across these subsections ### Over time The first figure shows **compliance trends over time**, beginning at the end of the rollout period (`r strftime(ymd(params$rollout_end), format="%b %Y")`). The overall system wide compliance is shown as the bold solid line and the smaller black lines show the trends for each of the hospitals ```{r fig-CHG-time-overall} #| fig-width: 8 #| fig-height: 5 #| column: screen-inset-shaded #| fig-cap-location: margin #| fig-cap: "CHG compliance over time, across entire system. The bolded line is the overall CHG complaince, and the more faint lines are each hospital's trend" #| fig-scap: "Overall system wide CHG compliance" # Pooled aveage df_temp <- matched_data$small_by_hosp %>% filter(!is.na(CHG_rate)) %>% filter(CHG_rate!=0) %>% group_by(Date) %>% summarise(Numerator = sum(Numerator, na.rm = T), Denominator = sum(Denominator, na.rm = T), CHG_rate = Numerator / Denominator) matched_data$small_by_hosp %>% filter(!is.na(CHG_rate)) %>% filter(CHG_rate!=0) %>% mutate(cut_grp = Hmisc::cut2(Denominator, g=3)) %>% ggplot(aes(x=Date, y=CHG_rate)) + ggrepel::geom_label_repel(aes(label=name_short), nudge_y = -.05, nudge_x = 25, arrow = arrow(length = unit(0.02, "npc")), data=filter(matched_data$small_by_hosp, Date==ymd("2024-04-15") & Hosp=="BMC")) + ggrepel::geom_label_repel(aes(label=name_short), nudge_y = +.05, nudge_x = -25, arrow = arrow(length = unit(0.02, "npc")), data=filter(matched_data$small_by_hosp, Date==ymd("2024-03-15") & Hosp=="WHL")) + # geom_point(aes(size=cut_grp), alpha=.5) + geom_line(aes(group=Hosp, linetype=hosp_type), alpha=0.5) + geom_smooth(se = F, span=.5, data=df_temp) + # stat_smooth(geom="line", alpha=0.75) + # geom_line(alpha=0.5) + scale_y_continuous(limits = c(NA, 1), labels = scales::percent) + scale_linetype_manual(values=c("Academic"="twodash", "CAH / small"="dotted", "Community" = "solid")) + theme_bw() + labs(title = "System wide CHG compliance vs time", y = "CHG compliance", linetype = "Hospital\nType") rm(df_temp) # matched_data$small_by_hosp %>% # filter(!is.na(CHG_rate)) %>% # filter(CHG_rate!=0) %>% # filter(Hosp %in% c("BMC", "WHL", "UTN")) %>% # mutate(cut_grp = Hmisc::cut2(Denominator, g=3)) %>% # ggplot(aes(x=Date, y=CHG_rate)) + # geom_line(aes(group=Hosp, color=Hosp), alpha=0.5) ``` A few patterns emerge at first glance: - Over time, **compliance seems to go up** but only **to a point** *before it plateaus* - This is true for community hospitals like Wheeling (that reached a high plateau early) and Berkeley (that reached a lower plateau later on) - It also looks to be the case for CAH and our academic hospital (Ruby) - Different **hospitals plateau at different levels** - As mentioned above, Wheeling ends up much higher than Berkley - By one year after roll out, the community and CAH generally ended at or above the system wide goal (80%) but the largest hospital, our **academic hospital did not reach it's goal** and **brings down the average** for the entire system For further details, below is an interactive figure that shows each hospital's trend ```{r fig-CHG-time-hosp-interactive} #| fig-cap: "CHG compliance over time, by hospital" #| fig-scap: "CHG vs time, by hospital (interactive)" # Pooled aveage df_temp <- matched_data$small_by_hosp %>% filter(!is.na(CHG_rate)) %>% filter(CHG_rate!=0) %>% group_by(Date) %>% summarise(Numerator = sum(Numerator, na.rm = T), Denominator = sum(Denominator, na.rm = T), CHG_rate = Numerator / Denominator) x <- matched_data$small_by_hosp %>% filter(!is.na(CHG_rate)) %>% filter(CHG_rate!=0) %>% mutate(cut_grp = Hmisc::cut2(Denominator, g=3)) %>% ggplot(aes(x=Date, y=CHG_rate, label=Denominator)) + geom_line(aes(color=Hosp), alpha=0.5) + geom_smooth(se = F, span=.5, color="black", data=df_temp) + scale_y_continuous(limits = c(NA, 1), labels = scales::percent) + theme_bw() + labs(title = "Hospital's CHG compliance vs time", y = "CHG compliance") plotly::ggplotly(x) rm(df_temp, x) ``` ::: callout-tip If you want to dynamically explore the data yourself, in the interactive figure below you can: - Hover over points to view the actual numbers at each point, including the exact compliance rate, date, and denominator (number of patient-days eligible for CHG bathing) - Clicking on each hospital's name/abbreviation in the figure's legend will show/hide it from view (double clicking will isolate that particular hospital). This can be helpful for comparing specific hospitals ::: ```{r fig-test} #| fig-cap-location: bottom #| fig-cap: "testing" #| eval: false # matched_data$all_by_hosp %>% # filter(!is.na(CHG_rate)) %>% # ggplot(aes(x=Date, y=CHG_rate, group=Hosp, color=hosp_type)) + # geom_line() + facet_wrap("Hosp") # # matched_data$small_by_hosp %>% # # filter(!is.na(CHG_rate)) %>% # filter(CHG_rate!=0) %>% # ggplot(aes(x=Date, y=CHG_rate, group=Hosp, color=hosp_type)) + # # geom_smooth(se = F, span=.5, alpha=0.25) + # stat_smooth(geom="line", span=.5, alpha=0.5) + # # geom_line(alpha=0.5) + # scale_y_continuous(limits = c(NA, 1), labels = scales::percent) matched_data$small_by_hosp %>% # filter(!is.na(CHG_rate)) %>% filter(CHG_rate!=0) %>% ggplot(aes(x=Date, y=CHG_rate, group=Hosp, color=Hosp)) + # geom_smooth(se = F, span=.5, alpha=0.25) + # geom_point(aes(linetype=hosp_type)) + stat_smooth(geom="line", alpha=0.75) + # geom_line(alpha=0.5) + scale_y_continuous(limits = c(NA, 1), labels = scales::percent) + theme_bw() ``` #### CAH specifically Critical access hospitals[^3] are of particular interest. Recall @fig-CHG-time-overall (slightly modified below) which showed an apparent dip in compliance early after the rollout period, then sustained increased compliance until it plateaus. [^3]: In this section I use the term "CAH" for brevity, but recall that this includes smaller hospitals (namely WTZ) which isn't *technically* a critical access hospital ```{r} #| fig-width: 6.5 #| fig-height: 3.5 # Pooled aveage df_temp <- matched_data$small_by_hosp %>% filter(!is.na(CHG_rate)) %>% filter(CHG_rate!=0) %>% group_by(Date) %>% summarise(Numerator = sum(Numerator, na.rm = T), Denominator = sum(Denominator, na.rm = T), CHG_rate = Numerator / Denominator) matched_data$small_by_hosp %>% filter(!is.na(CHG_rate)) %>% filter(CHG_rate!=0) %>% mutate(cut_grp = Hmisc::cut2(Denominator, g=3)) %>% # filter(Hosp=="Small") %>% mutate(grp = if_else(Hosp=="Small", "CAH / Small", "All others")) %>% ggplot(aes(x=Date, y=CHG_rate)) + geom_line(aes(group=Hosp, color=hosp_type), alpha=0.7) + geom_smooth(se = F, span=.5, color="black", data=df_temp) + # stat_smooth(geom="line", alpha=0.75) + # geom_line(alpha=0.5) + scale_y_continuous(limits = c(NA, 1), labels = scales::percent) + scale_color_manual(values=c("CAH / small"="red", "Community"="grey80", "Academic"="lightblue")) + theme_bw() + labs(y = "CHG compliance", color = "") rm(df_temp) ``` To dive into this a little more, I've de-aggregated the "CAH / Small hospital" group in the figure below. Similar to @fig-CHG-time-hosp-interactive the thick black line is the system wide average, but to make it (slightly less) busy I've omitted the community and academic hospitals. Because the compliance rate for the "CAH / Small hospital" group in the main analysis is a weighted average[^4] of these various hospitals, the transparency of each line is reflective of that's hospital's average number of opportunities for CHG bathing [^4]: Meaning that hospitals with greater numbers of eligible patient days (like GRMC) will contribute more to the average than hospitals with fewer oppurtunities (like WTZ or JAX) ```{r fig-CHG-time-CAH-interactive} #| fig-cap: "CHG compliance over time, limited to critical access hospitals (CAH). Each lines transparency is weighted to how many opportunities for CHG bathing the hospital had across the year (this is the 'denominator' in the Tableau dashboard). Similar to before, the solid black line shows the system wide average CHG compliance. " #| fig-scap: "CHG vs time, by CAH (interactive)" # Pooled aveages df_avg_all <- matched_data$small_by_hosp %>% filter(!is.na(CHG_rate)) %>% filter(CHG_rate!=0) %>% group_by(Date) %>% summarise(Numerator = sum(Numerator, na.rm = T), Denominator = sum(Denominator, na.rm = T), CHG_rate = Numerator / Denominator) df_avg_small <- matched_data$all_by_hosp %>% filter(!is.na(CHG_rate)) %>% filter(CHG_rate!=0) %>% filter(Hosp %in% matched_data$small_hospitals) %>% group_by(Date) %>% summarise(Numerator = sum(Numerator, na.rm = T), Denominator = sum(Denominator, na.rm = T), CHG_rate = Numerator / Denominator) x <- matched_data$all_by_hosp %>% filter(!is.na(CHG_rate)) %>% filter(CHG_rate!=0) %>% filter(Hosp %in% matched_data$small_hospitals) %>% group_by(Hosp) %>% mutate(avg_denom = mean(Denominator, na.rm = T)) %>% ggplot(aes(x=Date, y=CHG_rate, label=Denominator)) + geom_line(aes(color=Hosp, alpha=avg_denom)) + # geom_smooth(se = F, span=.5, color="grey40", linetype="dotted", # data=df_avg_small) + geom_smooth(se = F, span=.5, color="black", data=df_avg_all) + scale_y_continuous(limits = c(NA, 1), labels = scales::percent) + theme_bw() + guides(alpha=F) + labs(title = "CHG compliance (CAH only) vs time", y = "CHG compliance", color="CAH") plotly::ggplotly(x) rm(df_temp, df_avg_small, df_avg_all) ``` The takeaways I have here are: - The three CAH hospitals that contribute the most in terms of CHG opportunities are GRMC, PVH, SMR, and GMH - Among these four hospitals, they all have improved their compliance over time and are at or above the system wide average - Notably, Grant (GMH) didn't report data for 10/2023 (but did start reporting in 11/2023) and started out with the lowest compliance when they did report data - The early dip seen in the late fall of 2023 on @fig-CHG-time-overall may be an artifact. When Grant did start reporting, they brought the group's average down quite a bit, but the CAH group's rate probably would have been lower in 10/2023 if GMH had reported data - BRX and WTZ struggle a little more than the rest of the group, especially compared to JAX (despite all three having comparable numbers of CHG opportunities) - STJ struggled at first, but generally ended up around the system wide average - JMC actually has been going down over time, a trend not seen for any other hospital - BRN and HRS have very few opportunities (see `avg_denom` when you hover over their lines) but generally do a great/excellent job when they do have opportunities (Harrison is actually a straight line at 100%) - Important to remember that because of how the data is collected in Epic, the compliance percentage may not be very reliable with small denominators. For example, a patient who is admitted at 11pm (and doesn't get a bath in their first hour) who has a 72h length of stay will get 67% compliance *even if they do everything right* from a CHG bathing standpoint ### By number of opportunities Next we'll look at how compliance varies by the number of CHG bathing opportunities (the number of patient days that are eligble for CHG bathing) for each hospital. There are two reasons to explore this relationship 1. As mentioned above, the rate can be very sensitive to one missed opportunity, if the denominator is small. This is particularly true for the smaller and critical access hospitals 2. There might be some *a priori* reasons to expect that the amount opportunities could influence compliance - One one hand, it's possible that facilities who see fewer patients who are eligible for CHG bathing might not have CHG bathing ingrained into their workflow (it may not be front of mind for the healthcare providers) - Conversely, more opportunities for bathing are also more opportunities to miss bathing. Are there issues with scaling? ::: callout-note When I say "CHG bathing opportunities", I'm refering to the denominator in the Tableau dashboard. So this section is looking at "CHG compliance rate" ($Rate = \frac{Numerator}{Denominator} \times 100$) vs $Denominator$ . ::: The table below displays the same information from @fig-CHG-time-CAH-interactive (looking at CAH only). The table is sorted by hospital, starting with those who have the fewest opportunities and ending with those who have the most opportunities. By scanning from left to right ('23 Q4 to '24 Q3), you can see compliance went up over time (the colors move from purple to green), which is what we saw in @fig-CHG-time-CAH-interactive. Early on (2023 Q4 and 2024 Q1) it appears that hospitals with more opportunities did worse than those with fewer opportunities, but by then end (Q2 & Q3 of 2024) that trend seems to go away. ```{r tbl-CAH_denom_vs_rate} #| tbl-cap: Comparison of the number of patient-days eligable for CHG bathing with CHG compliance for critical access and small hospitals #| column: screen-inset-shaded #| collapse: true # matched_data$all_by_hosp %>% # filter(!is.na(CHG_rate)) %>% # filter(CHG_rate!=0) %>% # filter(Hosp %in% matched_data$small_hospitals) %>% # group_by(Hosp) %>% # summarise(denom = mean(Denominator, na.rm = T), # denom_avg = mean(Denominator, na.rm = T), # denom_sd = sd(Denominator, na.rm = T), # # CHG_avg = mean(CHG_rate, na.rm = T), # CHG_sd = sd(CHG_rate, na.rm = T), # # denom_med = qwraps2::median_iqr(Denominator, na_rm = T), # # denom_avg = qwraps2::mean_sd(Denominator, na_rm = T), # # denom_med = qwraps2::median_iqr(Denominator, na_rm = T), # # ) %>% # arrange(denom) %>% # print(n=100) %>% # gt() %>% # cols_hide(denom) %>% # cols_merge_uncert(denom_avg, denom_sd) %>% # cols_merge_uncert(CHG_avg, CHG_sd) %>% # fmt_number() %>% # fmt_percent(starts_with("CHG"), decimals = 1) %>% # gtExtras::gt_hulk_col_numeric( # columns = CHG_avg, # reverse = T, # # domain = c(-1,1), # # palette = "BuPu", # # pal_type = "continuous" # ) %>% # cols_label(denom_avg="CHG opportunities", # CHG_avg="CHG compliance", # Hosp="Hospital") %>% # tab_header( # title = md('Average CHG opportunities & compliance for CAH'), # subtitle = "Expressed as mean +/- SD" ) %>% # opt_stylize(2, color="gray") t1 <- matched_data$all_by_hosp %>% filter(!is.na(CHG_rate)) %>% filter(CHG_rate!=0) %>% filter(Hosp %in% matched_data$small_hospitals) %>% group_by(Hosp, name_short) %>% summarise(Overall.denom_avg = mean(Denominator, na.rm = T), Overall.denom_sd = sd(Denominator, na.rm = T), Overall.CHG_avg = mean(CHG_rate, na.rm = T), Overall.CHG_sd = sd(CHG_rate, na.rm = T), # denom_med = qwraps2::median_iqr(Denominator, na_rm = T), # denom_avg = qwraps2::mean_sd(Denominator, na_rm = T), # denom_med = qwraps2::median_iqr(Denominator, na_rm = T), ) %>% arrange(Overall.denom_avg) t2 <- matched_data$all_by_hosp %>% filter(!is.na(CHG_rate)) %>% filter(CHG_rate!=0) %>% filter(Hosp %in% matched_data$small_hospitals) %>% mutate(q = str_glue("{year(Date)} Q{quarter(Date)}"), q = str_remove(q, "20"), q = str_glue("'{q}")) %>% # count(Date, q) %>% group_by(Hosp, q) %>% summarise(denom_avg = mean(Denominator, na.rm = T), denom_sd = sd(Denominator, na.rm = T), CHG_avg = mean(CHG_rate, na.rm = T), CHG_sd = sd(CHG_rate, na.rm = T), # denom_med = qwraps2::median_iqr(Denominator, na_rm = T), # denom_avg = qwraps2::mean_sd(Denominator, na_rm = T), # denom_med = qwraps2::median_iqr(Denominator, na_rm = T), ) %>% pivot_wider(names_from = q, values_from = c(denom_avg, denom_sd, CHG_avg, CHG_sd), names_vary = "slowest", names_glue = "{q}.{.value}") inner_join(t1, t2, by="Hosp") %>% mutate(Hosp = str_glue("{Hosp} ({name_short})")) %>% select(-name_short) %>% ungroup() %>% gt(rowname_col = "Hosp") %>% tab_spanner_delim(delim = ".") %>% fmt_missing() %>% fmt_number(decimals = 1) %>% fmt_percent(contains("CHG"), decimals = 0) %>% cols_merge_uncert(Overall.denom_avg, Overall.denom_sd) %>% cols_merge_uncert(Overall.CHG_avg, Overall.CHG_sd) %>% cols_merge_uncert(`'23 Q4.denom_avg`, `'23 Q4.denom_sd`) %>% cols_merge_uncert(`'23 Q4.CHG_avg`, `'23 Q4.CHG_sd`) %>% cols_merge_uncert(`'24 Q1.denom_avg`, `'24 Q1.denom_sd`) %>% cols_merge_uncert(`'24 Q1.CHG_avg`, `'24 Q1.CHG_sd`) %>% cols_merge_uncert(`'24 Q2.denom_avg`, `'24 Q2.denom_sd`) %>% cols_merge_uncert(`'24 Q2.CHG_avg`, `'24 Q2.CHG_sd`) %>% cols_merge_uncert(`'24 Q3.denom_avg`, `'24 Q3.denom_sd`) %>% cols_merge_uncert(`'24 Q3.CHG_avg`, `'24 Q3.CHG_sd`) %>% cols_label(Overall.denom_avg ="Eligible Days", `'23 Q4.denom_avg`="Eligible Days", `'24 Q1.denom_avg`="Eligible Days", `'24 Q2.denom_avg`="Eligible Days", `'24 Q3.denom_avg`="Eligible Days", Overall.CHG_avg ="Compliance", `'23 Q4.CHG_avg`="Compliance", `'24 Q1.CHG_avg`="Compliance", `'24 Q2.CHG_avg`="Compliance", `'24 Q3.CHG_avg`="Compliance", Hosp="Hospital") %>% # gtExtras::gt_hulk_col_numeric(columns = contains("CHG"),domain = c(0,1)) %>% data_color( columns = contains("CHG"), palette = c("#86102f", "#f7b8ad", "#b9ddf1","#2c5782"), na_color = "white", domain = c(0,1)) %>% data_color( columns = Overall.CHG_avg, method = "numeric", palette = "PuOr", # palette = "YlGnBu", reverse = T ) %>% tab_header( title = md('**Small/CAH:** Average CHG opportunities & compliance'), subtitle = "Expressed as mean +/- SD" ) %>% opt_stylize(2, color="gray") rm(t1, t2) ``` Across all hospitals the the system there are maybe hints at decreasing compliance as the number of opportunities increase, shown by the black trend lines below. However, the variation in CHG opportunities are mostly clustered by hospital along the X-axis, so perhaps it's really the hospital that is accounting for the differences. ```{r fig-CHG-by-denominator-interactive} #| fig-cap: "CHG compliance by number of opportunities for CHG bathing. The solid black line shows the trend for each panel." #| fig-scap: "CHG vs denominator (interactive)" # matched_data$small_by_hosp %>% # ggplot(aes(x=Denominator, y=CHG_rate)) + # geom_point() + geom_smooth() + # facet_wrap("hosp_type", scales = "free") x <- matched_data$all_by_hosp %>% filter(!is.na(CHG_rate)) %>% filter(CHG_rate!=0) %>% mutate(hosp_type = case_when(hosp_type=="Academic" ~ "Academic", hosp_type=="Community" ~ "Community", TRUE ~ "CAH / small")) %>% mutate(Month = strftime(Date, format="%b %Y")) %>% ggplot(aes(x=Denominator, y=CHG_rate)) + geom_smooth(span=.9, color="black", se=F) + geom_point(aes(color=Hosp, shape=hosp_type, text=Month)) + facet_wrap("hosp_type", scales = "free_x") + guides(shape=F) + scale_y_continuous(limits = c(NA, 1), labels = scales::percent) + theme_bw() + labs(title = "CHG compliance vs CHG opportunities", y = "CHG compliance", x="CHG opportunities") plotly::ggplotly(x) rm(x) ``` Importantly, @fig-CHG-by-denominator-interactive above doesn't account for time (though you can hover over each point to view the month). If we stratify by quarter, any trend seems to go away, again implying that if there are differences in compliance by opportunities, it's those differences are probably occuring at a hospital level (i.e. hospital is a confounding factor for this relationship) ```{r fig-CHG-by-denominator-facet} #| fig-cap: "CHG compliance by number of opportunities for CHG bathing, broken down by quarter. The solid black line shows the trend for each panel." #| fig-scap: "CHG vs denominator (by quarter)" matched_data$all_by_hosp %>% filter(!is.na(CHG_rate)) %>% filter(CHG_rate!=0) %>% mutate(hosp_type = case_when(hosp_type=="Academic" ~ "Academic", hosp_type=="Community" ~ "Community", TRUE ~ "CAH / small")) %>% mutate(q = str_glue("{year(Date)} Q{quarter(Date)}"), q = str_remove(q, "20"), q = str_glue("'{q}")) %>% mutate(Month = strftime(Date, format="%b %Y")) %>% ggplot(aes(x=Denominator, y=CHG_rate)) + geom_smooth(span=.9, color="black", se=F, alpha=.5) + geom_point(aes(color=Hosp, shape=hosp_type, text=Month)) + facet_grid(q ~ hosp_type, scales = "free_x") + guides(shape=F, color=F) + scale_y_continuous(limits = c(NA, 1), labels = scales::percent) + theme_bw() + labs(title = "CHG compliance vs CHG opportunities", y = "CHG compliance", x="CHG opportunities") ``` ::: {.callout-warning icon="false"} ## Key take aways It doesn't really look like the number of opportunities is an important factor, especially compared to trends over time and across hospitals ::: ### By Hospital We will start by looking at a table similar to @tbl-CAH_denom_vs_rate, but now for all hospitals[^5]. As we saw graphically in @fig-CHG-time-overall (in the [Over time] section), compliance rates increase over time until they plateau. Most striking is WVUH, with it's high number of opportunities and plateau at such a low number compared to all of the other facilities. [^5]: Unlike @tbl-CAH_denom_vs_rate, all the small and critical access hospitals are now aggregated into the "small hospitals" group ```{r tbl-hosp_denom_vs_rate} #| tbl-cap: Comparison of the number of patient-days eligable for CHG bathing with CHG compliance for all hospitals #| column: screen-inset-shaded #| collapse: true t1 <- matched_data$small_by_hosp %>% filter(!is.na(CHG_rate)) %>% filter(CHG_rate!=0) %>% group_by(Hosp, name_short) %>% summarise(Overall.denom_avg = mean(Denominator, na.rm = T), Overall.denom_sd = sd(Denominator, na.rm = T), Overall.CHG_avg = mean(CHG_rate, na.rm = T), Overall.CHG_sd = sd(CHG_rate, na.rm = T), # denom_med = qwraps2::median_iqr(Denominator, na_rm = T), # denom_avg = qwraps2::mean_sd(Denominator, na_rm = T), # denom_med = qwraps2::median_iqr(Denominator, na_rm = T), ) %>% arrange(Overall.denom_avg) t2 <- matched_data$small_by_hosp %>% filter(!is.na(CHG_rate)) %>% filter(CHG_rate!=0) %>% mutate(q = str_glue("{year(Date)} Q{quarter(Date)}"), q = str_remove(q, "20"), q = str_glue("'{q}")) %>% # count(Date, q) %>% group_by(Hosp, q) %>% summarise(denom_avg = mean(Denominator, na.rm = T), denom_sd = sd(Denominator, na.rm = T), CHG_avg = mean(CHG_rate, na.rm = T), CHG_sd = sd(CHG_rate, na.rm = T), # denom_med = qwraps2::median_iqr(Denominator, na_rm = T), # denom_avg = qwraps2::mean_sd(Denominator, na_rm = T), # denom_med = qwraps2::median_iqr(Denominator, na_rm = T), ) %>% pivot_wider(names_from = q, values_from = c(denom_avg, denom_sd, CHG_avg, CHG_sd), names_vary = "slowest", names_glue = "{q}.{.value}") inner_join(t1, t2, by="Hosp") %>% mutate(Hosp = str_glue("{Hosp} ({name_short})")) %>% select(-name_short) %>% ungroup() %>% select(-contains("denom")) %>% gt(rowname_col = "Hosp") %>% # tab_spanner_delim(delim = ".") %>% fmt_missing() %>% fmt_number(decimals = 1) %>% fmt_percent(contains("CHG"), decimals = 0) %>% # cols_merge_uncert(Overall.denom_avg, Overall.denom_sd) %>% cols_merge_uncert(Overall.CHG_avg, Overall.CHG_sd) %>% # cols_merge_uncert(`'23 Q4.denom_avg`, `'23 Q4.denom_sd`) %>% cols_merge_uncert(`'23 Q4.CHG_avg`, `'23 Q4.CHG_sd`) %>% # cols_merge_uncert(`'24 Q1.denom_avg`, `'24 Q1.denom_sd`) %>% cols_merge_uncert(`'24 Q1.CHG_avg`, `'24 Q1.CHG_sd`) %>% # cols_merge_uncert(`'24 Q2.denom_avg`, `'24 Q2.denom_sd`) %>% cols_merge_uncert(`'24 Q2.CHG_avg`, `'24 Q2.CHG_sd`) %>% # cols_merge_uncert(`'24 Q3.denom_avg`, `'24 Q3.denom_sd`) %>% cols_merge_uncert(`'24 Q3.CHG_avg`, `'24 Q3.CHG_sd`) %>% cols_label(Overall.CHG_avg ="Overall", `'23 Q4.CHG_avg`="2023 Q4", `'24 Q1.CHG_avg`="2024 Q1", `'24 Q2.CHG_avg`="2024 Q2", `'24 Q3.CHG_avg`="2024 Q3", # Overall.CHG_avg ="Compliance", # `'23 Q4.CHG_avg`="Compliance", # `'24 Q1.CHG_avg`="Compliance", # `'24 Q2.CHG_avg`="Compliance", # `'24 Q3.CHG_avg`="Compliance", # Overall.denom_avg ="Eligible Days", # `'23 Q4.denom_avg`="Eligible Days", # `'24 Q1.denom_avg`="Eligible Days", # `'24 Q2.denom_avg`="Eligible Days", # `'24 Q3.denom_avg`="Eligible Days", Hosp="Hospital") %>% # gtExtras::gt_hulk_col_numeric( # columns = contains("CHG"), # domain = c(min(pull(filter(matched_data$small_by_hosp, CHG_rate!=0), # "CHG_rate"), na.rm = T) ,1)) %>% data_color( columns = contains("CHG"), palette = c("#86102f", "#f7b8ad", "#b9ddf1","#2c5782"), na_color = "white", domain = c(min(pull(filter(matched_data$small_by_hosp, CHG_rate!=0), "CHG_rate"), na.rm = T) ,1)) %>% data_color( columns = Overall.CHG_avg, method = "numeric", palette = "PuOr", reverse = F # palette = "YlGnBu", reverse = T ) %>% tab_header( title = md('**CHG compliance** by hospital'), subtitle = "Expressed as mean +/- SD" ) %>% opt_stylize(2, color="gray") rm(t1, t2) ``` This can be shown graphically, in the boxplots below ```{r fig-hospital-boxplot} #| fig-cap: "CHG compliance for each hospital, broken down by quarter" #| fig-scap: "CHG by hospital" #| fig-width: 8 #| fig-height: 6 matched_data$small_by_hosp %>% filter(!is.na(CHG_rate)) %>% filter(CHG_rate!=0) %>% # mutate(hosp_type = case_when(hosp_type=="Academic" ~ "Academic / Community", # hosp_type=="Community" ~ "Academic / Community", # TRUE ~ "CAH / small")) %>% mutate(q = str_glue("{year(Date)} Q{quarter(Date)}"), q = str_remove(q, "20"), q = str_glue("'{q}"), q = factor(q, ordered=T)) %>% mutate(Month = strftime(Date, format="%b %Y")) %>% ggplot(aes(x=Hosp, y=CHG_rate)) + # geom_boxplot(fill="#BDD7E7", color="grey30") + # "#EFF3FF" "#BDD7E7" "#6BAED6" "#2171B5" geom_boxplot(aes(fill=q)) + # geom_jitter(aes(text=Month)) + # coord_flip() + # facet_wrap("q", scales = "free_y") + # facet_wrap(hosp_type~q, scales = "free_y") + theme_bw() + scale_y_continuous(labels = scales::percent) + theme(legend.position = "bottom") + scale_fill_brewer(palette = "RdPu") + labs(title="Compliance by hospital", subtitle = "Broken down by quarter", x = "", y = "CHG compliance", fill="") # facet_grid(.~hosp_type, scales = "free_y") ``` Although that's already a busy figure (at the risk of making it even more busy), the version below adds each hospital's overall boxplot in blue in the background in blue ```{r} #| fig-width: 8 #| fig-height: 6 matched_data$small_by_hosp %>% filter(!is.na(CHG_rate)) %>% filter(CHG_rate!=0) %>% # mutate(hosp_type = case_when(hosp_type=="Academic" ~ "Academic / Community", # hosp_type=="Community" ~ "Academic / Community", # TRUE ~ "CAH / small")) %>% mutate(q = str_glue("{year(Date)} Q{quarter(Date)}"), q = str_remove(q, "20"), q = str_glue("'{q}"), q = factor(q, ordered=T)) %>% mutate(Month = strftime(Date, format="%b %Y")) %>% ggplot(aes(x=Hosp, y=CHG_rate)) + geom_boxplot(fill="#BDD7E7", color="grey30") + # "#EFF3FF" "#BDD7E7" "#6BAED6" "#2171B5" geom_boxplot(aes(fill=q)) + # geom_jitter(aes(text=Month)) + # coord_flip() + # facet_wrap("q", scales = "free_y") + # facet_wrap(hosp_type~q, scales = "free_y") + theme_bw() + scale_y_continuous(labels = scales::percent) + theme(legend.position = "bottom") + scale_fill_brewer(palette = "RdPu") + labs(title="Compliance by hospital", subtitle = "Broken down by quarter", x = "", y = "CHG compliance", fill="") # facet_grid(.~hosp_type, scales = "free_y") ``` # Regressions ```{r} df_glm <- matched_data$small_by_hosp %>% filter(Era!="During") mods <- list() # █─mods # ├─CLABSI # ├─CAUTI # ├─MRSA # └─█─RE # ├─CLABSI # ├─CAUTI # └─MRSA ``` In this section, we will use a negative binomial regression to examine any influence that CHG bathing has on HAIs This is the same type of model that the NHSN uses to predict the number of expected infections. Unless otherwise specified, our outcome of interest will be the **monthly number of HAIs** observed for each hospital. Notably, this is the absolute number, not a rate. To account for confounding variables we will include the number of predicted infections (from the NHSN data). ## Example: CLABSI ```{r model_CLABSI_simple} #| results: 'hide' mods[["CLABSI"]] <- MASS::glm.nb(CLABSI_obs ~ CLABSI_pred + Era, data = df_glm) summary(mods[["CLABSI"]]) # DHARMa::testDispersion(mods[["CLABSI"]]) # glance(mods[["CLABSI"]]) # unique(select(time_limited, Date, Era)) %>% left_join(NHSN_clean) ``` To explain the various models, this section will go into detail using CLABSIs as an example. All of the other HAIs will follow the same format, but aren't shown for space. The backbone model is a simple negative binomial model, using the formula below for CLABSI ```{r model_CLABSI_simple_formula} #| echo: false if(!interactive()) formula(mods[["CLABSI"]]$terms) ``` The results of this specific model is shown in the table below: ```{r model_CLABSI_simple_table} broom::tidy(mods[["CLABSI"]], conf.int = T, exponentiate=T) %>% relocate(conf.low, conf.high, .before = p.value) %>% select(-c(std.error, statistic)) %>% gt() %>% cols_merge_range(col_begin = conf.low, col_end = conf.high, sep = " - ") %>% # tab_stub_indent(everything()) %>% cols_label(estimate = md("Incident Rate Ratio (IRR)"), term = "Term", conf.low = "95% CI", p.value = "P value") %>% fmt_number(columns = c(estimate, conf.low, conf.high, p.value), n_sigfig = 3) %>% fmt_scientific(columns = p.value, rows = p.value < 0.001) %>% fmt_scientific(columns = conf.low, rows = conf.low < 0.001 | conf.low > 1000) %>% fmt_scientific(columns = conf.high, rows = conf.high < 0.001 | conf.high > 1000) %>% tab_header( title = 'CLABSI: Negative binomial model' ) %>% opt_stylize(5) ``` This shows that compared to the before era, **the "after" era** (post CHG implementation) had a **`r scales::percent(1-filter(tidy(mods[["CLABSI"]], conf.int = T, exponentiate=T), term=="EraAfter")[['estimate']])` reduction** in CLABSIs (1 - `r round(filter(tidy(mods[["CLABSI"]], conf.int = T, exponentiate=T), term=="EraAfter")[['estimate']],3)`) with a P-value of `r round(filter(tidy(mods[["CLABSI"]], conf.int = T, exponentiate=T), term=="EraAfter")[['p.value']], 3)`. It also shows that the predicted infections are significantly associated with increased observed infections (which is to be expected). The same data is shown graphically below: ```{r model_CLABSI_simple_figure} sjPlot::plot_model(mods[["CLABSI"]]) ``` ### Random effects CLABSI ```{r model_CLABSI_RE} #| results: 'hide' library(glmmTMB) mods[["RE"]][["CLABSI"]] <- glmmTMB(CLABSI_obs ~ CLABSI_pred + Era + (1|Hosp) , data = df_glm, family = "nbinom1") summary(mods[["RE"]][["CLABSI"]]) # broom.mixed::glance(mods[["RE"]][["CLABSI"]]) # DHARMa::testDispersion(mods[["RE"]][["CLABSI"]]) ``` In the previous model, we didn't do anything to account for the various hospitals. There are arguments for and against adjusting for hospital-specific effects, outlined below ::: {.callout-tip icon="false"} ## Reason for adjusting for each hospital The biggest reason for doing this would be because we have **repeated measures**, meaning that we have within-group correlation (e.g. the likelihood of CLABSIs at Ruby in July are not independent of the likelihood at Ruby in November). I also feel like doing a random effects model would be helpful for getting a true population level estimate, but I don't have a clear reason for this belief ::: ::: {.callout-warning icon="false"} ## Reasons not to adjust for each hospital The beauty (and downfall) of using NHSN's predicted number of infections is that it already accounts for many variables that we would want to include. Among these, NHSN already accounts for hospital size & teaching hospital, so introducing "additional adjustment" for hospital size is **redundant**. This might cause issues with multicollinearity as well as difficulty in interpruting the results. Additionally, the only factors NHSN uses for calculating the predicted CLABSIs at CAH is the number of central line days (see [Table 3](https://www.cdc.gov/nhsn/pdfs/ps-analysis-resources/nhsn-sir-guide.pdf#page=18) and [page 42](https://www.cdc.gov/nhsn/pdfs/ps-analysis-resources/nhsn-sir-guide.pdf#page=42) of the NHSN documents), so I'm really not sure about this ::: I think it's worthwhile to at least include this model (even if it's a sensitivity analysis), which is the same as \[Model 1\] but now with **random effects for each hospital**. The model formula I use is below: ```{r model_CLABSI_RE_formula} #| echo: false if(!interactive()) mods$RE$CLABSI[["call"]][["formula"]] ``` The `(1|Hosp)` term means each hospital has a random intercept, capturing the variation in infection rates across hospitals. This helps to account for unobserved hospital-specific factors that may influence the CLABSI rates, but still assumes that the effect of CHG implementation is similar across hospitals ```{r model_CLABSI_RE_table} broom.mixed::tidy(mods[["RE"]][["CLABSI"]], conf.int = T, exponentiate=T) %>% filter(effect=="fixed") %>% select(term, estimate, conf.low, conf.high, p.value) %>% gt() %>% cols_merge_range(col_begin = conf.low, col_end = conf.high, sep = " - ") %>% # tab_stub_indent(everything()) %>% cols_label(estimate = md("Incident Rate Ratio (IRR)"), term = "Term", conf.low = "95% CI", p.value = "P value") %>% fmt_number(columns = c(estimate, conf.low, conf.high, p.value), n_sigfig = 3) %>% fmt_scientific(columns = p.value, rows = p.value < 0.001) %>% fmt_scientific(columns = conf.low, rows = conf.low < 0.001 | conf.low > 1000) %>% fmt_scientific(columns = conf.high, rows = conf.high < 0.001 | conf.high > 1000) %>% tab_header( title = md('CLABSI: **Random effects** negative binomial model'), subtitle = "Random effects for each hospital" ) %>% opt_stylize(5, color = "green") ``` ## All HAIs, negative binomial ```{r model_all_simple} #| results: 'hide' mods[["CAUTI"]] <- MASS::glm.nb(CAUTI_obs ~ CAUTI_pred + Era, data = df_glm) summary(mods[["CAUTI"]]) # DHARMa::testDispersion(mods[["CAUTI"]]) # glance(mods[["CAUTI"]]) # unique(select(time_limited, Date, Era)) %>% left_join(NHSN_clean) mods[["MRSA"]] <- MASS::glm.nb(MRSA_obs ~ MRSA_pred + Era, data = df_glm) summary(mods[["MRSA"]]) # DHARMa::testDispersion(mods[["MRSA"]]) # glance(mods[["MRSA"]]) ``` This shows all three models (CALBSI, CAUTI, MRSA) in the same table ```{r model_all_simple_table} bind_rows( mutate(broom::tidy(mods[["CLABSI"]], conf.int = T, exponentiate=T), Outcome = "CLABSI"), mutate(broom::tidy(mods[["CAUTI"]], conf.int = T, exponentiate=T), Outcome = "CAUTI"), mutate(broom::tidy(mods[["MRSA"]], conf.int = T, exponentiate=T), Outcome = "MRSA") ) %>% relocate(Outcome) %>% relocate(conf.low, conf.high, .before = p.value) %>% select(-c(std.error, statistic)) %>% mutate(term = if_else(str_detect(term, "_pred"), "NHSN prediction", term)) %>% group_by(Outcome) %>% gt(rowname_col = "term") %>% tab_stub_indent(rows = everything(), indent = 5) %>% cols_merge_range(col_begin = conf.low, col_end = conf.high, sep = " - ") %>% cols_label(estimate = md("Incident Rate Ratio (IRR)"), term = "Term", conf.low = "95% CI", p.value = "P value") %>% fmt_number(columns = c(estimate, conf.low, conf.high, p.value), n_sigfig = 3) %>% fmt_scientific(columns = p.value, rows = p.value < 0.001) %>% fmt_scientific(columns = conf.low, rows = conf.low < 0.001 | conf.low > 1000) %>% fmt_scientific(columns = conf.high, rows = conf.high < 0.001 | conf.high > 1000) %>% tab_style( style = cell_text(weight = "bold"), locations = cells_body( columns = c(estimate, p.value), rows = p.value <= 0.05 & term == "EraAfter" ) ) %>% tab_header( title = 'HAIs: Negative binomial models' ) %>% opt_stylize(5) ``` ```{r} sjPlot::plot_models(mods$CLABSI, mods$CAUTI, mods$MRSA, rm.terms = c("CLABSI_pred", "CAUTI_pred", "MRSA_pred"), m.labels = c("CLABSIs", "CAUTIs", "MRSA"), axis.labels = c("After CHG"), vline.color = "grey30", show.values = T, show.p = T) ``` # To do 1. Done: Add LabID MRSA - Done: Add to descriptive statistics (will require a bit different of a workflow) - Done: Add the regression 2. Add interrupted time series (for all models) 3. Compare compliance to HAIs (bivariable analysis in descriptive statistics) 4. Look at unit level compliance, similar to the @fig-todo1 below - This is turning out to be alot more work than I expected... It might need to be done after the abstract (I think we have plenty here as is) ```{r fig-todo1} CHG_dept %>% mutate(Rate = Numerator/Denominator) %>% mutate(ID = str_glue("{Hospital}_{Department}")) %>% ggplot(aes(x=Month, y=Rate, group=ID)) + geom_line(aes(alpha=Denominator)) + facet_wrap("Hospital") ```