# Load in packages
library(tidyverse)
library(table1)
library(lubridate)
library(e1071)
library(labelled)
library(janitor)
library(readxl)
library(survival)
library(survminer)
library(officer)
library(ggthemes)
library(gt)
TDM-TIME
A prospective cohort feasibility study of real-time beta-lactam antimicrobial therapeutic drug monitoring in critically ill patients with lower respiratory infection and development of an automated research notification tool: The TDM-TIME Study
Jan Hansel✉, Anthony Wilson, Jake Lain, Emmanuel Erhieyovwe, James Orr, Brian G. Keevil, Kayode Ogungbenro, Paul M. Dark, Timothy W. Felton
Division of Immunology, Immunity to Infection and Respiratory Medicine, The University of Manchester, Manchester, UK
Abstract
Background: Timely interventions are crucial for ICU patients with severe respiratory infections, particularly those with sepsis. Prompt initiation of antimicrobials is recommended to reduce mortality, but balancing this urgency with the risk of antimicrobial resistance is vital. Beta-lactam therapeutic drug monitoring (TDM) can be used to optimise dosing, but is limited by delays in delivering actionable results. In parallel, electronic health records (EHR) offer real-time operational support and can be used to identify patients eligible for trials. This study assessed the feasibility of improving turnaround times for beta-lactam TDM and enhancing recruitment into time-sensitive trials using novel EHR-based tools.
Methods: We conducted a prospective feasibility study in two ICUs in Manchester, UK, between December 2023 and June 2024. Adults receiving piperacillin/tazobactam or meropenem for respiratory infection were enrolled. An automated research notification (ARN) tool embedded in the EHR was designed to identify eligible participants in real time with simulations used to test efficacy. The primary outcome was availability of TDM results within two dose intervals. Secondary outcomes evaluated process times, survival and ICU/hospital length of stay.
Results: Of 276 potential participants identified via ARNs, 30 were enrolled, with TDM results available within two dose intervals for 67%. Recruitment prior to the first antimicrobial dose was achieved for 40%. Median (IQR) time from ARN to eligibility assessment was 0.5 (0.2-1.2) hours, significantly faster than simulated intermittent screening at 4.6 (3.1-14.9) hours (P < 0.001).
Discussion: While TDM results within desired timeframes were feasible, logistical constraints highlight the need for improved resource availability. ARNs enabled efficient participant identification and recruitment into time-sensitive research. Future work should explore broader applications of ARNs to clinical trials and precision interventions.
Registration: NCT05971979
Aims & Objectives
This study aimed to investigate turnaround times for delivering actionable results from TDM of beta-lactam antimicrobials in patients admitted to the intensive care unit. Furthermore, we sought to identify technology-enabled approaches to improve participant recruitment into future time-sensitive TDM trials.
Analyses
# Set working directories - if static
# if (interactive() && requireNamespace("rstudioapi", quietly = TRUE)) {
# script_path <- rstudioapi::getActiveDocumentContext()$path
# script_dir <- dirname(script_path)
# setwd(script_dir)
#}
#data_dir <- file.path(dirname(getwd()), "Data")
#getwd()
# Identify the latest TDM-TIME dataset in the current folder
<- list.files(pattern = "^TDMTIME_DATA_.*\\.csv$")
files <- str_extract(files, "(?<=TDMTIME_DATA_)[0-9]{4}-[0-9]{2}-[0-9]{2}")
dates <- as.Date(dates, format = "%Y-%m-%d")
date_formats <- which.max(date_formats)
latest_date_index <- files[latest_date_index]
latest_file
# Read in the latest TDM-TIME dataset
<- read_csv(latest_file)
df
# Remove incomplete cases
<- df %>% filter(!is.na(baseline_sample_collected))
df
# Remove invalid data - PID 11 had subsequent dose administered prior to fourth sample
$record_id == 11, "tdm4_pip_conc"] <- NA
df[df$record_id == 11, "tdm4_taz_conc"] <- NA df[df
# Apply labels
# Read in dictionary and clean names
<- read_csv("TDMTIME_DataDictionary_2024-12-16.csv")
dictionary <- clean_names(dictionary)
dictionary
# Create a coded variables dataframe
<- dictionary %>%
coded_vars filter(field_type %in% c("yesno", "radio", "dropdown")) %>%
select(variable_field_name, choices_calculations_or_slider_labels) %>%
filter(if_all(everything(), ~ !is.na(.)))
# Parse the variables using a for loop
for (i in seq_len(nrow(coded_vars))) {
<- coded_vars$variable_field_name[i]
var <- coded_vars$choices_calculations_or_slider_labels[i]
codes
if (var %in% names(df)) {
<- str_split(codes, "\\|")[[1]]
parsed_codes <- str_split(parsed_codes, ",")
parsed_levels_labels
<- sapply(parsed_levels_labels, function(x) str_trim(x[1]))
levels <- sapply(parsed_levels_labels, function(x) str_trim(x[2]))
labels
<- factor(df[[var]], levels = levels, labels = labels)
df[[var]] else {
} warning(paste("Variable", var, "not found in the dataframe."))
} }
# Calculate the SOFA score and its components
<- df %>%
df mutate(pf_ratio = (pao2*7.50062)/(fio2*0.01)) %>%
mutate(
resp_score = case_when(
<= 100 ~ 4,
pf_ratio <= 200 ~ 3,
pf_ratio <= 300 ~ 2,
pf_ratio <= 400 ~ 1,
pf_ratio TRUE ~ 0
),coag_score = case_when(
>= 150 ~ 0,
platelets <= 149 ~ 1,
platelets <= 99 ~ 2,
platelets <= 49 ~ 3,
platelets <= 19 ~ 4,
platelets TRUE ~ 0
),liver_score = case_when(
>= 205 ~ 4,
bilirubin >= 102 ~ 3,
bilirubin >= 33 ~ 2,
bilirubin >= 20 ~ 1,
bilirubin TRUE ~ 0
),cardio_score = case_when(
> 70 & vasopressor == 0 ~ 0,
map < 70 ~ 1,
map == 1 & vasopressor_dose <= 0.1 ~ 3,
vasopressor == 1 & vasopressor_dose > 0.1 ~ 4,
vasopressor TRUE ~ 0
),cns_score = case_when(
<= 5 ~ 4,
gcs <= 9 ~ 3,
gcs <= 12 ~ 2,
gcs <= 14 ~ 1,
gcs TRUE ~ 0
),renal_score = case_when(
<= 110 ~ 0,
creatinine <= 170 ~ 1,
creatinine <= 299 ~ 2,
creatinine <= 440 ~ 3,
creatinine > 440 ~ 4,
creatinine TRUE ~ 0
),sofa = resp_score + coag_score + liver_score + cardio_score + cns_score + renal_score
)
Participant baseline characteristics
# Baseline characteristics table
<- df %>%
df mutate(
sex = fct_infreq(as.factor(sex)), # Reorder 'sex'
postop = factor(ifelse(postop == 1, "Yes", "No"), levels = c("Yes", "No")), # Convert 1/0 to Yes/No
abx_indication = fct_infreq(as.factor(abx_indication)), # Reorder 'abx_indication'
mechanical_ventilation = factor(ifelse(mechanical_ventilation == 1, "Yes", "No"), levels = c("Yes", "No"))
)
label(df$sex) <- "Sex"
label(df$pt_age) <- "Age (years)"
label(df$bmi) <- "Body mass index (BMI)"
label(df$postop) <- "Surgery during admission"
label(df$abx_indication) <- "Antimicrobial indication"
label(df$sofa) <- "SOFA score"
label(df$crrt) <- "CRRT"
label(df$mechanical_ventilation) <- "Mechanical ventilation"
label(df$vasopressor) <- "Vasopressor requirement"
label(df$crp_day_0) <- "C-reactive protein"
label(df$pct_day_0) <- "Procalcitonin"
label(df$bicarbonate) <- "Bicarbonate"
label(df$pf_ratio) <- "P/F Ratio"
label(df$wcc_day_0) <- "WBC Count"
# Create the table
table1(~ sex + pt_age + bmi + postop + abx_indication + sofa + crrt + mechanical_ventilation + vasopressor + crp_day_0 + bicarbonate + pct_day_0 + wcc_day_0 + pf_ratio | abx_name, data = df, render.continuous=c(.="Median [Q1, Q3]"))
Piperacillin/tazobactam (N=26) |
Meropenem (N=4) |
Overall (N=30) |
|
---|---|---|---|
Sex | |||
Male | 14 (53.8%) | 2 (50.0%) | 16 (53.3%) |
Female | 12 (46.2%) | 2 (50.0%) | 14 (46.7%) |
Other | 0 (0%) | 0 (0%) | 0 (0%) |
Age (years) | |||
Median [Q1, Q3] | 63.0 [55.3, 75.8] | 61.5 [55.8, 68.8] | 63.0 [55.3, 74.8] |
Body mass index (BMI) | |||
Median [Q1, Q3] | 29.8 [24.0, 35.8] | 30.1 [27.7, 33.4] | 29.8 [24.7, 35.8] |
Surgery during admission | |||
Yes | 17 (65.4%) | 1 (25.0%) | 18 (60.0%) |
No | 9 (34.6%) | 3 (75.0%) | 12 (40.0%) |
Antimicrobial indication | |||
Hospital-acquired pneumonia (HAP) | 20 (76.9%) | 2 (50.0%) | 22 (73.3%) |
Other | 2 (7.7%) | 1 (25.0%) | 3 (10.0%) |
Sepsis (likely respiratory origin) | 2 (7.7%) | 0 (0%) | 2 (6.7%) |
Ventilator-associated pneumonia (VAP) | 1 (3.8%) | 1 (25.0%) | 2 (6.7%) |
Community-acquired pneumonia (CAP) | 1 (3.8%) | 0 (0%) | 1 (3.3%) |
SOFA score | |||
Median [Q1, Q3] | 6.00 [3.00, 8.00] | 9.00 [8.50, 10.0] | 6.50 [3.00, 9.00] |
CRRT | |||
Median [Q1, Q3] | 0 [0, 0] | 0.500 [0, 1.00] | 0 [0, 0] |
Mechanical ventilation | |||
Yes | 15 (57.7%) | 4 (100%) | 19 (63.3%) |
No | 11 (42.3%) | 0 (0%) | 11 (36.7%) |
Vasopressor requirement | |||
Median [Q1, Q3] | 0 [0, 0.750] | 0.500 [0, 1.00] | 0 [0, 1.00] |
C-reactive protein | |||
Median [Q1, Q3] | 218 [141, 281] | 143 [63.0, 221] | 211 [133, 272] |
Bicarbonate | |||
Median [Q1, Q3] | 25.4 [23.9, 26.9] | 25.1 [24.1, 26.6] | 25.4 [23.9, 26.9] |
Procalcitonin | |||
Median [Q1, Q3] | 0.150 [0, 0.683] | 0 [0, 3.80] | 0.0950 [0, 0.683] |
WBC Count | |||
Median [Q1, Q3] | 12.6 [9.98, 15.8] | 21.4 [19.5, 22.3] | 13.6 [10.3, 16.2] |
P/F Ratio | |||
Median [Q1, Q3] | 209 [134, 307] | 254 [205, 291] | 216 [134, 307] |
# Create a dataframe with the times based on intervals from REDCap
<- df %>%
times_df mutate(ref_time = case_when(
== 'Piperacillin/tazobactam' & tazocin_interval == 'Three times a day' ~ 16,
abx_name == 'Piperacillin/tazobactam' & tazocin_interval == 'Four times a day' ~ 12,
abx_name == 'Piperacillin/tazobactam' & tazocin_interval == 'Twice daily' ~ 24,
abx_name == 'Piperacillin/tazobactam' & tazocin_interval == 'Other' ~ as.integer(tazocin_interval_other)*2,
abx_name == 'Meropenem' & meropenem_interval == 'Three times a day' ~ 48,
abx_name == 'Meropenem' & meropenem_interval == 'Four times a day' ~ 24,
abx_name == 'Meropenem' & meropenem_interval == 'Twice daily' ~ 16,
abx_name == 'Meropenem' & meropenem_interval == 'Other' ~ as.integer(meropenem_interval_other)*2,
abx_name TRUE ~ 0
))
<- df %>%
df mutate(tdm1_diff = difftime(df$tdm1_time_collected, df$datetime_current_abx_dose, units = 'hours'),
tdm2_diff = difftime(df$tdm2_time_collected, df$datetime_current_abx_dose, units = 'hours'),
tdm3_diff = difftime(df$tdm3_time_collected, df$datetime_current_abx_dose, units = 'hours'),
tdm4_diff = difftime(df$tdm4_time_collected, df$datetime_current_abx_dose, units = 'hours')
)
<- df %>%
df mutate(
pre_analytical = difftime(time_thawed, datetime_current_abx_dose, units = "mins"),
analytical = difftime(time_lcmsms_finish, time_thawed, units = "mins")
)
Primary outcome
LC-MS/MS available within two dosing intervals from sampling
# Calculate the primary outcome (LC-MS/MS available within two dosing intervals from sampling)
<- times_df %>%
times_df mutate(primary_outcome = ifelse(time_tdm_final - datetime_current_abx_dose > ref_time, 0, 1)) %>%
mutate(secondary_outcome_time_from_abx_start = ifelse(time_tdm_final - abx_start_time > ref_time, 0, 1)) %>%
mutate(time_sampling = time_tdm_final - datetime_current_abx_dose) %>%
mutate(time_start = time_tdm_final - abx_start_time) %>%
select(primary_outcome, secondary_outcome_time_from_abx_start, time_sampling, time_start, abx_name)
# Output the primary outcome as a percentage
%>%
times_df summarise(
total = n(),
success = sum(primary_outcome, na.rm = TRUE)
%>%
) mutate(
percentage = (success/total) * 100
)
# A tibble: 1 × 3
total success percentage
<int> <dbl> <dbl>
1 30 21 70
# Group by antimicrobial
%>%
times_df group_by(abx_name) %>%
summarise(
total = n(),
success = sum(primary_outcome, na.rm = TRUE)
%>%
) mutate(
percentage = (success/total) * 100
)
# A tibble: 2 × 4
abx_name total success percentage
<fct> <int> <dbl> <dbl>
1 Piperacillin/tazobactam 26 18 69.2
2 Meropenem 4 3 75
Secondary outcomes
# Output the secondary outcome as a percentage
%>%
times_df summarise(
total = n(),
success = sum(secondary_outcome_time_from_abx_start, na.rm = TRUE)
%>%
) mutate(
percentage = (success/total) * 100
)
# A tibble: 1 × 3
total success percentage
<int> <dbl> <dbl>
1 30 11 36.7
# Group by antimicrobial
%>%
times_df group_by(abx_name) %>%
summarise(
total = n(),
success = sum(secondary_outcome_time_from_abx_start, na.rm = TRUE)
%>%
) mutate(
percentage = (success/total) * 100
)
# A tibble: 2 × 4
abx_name total success percentage
<fct> <int> <dbl> <dbl>
1 Piperacillin/tazobactam 26 8 30.8
2 Meropenem 4 3 75
# Output the time from sampling to LCMS available
%>%
times_df summarise(
median = median(time_sampling),
iqr_25 = quantile(time_sampling, 0.25, na.rm = TRUE),
iqr_75 = quantile(time_sampling, 0.75, na.rm = TRUE)
%>%
) mutate(
iqr_range = paste0(iqr_25, " - ", iqr_75)
)
# A tibble: 1 × 4
median iqr_25 iqr_75 iqr_range
<drtn> <drtn> <drtn> <chr>
1 10.95 hours 9.5875 hours 21.35833 hours 9.5875 - 21.3583333333333
# Group by antimicrobial
%>%
times_df group_by(abx_name) %>%
summarise(
median = median(time_sampling),
iqr_25 = quantile(time_sampling, 0.25, na.rm = TRUE),
iqr_75 = quantile(time_sampling, 0.75, na.rm = TRUE)
%>%
) mutate(
iqr_range = paste0(iqr_25, " - ", iqr_75)
)
# A tibble: 2 × 5
abx_name median iqr_25 iqr_75 iqr_range
<fct> <drtn> <drtn> <drtn> <chr>
1 Piperacillin/tazobactam 10.64167 hours 9.58750 hours 20.20833 hours 9.5875 -…
2 Meropenem 21.40000 hours 17.95833 hours 22.45417 hours 17.95833…
# Output the time from abx start to LCMS available
%>%
times_df summarise(
median = median(time_start),
iqr_25 = quantile(time_start, 0.25, na.rm = TRUE),
iqr_75 = quantile(time_start, 0.75, na.rm = TRUE)
%>%
) mutate(
iqr_range = paste0(iqr_25, " - ", iqr_75)
)
# A tibble: 1 × 4
median iqr_25 iqr_75 iqr_range
<drtn> <drtn> <drtn> <chr>
1 25.35 hours 12.3 hours 30.87917 hours 12.3 - 30.8791666666667
# Group by antimicrobial
%>%
times_df group_by(abx_name) %>%
summarise(
median = median(time_start),
iqr_25 = quantile(time_start, 0.25, na.rm = TRUE),
iqr_75 = quantile(time_start, 0.75, na.rm = TRUE)
%>%
) mutate(
iqr_range = paste0(iqr_25, " - ", iqr_75)
)
# A tibble: 2 × 5
abx_name median iqr_25 iqr_75 iqr_range
<fct> <drtn> <drtn> <drtn> <chr>
1 Piperacillin/tazobactam 25.55833 hours 10.64583 hours 30.87917 hours 10.64583…
2 Meropenem 24.63333 hours 22.02500 hours 29.82917 hours 22.025 -…
# Pre- and analytical phases
%>%
df summarise(
pre_ana_median = median(pre_analytical, na.rm = TRUE),
pre_ana_iqr_25 = quantile(pre_analytical, 0.25, na.rm = TRUE),
pre_ana_iqr_75 = quantile(pre_analytical, 0.75, na.rm = TRUE),
ana_median = median(analytical, na.rm = TRUE),
ana_iqr_25 = quantile(analytical, 0.25, na.rm = TRUE),
ana_iqr_75 = quantile(analytical, 0.75, na.rm = TRUE)
)
# A tibble: 1 × 6
pre_ana_median pre_ana_iqr_25 pre_ana_iqr_75 ana_median ana_iqr_25 ana_iqr_75
<drtn> <drtn> <drtn> <drtn> <drtn> <drtn>
1 497 mins 460.75 mins 1127.75 mins 138 mins 122.5 mins 163.5 mins
# Group by antimicrobial
%>%
df group_by(abx_name) %>%
summarise(
pre_ana_median = median(pre_analytical, na.rm = TRUE) / 60,
pre_ana_iqr_25 = quantile(pre_analytical, 0.25, na.rm = TRUE) / 60,
pre_ana_iqr_75 = quantile(pre_analytical, 0.75, na.rm = TRUE) / 60,
ana_median = median(analytical, na.rm = TRUE),
ana_iqr_25 = quantile(analytical, 0.25, na.rm = TRUE),
ana_iqr_75 = quantile(analytical, 0.75, na.rm = TRUE)
)
# A tibble: 2 × 7
abx_name pre_ana_median pre_ana_iqr_25 pre_ana_iqr_75 ana_median ana_iqr_25
<fct> <drtn> <drtn> <drtn> <drtn> <drtn>
1 Piperacill… 8.116667 mins 7.679167 mins 17.98333 mins 138.0 mins 115.25 mi…
2 Meropenem 18.991667 mins 15.258333 mins 20.24583 mins 144.5 mins 129.50 mi…
# ℹ 1 more variable: ana_iqr_75 <drtn>
# 28-day survival
%>%
df summarise(
total = n(),
success = sum(alive_28d, na.rm = TRUE)
%>%
) mutate(
percentage = (success/total) * 100
)
# A tibble: 1 × 3
total success percentage
<int> <dbl> <dbl>
1 30 21 70
# Group by antimicrobial
%>%
df group_by(abx_name) %>%
summarise(
total = n(),
success = sum(alive_28d, na.rm = TRUE)
%>%
) mutate(
percentage = (success/total) * 100
)
# A tibble: 2 × 4
abx_name total success percentage
<fct> <int> <dbl> <dbl>
1 Piperacillin/tazobactam 26 20 76.9
2 Meropenem 4 1 25
# 28-day ICU
%>%
df summarise(
total = n(),
success = sum(discharged_icu_28d, na.rm = TRUE)
%>%
) mutate(
percentage = (success/total) * 100
)
# A tibble: 1 × 3
total success percentage
<int> <dbl> <dbl>
1 30 15 50
# Group by antimicrobial
%>%
df group_by(abx_name) %>%
summarise(
total = n(),
success = sum(discharged_icu_28d, na.rm = TRUE)
%>%
) mutate(
percentage = (success/total) * 100
)
# A tibble: 2 × 4
abx_name total success percentage
<fct> <int> <dbl> <dbl>
1 Piperacillin/tazobactam 26 14 53.8
2 Meropenem 4 1 25
# ICU length of stay
%>%
df mutate(icu_los = as.numeric(discharged_icu_date - icu_admitted_date)) %>%
summarise(
median = median(icu_los, na.rm = TRUE),
iqr_25 = quantile(icu_los, 0.25, na.rm = TRUE),
iqr_75 = quantile(icu_los, 0.75, na.rm = TRUE)
)
# A tibble: 1 × 3
median iqr_25 iqr_75
<dbl> <dbl> <dbl>
1 7 5 25
# Group by antimicrobial
%>%
df group_by(abx_name) %>%
mutate(icu_los = as.numeric(discharged_icu_date - icu_admitted_date)) %>%
summarise(
median = median(icu_los, na.rm = TRUE),
iqr_25 = quantile(icu_los, 0.25, na.rm = TRUE),
iqr_75 = quantile(icu_los, 0.75, na.rm = TRUE)
)
# A tibble: 2 × 4
abx_name median iqr_25 iqr_75
<fct> <dbl> <dbl> <dbl>
1 Piperacillin/tazobactam 6.5 5 23
2 Meropenem 23 23 23
# 28-day hospital discharge
%>%
df summarise(
total = n(),
success = sum(discharged_28d, na.rm = TRUE)
%>%
) mutate(
percentage = (success/total) * 100
)
# A tibble: 1 × 3
total success percentage
<int> <dbl> <dbl>
1 30 9 30
# Hospital length of stay
%>%
df mutate(hosp_los = as.numeric(discharged_date - admitted_date)) %>%
summarise(
median = median(hosp_los, na.rm = TRUE),
iqr_25 = quantile(hosp_los, 0.25, na.rm = TRUE),
iqr_75 = quantile(hosp_los, 0.75, na.rm = TRUE)
)
# A tibble: 1 × 3
median iqr_25 iqr_75
<dbl> <dbl> <dbl>
1 17 10 27
# Group by antimicrobial
%>%
df mutate(hosp_los = as.numeric(discharged_date - admitted_date)) %>%
group_by(abx_name) %>%
summarise(
median = median(hosp_los, na.rm = TRUE),
iqr_25 = quantile(hosp_los, 0.25, na.rm = TRUE),
iqr_75 = quantile(hosp_los, 0.75, na.rm = TRUE)
)
# A tibble: 2 × 4
abx_name median iqr_25 iqr_75
<fct> <dbl> <dbl> <dbl>
1 Piperacillin/tazobactam 17 10 27
2 Meropenem NA NA NA
<- df %>%
df mutate(first_dose = ifelse(abs(difftime(datetime_current_abx_dose, abx_start_time, units = "hours")) <= 4, 1, 0))
# Proportion of participants recruited at time of first dose
%>%
df select(first_dose) %>%
summarise(
total = n(),
success = sum(first_dose, na.rm = TRUE)
%>%
) mutate(
percentage = (success/total) * 100
)
# A tibble: 1 × 3
total success percentage
<int> <dbl> <dbl>
1 30 12 40
Antimicrobial doses
# Antimicrobial concentration analyses
# Gather the concentrations
<- df %>%
df2 select(record_id, tdm1_diff, tdm2_diff, tdm3_diff, tdm4_diff,
tdm1_pip_conc, tdm2_pip_conc, tdm3_pip_conc, tdm4_pip_conc,
tdm1_taz_conc, tdm2_taz_conc, tdm3_taz_conc, tdm4_taz_conc, %>%
tdm1_mer_conc, tdm2_mer_conc, tdm3_mer_conc, tdm4_mer_conc) mutate(tdm1_time = as.numeric(tdm1_diff, units = 'hours'),
tdm2_time = as.numeric(tdm2_diff, units = 'hours'),
tdm3_time = as.numeric(tdm3_diff, units = 'hours'),
tdm4_time = as.numeric(tdm4_diff, units = 'hours'))
# Pivot the dataframe to a long dataset
<- df2 %>%
long_df pivot_longer(
cols = -record_id,
names_to = c("timepoint", "antibiotic", ".value"),
names_pattern = "(tdm\\d+)_?(pip|taz|mer)?_(time|conc)"
)
# Select time data
<- long_df %>%
time_data filter(!is.na(time)) %>% # Select only time rows
select(-conc, -antibiotic) # Drop the concentration column
# Select concentration data
<- long_df %>%
conc_data filter(!is.na(antibiotic)) %>% # Select only concentration rows
select(-time) # Drop the time column
# Join time and concentration datasets
<- conc_data %>%
clean_df left_join(time_data, by = c("record_id", "timepoint")) %>%
filter(!is.na(conc)) %>%
filter(antibiotic != 'taz')
# Define MIC breakpoints
<- 16
pip_mic <- 4
taz_mic <- 8
mer_mic
# Calculate target attainment
%>%
clean_df filter(antibiotic == "pip" & timepoint == 'tdm4') %>%
mutate(below = ifelse((conc * 0.7) < pip_mic, 1, 0)) %>%
summarise(count = n(), sum = sum(below))
# A tibble: 1 × 2
count sum
<int> <dbl>
1 24 11
%>%
clean_df filter(antibiotic == "taz" & timepoint == 'tdm4') %>%
mutate(below = ifelse(conc < taz_mic, 1, 0)) %>%
summarise(count = n(), sum = sum(below))
# A tibble: 1 × 2
count sum
<int> <int>
1 0 0
%>%
clean_df filter(antibiotic == "mer" & timepoint == 'tdm4') %>%
mutate(below = ifelse(conc < mer_mic, 1, 0)) %>%
summarise(count = n(), sum = sum(below))
# A tibble: 1 × 2
count sum
<int> <dbl>
1 3 0
ggplot(clean_df, aes(x = time, y = conc, group = interaction(record_id, antibiotic), color = antibiotic)) +
geom_point() +
geom_line() +
labs(x = "Time (hours)", y = "Concentration (mg/L)", title = "Antimicrobial concentrations over dosing interval") +
theme_minimal() +
xlim(0,8.3) +
scale_color_discrete(
name = "Antimicrobial",
labels = c("Meropenem", "Piperacillin", "Tazobactam")
)
<- clean_df %>%
below_pip filter(antibiotic == "pip" & timepoint == 'tdm4') %>%
mutate(below = ifelse((conc * 0.7) < pip_mic, 1, 0)) %>%
left_join(df, by = c("record_id")) %>%
mutate(vd = 4500/tdm1_pip_conc)
<- below_pip %>%
below_pip_2 select(record_id, below) %>%
right_join(clean_df, by = 'record_id') %>%
filter(!is.na(below))
ggplot(below_pip_2, aes(x = time, y = conc, group = interaction(record_id, as.factor(below)), color = as.factor(below))) +
geom_point() +
geom_line() +
labs(x = "Time (hours)", y = "Concentration (mg/L)", title = "Antimicrobial concentrations over dosing interval") +
theme_minimal() +
xlim(0,8.3)
Culture data
### Culture data
# Proportion with positive culture isolates
%>%
df summarise(
total = n(),
positive = sum(cultures, na.rm = TRUE)
%>%
) mutate(
percentage = (positive/total) * 100
)
# A tibble: 1 × 3
total positive percentage
<int> <dbl> <dbl>
1 30 21 70
# Count of culture sources
%>%
df count(culture_source)
# A tibble: 5 × 2
culture_source n
<fct> <int>
1 Blood 4
2 Sputum 8
3 Bronchial washing/BAL 8
4 Other 1
5 <NA> 9
%>%
df group_by(culture_source) %>%
count(pathogen) %>%
print(n = Inf)
# A tibble: 22 × 3
# Groups: culture_source [5]
culture_source pathogen n
<fct> <chr> <int>
1 Blood Escherichia coli 1
2 Blood Serratia marcescens 1
3 Blood Staphylococcus capitis 1
4 Blood Staphylococcus epidermidis 1
5 Sputum Acinetobacter baumannii; Aspergillus fumigatus s… 1
6 Sputum Candida albicans 1
7 Sputum Candida sp, Corynebacterium, adenovirus 1
8 Sputum Enterobacter cloacae complex 1
9 Sputum Pseudomonas aeruginosa 1
10 Sputum Staphylococcus aureus, Streptococcus pneumoniae,… 1
11 Sputum Yeast 3+ 1
12 Sputum rhinovirus PCR 1
13 Bronchial washing/BAL E.coli 1
14 Bronchial washing/BAL Escherichia coli, Haemophilus influenzae, Stenot… 1
15 Bronchial washing/BAL Escherichia coli, candida albicans 1
16 Bronchial washing/BAL Escherichia coli, candida tropicalis, staphyloco… 1
17 Bronchial washing/BAL Haemophilus influenzae 1
18 Bronchial washing/BAL Klebsiella pneumoniae 1
19 Bronchial washing/BAL Serratia marcescens and Klebsiella oxytoca 1
20 Bronchial washing/BAL Staphylococcus aureus 1
21 Other Parainfluenza-3 1
22 <NA> <NA> 9
# Clean up free text data
<- df %>%
pathogens filter(!is.na(pathogen)) %>% # Remove NAs
separate_rows(pathogen, sep = "[,;]") %>% # Split by comma or semicolon
separate_rows(pathogen, sep = "\\band\\b") %>% # Split by 'and'
mutate(pathogen = str_trim(pathogen)) %>% # Trim white spaces
mutate(pathogen = str_to_title(pathogen)) %>% # Standardize capitalization
#distinct(pathogen, .keep_all = TRUE) %>% # Remove duplicates
arrange(pathogen) %>% # Sort for readability
select(pathogen, culture_source)
# View cleaned data
print(pathogens, n = Inf)
# A tibble: 33 × 2
pathogen culture_source
<chr> <fct>
1 Acinetobacter Baumannii Sputum
2 Adenovirus Sputum
3 Aspergillus Fumigatus Species Complex Sputum
4 Aspergillus Pcr Sputum
5 Candida Albicans Bronchial washing/BAL
6 Candida Albicans Sputum
7 Candida Sp Sputum
8 Candida Tropicalis Bronchial washing/BAL
9 Corynebacterium Sputum
10 E.coli Bronchial washing/BAL
11 Enterobacter Cloacae Complex Sputum
12 Escherichia Coli Blood
13 Escherichia Coli Bronchial washing/BAL
14 Escherichia Coli Bronchial washing/BAL
15 Escherichia Coli Bronchial washing/BAL
16 Haemophilus Influenzae Bronchial washing/BAL
17 Haemophilus Influenzae Bronchial washing/BAL
18 Human Metapneumovirus Pcr Sputum
19 Klebsiella Oxytoca Bronchial washing/BAL
20 Klebsiella Pneumoniae Bronchial washing/BAL
21 Parainfluenza-3 Other
22 Pseudomonas Aeruginosa Sputum
23 Rhinovirus Pcr Sputum
24 Serratia Marcescens Blood
25 Serratia Marcescens Bronchial washing/BAL
26 Staphylococcus Aureus Sputum
27 Staphylococcus Aureus Bronchial washing/BAL
28 Staphylococcus Capitis Blood
29 Staphylococcus Epidermidis Blood
30 Staphylococcus Epidermidis Bronchial washing/BAL
31 Stenotrophomonas Maltophilia Bronchial washing/BAL
32 Streptococcus Pneumoniae Sputum
33 Yeast 3+ Sputum
# Susceptible proportion
%>%
df count(susceptible_to_abx)
# A tibble: 4 × 2
susceptible_to_abx n
<fct> <int>
1 Yes 9
2 No 4
3 Susceptibilities not available 8
4 <NA> 9
# Resistant bugs
%>%
df filter(susceptible_to_abx == 'No') %>%
select(pathogen)
# A tibble: 4 × 1
pathogen
<chr>
1 Acinetobacter baumannii; Aspergillus fumigatus species complex
2 E.coli
3 Escherichia coli
4 Enterobacter cloacae complex
Assessment of Automated Research Notifications (ARN)
#### ARN assesssment
# Read in ARN file
<- read_excel('TDMTIME_BPA_DATA_2024-12-12.xlsx', sheet = 'Anonymised')
df_bpa
# Create IDs and keep only BPA timings
<- df_bpa %>%
df_bpa mutate(pid = row_number()) %>%
select(pid, everything()) %>%
filter(method_screen == 'BPA')
$date_screen <- as.Date(df_bpa$date_screen)
df_bpa$time_text <- as.POSIXct(paste(df_bpa$date_screen, df_bpa$time_text))
df_bpa$time_rx_screen <- as.numeric(df_bpa$time_rx_screen)
df_bpa
<- df_bpa %>%
df_alt mutate(
# Ensure datetime is in POSIXct format
datetime = ymd_hms(time_text),
day_of_week = wday(datetime, week_start = 1), # Ensure week starts on Monday (1 = Monday, 7 = Sunday)
# Calculate the next target time based on the provided rules
next_target_time = case_when(
# Weekdays before 09:00
hour(datetime) < 9 & day_of_week %in% 1:5 ~ update(datetime, hour = 9, minute = 0, second = 0),
# Weekdays between 09:00 and 14:59
hour(datetime) >= 9 & hour(datetime) < 14 & day_of_week %in% 1:5 ~ update(datetime, hour = 14, minute = 0, second = 0),
# Weekdays after 15:00
hour(datetime) >= 14 & day_of_week %in% 1:4 ~ update(datetime + days(1), hour = 9, minute = 0, second = 0),
# Friday after 15:00 through the weekend
hour(datetime) >= 14 & day_of_week == 5) | day_of_week %in% 6:7 ~ update(datetime + days(8 - day_of_week), hour = 9, minute = 0, second = 0),
(# Default case to handle unexpected scenarios
TRUE ~ datetime
), BPA_daytime = case_when(
# Within the weekday window
%in% 1:5 & hour(datetime) >= 8 & hour(datetime) < 16 ~ time_rx_screen,
day_of_week # Before the window on weekdays
%in% 1:5 & hour(datetime) < 8 ~ as.numeric(difftime(update(datetime, hour = 8), datetime, units = "mins")),
day_of_week # After the window on weekdays except Friday
%in% 1:4 & hour(datetime) >= 16 ~ as.numeric(difftime(update(datetime + days(1), hour = 8), datetime, units = "mins")),
day_of_week # After the window on Friday or weekends
== 5 & hour(datetime) >= 16) | day_of_week %in% 6:7 ~ as.numeric(difftime(update(datetime + days(8 - day_of_week), hour = 8), datetime, units = "mins")),
(day_of_week # Default to 0 if none of the conditions match
TRUE ~ 0
)%>%
) mutate(
# Additional calculations based on next_target_time
minutes_to_next_target = as.numeric(difftime(next_target_time, datetime, units = "mins")),
less_than_24hrs = minutes_to_next_target < 1440,
less_than_2hrs = minutes_to_next_target < 120,
less_than_1hr = minutes_to_next_target < 60,
BPA_less_than_1hr = time_rx_screen < 60, # Ensure time_rx_screen is defined or convert it appropriately
weekend = if_else((day_of_week %in% c(6, 7) | (hour(datetime) >= 16 & day_of_week %in% c(5))), TRUE, FALSE),
daytime = hour(datetime) >= 8 & hour(datetime) < 16
)
<- df_alt %>%
df3 filter(!is.na(time_rx_screen)) %>%
filter(!weekend)
<- df3 %>%
df_long3 pivot_longer(cols = c(time_rx_screen, minutes_to_next_target, BPA_daytime),
names_to = "measurement_type",
values_to = "time_in_minutes") %>%
mutate(time_in_hours = time_in_minutes / 60 + 0.01) %>%
mutate(colour_group = ifelse(time_in_hours > 24, "More than 24", "Less than 24"))
<- df_long3 %>%
df_long3 filter(measurement_type != 'BPA_daytime')
$measurement_type <- factor(df_long3$measurement_type,
df_long3levels = c("minutes_to_next_target", "time_rx_screen"),
labels = c("Intermittent screening", "ARN"))
$measurement_type <- relevel(factor(df_long3$measurement_type), ref = "ARN")
df_long3
# Summary stats for BPA comparison
%>%
df_long3 group_by(measurement_type) %>%
summarise(
median = median(time_in_minutes) / 60,
iqr_25 = quantile(time_in_minutes, 0.25, na.rm = TRUE) / 60,
iqr_75 = quantile(time_in_minutes, 0.75, na.rm = TRUE) / 60
%>%
) mutate(
iqr_range = paste0(iqr_25, " - ", iqr_75)
)
# A tibble: 2 × 5
measurement_type median iqr_25 iqr_75 iqr_range
<fct> <dbl> <dbl> <dbl> <chr>
1 ARN 0.375 0.2 1.18 0.2 - 1.175
2 Intermittent screening 4.63 3.14 14.9 3.14166666666667 - 14.87083333333…
# Time to event analysis
<- Surv(time = df_long3$time_in_hours, event = df_long3$less_than_24hrs)
surv_obj <- survfit(surv_obj ~ measurement_type, data = df_long3)
fit <- coxph(surv_obj ~ measurement_type, data = df_long3)
cox_model cox_model
Call:
coxph(formula = surv_obj ~ measurement_type, data = df_long3)
coef exp(coef) se(coef) z p
measurement_typeIntermittent screening -1.6745 0.1874 0.1332 -12.57 <2e-16
Likelihood ratio test=160.4 on 1 df, p=< 2.2e-16
n= 332, number of events= 322
summary(cox_model) #output provides HR CIs
Call:
coxph(formula = surv_obj ~ measurement_type, data = df_long3)
n= 332, number of events= 322
coef exp(coef) se(coef) z
measurement_typeIntermittent screening -1.6745 0.1874 0.1332 -12.57
Pr(>|z|)
measurement_typeIntermittent screening <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
measurement_typeIntermittent screening 0.1874 5.336 0.1443 0.2433
Concordance= 0.701 (se = 0.008 )
Likelihood ratio test= 160.4 on 1 df, p=<2e-16
Wald test = 158 on 1 df, p=<2e-16
Score (logrank) test = 186.4 on 1 df, p=<2e-16
#####
## The KM plot
#####
<- ggsurvplot(
ggsurv_plot data = df_long3, pval = FALSE, risk.table = TRUE, risk.table.height = 0.25, conf.int = FALSE,
fit, palette = c("#0073C2FF", "#EFC000FF", "#E65C5C", '#37C78E'),
ggtheme = theme_minimal(),
xlab = "Time in Hours", ylab = "Proportion identified",
legend.title="", surv.scale="percent",
tables.col="strata",
risk.table.col = "strata",
risk.table.y.text = FALSE,
break.x.by = 1,
xlim = c(0,16)
)
<- ggsurv_plot$plot +
main_plot scale_y_reverse() + # Reverse the y-axis
theme(axis.title.y = element_text(hjust = 0.5))
# Combine the reversed main plot with the risk table
<- cowplot::plot_grid(
combined_plot
main_plot,$table, # Keep the risk table unchanged
ggsurv_plotncol = 1,
rel_heights = c(3, 1) # Adjust heights for better balance
)
print(combined_plot)
# SENSITIVIY ANALYSIS BPA COMPARISON
<- df_alt %>%
df3 filter(!is.na(time_rx_screen)) %>%
filter(!weekend)
<- df3 %>%
df_long3 pivot_longer(cols = c(time_rx_screen, minutes_to_next_target, BPA_daytime),
names_to = "measurement_type",
values_to = "time_in_minutes") %>%
mutate(time_in_hours = time_in_minutes / 60 + 0.01) %>%
mutate(colour_group = ifelse(time_in_hours > 24, "More than 24", "Less than 24"))
$measurement_type <- factor(df_long3$measurement_type,
df_long3levels = c("minutes_to_next_target", "time_rx_screen", "BPA_daytime"),
labels = c("Intermittent screening", "ARN", "ARN daytime"))
# Summary stats for BPA comparison
%>%
df_long3 group_by(measurement_type) %>%
summarise(
median = median(time_in_minutes) / 60,
iqr_25 = quantile(time_in_minutes, 0.25, na.rm = TRUE) / 60,
iqr_75 = quantile(time_in_minutes, 0.75, na.rm = TRUE) / 60
%>%
) mutate(
iqr_range = paste0(iqr_25, " - ", iqr_75)
)
# A tibble: 3 × 5
measurement_type median iqr_25 iqr_75 iqr_range
<fct> <dbl> <dbl> <dbl> <chr>
1 Intermittent screening 4.63 3.14 14.9 3.14166666666667 - 14.87083333333…
2 ARN 0.375 0.2 1.18 0.2 - 1.175
3 ARN daytime 0.617 0.2 7 0.2 - 7
$measurement_type <- relevel(factor(df_long3$measurement_type), ref = "ARN")
df_long3
# Time to event analysis
<- Surv(time = df_long3$time_in_hours)
surv_obj <- survfit(surv_obj ~ measurement_type, data = df_long3)
fit <- coxph(surv_obj ~ measurement_type, data = df_long3)
cox_model cox_model
Call:
coxph(formula = surv_obj ~ measurement_type, data = df_long3)
coef exp(coef) se(coef) z
measurement_typeIntermittent screening -1.5767 0.2066 0.1269 -12.422
measurement_typeARN daytime -0.6766 0.5083 0.1158 -5.844
p
measurement_typeIntermittent screening < 2e-16
measurement_typeARN daytime 5.09e-09
Likelihood ratio test=163.5 on 2 df, p=< 2.2e-16
n= 498, number of events= 498
<- survdiff(surv_obj ~ measurement_type, data = df_long3)
log_rank log_rank
Call:
survdiff(formula = surv_obj ~ measurement_type, data = df_long3)
N Observed Expected (O-E)^2/E
measurement_type=ARN 166 166 79.5 94.05
measurement_type=Intermittent screening 166 166 280.9 47.00
measurement_type=ARN daytime 166 166 137.6 5.87
(O-E)^2/V
measurement_type=ARN 125.89
measurement_type=Intermittent screening 123.49
measurement_type=ARN daytime 8.56
Chisq= 172 on 2 degrees of freedom, p= <2e-16
summary(cox_model) #output provides HR CIs
Call:
coxph(formula = surv_obj ~ measurement_type, data = df_long3)
n= 498, number of events= 498
coef exp(coef) se(coef) z
measurement_typeIntermittent screening -1.5767 0.2066 0.1269 -12.422
measurement_typeARN daytime -0.6766 0.5083 0.1158 -5.844
Pr(>|z|)
measurement_typeIntermittent screening < 2e-16 ***
measurement_typeARN daytime 5.09e-09 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
measurement_typeIntermittent screening 0.2066 4.839 0.1611 0.2650
measurement_typeARN daytime 0.5083 1.967 0.4051 0.6378
Concordance= 0.671 (se = 0.009 )
Likelihood ratio test= 163.5 on 2 df, p=<2e-16
Wald test = 155 on 2 df, p=<2e-16
Score (logrank) test = 174 on 2 df, p=<2e-16
#####
## The KM plot
#####
<- ggsurvplot(
ggsurv_plot data = df_long3, pval = FALSE, risk.table = TRUE, risk.table.height = 0.25, conf.int = FALSE,
fit, palette = c("#0073C2FF", "#EFC000FF", "#E65C5C", '#37C78E'),
ggtheme = theme_minimal(),
xlab = "Time in Hours", ylab = "Proportion identified",
legend.title="", surv.scale="percent",
tables.col="strata",
risk.table.col = "strata",
risk.table.y.text = FALSE,
break.x.by = 1,
xlim = c(0,16)
)
<- ggsurv_plot$plot +
main_plot scale_y_reverse() + # Reverse the y-axis
theme(axis.title.y = element_text(hjust = 0.5))
# Combine the reversed main plot with the risk table
<- cowplot::plot_grid(
combined_plot
main_plot,$table, # Keep the risk table unchanged
ggsurv_plotncol = 1,
rel_heights = c(3, 1) # Adjust heights for better balance
)
print(combined_plot)
Supplementary analyses
# Summary of reasons for exclusion
<- df_bpa %>%
summary_table filter(!is.na(ineligible_reason)) %>% # Remove NA values
count(ineligible_reason, name = "Count") %>% # Count occurrences
mutate(Percentage = round(Count / sum(Count) * 100, 1)) %>% # Calculate percentages
arrange(desc(Count)) # Sort by count
# Create a gt table
<- summary_table %>%
gt_table gt() %>%
tab_header(
title = "Reasons for ineligibility"
%>%
) cols_label(
ineligible_reason = "Ineligible reason",
Count = "Count",
Percentage = "Percentage (%)"
%>%
) fmt_number(
columns = vars(Percentage),
decimals = 1
%>%
) tab_style(
style = list(cell_text(weight = "bold")),
locations = cells_column_labels(everything())
)
# Print the table
gt_table
Reasons for ineligibility | ||
---|---|---|
Ineligible reason | Count | Percentage (%) |
No capacity | 173 | 70.3 |
Non-respiratory infection | 45 | 18.3 |
Antimicrobial started > 24 hrs | 10 | 4.1 |
Previously enrolled | 6 | 2.4 |
Unlikely to survive 24 hours | 4 | 1.6 |
Re-prescription | 3 | 1.2 |
Non-English speaker | 2 | 0.8 |
Screening failure | 1 | 0.4 |
Stepdown to ward imminent | 1 | 0.4 |
Withdrawn from study | 1 | 0.4 |
# Day of week analyses
<- df_bpa %>%
df_bpa mutate(
# Ensure datetime is in POSIXct format
datetime = ymd_hms(time_text),
day_of_week = wday(datetime, week_start = 1), # Ensure week starts on Monday (1 = Monday, 7 = Sunday)
# Calculate the next target time based on the provided rules
next_target_time = case_when(
# Weekdays before 09:00
hour(datetime) < 9 & day_of_week %in% 1:5 ~ update(datetime, hour = 9, minute = 0, second = 0),
# Weekdays between 09:00 and 14:59
hour(datetime) >= 9 & hour(datetime) < 14 & day_of_week %in% 1:5 ~ update(datetime, hour = 14, minute = 0, second = 0),
# Weekdays after 15:00
hour(datetime) >= 14 & day_of_week %in% 1:4 ~ update(datetime + days(1), hour = 9, minute = 0, second = 0),
# Friday after 15:00 through the weekend
hour(datetime) >= 14 & day_of_week == 5) | day_of_week %in% 6:7 ~ update(datetime + days(8 - day_of_week), hour = 9, minute = 0, second = 0),
(# Default case to handle unexpected scenarios
TRUE ~ datetime
), BPA_daytime = case_when(
# Within the weekday window
%in% 1:5 & hour(datetime) >= 8 & hour(datetime) < 16 ~ time_rx_screen,
day_of_week # Before the window on weekdays
%in% 1:5 & hour(datetime) < 8 ~ as.numeric(difftime(update(datetime, hour = 8), datetime, units = "mins")),
day_of_week # After the window on weekdays except Friday
%in% 1:4 & hour(datetime) >= 16 ~ as.numeric(difftime(update(datetime + days(1), hour = 8), datetime, units = "mins")),
day_of_week # After the window on Friday or weekends
== 5 & hour(datetime) >= 16) | day_of_week %in% 6:7 ~ as.numeric(difftime(update(datetime + days(8 - day_of_week), hour = 8), datetime, units = "mins")),
(day_of_week # Default to 0 if none of the conditions match
TRUE ~ 0
)%>%
) mutate(
# Additional calculations based on next_target_time
minutes_to_next_target = as.numeric(difftime(next_target_time, datetime, units = "mins")),
less_than_24hrs = minutes_to_next_target < 1440,
less_than_2hrs = minutes_to_next_target < 120,
less_than_1hr = minutes_to_next_target < 60,
BPA_less_than_1hr = time_rx_screen < 60, # Ensure time_rx_screen is defined or convert it appropriately
weekend = if_else((day_of_week %in% c(6, 7) | (hour(datetime) >= 16 & day_of_week %in% c(5))), TRUE, FALSE),
daytime = hour(datetime) >= 8 & hour(datetime) < 16
)
<- df_bpa %>%
df_bpa mutate(
hour = as.numeric(format(time_abx_prescribed, "%H")),
day_of_week = factor(day_of_week,
levels = c(1, 2, 3, 4, 5, 6, 7),
labels = c("Monday", "Tuesday", "Wednesday",
"Thursday", "Friday", "Saturday", "Sunday"))
)
ggplot(df_bpa, aes(x = hour, fill = day_of_week)) +
geom_histogram(binwidth = 1, position = "stack", color = "black") +
labs(
title = "Frequency of Prescriptions by Time of Day and Day of Week",
x = "Hour of Day",
y = "Frequency",
fill = "Day of Week"
+
) scale_x_continuous(breaks = 0:23) +
theme_minimal()
ggplot(df_bpa, aes(x = hour)) +
geom_histogram(binwidth = 1, fill = "steelblue", color = "black") +
facet_wrap(~ day_of_week, ncol = 2) +
labs(
title = "Frequency of Prescriptions by Time of Day",
x = "Hour of Day",
y = "Frequency"
+
) scale_x_continuous(breaks = 0:23) +
theme_minimal() +
theme(
strip.text = element_text(size = 10, face = "bold")
)
ggplot(df_bpa, aes(x = hour)) +
geom_histogram(binwidth = 1, fill = "steelblue", color = "black") +
#facet_wrap(~ day_of_week, ncol = 2) +
labs(
title = "Frequency of Prescriptions by Time of Day",
x = "Hour of Day",
y = "Frequency"
+
) scale_x_continuous(breaks = 0:23) +
theme_minimal() +
theme(
strip.text = element_text(size = 10, face = "bold")
)
# Summarize counts by day of week and hour
<- df_bpa %>%
heatmap_data count(day_of_week, hour)
# Plot heatmap
ggplot(heatmap_data, aes(x = hour, y = day_of_week, fill = n)) +
geom_tile(color = "white") +
scale_fill_gradient(low = "lightgreen", high = "lightcoral") +
scale_x_continuous(breaks = seq(0,23, by =1)) +
labs(title = "Heatmap of Prescription Frequencies",
x = "Hour of Day", y = "Day of Week", fill = "Count") +
theme_minimal()
# Create a table of counts by hour
<- table(df_bpa$hour)
hour_counts
# Perform Chi-squared test
<- chisq.test(hour_counts)
chisq_test
print(chisq_test)
Chi-squared test for given probabilities
data: hour_counts
X-squared = 120.87, df = 23, p-value = 2.902e-15
ggplot(df_bpa, aes(x = hour)) +
geom_density(fill = "skyblue", alpha = 0.5) +
labs(title = "Density Plot of Prescription Hours", x = "Hour of Day", y = "Density") +
theme_minimal()
<- df_bpa %>%
summary_time_rx # group_by(enrolled) %>%
summarise(
mean_time = mean(time_rx_screen, na.rm = TRUE),
median_time = median(time_rx_screen, na.rm = TRUE),
sd_time = sd(time_rx_screen, na.rm = TRUE),
iqr_25 = quantile(time_rx_screen, 0.25, na.rm = TRUE),
iqr_75 = quantile(time_rx_screen, 0.75, na.rm = TRUE),
n = n()
)
print(summary_time_rx)
# A tibble: 1 × 6
mean_time median_time sd_time iqr_25 iqr_75 n
<dbl> <dbl> <dbl> <dbl> <dbl> <int>
1 83.2 30 138. 12 69.5 276
ggplot(df_bpa, aes(x = enrolled, y = time_rx_screen, fill = enrolled)) +
geom_boxplot(outlier.shape = 16, outlier.colour = "red", alpha = 0.6) +
labs(
title = "Comparison of time_rx_screen by Enrollment Status",
x = "Enrollment Status",
y = "Time Rx Screen"
+
) theme_minimal() +
scale_fill_manual(values = c("Yes" = "steelblue", "No" = "salmon"))
ggplot(df_bpa, aes(x = enrolled, y = time_rx_screen)) +
geom_boxplot() +
labs(
title = "Time from prescription to eligibility screening according to enrolment status",
x = 'Enrolled', y = 'Time (minutes)'
)
# Density plot for time_rx_screen
ggplot(df_bpa, aes(x = time_rx_screen, fill = enrolled)) +
geom_density(alpha = 0.5) +
labs(
title = "Density Plot of time_rx_screen by Enrollment Status",
x = "Time Rx Screen",
y = "Density"
+
) theme_minimal() +
scale_fill_manual(values = c("Yes" = "steelblue", "No" = "salmon"))
# Filter rows without missing time_rx_screen values
<- df_bpa %>% filter(!is.na(time_rx_screen))
df_clean
# Summarize medians and IQR
<- df_clean %>%
summary_median group_by(enrolled) %>%
summarise(
median_time = median(time_rx_screen, na.rm = TRUE),
IQR_time = IQR(time_rx_screen, na.rm = TRUE),
n = n()
)
print(summary_median)
# A tibble: 2 × 4
enrolled median_time IQR_time n
<chr> <dbl> <dbl> <int>
1 No 31.5 58.2 244
2 Yes 18.5 39.2 32
<- df_bpa %>%
df_bpa mutate(hour = as.numeric(format(time_abx_prescribed, "%H")),
minute = as.numeric(format(time_abx_prescribed, "%M")),
time_of_day = hour + minute / 60, # Convert to decimal hours
color_group = case_when(
> 9 & time_of_day < 13) ~ "Light Red", # 09:01 - 12:59
(time_of_day > 15 | time_of_day <= 7) ~ "Light Red", # 15:01 - 06:59
(time_of_day TRUE ~ "Light Green" # Rest of the time
))
# Scatterplot with conditional colors
ggplot(df_bpa, aes(x = time_of_day, y = 1, color = color_group)) +
geom_jitter(width = 0, height = 0.2, alpha = 0.7, size = 2) +
scale_color_manual(values = c("Light Red" = "lightcoral",
"Light Green" = "lightgreen")) +
labs(
title = "Scatter Plot of Prescription Times with Conditional Coloring",
x = "Time of Day (24-Hour Format)",
y = "Distribution (Jittered)",
color = "Time Group"
+
) scale_x_continuous(breaks = seq(0, 24, by = 1)) +
theme_minimal() +
theme(axis.text.y = element_blank(), # Remove Y-axis labels
axis.ticks.y = element_blank()) # Remove Y-axis ticks
<- df_bpa %>%
color_proportions group_by(color_group) %>%
summarise(count = n()) %>%
mutate(proportion = count / sum(count) * 100)
print(color_proportions)
# A tibble: 2 × 3
color_group count proportion
<chr> <int> <dbl>
1 Light Green 42 15.2
2 Light Red 234 84.8
# Extract hour and minute from time_abx_prescribed and calculate decimal hours
<- df_bpa %>%
df_bpa mutate(hour = as.numeric(format(time_abx_prescribed, "%H")),
minute = as.numeric(format(time_abx_prescribed, "%M")),
time_of_day = hour + minute / 60) # Convert to decimal hours
# Scatter plot with enrolled vs non-enrolled coloring
ggplot(df_bpa, aes(x = time_of_day, y = 1, color = enrolled)) +
geom_jitter(width = 0, height = 0.2, alpha = 0.7, size = 2) +
scale_color_manual(values = c("Yes" = "lightgreen", "No" = 'white')) +
labs(
title = "Scatter Plot of Prescription Times by Enrollment Status",
x = "Time of Day (24-Hour Format)",
y = "Distribution (Jittered)",
color = "Enrollment Status"
+
) scale_x_continuous(breaks = seq(0, 24, by = 1)) +
theme_minimal() +
theme(axis.text.y = element_blank(), # Remove Y-axis labels
axis.ticks.y = element_blank()) # Remove Y-axis ticks
Some fringe simulations
##### Experimental - simulations
$time_abx_prescribed_time <- format(df_bpa$time_abx_prescribed, format = "%H:%M:%S")
df_bpa
<- df_bpa %>%
df_bpa1 mutate(event_datetime = as.POSIXct(paste(date_abx_prescribed, time_abx_prescribed_time), format = "%Y-%m-%d %H:%M:%S"))
# Filter for events within working hours (Monday-Friday, 09:00 to Friday 16:00)
<- df_bpa1 %>%
df_bpa1 filter(!(wday(event_datetime) %in% c(1, 7)), # Exclude Saturday (7) and Sunday (1)
!(wday(event_datetime) == 6 & hour(event_datetime) > 16), # Exclude Friday after 16:00
!(wday(event_datetime) == 2 & hour(event_datetime) < 8)) # Exclude Monday before 08:00
<- df_bpa1 %>%
df_bpa1 mutate(
event_date = as.Date(event_datetime),
# Add the two cross-sectional assessment times for each event's date
cross_sectional_09 = as.POSIXct(paste(event_date, "09:00:00"), format = "%Y-%m-%d %H:%M:%S"),
cross_sectional_14 = as.POSIXct(paste(event_date, "14:00:00"), format = "%Y-%m-%d %H:%M:%S"),
# Handle edge cases: If the event occurred after 14:00, the next cross-sectional assessment will be the next day's 09:00
cross_sectional_09_next_day = cross_sectional_09 + days(1),
cross_sectional_14_next_day = cross_sectional_14 + days(1),
# Assign the nearest cross-sectional time to each event
nearest_cross_sectional = case_when(
<= cross_sectional_09 ~ cross_sectional_09,
event_datetime > cross_sectional_09 & event_datetime <= cross_sectional_14 ~ cross_sectional_14,
event_datetime TRUE ~ cross_sectional_09_next_day
)
)
<- df_bpa1 %>%
df_bpa1 mutate(
# Time difference for real-time assessment
real_time_diff = time_rx_screen,
# Time difference for cross-sectional assessments
cross_sectional_diff = as.numeric(difftime(nearest_cross_sectional, event_datetime, units = "mins"))
)
library(dplyr)
<- df_bpa1 %>%
df_bpa1 mutate(
detected_real_time = real_time_diff <= 60, # Real-time detection within 1 hour
detected_cross_sectional = cross_sectional_diff <= 60 # Cross-sectional detection within 1 hour
)
<- df_bpa1 %>%
results summarise(
prop_detected_real_time = mean(detected_real_time, na.rm = TRUE),
prop_detected_cross_sectional = mean(detected_cross_sectional, na.rm = TRUE)
)
print(results)
# A tibble: 1 × 2
prop_detected_real_time prop_detected_cross_sectional
<dbl> <dbl>
1 0.748 0.0675
<- results %>%
results_long pivot_longer(cols = everything(), names_to = "method", values_to = "proportion")
ggplot(results_long, aes(x = method, y = proportion, fill = method)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Comparison of Event Detection Proportions",
x = "Method",
y = "Proportion of Events Detected Within 1 Hour") +
theme_minimal()
<- df_bpa1[, c("real_time_diff", "cross_sectional_diff")]
df_times
library(mice)
# Perform multiple imputations
<- mice(df_times, m = 5, method = "pmm", maxit = 50, seed = 123) imputed_data
iter imp variable
1 1
1 2
1 3
1 4
1 5
2 1
2 2
2 3
2 4
2 5
3 1
3 2
3 3
3 4
3 5
4 1
4 2
4 3
4 4
4 5
5 1
5 2
5 3
5 4
5 5
6 1
6 2
6 3
6 4
6 5
7 1
7 2
7 3
7 4
7 5
8 1
8 2
8 3
8 4
8 5
9 1
9 2
9 3
9 4
9 5
10 1
10 2
10 3
10 4
10 5
11 1
11 2
11 3
11 4
11 5
12 1
12 2
12 3
12 4
12 5
13 1
13 2
13 3
13 4
13 5
14 1
14 2
14 3
14 4
14 5
15 1
15 2
15 3
15 4
15 5
16 1
16 2
16 3
16 4
16 5
17 1
17 2
17 3
17 4
17 5
18 1
18 2
18 3
18 4
18 5
19 1
19 2
19 3
19 4
19 5
20 1
20 2
20 3
20 4
20 5
21 1
21 2
21 3
21 4
21 5
22 1
22 2
22 3
22 4
22 5
23 1
23 2
23 3
23 4
23 5
24 1
24 2
24 3
24 4
24 5
25 1
25 2
25 3
25 4
25 5
26 1
26 2
26 3
26 4
26 5
27 1
27 2
27 3
27 4
27 5
28 1
28 2
28 3
28 4
28 5
29 1
29 2
29 3
29 4
29 5
30 1
30 2
30 3
30 4
30 5
31 1
31 2
31 3
31 4
31 5
32 1
32 2
32 3
32 4
32 5
33 1
33 2
33 3
33 4
33 5
34 1
34 2
34 3
34 4
34 5
35 1
35 2
35 3
35 4
35 5
36 1
36 2
36 3
36 4
36 5
37 1
37 2
37 3
37 4
37 5
38 1
38 2
38 3
38 4
38 5
39 1
39 2
39 3
39 4
39 5
40 1
40 2
40 3
40 4
40 5
41 1
41 2
41 3
41 4
41 5
42 1
42 2
42 3
42 4
42 5
43 1
43 2
43 3
43 4
43 5
44 1
44 2
44 3
44 4
44 5
45 1
45 2
45 3
45 4
45 5
46 1
46 2
46 3
46 4
46 5
47 1
47 2
47 3
47 4
47 5
48 1
48 2
48 3
48 4
48 5
49 1
49 2
49 3
49 4
49 5
50 1
50 2
50 3
50 4
50 5
# Choose one of the completed datasets
<- complete(imputed_data, 1)
df_times
ggplot(df_times, aes(x = real_time_diff)) +
geom_histogram(binwidth = 50, fill = "blue", alpha = 0.7) +
labs(title = "Distribution of Real Time Differences", x = "Real Time Diff", y = "Frequency")
ggplot(df_times, aes(x = cross_sectional_diff)) +
geom_histogram(binwidth = 50, fill = "green", alpha = 0.7) +
labs(title = "Distribution of Cross Sectional Differences", x = "Cross Sectional Diff", y = "Frequency")
library(fitdistrplus)
# Fit distributions
<- fitdist(df_times$real_time_diff, "lnorm") # Example: gamma
fit_real_time
# Summary of the fits
summary(fit_real_time)
Fitting of the distribution ' lnorm ' by maximum likelihood
Parameters :
estimate Std. Error
meanlog 3.330683 0.10431079
sdlog 1.331751 0.07375868
Loglikelihood: -820.887 AIC: 1645.774 BIC: 1651.961
Correlation matrix:
meanlog sdlog
meanlog 1.000000e+00 3.061404e-09
sdlog 3.061404e-09 1.000000e+00
set.seed(123) # For reproducibility
# Simulate 1000 values for real_time_diff and cross_sectional_diff
<- rgamma(1000, shape = fit_real_time$estimate["shape"], rate = fit_real_time$estimate["rate"])
sim_real_time $estimate fit_real_time
meanlog sdlog
3.330683 1.331751
summary(df_times$real_time_diff)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.00 12.00 22.00 73.53 61.00 621.00
any(df_times$real_time_diff <= 0, na.rm = TRUE) # Check for non-positive values
[1] FALSE
<- fitdist(df_times$real_time_diff, "gamma")
fit_real_time summary(fit_real_time)
Fitting of the distribution ' gamma ' by maximum likelihood
Parameters :
estimate Std. Error
shape 0.629339091 0.058327967
rate 0.008621599 0.001146604
Loglikelihood: -849.987 AIC: 1703.974 BIC: 1710.162
Correlation matrix:
shape rate
shape 1.0000000 0.6810348
rate 0.6810348 1.0000000
library(mixtools)
<- normalmixEM(df_times$cross_sectional_diff, k = 2) fit
number of iterations= 21
summary(fit)
summary of normalmixEM object:
comp 1 comp 2
lambda 0.585493 0.414507
mu 194.625596 903.118105
sigma 98.929108 185.688352
loglik at estimate: -1129.279
# Number of samples to simulate
<- 10000
n
# Simulate component membership based on mixing proportions
<- sample(1:2, size = n, replace = TRUE, prob = fit$lambda)
component
# Simulate data based on the assigned components
<- sapply(component, function(k) rnorm(1, mean = fit$mu[k], sd = fit$sigma[k]))
sim_cross_sectional
<- rgamma(10000, shape = fit_real_time$estimate["shape"], rate = fit_real_time$estimate["rate"])
sim_real_time
wilcox.test(sim_cross_sectional, sim_real_time)
Wilcoxon rank sum test with continuity correction
data: sim_cross_sectional and sim_real_time
W = 90469820, p-value < 2.2e-16
alternative hypothesis: true location shift is not equal to 0
hist(sim_cross_sectional, breaks = 30, col = 'skyblue', freq = FALSE, main = "Density-Normalized Histograms")
hist(df_times$cross_sectional_diff, breaks = 60, col = rgb(1, 0, 0, 0.5), freq = FALSE, add = TRUE)
legend("topright", legend = c("Simulated times (n = 10000)", "Original times (n = 166)"),
fill = c("skyblue", rgb(1, 0, 0, 0.5)))
hist(sim_real_time, breaks = 30, col = 'skyblue', freq = FALSE, main = "Density-Normalized Histograms")
hist(df_times$real_time_diff, breaks = 60, col = rgb(1, 0, 0, 0.5), freq = FALSE, add = TRUE)
legend("topright", legend = c("Simulated times (n = 10000)", "Original times (n = 166)"),
fill = c("skyblue", rgb(1, 0, 0, 0.5)))
# Boxplots for a quick summary
boxplot(sim_cross_sectional, sim_real_time,
names = c("Cross Sectional", "Real Time"),
col = c("skyblue", "salmon"), main = "Boxplot Comparison")
# Extract the p-value from the test result
<- "< 0.001"
p_value
# Add the p-value as text to the plot
text(x = 1.5, y = max(c(sim_cross_sectional, sim_real_time)) * 0.95,
labels = "Wilcoxon test P < 0.001")