# pull ED data: ------------
# today_date <- "20190901"
today_date <- Sys.Date() %>% as.character() %>% gsub("-", "", .)
vw_ed_real_time <<- dplyr::tbl(cnx, dbplyr::in_schema("publish",
"emergency_nrt"))
df1.ed_data <-
vw_ed_real_time %>%
filter(facility_name == "LGH Lions Gate",
start_date_id == today_date) %>%
select(start_date_id,
start_dt_tm,
start_to_left_ed_elapsed_time_minutes,
first_triage_acuity_cd,
age_at_start_date,
is_admitted) %>%
collect()
# df1.ed_data
# str(df1.ed_data)
# summary(df1.ed_data)
df2.ed_modified <-
df1.ed_data %>%
rename(ed_los = start_to_left_ed_elapsed_time_minutes) %>%
mutate(time_now = ymd_hms(Sys.time()),
ed_los_filled = case_when(is.na(ed_los) ~ as.numeric(time_now - start_dt_tm,
units = "hours"),
TRUE ~ as.numeric(ed_los/60)),
left_ed = ifelse(is.na(ed_los), 0, 1),
is_admitted = as.factor(is_admitted) %>% fct_recode(non_admit = "0",
admit = "1")) %>%
select(start_dt_tm,
ed_los,
ed_los_filled,
left_ed,
is_admitted)
# df2.ed_modified
# survival model 1: -------
km1 <- survfit(Surv(ed_los_filled, left_ed) ~ 1,
data = df2.ed_modified)
num_still_in_ed <-
df2.ed_modified %>%
filter(left_ed == 0) %>%
pull %>%
as.numeric() %>%
sum
p1.survival <- autoplot(km1,
conf.int = FALSE,
firsty = 1,
surv.colour = "skyblue4",
surv.size = 1.5,
censor.size = 5,
censor.colour = "firebrick") +
scale_y_continuous(limits = c(0,1),
expand = c(0, 0),
breaks = seq(0, 1, .1)) +
scale_x_continuous(expand = c(0, 0),
breaks = seq(0, 50000, 1)) +
geom_vline(xintercept = 10,
col = "grey70") +
labs(x = "Time from start in ED (hours)",
y = "Probability of staying longer than specified time",
title = sprintf("LGH ED - Patients arriving on %s",
ymd(today_date)),
subtitle = sprintf("%i patients arrived at the ED so far today \n%i patients admitted \n%i still in ED",
nrow(df2.ed_modified),
df2.ed_modified %>% filter(is_admitted == "admit") %>% nrow(),
num_still_in_ed),
caption = sprintf("\n\nReport created at %s",
Sys.time())) +
theme_light() +
theme(panel.grid.minor = element_line(colour="grey95"),
panel.grid.major = element_line(colour="grey95")); p1.survival
# survival model 2: -----------
km2 <- survfit(Surv(ed_los_filled, left_ed) ~ is_admitted,
data = df2.ed_modified)
num_admits <-
df2.ed_modified %>%
filter(is_admitted == "admit") %>%
nrow()
if (num_admits < 1){
print("No admits so far today")
} else {
p2.survival <- autoplot(km2,
conf.int = FALSE,
firsty = 1,
#surv.colour = "skyblue4",
surv.size = 1.5,
censor.size = 5,
censor.colour = "black") +
scale_y_continuous(limits = c(0,1),
expand = c(0, 0),
breaks = seq(0, 1, .1)) +
scale_x_continuous(expand = c(0, 0),
breaks = seq(0, 50000, 1)) +
geom_vline(xintercept = 4,
col = "grey70") +
geom_vline(xintercept = 10,
col = "grey70") +
labs(x = "Time from start in ED (hours)",
y = "Probability of staying longer than specified time",
title = sprintf("LGH ED - Patients arriving on %s",
ymd(today_date)),
subtitle = sprintf("%i patients arrived at the ED so far today \n%i patients admitted \n%i still in ED",
nrow(df2.ed_modified),
df2.ed_modified %>% filter(is_admitted == "admit") %>% nrow(),
num_still_in_ed),
caption = sprintf("\n\nReport created at %s",
Sys.time())) +
theme_light() +
theme(panel.grid.minor = element_line(colour="grey95"),
panel.grid.major = element_line(colour="grey95"),
legend.position = "bottom"); p2.survival
}
This paper is a good general discussion on Kaplan-Meier curves. Here are some specifics related to the ED LOS context:
The x-axis is time, representing ED LOS. On this graph, the exact start time of a patient doesn’t matter, only the ED LOS does.
The blue line shows the pattern of patient discharges, according to their ED LOS. Whenever the line dips downward, that means that a patient was discharged after spending that amount of time in the ED. This pattern allows us to choose any LOS cutoff and see how many patients were achieving that goal. For example, we can ask, “What proportion of patients had ED LOS < 4 hours” The answer is given by 1 - y, where y is the reading on the y-axis of the graph
Red/black points represent patients still in ED. Although we don’t know their final ED LOS yet, we can track how long they have already been in ED, and use the blue curve to try to predict how much longer they may be there.
You could get a similar viz by taking all ED LOS, and showing as a bar chart, arranged from lowest to max LOS. You could use colour to highlight the bars that are patients still in ED
I think the survival curve is better because:
Much easier to compare across days, sites, different timescales, because the y-axis always standardized to be between 0 and 1.
Easy to read off percentiles from the y-axis.
Shape of the graph is not distorted by the censored observations (pts still in ED). This correctly reflects the fact that we do not yet know their ED LOS.
Opens the door for more advanced methods: we can easily get confidence intervals, log-rank tests to test whether 2 different sites/time periods are significantly different in terms of their LOS patterns.
# output: -----
ggsave(here::here("results",
"dst",
sprintf("%s_lgh_ed-los-survival-curves.pdf",
Sys.time() %>% gsub(":", ".", .))),
p2.survival)
## Saving 7 x 5 in image