Load packages
# set working directory as needed
setwd("~/DATA 604 Simulation and Modeling Techniques/Final Project")
library(ggplot2)
library(MASS)
library(fitdistrplus)
library(mc2d)
library(dplyr)
library(tidyr)
library(stringr)
library(lubridate)
Import data
appts <- read.csv("Patient_Flow_Wait_Times/Appt_Scheduling_Jan_Feb_Mar.csv", stringsAsFactors = FALSE)
enc_times <- read.csv("Patient_Flow_Wait_Times/Enc_Time_Stamps_Jan_2016.csv", stringsAsFactors = FALSE)
no_shows <- read.csv("Patient_Flow_Wait_Times/No_Show_Rate_per_Day.csv",
stringsAsFactors = FALSE)
providers <- read.csv("Patient_Flow_Wait_Times/Provider_Counts_Jan_Feb_Mar_2016.csv",
stringsAsFactors = FALSE)
visits <- read.csv("Patient_Flow_Wait_Times/Visits_per_Day_Jan_2016.csv", stringsAsFactors = FALSE)
No-Show Rate
no_shows <- no_shows %>%
mutate(No_Show_Rate = No_Shows / Total_Visits_Arrived_and_No_Show,
Date = ymd(Date)) %>%
filter(!(wday(Date, label = TRUE) %in% c("Sat", "Sun")) & ymd(Date) != ymd("2016-01-29"))
## Warning: package 'bindrcpp' was built under R version 3.3.2
knitr::kable(
no_shows %>%
group_by(Clinic) %>%
summarize(Monthly_No_Show_Rate = sum(No_Shows) / sum(Total_Visits_Arrived_and_No_Show),
Avg_Daily_No_Show_Rate = mean(No_Show_Rate),
SD_Daily_No_Show_Rate = sd(No_Show_Rate))
)
| Adult Medicine |
0.2340764 |
0.2350831 |
0.041808 |
ggplot(no_shows, aes(No_Show_Rate)) +
geom_histogram(bins = 10)

ggplot(no_shows, aes(Date, No_Show_Rate)) +
geom_line() +
scale_x_date()

Appointment slots
appts_5min <- appts %>%
mutate(Date = ymd(Date),
Start_Time = hms(Start_Time),
End_Time = hms(End_Time))
# head(appts_5min[(minute(appts_5min$Start_Time) %% 5) != 0, ])
minute(appts_5min$Start_Time) <- round(minute(appts_5min$Start_Time) / 5) * 5
minute(appts_5min$End_Time) <- round(minute(appts_5min$End_Time) / 5) * 5
appts_5min <- appts_5min %>%
mutate(Start_Hr = hour(Start_Time),
Start_Min = minute(Start_Time),
End_Hr = hour(End_Time),
End_Min = minute(End_Time)) %>%
group_by(Clinic, Date, Start_Hr, Start_Min, End_Hr, End_Min) %>%
summarize(Visits = sum(Total_Scheduled_Visits)) %>%
ungroup() %>%
filter(!(wday(Date, label = TRUE) %in% c("Sat", "Sun"))) %>%
arrange(Clinic, Date, Start_Hr, Start_Min)
appts_5min <- appts_5min %>%
mutate(Start_Time = hm(str_c(Start_Hr, ":", Start_Min)),
End_Time = hm(str_c(End_Hr, ":", End_Min)))
appt_len <- appts_5min %>%
mutate(Duration = (End_Hr * 60) - (Start_Hr * 60) + (End_Min - Start_Min)) %>%
group_by(Clinic, Duration) %>%
summarize(Visits = sum(Visits))
knitr::kable(appt_len %>% mutate(Proportion = Visits / sum(Visits)))
| Adult Medicine |
0 |
1 |
0.0000860 |
| Adult Medicine |
5 |
7 |
0.0006018 |
| Adult Medicine |
10 |
412 |
0.0354195 |
| Adult Medicine |
15 |
10862 |
0.9338033 |
| Adult Medicine |
20 |
64 |
0.0055021 |
| Adult Medicine |
25 |
2 |
0.0001719 |
| Adult Medicine |
30 |
207 |
0.0177957 |
| Adult Medicine |
35 |
1 |
0.0000860 |
| Adult Medicine |
45 |
37 |
0.0031809 |
| Adult Medicine |
60 |
38 |
0.0032669 |
| Adult Medicine |
120 |
1 |
0.0000860 |
ggplot(appt_len, aes(Duration, Visits)) +
geom_bar(stat = "identity")

appts_15min <- appts %>%
mutate(Date = ymd(Date),
Start_Time = hms(Start_Time),
End_Time = hms(End_Time))
minute(appts_15min$Start_Time) <- round(minute(appts_15min$Start_Time) / 15) * 15
minute(appts_15min$End_Time) <- round(minute(appts_15min$End_Time) / 15) * 15
appts_15min <- appts_15min %>%
mutate(Start_Hr = hour(Start_Time),
Start_Min = minute(Start_Time),
End_Hr = hour(End_Time),
End_Min = minute(End_Time)) %>%
group_by(Clinic, Date, Start_Hr, Start_Min) %>%
summarize(Visits = sum(Total_Scheduled_Visits)) %>%
ungroup()
appts_15min$Start_Time <- hm(str_c(appts_15min$Start_Hr, ":", appts_15min$Start_Min), roll = TRUE)
appts_15min <- appts_15min %>%
mutate(Start_Hr = hour(Start_Time),
Start_Min = minute(Start_Time)) %>%
group_by(Clinic, Date, Start_Hr, Start_Min) %>%
summarize(Visits = sum(Visits)) %>%
ungroup() %>%
filter(!(wday(Date, label = TRUE) %in% c("Sat", "Sun"))) %>%
arrange(Clinic, Date, Start_Hr, Start_Min)
avg_day <- appts_15min %>%
group_by(Clinic, Start_Hr, Start_Min) %>%
summarize(Total_Visits = sum(Visits),
Days = n_distinct(Date),
Avg_Visits = round((Total_Visits / Days) / 0.25) * 0.25)
schedule <- avg_day %>%
ungroup() %>%
mutate(Start_Time = hm(str_c(Start_Hr, ":", Start_Min))) %>%
select(Clinic, Start_Time, Avg_Visits) %>%
mutate(Visits_Int = round(Avg_Visits),
Avg_Vts_per_Hr = Avg_Visits * 4)
knitr::kable(schedule)
| Adult Medicine |
8H 0M 0S |
1.00 |
1 |
4 |
| Adult Medicine |
8H 15M 0S |
1.25 |
1 |
5 |
| Adult Medicine |
8H 30M 0S |
1.25 |
1 |
5 |
| Adult Medicine |
8H 45M 0S |
1.75 |
2 |
7 |
| Adult Medicine |
9H 0M 0S |
9.00 |
9 |
36 |
| Adult Medicine |
9H 15M 0S |
3.00 |
3 |
12 |
| Adult Medicine |
9H 30M 0S |
7.75 |
8 |
31 |
| Adult Medicine |
9H 45M 0S |
10.00 |
10 |
40 |
| Adult Medicine |
10H 0M 0S |
8.75 |
9 |
35 |
| Adult Medicine |
10H 15M 0S |
5.25 |
5 |
21 |
| Adult Medicine |
10H 30M 0S |
10.00 |
10 |
40 |
| Adult Medicine |
10H 45M 0S |
4.75 |
5 |
19 |
| Adult Medicine |
11H 0M 0S |
9.25 |
9 |
37 |
| Adult Medicine |
11H 15M 0S |
7.00 |
7 |
28 |
| Adult Medicine |
11H 30M 0S |
6.50 |
6 |
26 |
| Adult Medicine |
11H 45M 0S |
1.25 |
1 |
5 |
| Adult Medicine |
12H 0M 0S |
2.00 |
2 |
8 |
| Adult Medicine |
12H 15M 0S |
1.50 |
2 |
6 |
| Adult Medicine |
12H 30M 0S |
2.25 |
2 |
9 |
| Adult Medicine |
12H 45M 0S |
2.50 |
2 |
10 |
| Adult Medicine |
13H 0M 0S |
9.50 |
10 |
38 |
| Adult Medicine |
13H 15M 0S |
4.75 |
5 |
19 |
| Adult Medicine |
13H 30M 0S |
5.75 |
6 |
23 |
| Adult Medicine |
13H 45M 0S |
7.75 |
8 |
31 |
| Adult Medicine |
14H 0M 0S |
8.25 |
8 |
33 |
| Adult Medicine |
14H 15M 0S |
6.00 |
6 |
24 |
| Adult Medicine |
14H 30M 0S |
6.50 |
6 |
26 |
| Adult Medicine |
14H 45M 0S |
3.75 |
4 |
15 |
| Adult Medicine |
15H 0M 0S |
7.50 |
8 |
30 |
| Adult Medicine |
15H 15M 0S |
5.50 |
6 |
22 |
| Adult Medicine |
15H 30M 0S |
5.25 |
5 |
21 |
| Adult Medicine |
15H 45M 0S |
2.00 |
2 |
8 |
| Adult Medicine |
16H 0M 0S |
4.25 |
4 |
17 |
| Adult Medicine |
16H 15M 0S |
2.25 |
2 |
9 |
| Adult Medicine |
16H 30M 0S |
3.25 |
3 |
13 |
| Adult Medicine |
16H 45M 0S |
4.00 |
4 |
16 |
| Adult Medicine |
17H 0M 0S |
7.25 |
7 |
29 |
| Adult Medicine |
17H 15M 0S |
3.25 |
3 |
13 |
| Adult Medicine |
17H 30M 0S |
4.00 |
4 |
16 |
| Adult Medicine |
17H 45M 0S |
3.25 |
3 |
13 |
| Adult Medicine |
18H 0M 0S |
4.00 |
4 |
16 |
| Adult Medicine |
18H 15M 0S |
1.50 |
2 |
6 |
| Adult Medicine |
18H 30M 0S |
1.25 |
1 |
5 |
| Adult Medicine |
18H 45M 0S |
1.00 |
1 |
4 |
| Adult Medicine |
19H 0M 0S |
1.50 |
2 |
6 |
| Adult Medicine |
19H 15M 0S |
1.00 |
1 |
4 |
Patient Arrivals & Arrival Time Deviation
enc_times <- enc_times %>%
filter(hms(Arrival_Time) >= hms("07:00:00") & Vitals_Done != "NULL") %>%
mutate(early_late = as.numeric(as.duration(hms(Arrival_Time) - hms(Start_Time))) / 60,
length = as.numeric(as.duration(hms(Departure_Time) - hms(Arrival_Time))) / 60)
## Note: method with signature 'Period#ANY' chosen for function '-',
## target signature 'Period#Period'.
## "ANY#Period" would also be valid
ggplot(enc_times, aes(early_late)) +
geom_histogram(binwidth = 15)

ggplot(enc_times, aes(early_late)) +
geom_histogram(binwidth = 5) +
coord_cartesian(xlim = c(-120, 120))

nrm <- fitdistr(enc_times$early_late, "normal")
enc_filt <- enc_times %>%
filter(early_late > -65 & early_late < 65)
tri <- fitdist(enc_filt$early_late, "triang",
start = list(min = min(enc_filt$early_late) - 5,
mode = unique(enc_filt$early_late)[which.max(
tabulate(match(enc_filt$early_late, unique(enc_filt$early_late))))],
max = max(enc_filt$early_late + 5)))
## Warning in sqrt(diag(varcovar)): NaNs produced
## Warning in sqrt(1/diag(V)): NaNs produced
## Warning in cov2cor(varcovar): diag(.) had 0 or NA entries; non-finite
## result is doubtful
ggplot(enc_times, aes(early_late)) +
geom_histogram(binwidth = 15, aes(y = ..density..)) +
coord_cartesian(xlim = c(-120, 120)) +
geom_line(stat = "density", aes(color = "empirical density", linetype = "empirical density")) +
stat_function(fun = dnorm, args = list(nrm$estimate[1], nrm$estimate[2]),
aes(color = "normal", linetype = "normal")) +
stat_function(fun = dtriang, args = list(min = tri$estimate[1],
mode = tri$estimate[2],
max = tri$estimate[3]),
aes(color = "triangle", linetype = "triangle")) +
scale_color_manual(name = "",
values = c("empirical density" = "black",
"normal" = "red",
"triangle" = "blue"),
breaks = c("empirical density",
"normal",
"triangle")) +
scale_linetype_manual(name = "",
values = c("empirical density" = "solid",
"normal" = "dashed",
"triangle" = "dashed"),
breaks = c("empirical density",
"normal",
"triangle")) +
xlab("arrival time deviation (minutes)")

ks.test(enc_times$early_late, "pnorm", nrm$estimate[1], nrm$estimate[2])
## Warning in ks.test(enc_times$early_late, "pnorm", nrm$estimate[1], nrm
## $estimate[2]): ties should not be present for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: enc_times$early_late
## D = 0.0866, p-value < 2.2e-16
## alternative hypothesis: two-sided
ks.test(enc_times$early_late, "ptriang", tri$estimate[1], tri$estimate[2], tri$estimate[3])
## Warning in ks.test(enc_times$early_late, "ptriang", tri$estimate[1], tri
## $estimate[2], : ties should not be present for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: enc_times$early_late
## D = 0.0829, p-value < 2.2e-16
## alternative hypothesis: two-sided
ggplot(enc_times, aes(early_late)) +
stat_ecdf(aes(color = "empirical cumulative distribution",
linetype = "empirical cumulative distribution")) +
stat_function(fun = pnorm, args = list(nrm$estimate[1], nrm$estimate[2]),
aes(color = "normal", linetype = "normal")) +
stat_function(fun = ptriang, args = list(min = tri$estimate[1],
mode = tri$estimate[2],
max = tri$estimate[3]),
aes(color = "triangle", linetype = "triangle")) +
scale_color_manual(name = "",
values = c("empirical cumulative distribution" = "black",
"normal" = "red",
"triangle" = "blue"),
breaks = c("empirical cumulative distribution",
"normal",
"triangle")) +
scale_linetype_manual(name = "",
values = c("empirical cumulative distribution" = "solid",
"normal" = "dashed",
"triangle" = "dashed"),
breaks = c("empirical cumulative distribution",
"normal",
"triangle")) +
labs(x = "interarrival time (minutes)", y = "cumulative probability")

ggplot(enc_times) +
geom_line(aes(x = ppoints(length(early_late), 1),
y = pnorm(sort(early_late), nrm$estimate[1], nrm$estimate[2]),
color = "normal")) +
geom_line(aes(x = ppoints(length(early_late), 1),
y = ptriang(sort(early_late), tri$estimate[1], tri$estimate[2], tri$estimate[3]),
color = "triangle")) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
scale_color_manual(name = "",
values = c("normal" = "red",
"triangle" = "blue"),
breaks = c("normal",
"triangle")) +
labs(title = "P-P Plot", x = "sample probability", y = "theoretical probability")

ggplot(enc_times, aes(sample = early_late)) +
stat_qq(distribution = qnorm,
dparams = list(nrm$estimate[1], nrm$estimate[2]),
geom = "line",
aes(color = "normal")) +
stat_qq(distribution = qtriang,
dparams = list(tri$estimate[1], tri$estimate[2], tri$estimate[3]),
geom = "line",
aes(color = "triangle")) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
scale_color_manual(name = "",
values = c("normal" = "red",
"triangle" = "blue"),
breaks = c("normal", "triangle")) +
labs(title = "Q-Q Plot")

nrm
## mean sd
## -5.9178235 46.2293202
## ( 0.8633833) ( 0.6105042)
enc_times %>%
filter(length > 0) %>%
ggplot(aes(length)) +
geom_histogram(binwidth = 10)

interarrival <- enc_times %>%
filter(hms(Arrival_Time) >= hms("07:00:00") & Vitals_Done != "NULL") %>%
select(Clinic, Date, Arrival_Time) %>%
mutate(Date = ymd(Date),
Day = wday(Date, label = TRUE),
Weekday = ifelse(Day %in% c("Sat", "Sun"), 0, 1))
Interarrival Time
adult_arr <- interarrival %>%
filter(Clinic == "Adult Medicine" & Weekday == 1) %>%
arrange(Date, Arrival_Time) %>%
mutate(Interarrival = ifelse(ymd(Date) == ymd(lag(Date)),
as.numeric(as.duration(hms(Arrival_Time) - hms(lag(Arrival_Time)))) / 60,
NA))
adult_arr <- adult_arr[!is.na(adult_arr$Interarrival) &
adult_arr$Interarrival > 0 &
adult_arr$Interarrival < 20, ]
summary(adult_arr$Interarrival)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.01667 1.13300 2.38300 3.37300 4.43300 19.82000
ggplot(adult_arr, aes(Interarrival)) +
geom_histogram(binwidth = 1)

e_fit <- fitdistr(adult_arr$Interarrival, "exponential")
g_fit <- fitdistr(adult_arr$Interarrival, "gamma")
lnrm_fit <- fitdistr(adult_arr$Interarrival, "lognormal")
w_fit <- fitdistr(adult_arr$Interarrival, "weibull")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
ggplot(adult_arr, aes(Interarrival)) +
geom_histogram(aes(y = ..density..), binwidth = 1) +
geom_line(stat = "density", aes(color = "empirical density", linetype = "empirical density")) +
stat_function(fun = dexp, args = list(e_fit$estimate),
aes(color = "exponential", linetype = "exponential")) +
stat_function(fun = dgamma, args = list(g_fit$estimate[1], g_fit$estimate[2]),
aes(color = "gamma", linetype = "gamma")) +
stat_function(fun = dlnorm, args = list(lnrm_fit$estimate[1], lnrm_fit$estimate[2]),
aes(color = "log normal", linetype = "log normal")) +
stat_function(fun = dweibull, args = list(w_fit$estimate[1], w_fit$estimate[2]),
aes(color = "Weibull", linetype = "Weibull")) +
scale_color_manual(name = "",
values = c("empirical density" = "black",
"exponential" = "red",
"gamma" = "blue",
"log normal" = "green",
"Weibull" = "orange"),
breaks = c("empirical density",
"exponential",
"gamma",
"log normal",
"Weibull")) +
scale_linetype_manual(name = "",
values = c("empirical density" = "solid",
"exponential" = "dashed",
"gamma" = "dashed",
"log normal" = "dashed",
"Weibull" = "dashed"),
breaks = c("empirical density",
"exponential",
"gamma",
"log normal",
"Weibull")) +
labs(x = "interarrival time (minutes)")

ks.test(adult_arr$Interarrival, "pexp", e_fit$estimate[1])
## Warning in ks.test(adult_arr$Interarrival, "pexp", e_fit$estimate[1]): ties
## should not be present for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: adult_arr$Interarrival
## D = 0.038403, p-value = 0.0005134
## alternative hypothesis: two-sided
ks.test(adult_arr$Interarrival, "pgamma", g_fit$estimate[1], g_fit$estimate[2])
## Warning in ks.test(adult_arr$Interarrival, "pgamma", g_fit$estimate[1], :
## ties should not be present for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: adult_arr$Interarrival
## D = 0.025077, p-value = 0.05889
## alternative hypothesis: two-sided
ks.test(adult_arr$Interarrival, "plnorm", lnrm_fit$estimate[1], lnrm_fit$estimate[2])
## Warning in ks.test(adult_arr$Interarrival, "plnorm", lnrm_fit
## $estimate[1], : ties should not be present for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: adult_arr$Interarrival
## D = 0.078957, p-value = 1.332e-15
## alternative hypothesis: two-sided
ks.test(adult_arr$Interarrival, "pweibull", w_fit$estimate[1], w_fit$estimate[2])
## Warning in ks.test(adult_arr$Interarrival, "pweibull", w_fit$estimate[1], :
## ties should not be present for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: adult_arr$Interarrival
## D = 0.027132, p-value = 0.03227
## alternative hypothesis: two-sided
ggplot(adult_arr, aes(Interarrival)) +
stat_ecdf(aes(color = "empirical cumulative distribution",
linetype = "empirical cumulative distribution")) +
stat_function(fun = pexp, args = list(e_fit$estimate),
aes(color = "exponential", linetype = "exponential")) +
stat_function(fun = pgamma, args = list(g_fit$estimate[1], g_fit$estimate[2]),
aes(color = "gamma", linetype = "gamma")) +
stat_function(fun = plnorm, args = list(lnrm_fit$estimate[1], lnrm_fit$estimate[2]),
aes(color = "log normal", linetype = "log normal")) +
stat_function(fun = pweibull, args = list(w_fit$estimate[1], w_fit$estimate[2]),
aes(color = "Weibull", linetype = "Weibull")) +
scale_color_manual(name = "",
values = c("empirical cumulative distribution" = "black",
"exponential" = "red",
"gamma" = "blue",
"log normal" = "green",
"Weibull" = "orange"),
breaks = c("empirical cumulative distribution",
"exponential",
"gamma",
"log normal",
"Weibull")) +
scale_linetype_manual(name = "",
values = c("empirical cumulative distribution" = "solid",
"exponential" = "dashed",
"gamma" = "dashed",
"log normal" = "dashed",
"Weibull" = "dashed"),
breaks = c("empirical cumulative distribution",
"exponential",
"gamma",
"log normal",
"Weibull")) +
labs(x = "interarrival time (minutes)", y = "cumulative probability")

ggplot(adult_arr) +
geom_line(aes(x = ppoints(length(Interarrival), 1),
y = pexp(sort(Interarrival), e_fit$estimate[1]),
color = "exponential")) +
geom_line(aes(x = ppoints(length(Interarrival), 1),
y = pgamma(sort(Interarrival), g_fit$estimate[1], g_fit$estimate[2]),
color = "gamma")) +
geom_line(aes(x = ppoints(length(Interarrival), 1),
y = plnorm(sort(Interarrival), lnrm_fit$estimate[1], lnrm_fit$estimate[2]),
color = "log normal")) +
geom_line(aes(x = ppoints(length(Interarrival), 1),
y = pweibull(sort(Interarrival), w_fit$estimate[1], w_fit$estimate[2]),
color = "Weibull")) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
scale_color_manual(name = "",
values = c("exponential" = "red",
"gamma" = "blue",
"log normal" = "green",
"Weibull" = "orange"),
breaks = c("exponential",
"gamma",
"log normal",
"Weibull")) +
labs(title = "P-P Plot", x = "sample probability", y = "theoretical probability")

ggplot(adult_arr, aes(sample = Interarrival)) +
stat_qq(distribution = qexp,
dparams = list(e_fit$estimate[1]),
geom = "line",
aes(color = "exponential")) +
stat_qq(distribution = qgamma,
dparams = list(g_fit$estimate[1], g_fit$estimate[2]),
geom = "line",
aes(color = "gamma")) +
stat_qq(distribution = qlnorm,
dparams = list(lnrm_fit$estimate[1], lnrm_fit$estimate[2]),
geom = "line",
aes(color = "log normal")) +
stat_qq(distribution = qweibull,
dparams = list(w_fit$estimate[1], w_fit$estimate[2]),
geom = "line",
aes(color = "Weibull")) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
scale_color_manual(name = "",
values = c("exponential" = "red",
"gamma" = "blue",
"log normal" = "green",
"Weibull" = "orange"),
breaks = c("exponential",
"gamma",
"log normal",
"Weibull")) +
labs(title = "Q-Q Plot")

Waiting Time Between Registration and Initiation of Care
enc_times <- enc_times %>%
mutate(Wait_Time_After_Reg =
as.numeric(as.duration(
hms(format(ymd_hms(Vitals_Done), "%H:%M:%S")) - hms(Arrival_Time))) / 60)
enc_times %>%
filter(Wait_Time_After_Reg > 0 & Wait_Time_After_Reg <= 360) %>%
ggplot(aes(Wait_Time_After_Reg)) +
geom_histogram(binwidth = 10)

Provider Service Times
serv_times <- read.csv("Patient_Flow_Wait_Times/Enc_Time_Stamps_Jan_2016.csv", stringsAsFactors = FALSE)
serv_times <- serv_times %>%
mutate(Service_Time = as.numeric(as.duration(
hms(Departure_Time) - hms(format(ymd_hms(Vitals_Done), "%H:%M:%S")))) / 60)
## Warning: 130 failed to parse.
serv_times %>%
filter(Service_Time > 0 & Service_Time < 180) %>%
ggplot(aes(Service_Time)) +
geom_histogram(binwidth = 5)

adult_service <- serv_times %>%
filter(Clinic == "Adult Medicine" &
Service_Time > 0 &
Service_Time < 180 &
!(wday(Date, label = TRUE) %in% c("Sat", "Sun"))) %>%
select(Clinic, Date, Service_Time)
e_fit <- fitdistr(adult_service$Service_Time, "exponential")
g_fit <- fitdistr(adult_service$Service_Time, "gamma")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
lnrm_fit <- fitdistr(adult_service$Service_Time, "lognormal")
w_fit <- fitdistr(adult_service$Service_Time, "weibull")
ggplot(adult_service, aes(Service_Time)) +
geom_histogram(aes(y = ..density..), binwidth = 5) +
geom_line(stat = "density", aes(color = "empirical density", linetype = "empirical density")) +
stat_function(fun = dexp, args = list(e_fit$estimate),
aes(color = "exponential", linetype = "exponential")) +
stat_function(fun = dgamma, args = list(g_fit$estimate[1], g_fit$estimate[2]),
aes(color = "gamma", linetype = "gamma")) +
stat_function(fun = dlnorm, args = list(lnrm_fit$estimate[1], lnrm_fit$estimate[2]),
aes(color = "log normal", linetype = "log normal")) +
stat_function(fun = dweibull, args = list(w_fit$estimate[1], w_fit$estimate[2]),
aes(color = "Weibull", linetype = "Weibull")) +
scale_color_manual(name = "",
values = c("empirical density" = "black",
"exponential" = "red",
"gamma" = "blue",
"log normal" = "green",
"Weibull" = "orange"),
breaks = c("empirical density",
"exponential",
"gamma",
"log normal",
"Weibull")) +
scale_linetype_manual(name = "",
values = c("empirical density" = "solid",
"exponential" = "dashed",
"gamma" = "dashed",
"log normal" = "dashed",
"Weibull" = "dashed"),
breaks = c("empirical density",
"exponential",
"gamma",
"log normal",
"Weibull")) +
labs(x = "service time (minutes)")

ks.test(adult_service$Service_Time, "pexp", e_fit$estimate[1])
## Warning in ks.test(adult_service$Service_Time, "pexp", e_fit$estimate[1]):
## ties should not be present for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: adult_service$Service_Time
## D = 0.12206, p-value < 2.2e-16
## alternative hypothesis: two-sided
ks.test(adult_service$Service_Time, "pgamma", g_fit$estimate[1], g_fit$estimate[2])
## Warning in ks.test(adult_service$Service_Time, "pgamma", g_fit
## $estimate[1], : ties should not be present for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: adult_service$Service_Time
## D = 0.013058, p-value = 0.7297
## alternative hypothesis: two-sided
ks.test(adult_service$Service_Time, "plnorm", lnrm_fit$estimate[1], lnrm_fit$estimate[2])
## Warning in ks.test(adult_service$Service_Time, "plnorm", lnrm_fit
## $estimate[1], : ties should not be present for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: adult_service$Service_Time
## D = 0.067189, p-value = 2.447e-11
## alternative hypothesis: two-sided
ks.test(adult_service$Service_Time, "pweibull", w_fit$estimate[1], w_fit$estimate[2])
## Warning in ks.test(adult_service$Service_Time, "pweibull", w_fit
## $estimate[1], : ties should not be present for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: adult_service$Service_Time
## D = 0.017316, p-value = 0.3744
## alternative hypothesis: two-sided
ggplot(adult_service, aes(Service_Time)) +
stat_ecdf(aes(color = "empirical cumulative distribution",
linetype = "empirical cumulative distribution")) +
stat_function(fun = pexp, args = list(e_fit$estimate),
aes(color = "exponential", linetype = "exponential")) +
stat_function(fun = pgamma, args = list(g_fit$estimate[1], g_fit$estimate[2]),
aes(color = "gamma", linetype = "gamma")) +
stat_function(fun = plnorm, args = list(lnrm_fit$estimate[1], lnrm_fit$estimate[2]),
aes(color = "log normal", linetype = "log normal")) +
stat_function(fun = pweibull, args = list(w_fit$estimate[1], w_fit$estimate[2]),
aes(color = "Weibull", linetype = "Weibull")) +
scale_color_manual(name = "",
values = c("empirical cumulative distribution" = "black",
"exponential" = "red",
"gamma" = "blue",
"log normal" = "green",
"Weibull" = "orange"),
breaks = c("empirical cumulative distribution",
"exponential",
"gamma",
"log normal",
"Weibull")) +
scale_linetype_manual(name = "",
values = c("empirical cumulative distribution" = "solid",
"exponential" = "dashed",
"gamma" = "dashed",
"log normal" = "dashed",
"Weibull" = "dashed"),
breaks = c("empirical cumulative distribution",
"exponential",
"gamma",
"log normal",
"Weibull")) +
labs(x = "service time (minutes)", y = "cumulative probability")

ggplot(adult_service) +
geom_line(aes(x = ppoints(length(Service_Time), 1),
y = pexp(sort(Service_Time), e_fit$estimate[1]),
color = "exponential")) +
geom_line(aes(x = ppoints(length(Service_Time), 1),
y = pgamma(sort(Service_Time), g_fit$estimate[1], g_fit$estimate[2]),
color = "gamma")) +
geom_line(aes(x = ppoints(length(Service_Time), 1),
y = plnorm(sort(Service_Time), lnrm_fit$estimate[1], lnrm_fit$estimate[2]),
color = "log normal")) +
geom_line(aes(x = ppoints(length(Service_Time), 1),
y = pweibull(sort(Service_Time), w_fit$estimate[1], w_fit$estimate[2]),
color = "Weibull")) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
scale_color_manual(name = "",
values = c("exponential" = "red",
"gamma" = "blue",
"log normal" = "green",
"Weibull" = "orange"),
breaks = c("exponential",
"gamma",
"log normal",
"Weibull")) +
labs(title = "P-P Plot", x = "sample probability", y = "theoretical probability")

ggplot(adult_service, aes(sample = Service_Time)) +
stat_qq(distribution = qexp,
dparams = list(e_fit$estimate[1]),
geom = "line",
aes(color = "exponential")) +
stat_qq(distribution = qgamma,
dparams = list(g_fit$estimate[1], g_fit$estimate[2]),
geom = "line",
aes(color = "gamma")) +
stat_qq(distribution = qlnorm,
dparams = list(lnrm_fit$estimate[1], lnrm_fit$estimate[2]),
geom = "line",
aes(color = "log normal")) +
stat_qq(distribution = qweibull,
dparams = list(w_fit$estimate[1], w_fit$estimate[2]),
geom = "line",
aes(color = "Weibull")) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
scale_color_manual(name = "",
values = c("exponential" = "red",
"gamma" = "blue",
"log normal" = "green",
"Weibull" = "orange"),
breaks = c("exponential",
"gamma",
"log normal",
"Weibull")) +
labs(title = "Q-Q Plot")

g_fit
## shape rate
## 1.699797193 0.038216491
## (0.041833922) (0.001091857)
Monte Carlo Simulation to Identify Distributions of Visit Times with Medical Assistants and with Physicians
set.seed(2017)
n <- 10000
total <- rgamma(n, shape = g_fit$estimate[1], rate = g_fit$estimate[2])
ma_time <- rtriang(n, min = 2, mode = 10, max = 16)
md_time <- total - ma_time
md_time <- md_time[md_time > 0]
df <- data.frame(phys_time = md_time)
ggplot(df, aes(phys_time)) +
geom_histogram(binwidth = 10) +
xlab("physician service time (minutes)")

e_fit <- fitdistr(df$phys_time, "exponential")
g_fit <- fitdistr(df$phys_time, "gamma")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
lnrm_fit <- fitdistr(df$phys_time, "lognormal")
w_fit <- fitdistr(df$phys_time, "weibull")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
ggplot(df, aes(phys_time)) +
geom_histogram(aes(y = ..density..), binwidth = 5) +
geom_line(stat = "density", aes(color = "empirical density", linetype = "empirical density")) +
stat_function(fun = dexp, args = list(e_fit$estimate),
aes(color = "exponential", linetype = "exponential")) +
stat_function(fun = dgamma, args = list(g_fit$estimate[1], g_fit$estimate[2]),
aes(color = "gamma", linetype = "gamma")) +
stat_function(fun = dlnorm, args = list(lnrm_fit$estimate[1], lnrm_fit$estimate[2]),
aes(color = "log normal", linetype = "log normal")) +
stat_function(fun = dweibull, args = list(w_fit$estimate[1], w_fit$estimate[2]),
aes(color = "Weibull", linetype = "Weibull")) +
scale_color_manual(name = "",
values = c("empirical density" = "black",
"exponential" = "red",
"gamma" = "blue",
"log normal" = "green",
"Weibull" = "orange"),
breaks = c("empirical density",
"exponential",
"gamma",
"log normal",
"Weibull")) +
scale_linetype_manual(name = "",
values = c("empirical density" = "solid",
"exponential" = "dashed",
"gamma" = "dashed",
"log normal" = "dashed",
"Weibull" = "dashed"),
breaks = c("empirical density",
"exponential",
"gamma",
"log normal",
"Weibull")) +
labs(x = "physician service time (minutes)")

ks.test(df$phys_time, "pexp", e_fit$estimate[1])
##
## One-sample Kolmogorov-Smirnov test
##
## data: df$phys_time
## D = 0.063045, p-value < 2.2e-16
## alternative hypothesis: two-sided
ks.test(df$phys_time, "pgamma", g_fit$estimate[1], g_fit$estimate[2])
##
## One-sample Kolmogorov-Smirnov test
##
## data: df$phys_time
## D = 0.019333, p-value = 0.002189
## alternative hypothesis: two-sided
ks.test(df$phys_time, "plnorm", lnrm_fit$estimate[1], lnrm_fit$estimate[2])
##
## One-sample Kolmogorov-Smirnov test
##
## data: df$phys_time
## D = 0.081045, p-value < 2.2e-16
## alternative hypothesis: two-sided
ks.test(df$phys_time, "pweibull", w_fit$estimate[1], w_fit$estimate[2])
##
## One-sample Kolmogorov-Smirnov test
##
## data: df$phys_time
## D = 0.0089321, p-value = 0.4608
## alternative hypothesis: two-sided
ggplot(df, aes(phys_time)) +
stat_ecdf(aes(color = "empirical cumulative distribution",
linetype = "empirical cumulative distribution")) +
stat_function(fun = pexp, args = list(e_fit$estimate),
aes(color = "exponential", linetype = "exponential")) +
stat_function(fun = pgamma, args = list(g_fit$estimate[1], g_fit$estimate[2]),
aes(color = "gamma", linetype = "gamma")) +
stat_function(fun = plnorm, args = list(lnrm_fit$estimate[1], lnrm_fit$estimate[2]),
aes(color = "log normal", linetype = "log normal")) +
stat_function(fun = pweibull, args = list(w_fit$estimate[1], w_fit$estimate[2]),
aes(color = "Weibull", linetype = "Weibull")) +
scale_color_manual(name = "",
values = c("empirical cumulative distribution" = "black",
"exponential" = "red",
"gamma" = "blue",
"log normal" = "green",
"Weibull" = "orange"),
breaks = c("empirical cumulative distribution",
"exponential",
"gamma",
"log normal",
"Weibull")) +
scale_linetype_manual(name = "",
values = c("empirical cumulative distribution" = "solid",
"exponential" = "dashed",
"gamma" = "dashed",
"log normal" = "dashed",
"Weibull" = "dashed"),
breaks = c("empirical cumulative distribution",
"exponential",
"gamma",
"log normal",
"Weibull")) +
labs(x = "physician service time (minutes)", y = "cumulative probability")

ggplot(df) +
geom_line(aes(x = ppoints(length(phys_time), 1),
y = pexp(sort(phys_time), e_fit$estimate[1]),
color = "exponential")) +
geom_line(aes(x = ppoints(length(phys_time), 1),
y = pgamma(sort(phys_time), g_fit$estimate[1], g_fit$estimate[2]),
color = "gamma")) +
geom_line(aes(x = ppoints(length(phys_time), 1),
y = plnorm(sort(phys_time), lnrm_fit$estimate[1], lnrm_fit$estimate[2]),
color = "log normal")) +
geom_line(aes(x = ppoints(length(phys_time), 1),
y = pweibull(sort(phys_time), w_fit$estimate[1], w_fit$estimate[2]),
color = "Weibull")) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
scale_color_manual(name = "",
values = c("exponential" = "red",
"gamma" = "blue",
"log normal" = "green",
"Weibull" = "orange"),
breaks = c("exponential",
"gamma",
"log normal",
"Weibull")) +
labs(title = "P-P Plot", x = "sample probability", y = "theoretical probability")

ggplot(df, aes(sample = phys_time)) +
stat_qq(distribution = qexp,
dparams = list(e_fit$estimate[1]),
geom = "line",
aes(color = "exponential")) +
stat_qq(distribution = qgamma,
dparams = list(g_fit$estimate[1], g_fit$estimate[2]),
geom = "line",
aes(color = "gamma")) +
stat_qq(distribution = qlnorm,
dparams = list(lnrm_fit$estimate[1], lnrm_fit$estimate[2]),
geom = "line",
aes(color = "log normal")) +
stat_qq(distribution = qweibull,
dparams = list(w_fit$estimate[1], w_fit$estimate[2]),
geom = "line",
aes(color = "Weibull")) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
scale_color_manual(name = "",
values = c("exponential" = "red",
"gamma" = "blue",
"log normal" = "green",
"Weibull" = "orange"),
breaks = c("exponential",
"gamma",
"log normal",
"Weibull")) +
labs(title = "Q-Q Plot")

w_fit
## shape scale
## 1.176475743 41.349116214
## ( 0.009657111) ( 0.387141958)
Number of Staff
staff <- providers %>%
select(Clinic, Date, Providers) %>%
mutate(Date = ymd(Date),
Day = wday(Date, label = TRUE),
Weekday = ifelse(Day %in% c("Sat", "Sun"), 0, 1))
ggplot(staff, aes(Day, Providers)) +
geom_bar(stat = "summary", fun.y = "mean")

ggplot(staff, aes(Providers)) +
geom_histogram(binwidth = 1) +
facet_grid(~ Day)

staff %>%
filter(Clinic == "Adult Medicine" & Weekday == 1) %>%
ggplot(aes(Providers)) +
geom_histogram(aes(y = ..density..), binwidth = 1) +
geom_line(stat = "density") #, aes(color = "empirical density", linetype = "empirical density")) +

adult_wkday <- staff %>%
filter(Clinic == "Adult Medicine" & Weekday == 1)
adult_wkday <- adult_wkday$Providers
unif <- c(min = min(adult_wkday), max = max(adult_wkday))
geo <- fitdistr(adult_wkday, "geometric")
nb <- fitdist(adult_wkday, "nbinom")
## Warning in sqrt(diag(varcovar)): NaNs produced
## Warning in sqrt(1/diag(V)): NaNs produced
## Warning in cov2cor(varcovar): diag(.) had 0 or NA entries; non-finite
## result is doubtful
ggplot(data.frame(adult_wkday = adult_wkday), aes(adult_wkday)) +
geom_histogram(aes(y = ..density..), binwidth = 1) +
geom_line(stat = "density", aes(color = "empirical density", linetype = "empirical density")) +
geom_line(aes(y = dunif(adult_wkday, min = unif[1], max = unif[2]),
color = "uniform", linetype = "uniform")) +
geom_line(aes(y = dgeom(adult_wkday, prob = geo$estimate[1]),
color = "geometric", linetype = "geometric")) +
geom_line(aes(y = dnbinom(adult_wkday, size = nb$estimate[[1]], mu = nb$estimate[[2]]),
color = "negative binomial", linetype = "negative binomial")) +
scale_color_manual(name = "",
values = c("empirical density" = "black",
"uniform" = "red",
"geometric" = "blue",
"negative binomial" = "green"),
breaks = c("empirical density",
"uniform",
"geometric",
"negative binomial")) +
scale_linetype_manual(name = "",
values = c("empirical density" = "solid",
"uniform" = "dashed",
"geometric" = "dashed",
"negative binomial" = "dashed"),
breaks = c("empirical density",
"uniform",
"geometric",
"negative binomial"))

tab <- as.data.frame(table(factor(adult_wkday, levels = min(adult_wkday):max(adult_wkday))))
counts <- data.frame(
observed = tab$Freq,
expected_unif = dunif(min(adult_wkday):max(adult_wkday),
min = min(adult_wkday),
max = max(adult_wkday)) * length(adult_wkday),
expected_geom = dgeom(min(adult_wkday):max(adult_wkday),
prob = geo$estimate[1]) * length(adult_wkday),
expected_nbinom = dnbinom(min(adult_wkday):max(adult_wkday),
size = nb$estimate[[1]],
mu = nb$estimate[[2]]) * length(adult_wkday))
row.names(counts) <- tab$Var1
(t1 <- as.table(t(counts[, 1:2])))
## 7 8 9 10 11 12
## observed 2.000000 2.000000 2.000000 6.000000 6.000000 4.000000
## expected_unif 5.545455 5.545455 5.545455 5.545455 5.545455 5.545455
## 13 14 15 16 17 18
## observed 9.000000 12.000000 8.000000 4.000000 1.000000 5.000000
## expected_unif 5.545455 5.545455 5.545455 5.545455 5.545455 5.545455
chisq.test(t1)
## Warning in chisq.test(t1): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: t1
## X-squared = 12.14, df = 11, p-value = 0.3532
chisq.test(t1, simulate.p.value = TRUE)
##
## Pearson's Chi-squared test with simulated p-value (based on 2000
## replicates)
##
## data: t1
## X-squared = 12.14, df = NA, p-value = 0.6322
(t2 <- as.table(t(counts[, c(1, 3)])))
## 7 8 9 10 11 12
## observed 2.000000 2.000000 2.000000 6.000000 6.000000 4.000000
## expected_geom 2.586632 2.402948 2.232308 2.073785 1.926520 1.789712
## 13 14 15 16 17 18
## observed 9.000000 12.000000 8.000000 4.000000 1.000000 5.000000
## expected_geom 1.662620 1.544553 1.434870 1.332975 1.238317 1.150381
chisq.test(t2)
## Warning in chisq.test(t2): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: t2
## X-squared = 9.586, df = 11, p-value = 0.568
chisq.test(t2, simulate.p.value = TRUE)
##
## Pearson's Chi-squared test with simulated p-value (based on 2000
## replicates)
##
## data: t2
## X-squared = 9.586, df = NA, p-value = 0.928
(t3 <- as.table(t(counts[, c(1, 4)])))
## 7 8 9 10 11
## observed 2.000000 2.000000 2.000000 6.000000 6.000000
## expected_nbinom 1.652308 2.702026 3.927673 5.138351 6.111101
## 12 13 14 15 16
## observed 4.000000 9.000000 12.000000 8.000000 4.000000
## expected_nbinom 6.662338 6.704582 6.265160 5.464235 4.467842
## 17 18
## observed 1.000000 5.000000
## expected_nbinom 3.438250 2.498926
chisq.test(t3)
## Warning in chisq.test(t3): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: t3
## X-squared = 6.0195, df = 11, p-value = 0.8721
chisq.test(t3, simulate.p.value = TRUE)
##
## Pearson's Chi-squared test with simulated p-value (based on 2000
## replicates)
##
## data: t3
## X-squared = 6.0195, df = NA, p-value = 0.999
ggplot(data.frame(adult_wkday = adult_wkday), aes(adult_wkday)) +
stat_ecdf(aes(color = "empirical cumulative distribution",
linetype = "empirical cumulative distribution")) +
stat_function(fun = punif, args = list(unif[1], unif[2]),
aes(color = "uniform", linetype = "uniform")) +
stat_function(fun = pgeom, args = list(prob = geo$estimate[1]),
aes(color = "geometric", linetype = "geometric")) +
stat_function(fun = pnbinom, args = list(size = nb$estimate[1], mu = nb$estimate[2]),
aes(color = "negative binomial", linetype = "negative binomial")) +
scale_color_manual(name = "",
values = c("empirical cumulative distribution" = "black",
"uniform" = "red",
"geometric" = "blue",
"negative binomial" = "green"),
breaks = c("empirical cumulative distribution",
"uniform",
"geometric",
"negative binomial")) +
scale_linetype_manual(name = "",
values = c("empirical cumulative distribution" = "solid",
"uniform" = "dashed",
"geometric" = "dashed",
"negative binomial" = "dashed"),
breaks = c("empirical cumulative distribution",
"uniform",
"geometric",
"negative binomial")) +
labs(y = "cumulative probability")

ggplot(data.frame(adult_wkday = adult_wkday)) +
geom_line(aes(x = ppoints(length(adult_wkday), 1),
y = punif(sort(adult_wkday), min = unif[1], max = unif[2]),
color = "uniform")) +
geom_line(aes(x = ppoints(length(adult_wkday), 1),
y = pgeom(sort(adult_wkday), prob = geo$estimate[1]),
color = "geometric")) +
geom_line(aes(x = ppoints(length(adult_wkday), 1),
y = pnbinom(sort(adult_wkday),
size = nb$estimate[1],
mu = nb$estimate[2]),
color = "negative binomial")) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
scale_color_manual(name = "",
values = c("uniform" = "red",
"geometric" = "blue",
"negative binomial" = "green"),
breaks = c("uniform",
"geometric",
"negative binomial")) +
labs(title = "P-P Plot", x = "sample probability", y = "theoretical probability")

ggplot(data.frame(adult_wkday = adult_wkday), aes(sample = adult_wkday)) +
stat_qq(distribution = qunif,
dparams = list(min = unif[1], max = unif[2]),
geom = "line",
aes(color = "uniform")) +
stat_qq(distribution = qgeom,
dparams = list(prob = geo$estimate[1]),
geom = "line",
aes(color = "geometric")) +
stat_qq(distribution = qnbinom,
dparams = list(size = nb$estimate[1], mu = nb$estimate[2]),
geom = "line",
aes(color = "negative binomial")) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
scale_color_manual(name = "",
values = c("uniform" = "red",
"geometric" = "blue",
"negative binomial" = "green"),
breaks = c("uniform",
"geometric",
"negative binomial")) +
labs(title = "Q-Q Plot")

nb
## Fitting of the distribution ' nbinom ' by maximum likelihood
## Parameters:
## estimate Std. Error
## size 3.665819e+07 NaN
## mu 1.308243e+01 0.4631131
mean(adult_wkday)
## [1] 13.08197
Import Simulation Results for Model Validation and Comparison
fixed <- read.csv("Adult_Amb_Clinic_Fixed_Exam_Rms_Model_Experiment1_ResultsSummary.csv", header = TRUE, stringsAsFactors = FALSE)
flex <- read.csv("Adult_Amb_Clinic_Flex_Exam_Rms_Model_Experiment1_ResultsSummary.csv", header = TRUE, stringsAsFactors = FALSE)
Model Verification and Validation
model_vv <- data.frame(Source = c("Observed Data",
"Status Quo Model (Fixed Exam Rooms)",
"Intervention Model (Flexible Exam Rooms)"),
Total_Visits = numeric(3),
Time_In_System = numeric(3),
Average_Wait_Time = numeric(3))
model_vv[model_vv$Source == "Observed Data", ]$Total_Visits <- visits %>%
filter(!(wday(mdy(Date), label = TRUE) %in% c("Sat", "Sun"))) %>%
summarize(visits_per_day = mean(Visits)) * (52 * 5)
model_vv[model_vv$Source == "Observed Data", ]$Time_In_System <- enc_times %>%
filter(length > 0) %>%
summarize(avg_length = mean(length, na.rm = TRUE))
model_vv[model_vv$Source == "Observed Data", ]$Average_Wait_Time <- enc_times %>%
filter(Wait_Time_After_Reg > 0 & Wait_Time_After_Reg <= 240) %>%
summarize(avg_wait = mean(Wait_Time_After_Reg, na.rm = TRUE))
model_vv[model_vv$Source == "Status Quo Model (Fixed Exam Rooms)", ]$Total_Visits <- fixed %>%
filter(Object.Name == "Patient" & Data.Item == "NumberDestroyed" & Statistic.Type == "Total") %>%
select(Average)
model_vv[model_vv$Source == "Status Quo Model (Fixed Exam Rooms)", ]$Time_In_System <- fixed %>%
filter(Object.Name == "Patient" & Data.Item == "TimeInSystem" & Statistic.Type == "Average (Minutes)") %>%
select(Average)
model_vv[model_vv$Source == "Status Quo Model (Fixed Exam Rooms)", ]$Average_Wait_Time <- fixed %>%
filter(Object.Name == "Model" & Data.Source == "WaitTime" & Statistic.Type == "Average (Minutes)") %>%
select(Average)
model_vv[model_vv$Source == "Intervention Model (Flexible Exam Rooms)", ]$Total_Visits <- flex %>%
filter(Object.Name == "Patient" & Data.Item == "NumberDestroyed" & Statistic.Type == "Total") %>%
select(Average)
model_vv[model_vv$Source == "Intervention Model (Flexible Exam Rooms)", ]$Time_In_System <- flex %>%
filter(Object.Name == "Patient" & Data.Item == "TimeInSystem" & Statistic.Type == "Average (Minutes)") %>%
select(Average)
model_vv[model_vv$Source == "Intervention Model (Flexible Exam Rooms)", ]$Average_Wait_Time <- flex %>%
filter(Object.Name == "Model" & Data.Source == "WaitTime" & Statistic.Type == "Average (Minutes)") %>%
select(Average)
knitr::kable(model_vv, digits = 2)
| Observed Data |
41011.58 |
65.3478 |
24.72246 |
| Status Quo Model (Fixed Exam Rooms) |
42259 |
89.05314 |
32.36252 |
| Intervention Model (Flexible Exam Rooms) |
42255.7 |
70.32399 |
9.327603 |
Status Quo versus Intervention Models:
Effects of Flexible Exam Room Allocation on Average Wait Time Between Registration and Escort to Exam Room and Average Total Time in System
\(t = \frac{\bar{x_1} - \bar{x_2}}{\sqrt{\frac{s_1^2}{n_1} + \frac{s_2^2}{n_2}}}\)
\(d.f. = \frac{(s_1^2 / n_1 + s_2^2/ n_2)^2}{(s_1^2 / n_1)^2 / (n_1 - 1) + (s_2^2 / n_2)^2 / (n_2 - 1)}\)
(fixed_wait <- fixed %>%
filter(Object.Name == "Model" & Data.Source == "WaitTime" & Statistic.Type == "Average (Minutes)") %>%
select(Average, Standard.Deviation, Half.Width))
## Average Standard.Deviation Half.Width
## 1 32.36252 1.394246 0.997314
(fixed_tis <- fixed %>%
filter(Object.Name == "Patient" & Data.Item == "TimeInSystem" & Statistic.Type == "Average (Minutes)") %>%
select(Average, Standard.Deviation, Half.Width))
## Average Standard.Deviation Half.Width
## 1 89.05314 1.634038 1.168839
(flex_wait <- flex %>%
filter(Object.Name == "Model" & Data.Source == "WaitTime" & Statistic.Type == "Average (Minutes)") %>%
select(Average, Standard.Deviation, Half.Width))
## Average Standard.Deviation Half.Width
## 1 9.327603 0.1084512 0.07757589
(flex_tis <- flex %>%
filter(Object.Name == "Patient" & Data.Item == "TimeInSystem" & Statistic.Type == "Average (Minutes)") %>%
select(Average, Standard.Deviation, Half.Width))
## Average Standard.Deviation Half.Width
## 1 70.32399 0.4667451 0.3338661
df_wait <- (fixed_wait$Standard.Deviation^2 / 10 + flex_wait$Standard.Deviation^2 / 10)^2 /
(((fixed_wait$Standard.Deviation^2 / 10)^2 / 9) + ((flex_wait$Standard.Deviation^2 / 10)^2 / 9))
(t_wait <- (flex_wait$Average - fixed_wait$Average) /
(sqrt(flex_wait$Standard.Deviation^2 / 10 + fixed_wait$Standard.Deviation^2 / 10)))
## [1] -52.08798
(p_value_wait <- pt(t_wait, df_wait))
## [1] 6.846879e-13
df_tis <- (fixed_tis$Standard.Deviation^2 / 10 + flex_tis$Standard.Deviation^2 / 10)^2 /
(((fixed_tis$Standard.Deviation^2 / 10)^2 / 9) + ((flex_tis$Standard.Deviation^2 / 10)^2 / 9))
(t_tis <- (flex_tis$Average - fixed_tis$Average) /
(sqrt(flex_tis$Standard.Deviation^2 / 10 + fixed_tis$Standard.Deviation^2 / 10)))
## [1] -34.85176
(p_value_tis <- pt(t_tis, df_tis))
## [1] 1.837663e-12
diff_means <- data.frame(Parameter = c("Average Wait Time (minutes)", "Average Time In System (minutes)"),
t_statistic = c(t_wait, t_tis),
p_value_1_sided = formatC(c(p_value_wait, p_value_tis), format = "e"))
knitr::kable(diff_means)
| Average Wait Time (minutes) |
-52.08798 |
6.8469e-13 |
| Average Time In System (minutes) |
-34.85176 |
1.8377e-12 |