Load packages

# set working directory as needed
setwd("~/DATA 604 Simulation and Modeling Techniques/Final Project")
library(ggplot2)
library(MASS)
library(fitdistrplus)
library(mc2d)
library(dplyr)
library(tidyr)
library(stringr)
library(lubridate)

Import data

appts <- read.csv("Patient_Flow_Wait_Times/Appt_Scheduling_Jan_Feb_Mar.csv", stringsAsFactors = FALSE)
enc_times <- read.csv("Patient_Flow_Wait_Times/Enc_Time_Stamps_Jan_2016.csv", stringsAsFactors = FALSE)
no_shows <- read.csv("Patient_Flow_Wait_Times/No_Show_Rate_per_Day.csv", 
                     stringsAsFactors = FALSE)
providers <- read.csv("Patient_Flow_Wait_Times/Provider_Counts_Jan_Feb_Mar_2016.csv", 
                      stringsAsFactors = FALSE)
visits <- read.csv("Patient_Flow_Wait_Times/Visits_per_Day_Jan_2016.csv", stringsAsFactors = FALSE)

No-Show Rate

no_shows <- no_shows %>% 
  mutate(No_Show_Rate = No_Shows / Total_Visits_Arrived_and_No_Show,
         Date = ymd(Date)) %>%
  filter(!(wday(Date, label = TRUE) %in% c("Sat", "Sun")) & ymd(Date) != ymd("2016-01-29"))
## Warning: package 'bindrcpp' was built under R version 3.3.2
knitr::kable(
  no_shows %>% 
  group_by(Clinic) %>% 
  summarize(Monthly_No_Show_Rate = sum(No_Shows) / sum(Total_Visits_Arrived_and_No_Show),
            Avg_Daily_No_Show_Rate = mean(No_Show_Rate),
            SD_Daily_No_Show_Rate = sd(No_Show_Rate))
  )
Clinic Monthly_No_Show_Rate Avg_Daily_No_Show_Rate SD_Daily_No_Show_Rate
Adult Medicine 0.2340764 0.2350831 0.041808
ggplot(no_shows, aes(No_Show_Rate)) +
  geom_histogram(bins = 10)

ggplot(no_shows, aes(Date, No_Show_Rate)) +
  geom_line() +
  scale_x_date()

Appointment slots

appts_5min <- appts %>%
  mutate(Date = ymd(Date),
         Start_Time = hms(Start_Time),
         End_Time = hms(End_Time))

# head(appts_5min[(minute(appts_5min$Start_Time) %% 5) != 0, ])
minute(appts_5min$Start_Time) <- round(minute(appts_5min$Start_Time) / 5) * 5
minute(appts_5min$End_Time) <- round(minute(appts_5min$End_Time) / 5) * 5

appts_5min <- appts_5min %>%
  mutate(Start_Hr = hour(Start_Time), 
         Start_Min = minute(Start_Time),
         End_Hr = hour(End_Time),
         End_Min = minute(End_Time)) %>%
  group_by(Clinic, Date, Start_Hr, Start_Min, End_Hr, End_Min) %>%
  summarize(Visits = sum(Total_Scheduled_Visits)) %>%
  ungroup() %>%
  filter(!(wday(Date, label = TRUE) %in% c("Sat", "Sun"))) %>%
  arrange(Clinic, Date, Start_Hr, Start_Min)

appts_5min <- appts_5min %>%
  mutate(Start_Time = hm(str_c(Start_Hr, ":", Start_Min)),
         End_Time = hm(str_c(End_Hr, ":", End_Min)))

appt_len <- appts_5min %>% 
  mutate(Duration = (End_Hr * 60) - (Start_Hr * 60) + (End_Min - Start_Min)) %>% 
  group_by(Clinic, Duration) %>%
  summarize(Visits = sum(Visits))

knitr::kable(appt_len %>% mutate(Proportion = Visits / sum(Visits)))
Clinic Duration Visits Proportion
Adult Medicine 0 1 0.0000860
Adult Medicine 5 7 0.0006018
Adult Medicine 10 412 0.0354195
Adult Medicine 15 10862 0.9338033
Adult Medicine 20 64 0.0055021
Adult Medicine 25 2 0.0001719
Adult Medicine 30 207 0.0177957
Adult Medicine 35 1 0.0000860
Adult Medicine 45 37 0.0031809
Adult Medicine 60 38 0.0032669
Adult Medicine 120 1 0.0000860
ggplot(appt_len, aes(Duration, Visits)) +
  geom_bar(stat = "identity")

appts_15min <- appts %>%
  mutate(Date = ymd(Date),
         Start_Time = hms(Start_Time),
         End_Time = hms(End_Time))

minute(appts_15min$Start_Time) <- round(minute(appts_15min$Start_Time) / 15) * 15
minute(appts_15min$End_Time) <- round(minute(appts_15min$End_Time) / 15) * 15

appts_15min <- appts_15min %>%
  mutate(Start_Hr = hour(Start_Time), 
         Start_Min = minute(Start_Time),
         End_Hr = hour(End_Time),
         End_Min = minute(End_Time)) %>%
  group_by(Clinic, Date, Start_Hr, Start_Min) %>%
  summarize(Visits = sum(Total_Scheduled_Visits)) %>%
  ungroup()

appts_15min$Start_Time <- hm(str_c(appts_15min$Start_Hr, ":", appts_15min$Start_Min), roll = TRUE)

appts_15min <- appts_15min %>%
  mutate(Start_Hr = hour(Start_Time), 
         Start_Min = minute(Start_Time)) %>%
  group_by(Clinic, Date, Start_Hr, Start_Min) %>%
  summarize(Visits = sum(Visits)) %>%
  ungroup() %>%
  filter(!(wday(Date, label = TRUE) %in% c("Sat", "Sun"))) %>%
  arrange(Clinic, Date, Start_Hr, Start_Min)

avg_day <- appts_15min %>% 
  group_by(Clinic, Start_Hr, Start_Min) %>%
  summarize(Total_Visits = sum(Visits),
            Days = n_distinct(Date),
            Avg_Visits = round((Total_Visits / Days) / 0.25) * 0.25)
schedule <- avg_day %>%
  ungroup() %>%
  mutate(Start_Time = hm(str_c(Start_Hr, ":", Start_Min))) %>%
  select(Clinic, Start_Time, Avg_Visits) %>%
  mutate(Visits_Int = round(Avg_Visits),
         Avg_Vts_per_Hr = Avg_Visits * 4)

knitr::kable(schedule)
Clinic Start_Time Avg_Visits Visits_Int Avg_Vts_per_Hr
Adult Medicine 8H 0M 0S 1.00 1 4
Adult Medicine 8H 15M 0S 1.25 1 5
Adult Medicine 8H 30M 0S 1.25 1 5
Adult Medicine 8H 45M 0S 1.75 2 7
Adult Medicine 9H 0M 0S 9.00 9 36
Adult Medicine 9H 15M 0S 3.00 3 12
Adult Medicine 9H 30M 0S 7.75 8 31
Adult Medicine 9H 45M 0S 10.00 10 40
Adult Medicine 10H 0M 0S 8.75 9 35
Adult Medicine 10H 15M 0S 5.25 5 21
Adult Medicine 10H 30M 0S 10.00 10 40
Adult Medicine 10H 45M 0S 4.75 5 19
Adult Medicine 11H 0M 0S 9.25 9 37
Adult Medicine 11H 15M 0S 7.00 7 28
Adult Medicine 11H 30M 0S 6.50 6 26
Adult Medicine 11H 45M 0S 1.25 1 5
Adult Medicine 12H 0M 0S 2.00 2 8
Adult Medicine 12H 15M 0S 1.50 2 6
Adult Medicine 12H 30M 0S 2.25 2 9
Adult Medicine 12H 45M 0S 2.50 2 10
Adult Medicine 13H 0M 0S 9.50 10 38
Adult Medicine 13H 15M 0S 4.75 5 19
Adult Medicine 13H 30M 0S 5.75 6 23
Adult Medicine 13H 45M 0S 7.75 8 31
Adult Medicine 14H 0M 0S 8.25 8 33
Adult Medicine 14H 15M 0S 6.00 6 24
Adult Medicine 14H 30M 0S 6.50 6 26
Adult Medicine 14H 45M 0S 3.75 4 15
Adult Medicine 15H 0M 0S 7.50 8 30
Adult Medicine 15H 15M 0S 5.50 6 22
Adult Medicine 15H 30M 0S 5.25 5 21
Adult Medicine 15H 45M 0S 2.00 2 8
Adult Medicine 16H 0M 0S 4.25 4 17
Adult Medicine 16H 15M 0S 2.25 2 9
Adult Medicine 16H 30M 0S 3.25 3 13
Adult Medicine 16H 45M 0S 4.00 4 16
Adult Medicine 17H 0M 0S 7.25 7 29
Adult Medicine 17H 15M 0S 3.25 3 13
Adult Medicine 17H 30M 0S 4.00 4 16
Adult Medicine 17H 45M 0S 3.25 3 13
Adult Medicine 18H 0M 0S 4.00 4 16
Adult Medicine 18H 15M 0S 1.50 2 6
Adult Medicine 18H 30M 0S 1.25 1 5
Adult Medicine 18H 45M 0S 1.00 1 4
Adult Medicine 19H 0M 0S 1.50 2 6
Adult Medicine 19H 15M 0S 1.00 1 4

Patient Arrivals & Arrival Time Deviation

enc_times <- enc_times %>%
  filter(hms(Arrival_Time) >= hms("07:00:00") & Vitals_Done != "NULL") %>%
  mutate(early_late = as.numeric(as.duration(hms(Arrival_Time) - hms(Start_Time))) / 60,
         length = as.numeric(as.duration(hms(Departure_Time) - hms(Arrival_Time))) / 60)
## Note: method with signature 'Period#ANY' chosen for function '-',
##  target signature 'Period#Period'.
##  "ANY#Period" would also be valid
ggplot(enc_times, aes(early_late)) +
  geom_histogram(binwidth = 15)

ggplot(enc_times, aes(early_late)) +
  geom_histogram(binwidth = 5) +
  coord_cartesian(xlim = c(-120, 120))

nrm <- fitdistr(enc_times$early_late, "normal")
enc_filt <- enc_times %>%
  filter(early_late > -65 & early_late < 65)
tri <- fitdist(enc_filt$early_late, "triang", 
               start = list(min = min(enc_filt$early_late) - 5, 
                            mode = unique(enc_filt$early_late)[which.max(
                              tabulate(match(enc_filt$early_late, unique(enc_filt$early_late))))], 
                            max = max(enc_filt$early_late + 5)))
## Warning in sqrt(diag(varcovar)): NaNs produced
## Warning in sqrt(1/diag(V)): NaNs produced
## Warning in cov2cor(varcovar): diag(.) had 0 or NA entries; non-finite
## result is doubtful
ggplot(enc_times, aes(early_late)) +
  geom_histogram(binwidth = 15, aes(y = ..density..)) +
  coord_cartesian(xlim = c(-120, 120)) +
  geom_line(stat = "density", aes(color = "empirical density", linetype = "empirical density")) +
  stat_function(fun = dnorm, args = list(nrm$estimate[1], nrm$estimate[2]), 
                aes(color = "normal", linetype = "normal")) +
  stat_function(fun = dtriang, args = list(min = tri$estimate[1], 
                                           mode = tri$estimate[2], 
                                           max = tri$estimate[3]), 
                aes(color = "triangle", linetype = "triangle")) +
  scale_color_manual(name = "", 
                     values = c("empirical density" = "black",
                                "normal" = "red",
                                "triangle" = "blue"),
                     breaks = c("empirical density", 
                                "normal",
                                "triangle")) +
  scale_linetype_manual(name = "", 
                        values = c("empirical density" = "solid",
                                   "normal" = "dashed",
                                   "triangle" = "dashed"),
                        breaks = c("empirical density", 
                                   "normal",
                                   "triangle")) +
  xlab("arrival time deviation (minutes)")

ks.test(enc_times$early_late, "pnorm", nrm$estimate[1], nrm$estimate[2])
## Warning in ks.test(enc_times$early_late, "pnorm", nrm$estimate[1], nrm
## $estimate[2]): ties should not be present for the Kolmogorov-Smirnov test
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  enc_times$early_late
## D = 0.0866, p-value < 2.2e-16
## alternative hypothesis: two-sided
ks.test(enc_times$early_late, "ptriang", tri$estimate[1], tri$estimate[2], tri$estimate[3])
## Warning in ks.test(enc_times$early_late, "ptriang", tri$estimate[1], tri
## $estimate[2], : ties should not be present for the Kolmogorov-Smirnov test
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  enc_times$early_late
## D = 0.0829, p-value < 2.2e-16
## alternative hypothesis: two-sided
ggplot(enc_times, aes(early_late)) +
  stat_ecdf(aes(color = "empirical cumulative distribution",
                linetype = "empirical cumulative distribution")) +
  stat_function(fun = pnorm, args = list(nrm$estimate[1], nrm$estimate[2]),
                aes(color = "normal", linetype = "normal")) +
  stat_function(fun = ptriang, args = list(min = tri$estimate[1],
                                           mode = tri$estimate[2],
                                           max = tri$estimate[3]),
                aes(color = "triangle", linetype = "triangle")) +
  scale_color_manual(name = "", 
                     values = c("empirical cumulative distribution" = "black",
                                "normal" = "red",
                                "triangle" = "blue"),
                     breaks = c("empirical cumulative distribution", 
                                "normal",
                                "triangle")) +
  scale_linetype_manual(name = "", 
                        values = c("empirical cumulative distribution" = "solid",
                                   "normal" = "dashed",
                                   "triangle" = "dashed"),
                        breaks = c("empirical cumulative distribution", 
                                   "normal",
                                   "triangle")) +
  labs(x = "interarrival time (minutes)", y = "cumulative probability")

ggplot(enc_times) +
  geom_line(aes(x = ppoints(length(early_late), 1),
                y = pnorm(sort(early_late), nrm$estimate[1], nrm$estimate[2]),
                color = "normal")) +
  geom_line(aes(x = ppoints(length(early_late), 1),
                y = ptriang(sort(early_late), tri$estimate[1], tri$estimate[2], tri$estimate[3]),
                color = "triangle")) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
  scale_color_manual(name = "",
                     values = c("normal" = "red",
                                "triangle" = "blue"),
                     breaks = c("normal",
                                "triangle")) +
  labs(title = "P-P Plot", x = "sample probability", y = "theoretical probability")

ggplot(enc_times, aes(sample = early_late)) +
  stat_qq(distribution = qnorm,
          dparams = list(nrm$estimate[1], nrm$estimate[2]),
          geom = "line",
          aes(color = "normal")) +
  stat_qq(distribution = qtriang,
          dparams = list(tri$estimate[1], tri$estimate[2], tri$estimate[3]),
          geom = "line",
          aes(color = "triangle")) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
  scale_color_manual(name = "",
                     values = c("normal" = "red",
                                "triangle" = "blue"),
                     breaks = c("normal", "triangle")) +
  labs(title = "Q-Q Plot")

nrm
##       mean          sd    
##   -5.9178235   46.2293202 
##  ( 0.8633833) ( 0.6105042)
enc_times %>%
  filter(length > 0) %>%
  ggplot(aes(length)) +
  geom_histogram(binwidth = 10)

interarrival <- enc_times %>%
  filter(hms(Arrival_Time) >= hms("07:00:00") & Vitals_Done != "NULL") %>%
  select(Clinic, Date, Arrival_Time) %>%
  mutate(Date = ymd(Date),
         Day = wday(Date, label = TRUE), 
         Weekday = ifelse(Day %in% c("Sat", "Sun"), 0, 1))

Interarrival Time

adult_arr <- interarrival %>%
  filter(Clinic == "Adult Medicine" & Weekday == 1) %>%
  arrange(Date, Arrival_Time) %>%
  mutate(Interarrival = ifelse(ymd(Date) == ymd(lag(Date)),
                               as.numeric(as.duration(hms(Arrival_Time) - hms(lag(Arrival_Time)))) / 60,
                               NA))

adult_arr <- adult_arr[!is.na(adult_arr$Interarrival) & 
                         adult_arr$Interarrival > 0 &
                         adult_arr$Interarrival < 20, ]
summary(adult_arr$Interarrival)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##  0.01667  1.13300  2.38300  3.37300  4.43300 19.82000
ggplot(adult_arr, aes(Interarrival)) +
  geom_histogram(binwidth = 1)

e_fit <- fitdistr(adult_arr$Interarrival, "exponential")
g_fit <- fitdistr(adult_arr$Interarrival, "gamma")
lnrm_fit <- fitdistr(adult_arr$Interarrival, "lognormal")
w_fit <- fitdistr(adult_arr$Interarrival, "weibull")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
ggplot(adult_arr, aes(Interarrival)) +
  geom_histogram(aes(y = ..density..), binwidth = 1) +
  geom_line(stat = "density", aes(color = "empirical density", linetype = "empirical density")) +
  stat_function(fun = dexp, args = list(e_fit$estimate),
                aes(color = "exponential", linetype = "exponential")) +
  stat_function(fun = dgamma, args = list(g_fit$estimate[1], g_fit$estimate[2]), 
                aes(color = "gamma", linetype = "gamma")) +
  stat_function(fun = dlnorm, args = list(lnrm_fit$estimate[1], lnrm_fit$estimate[2]),
                aes(color = "log normal", linetype = "log normal")) +
  stat_function(fun = dweibull, args = list(w_fit$estimate[1], w_fit$estimate[2]), 
                aes(color = "Weibull", linetype = "Weibull")) +
  scale_color_manual(name = "", 
                     values = c("empirical density" = "black",
                                "exponential" = "red",
                                "gamma" = "blue",
                                "log normal" = "green",
                                "Weibull" = "orange"),
                     breaks = c("empirical density", 
                                "exponential",
                                "gamma",
                                "log normal",
                                "Weibull")) +
  scale_linetype_manual(name = "", 
                        values = c("empirical density" = "solid",
                                   "exponential" = "dashed",
                                   "gamma" = "dashed",
                                   "log normal" = "dashed",
                                   "Weibull" = "dashed"),
                        breaks = c("empirical density", 
                                   "exponential",
                                   "gamma",
                                   "log normal",
                                   "Weibull")) +
  labs(x = "interarrival time (minutes)")

ks.test(adult_arr$Interarrival, "pexp", e_fit$estimate[1])
## Warning in ks.test(adult_arr$Interarrival, "pexp", e_fit$estimate[1]): ties
## should not be present for the Kolmogorov-Smirnov test
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  adult_arr$Interarrival
## D = 0.038403, p-value = 0.0005134
## alternative hypothesis: two-sided
ks.test(adult_arr$Interarrival, "pgamma", g_fit$estimate[1], g_fit$estimate[2])
## Warning in ks.test(adult_arr$Interarrival, "pgamma", g_fit$estimate[1], :
## ties should not be present for the Kolmogorov-Smirnov test
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  adult_arr$Interarrival
## D = 0.025077, p-value = 0.05889
## alternative hypothesis: two-sided
ks.test(adult_arr$Interarrival, "plnorm", lnrm_fit$estimate[1], lnrm_fit$estimate[2])
## Warning in ks.test(adult_arr$Interarrival, "plnorm", lnrm_fit
## $estimate[1], : ties should not be present for the Kolmogorov-Smirnov test
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  adult_arr$Interarrival
## D = 0.078957, p-value = 1.332e-15
## alternative hypothesis: two-sided
ks.test(adult_arr$Interarrival, "pweibull", w_fit$estimate[1], w_fit$estimate[2])
## Warning in ks.test(adult_arr$Interarrival, "pweibull", w_fit$estimate[1], :
## ties should not be present for the Kolmogorov-Smirnov test
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  adult_arr$Interarrival
## D = 0.027132, p-value = 0.03227
## alternative hypothesis: two-sided
ggplot(adult_arr, aes(Interarrival)) +
  stat_ecdf(aes(color = "empirical cumulative distribution",
                linetype = "empirical cumulative distribution")) +
  stat_function(fun = pexp, args = list(e_fit$estimate),
                aes(color = "exponential", linetype = "exponential")) +
  stat_function(fun = pgamma, args = list(g_fit$estimate[1], g_fit$estimate[2]),
                aes(color = "gamma", linetype = "gamma")) +
  stat_function(fun = plnorm, args = list(lnrm_fit$estimate[1], lnrm_fit$estimate[2]),
                aes(color = "log normal", linetype = "log normal")) +
  stat_function(fun = pweibull, args = list(w_fit$estimate[1], w_fit$estimate[2]),
                aes(color = "Weibull", linetype = "Weibull")) +
  scale_color_manual(name = "",
                     values = c("empirical cumulative distribution" = "black",
                                "exponential" = "red",
                                "gamma" = "blue",
                                "log normal" = "green",
                                "Weibull" = "orange"),
                     breaks = c("empirical cumulative distribution",
                                "exponential",
                                "gamma",
                                "log normal",
                                "Weibull")) +
  scale_linetype_manual(name = "",
                        values = c("empirical cumulative distribution" = "solid",
                                   "exponential" = "dashed",
                                   "gamma" = "dashed",
                                   "log normal" = "dashed",
                                   "Weibull" = "dashed"),
                        breaks = c("empirical cumulative distribution",
                                   "exponential",
                                   "gamma",
                                   "log normal",
                                   "Weibull")) +
  labs(x = "interarrival time (minutes)", y = "cumulative probability")

ggplot(adult_arr) +
  geom_line(aes(x = ppoints(length(Interarrival), 1),
                y = pexp(sort(Interarrival), e_fit$estimate[1]),
                color = "exponential")) +
  geom_line(aes(x = ppoints(length(Interarrival), 1),
                y = pgamma(sort(Interarrival), g_fit$estimate[1], g_fit$estimate[2]),
                color = "gamma")) +
  geom_line(aes(x = ppoints(length(Interarrival), 1),
                y = plnorm(sort(Interarrival), lnrm_fit$estimate[1], lnrm_fit$estimate[2]),
                color = "log normal")) +
  geom_line(aes(x = ppoints(length(Interarrival), 1),
                y = pweibull(sort(Interarrival), w_fit$estimate[1], w_fit$estimate[2]),
                color = "Weibull")) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
  scale_color_manual(name = "",
                     values = c("exponential" = "red",
                                "gamma" = "blue",
                                "log normal" = "green",
                                "Weibull" = "orange"),
                     breaks = c("exponential",
                                "gamma",
                                "log normal",
                                "Weibull")) +
  labs(title = "P-P Plot", x = "sample probability", y = "theoretical probability")

ggplot(adult_arr, aes(sample = Interarrival)) +
  stat_qq(distribution = qexp,
          dparams = list(e_fit$estimate[1]),
          geom = "line",
          aes(color = "exponential")) +
  stat_qq(distribution = qgamma,
          dparams = list(g_fit$estimate[1], g_fit$estimate[2]),
          geom = "line",
          aes(color = "gamma")) +
  stat_qq(distribution = qlnorm,
          dparams = list(lnrm_fit$estimate[1], lnrm_fit$estimate[2]),
          geom = "line",
          aes(color = "log normal")) +
  stat_qq(distribution = qweibull,
          dparams = list(w_fit$estimate[1], w_fit$estimate[2]),
          geom = "line",
          aes(color = "Weibull")) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
  scale_color_manual(name = "",
                     values = c("exponential" = "red",
                                "gamma" = "blue",
                                "log normal" = "green",
                                "Weibull" = "orange"),
                     breaks = c("exponential",
                                "gamma",
                                "log normal",
                                "Weibull")) +
  labs(title = "Q-Q Plot")

Waiting Time Between Registration and Initiation of Care

enc_times <- enc_times %>%
  mutate(Wait_Time_After_Reg = 
           as.numeric(as.duration(
              hms(format(ymd_hms(Vitals_Done), "%H:%M:%S")) - hms(Arrival_Time))) / 60)
         
enc_times %>%
  filter(Wait_Time_After_Reg > 0 & Wait_Time_After_Reg <= 360) %>%
  ggplot(aes(Wait_Time_After_Reg)) +
  geom_histogram(binwidth = 10)

Provider Service Times

serv_times <- read.csv("Patient_Flow_Wait_Times/Enc_Time_Stamps_Jan_2016.csv", stringsAsFactors = FALSE)
serv_times <- serv_times %>%
  mutate(Service_Time = as.numeric(as.duration(
    hms(Departure_Time) - hms(format(ymd_hms(Vitals_Done), "%H:%M:%S")))) / 60)
## Warning: 130 failed to parse.
serv_times %>%
  filter(Service_Time > 0 & Service_Time < 180) %>%
  ggplot(aes(Service_Time)) +
  geom_histogram(binwidth = 5)

adult_service <- serv_times %>%
  filter(Clinic == "Adult Medicine" & 
           Service_Time > 0 & 
           Service_Time < 180 & 
           !(wday(Date, label = TRUE) %in% c("Sat", "Sun"))) %>%
  select(Clinic, Date, Service_Time)

e_fit <- fitdistr(adult_service$Service_Time, "exponential")
g_fit <- fitdistr(adult_service$Service_Time, "gamma")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
lnrm_fit <- fitdistr(adult_service$Service_Time, "lognormal")
w_fit <- fitdistr(adult_service$Service_Time, "weibull")

ggplot(adult_service, aes(Service_Time)) +
  geom_histogram(aes(y = ..density..), binwidth = 5) +
  geom_line(stat = "density", aes(color = "empirical density", linetype = "empirical density")) +
  stat_function(fun = dexp, args = list(e_fit$estimate),
                aes(color = "exponential", linetype = "exponential")) +
  stat_function(fun = dgamma, args = list(g_fit$estimate[1], g_fit$estimate[2]), 
                aes(color = "gamma", linetype = "gamma")) +
  stat_function(fun = dlnorm, args = list(lnrm_fit$estimate[1], lnrm_fit$estimate[2]),
                aes(color = "log normal", linetype = "log normal")) +
  stat_function(fun = dweibull, args = list(w_fit$estimate[1], w_fit$estimate[2]), 
                aes(color = "Weibull", linetype = "Weibull")) +
  scale_color_manual(name = "", 
                     values = c("empirical density" = "black",
                                "exponential" = "red",
                                "gamma" = "blue",
                                "log normal" = "green",
                                "Weibull" = "orange"),
                     breaks = c("empirical density", 
                                "exponential",
                                "gamma",
                                "log normal",
                                "Weibull")) +
  scale_linetype_manual(name = "", 
                        values = c("empirical density" = "solid",
                                   "exponential" = "dashed",
                                   "gamma" = "dashed",
                                   "log normal" = "dashed",
                                   "Weibull" = "dashed"),
                        breaks = c("empirical density", 
                                   "exponential",
                                   "gamma",
                                   "log normal",
                                   "Weibull")) +
  labs(x = "service time (minutes)")

ks.test(adult_service$Service_Time, "pexp", e_fit$estimate[1])
## Warning in ks.test(adult_service$Service_Time, "pexp", e_fit$estimate[1]):
## ties should not be present for the Kolmogorov-Smirnov test
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  adult_service$Service_Time
## D = 0.12206, p-value < 2.2e-16
## alternative hypothesis: two-sided
ks.test(adult_service$Service_Time, "pgamma", g_fit$estimate[1], g_fit$estimate[2])
## Warning in ks.test(adult_service$Service_Time, "pgamma", g_fit
## $estimate[1], : ties should not be present for the Kolmogorov-Smirnov test
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  adult_service$Service_Time
## D = 0.013058, p-value = 0.7297
## alternative hypothesis: two-sided
ks.test(adult_service$Service_Time, "plnorm", lnrm_fit$estimate[1], lnrm_fit$estimate[2])
## Warning in ks.test(adult_service$Service_Time, "plnorm", lnrm_fit
## $estimate[1], : ties should not be present for the Kolmogorov-Smirnov test
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  adult_service$Service_Time
## D = 0.067189, p-value = 2.447e-11
## alternative hypothesis: two-sided
ks.test(adult_service$Service_Time, "pweibull", w_fit$estimate[1], w_fit$estimate[2])
## Warning in ks.test(adult_service$Service_Time, "pweibull", w_fit
## $estimate[1], : ties should not be present for the Kolmogorov-Smirnov test
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  adult_service$Service_Time
## D = 0.017316, p-value = 0.3744
## alternative hypothesis: two-sided
ggplot(adult_service, aes(Service_Time)) +
  stat_ecdf(aes(color = "empirical cumulative distribution", 
                linetype = "empirical cumulative distribution")) +
  stat_function(fun = pexp, args = list(e_fit$estimate),
                aes(color = "exponential", linetype = "exponential")) +
  stat_function(fun = pgamma, args = list(g_fit$estimate[1], g_fit$estimate[2]), 
                aes(color = "gamma", linetype = "gamma")) +
  stat_function(fun = plnorm, args = list(lnrm_fit$estimate[1], lnrm_fit$estimate[2]),
                aes(color = "log normal", linetype = "log normal")) +
  stat_function(fun = pweibull, args = list(w_fit$estimate[1], w_fit$estimate[2]), 
                aes(color = "Weibull", linetype = "Weibull")) +
  scale_color_manual(name = "", 
                     values = c("empirical cumulative distribution" = "black",
                                "exponential" = "red",
                                "gamma" = "blue",
                                "log normal" = "green",
                                "Weibull" = "orange"),
                     breaks = c("empirical cumulative distribution", 
                                "exponential",
                                "gamma",
                                "log normal",
                                "Weibull")) +
  scale_linetype_manual(name = "", 
                        values = c("empirical cumulative distribution" = "solid",
                                   "exponential" = "dashed",
                                   "gamma" = "dashed",
                                   "log normal" = "dashed",
                                   "Weibull" = "dashed"),
                        breaks = c("empirical cumulative distribution", 
                                   "exponential",
                                   "gamma",
                                   "log normal",
                                   "Weibull")) +
  labs(x = "service time (minutes)", y = "cumulative probability")

ggplot(adult_service) +
  geom_line(aes(x = ppoints(length(Service_Time), 1),
                y = pexp(sort(Service_Time), e_fit$estimate[1]),
                color = "exponential")) +
  geom_line(aes(x = ppoints(length(Service_Time), 1),
                y = pgamma(sort(Service_Time), g_fit$estimate[1], g_fit$estimate[2]),
                color = "gamma")) +
  geom_line(aes(x = ppoints(length(Service_Time), 1),
                y = plnorm(sort(Service_Time), lnrm_fit$estimate[1], lnrm_fit$estimate[2]),
                color = "log normal")) +
  geom_line(aes(x = ppoints(length(Service_Time), 1),
                y = pweibull(sort(Service_Time), w_fit$estimate[1], w_fit$estimate[2]),
                color = "Weibull")) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
  scale_color_manual(name = "", 
                     values = c("exponential" = "red",
                                "gamma" = "blue",
                                "log normal" = "green",
                                "Weibull" = "orange"),
                     breaks = c("exponential",
                                "gamma",
                                "log normal",
                                "Weibull")) +
  labs(title = "P-P Plot", x = "sample probability", y = "theoretical probability")

ggplot(adult_service, aes(sample = Service_Time)) +
  stat_qq(distribution = qexp, 
          dparams = list(e_fit$estimate[1]), 
          geom = "line",
          aes(color = "exponential")) +
  stat_qq(distribution = qgamma, 
          dparams = list(g_fit$estimate[1], g_fit$estimate[2]), 
          geom = "line",
          aes(color = "gamma")) +
  stat_qq(distribution = qlnorm, 
          dparams = list(lnrm_fit$estimate[1], lnrm_fit$estimate[2]),
          geom = "line",
          aes(color = "log normal")) +
  stat_qq(distribution = qweibull, 
          dparams = list(w_fit$estimate[1], w_fit$estimate[2]), 
          geom = "line",
          aes(color = "Weibull")) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
  scale_color_manual(name = "", 
                     values = c("exponential" = "red",
                                "gamma" = "blue",
                                "log normal" = "green",
                                "Weibull" = "orange"),
                     breaks = c("exponential",
                                "gamma",
                                "log normal",
                                "Weibull")) +
  labs(title = "Q-Q Plot")

g_fit
##       shape         rate    
##   1.699797193   0.038216491 
##  (0.041833922) (0.001091857)

Monte Carlo Simulation to Identify Distributions of Visit Times with Medical Assistants and with Physicians

set.seed(2017)
n <- 10000
total <- rgamma(n, shape = g_fit$estimate[1], rate = g_fit$estimate[2])
ma_time <- rtriang(n, min = 2, mode = 10, max = 16)
md_time <- total - ma_time
md_time <- md_time[md_time > 0]

df <- data.frame(phys_time = md_time)

ggplot(df, aes(phys_time)) +
  geom_histogram(binwidth = 10) +
  xlab("physician service time (minutes)")

e_fit <- fitdistr(df$phys_time, "exponential")
g_fit <- fitdistr(df$phys_time, "gamma")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
lnrm_fit <- fitdistr(df$phys_time, "lognormal")
w_fit <- fitdistr(df$phys_time, "weibull")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
ggplot(df, aes(phys_time)) +
  geom_histogram(aes(y = ..density..), binwidth = 5) +
  geom_line(stat = "density", aes(color = "empirical density", linetype = "empirical density")) +
  stat_function(fun = dexp, args = list(e_fit$estimate),
                aes(color = "exponential", linetype = "exponential")) +
  stat_function(fun = dgamma, args = list(g_fit$estimate[1], g_fit$estimate[2]), 
                aes(color = "gamma", linetype = "gamma")) +
  stat_function(fun = dlnorm, args = list(lnrm_fit$estimate[1], lnrm_fit$estimate[2]),
                aes(color = "log normal", linetype = "log normal")) +
  stat_function(fun = dweibull, args = list(w_fit$estimate[1], w_fit$estimate[2]), 
                aes(color = "Weibull", linetype = "Weibull")) +
  scale_color_manual(name = "", 
                     values = c("empirical density" = "black",
                                "exponential" = "red",
                                "gamma" = "blue",
                                "log normal" = "green",
                                "Weibull" = "orange"),
                     breaks = c("empirical density", 
                                "exponential",
                                "gamma",
                                "log normal",
                                "Weibull")) +
  scale_linetype_manual(name = "", 
                        values = c("empirical density" = "solid",
                                   "exponential" = "dashed",
                                   "gamma" = "dashed",
                                   "log normal" = "dashed",
                                   "Weibull" = "dashed"),
                        breaks = c("empirical density", 
                                   "exponential",
                                   "gamma",
                                   "log normal",
                                   "Weibull")) +
  labs(x = "physician service time (minutes)")

ks.test(df$phys_time, "pexp", e_fit$estimate[1])
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  df$phys_time
## D = 0.063045, p-value < 2.2e-16
## alternative hypothesis: two-sided
ks.test(df$phys_time, "pgamma", g_fit$estimate[1], g_fit$estimate[2])
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  df$phys_time
## D = 0.019333, p-value = 0.002189
## alternative hypothesis: two-sided
ks.test(df$phys_time, "plnorm", lnrm_fit$estimate[1], lnrm_fit$estimate[2])
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  df$phys_time
## D = 0.081045, p-value < 2.2e-16
## alternative hypothesis: two-sided
ks.test(df$phys_time, "pweibull", w_fit$estimate[1], w_fit$estimate[2])
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  df$phys_time
## D = 0.0089321, p-value = 0.4608
## alternative hypothesis: two-sided
ggplot(df, aes(phys_time)) +
  stat_ecdf(aes(color = "empirical cumulative distribution", 
                linetype = "empirical cumulative distribution")) +
  stat_function(fun = pexp, args = list(e_fit$estimate),
                aes(color = "exponential", linetype = "exponential")) +
  stat_function(fun = pgamma, args = list(g_fit$estimate[1], g_fit$estimate[2]), 
                aes(color = "gamma", linetype = "gamma")) +
  stat_function(fun = plnorm, args = list(lnrm_fit$estimate[1], lnrm_fit$estimate[2]),
                aes(color = "log normal", linetype = "log normal")) +
  stat_function(fun = pweibull, args = list(w_fit$estimate[1], w_fit$estimate[2]), 
                aes(color = "Weibull", linetype = "Weibull")) +
  scale_color_manual(name = "", 
                     values = c("empirical cumulative distribution" = "black",
                                "exponential" = "red",
                                "gamma" = "blue",
                                "log normal" = "green",
                                "Weibull" = "orange"),
                     breaks = c("empirical cumulative distribution", 
                                "exponential",
                                "gamma",
                                "log normal",
                                "Weibull")) +
  scale_linetype_manual(name = "", 
                        values = c("empirical cumulative distribution" = "solid",
                                   "exponential" = "dashed",
                                   "gamma" = "dashed",
                                   "log normal" = "dashed",
                                   "Weibull" = "dashed"),
                        breaks = c("empirical cumulative distribution", 
                                   "exponential",
                                   "gamma",
                                   "log normal",
                                   "Weibull")) +
  labs(x = "physician service time (minutes)", y = "cumulative probability")

ggplot(df) +
  geom_line(aes(x = ppoints(length(phys_time), 1),
                y = pexp(sort(phys_time), e_fit$estimate[1]),
                color = "exponential")) +
  geom_line(aes(x = ppoints(length(phys_time), 1),
                y = pgamma(sort(phys_time), g_fit$estimate[1], g_fit$estimate[2]),
                color = "gamma")) +
  geom_line(aes(x = ppoints(length(phys_time), 1),
                y = plnorm(sort(phys_time), lnrm_fit$estimate[1], lnrm_fit$estimate[2]),
                color = "log normal")) +
  geom_line(aes(x = ppoints(length(phys_time), 1),
                y = pweibull(sort(phys_time), w_fit$estimate[1], w_fit$estimate[2]),
                color = "Weibull")) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
  scale_color_manual(name = "", 
                     values = c("exponential" = "red",
                                "gamma" = "blue",
                                "log normal" = "green",
                                "Weibull" = "orange"),
                     breaks = c("exponential",
                                "gamma",
                                "log normal",
                                "Weibull")) +
  labs(title = "P-P Plot", x = "sample probability", y = "theoretical probability")

ggplot(df, aes(sample = phys_time)) +
  stat_qq(distribution = qexp, 
          dparams = list(e_fit$estimate[1]), 
          geom = "line",
          aes(color = "exponential")) +
  stat_qq(distribution = qgamma, 
          dparams = list(g_fit$estimate[1], g_fit$estimate[2]), 
          geom = "line",
          aes(color = "gamma")) +
  stat_qq(distribution = qlnorm, 
          dparams = list(lnrm_fit$estimate[1], lnrm_fit$estimate[2]),
          geom = "line",
          aes(color = "log normal")) +
  stat_qq(distribution = qweibull, 
          dparams = list(w_fit$estimate[1], w_fit$estimate[2]), 
          geom = "line",
          aes(color = "Weibull")) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
  scale_color_manual(name = "", 
                     values = c("exponential" = "red",
                                "gamma" = "blue",
                                "log normal" = "green",
                                "Weibull" = "orange"),
                     breaks = c("exponential",
                                "gamma",
                                "log normal",
                                "Weibull")) +
  labs(title = "Q-Q Plot")

w_fit
##       shape          scale    
##    1.176475743   41.349116214 
##  ( 0.009657111) ( 0.387141958)

Number of Staff

staff <- providers %>%
  select(Clinic, Date, Providers) %>%
  mutate(Date = ymd(Date),
         Day = wday(Date, label = TRUE), 
         Weekday = ifelse(Day %in% c("Sat", "Sun"), 0, 1))

ggplot(staff, aes(Day, Providers)) + 
  geom_bar(stat = "summary", fun.y = "mean")

ggplot(staff, aes(Providers)) + 
  geom_histogram(binwidth = 1) +
  facet_grid(~ Day)

staff %>% 
  filter(Clinic == "Adult Medicine" & Weekday == 1) %>%
  ggplot(aes(Providers)) +
  geom_histogram(aes(y = ..density..), binwidth = 1) +
  geom_line(stat = "density") #, aes(color = "empirical density", linetype = "empirical density")) +

adult_wkday <- staff %>% 
  filter(Clinic == "Adult Medicine" & Weekday == 1)
adult_wkday <- adult_wkday$Providers

unif <- c(min = min(adult_wkday), max = max(adult_wkday))
geo <- fitdistr(adult_wkday, "geometric")
nb <- fitdist(adult_wkday, "nbinom")
## Warning in sqrt(diag(varcovar)): NaNs produced
## Warning in sqrt(1/diag(V)): NaNs produced
## Warning in cov2cor(varcovar): diag(.) had 0 or NA entries; non-finite
## result is doubtful
ggplot(data.frame(adult_wkday = adult_wkday), aes(adult_wkday)) +
  geom_histogram(aes(y = ..density..), binwidth = 1) +
  geom_line(stat = "density", aes(color = "empirical density", linetype = "empirical density")) +
  geom_line(aes(y = dunif(adult_wkday, min = unif[1], max = unif[2]),
                color = "uniform", linetype = "uniform")) +
  geom_line(aes(y = dgeom(adult_wkday, prob = geo$estimate[1]),
                color = "geometric", linetype = "geometric")) +
  geom_line(aes(y = dnbinom(adult_wkday, size = nb$estimate[[1]], mu = nb$estimate[[2]]),
                color = "negative binomial", linetype = "negative binomial")) +
  scale_color_manual(name = "", 
                     values = c("empirical density" = "black",
                                "uniform" = "red",
                                "geometric" = "blue",
                                "negative binomial" = "green"),
                     breaks = c("empirical density", 
                                "uniform",
                                "geometric",
                                "negative binomial")) +
  scale_linetype_manual(name = "", 
                        values = c("empirical density" = "solid",
                                "uniform" = "dashed",
                                "geometric" = "dashed",
                                "negative binomial" = "dashed"),
                     breaks = c("empirical density", 
                                "uniform",
                                "geometric",
                                "negative binomial"))

tab <- as.data.frame(table(factor(adult_wkday, levels = min(adult_wkday):max(adult_wkday))))

counts <- data.frame(
  observed = tab$Freq,
  expected_unif = dunif(min(adult_wkday):max(adult_wkday), 
                        min = min(adult_wkday), 
                        max = max(adult_wkday)) * length(adult_wkday),
  expected_geom = dgeom(min(adult_wkday):max(adult_wkday), 
                        prob = geo$estimate[1]) * length(adult_wkday),
  expected_nbinom = dnbinom(min(adult_wkday):max(adult_wkday),
                            size = nb$estimate[[1]],
                            mu = nb$estimate[[2]]) * length(adult_wkday))
row.names(counts) <- tab$Var1

(t1 <- as.table(t(counts[, 1:2])))
##                       7         8         9        10        11        12
## observed       2.000000  2.000000  2.000000  6.000000  6.000000  4.000000
## expected_unif  5.545455  5.545455  5.545455  5.545455  5.545455  5.545455
##                      13        14        15        16        17        18
## observed       9.000000 12.000000  8.000000  4.000000  1.000000  5.000000
## expected_unif  5.545455  5.545455  5.545455  5.545455  5.545455  5.545455
chisq.test(t1)
## Warning in chisq.test(t1): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  t1
## X-squared = 12.14, df = 11, p-value = 0.3532
chisq.test(t1, simulate.p.value = TRUE)
## 
##  Pearson's Chi-squared test with simulated p-value (based on 2000
##  replicates)
## 
## data:  t1
## X-squared = 12.14, df = NA, p-value = 0.6322
(t2 <- as.table(t(counts[, c(1, 3)])))
##                       7         8         9        10        11        12
## observed       2.000000  2.000000  2.000000  6.000000  6.000000  4.000000
## expected_geom  2.586632  2.402948  2.232308  2.073785  1.926520  1.789712
##                      13        14        15        16        17        18
## observed       9.000000 12.000000  8.000000  4.000000  1.000000  5.000000
## expected_geom  1.662620  1.544553  1.434870  1.332975  1.238317  1.150381
chisq.test(t2)
## Warning in chisq.test(t2): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  t2
## X-squared = 9.586, df = 11, p-value = 0.568
chisq.test(t2, simulate.p.value = TRUE)
## 
##  Pearson's Chi-squared test with simulated p-value (based on 2000
##  replicates)
## 
## data:  t2
## X-squared = 9.586, df = NA, p-value = 0.928
(t3 <- as.table(t(counts[, c(1, 4)])))
##                         7         8         9        10        11
## observed         2.000000  2.000000  2.000000  6.000000  6.000000
## expected_nbinom  1.652308  2.702026  3.927673  5.138351  6.111101
##                        12        13        14        15        16
## observed         4.000000  9.000000 12.000000  8.000000  4.000000
## expected_nbinom  6.662338  6.704582  6.265160  5.464235  4.467842
##                        17        18
## observed         1.000000  5.000000
## expected_nbinom  3.438250  2.498926
chisq.test(t3)
## Warning in chisq.test(t3): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  t3
## X-squared = 6.0195, df = 11, p-value = 0.8721
chisq.test(t3, simulate.p.value = TRUE)
## 
##  Pearson's Chi-squared test with simulated p-value (based on 2000
##  replicates)
## 
## data:  t3
## X-squared = 6.0195, df = NA, p-value = 0.999
ggplot(data.frame(adult_wkday = adult_wkday), aes(adult_wkday)) +
  stat_ecdf(aes(color = "empirical cumulative distribution", 
                linetype = "empirical cumulative distribution")) +
  stat_function(fun = punif, args = list(unif[1], unif[2]),
                aes(color = "uniform", linetype = "uniform")) +
  stat_function(fun = pgeom, args = list(prob = geo$estimate[1]), 
                aes(color = "geometric", linetype = "geometric")) +
  stat_function(fun = pnbinom, args = list(size = nb$estimate[1], mu = nb$estimate[2]),
                aes(color = "negative binomial", linetype = "negative binomial")) +
  scale_color_manual(name = "", 
                     values = c("empirical cumulative distribution" = "black",
                                "uniform" = "red",
                                "geometric" = "blue",
                                "negative binomial" = "green"),
                     breaks = c("empirical cumulative distribution", 
                                "uniform",
                                "geometric",
                                "negative binomial")) +
  scale_linetype_manual(name = "", 
                        values = c("empirical cumulative distribution" = "solid",
                                   "uniform" = "dashed",
                                   "geometric" = "dashed",
                                   "negative binomial" = "dashed"),
                        breaks = c("empirical cumulative distribution", 
                                   "uniform",
                                   "geometric",
                                   "negative binomial")) +
  labs(y = "cumulative probability")

ggplot(data.frame(adult_wkday = adult_wkday)) +
  geom_line(aes(x = ppoints(length(adult_wkday), 1),
                y = punif(sort(adult_wkday), min = unif[1], max = unif[2]),
                color = "uniform")) +
  geom_line(aes(x = ppoints(length(adult_wkday), 1),
                y = pgeom(sort(adult_wkday), prob = geo$estimate[1]),
                color = "geometric")) +
  geom_line(aes(x = ppoints(length(adult_wkday), 1),
                y = pnbinom(sort(adult_wkday), 
                            size = nb$estimate[1], 
                            mu = nb$estimate[2]),
                color = "negative binomial")) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
  coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
  scale_color_manual(name = "", 
                     values = c("uniform" = "red",
                                "geometric" = "blue",
                                "negative binomial" = "green"),
                     breaks = c("uniform",
                                "geometric",
                                "negative binomial")) +
  labs(title = "P-P Plot", x = "sample probability", y = "theoretical probability")

ggplot(data.frame(adult_wkday = adult_wkday), aes(sample = adult_wkday)) +
  stat_qq(distribution = qunif, 
          dparams = list(min = unif[1], max = unif[2]), 
          geom = "line",
          aes(color = "uniform")) +
  stat_qq(distribution = qgeom, 
          dparams = list(prob = geo$estimate[1]), 
          geom = "line",
          aes(color = "geometric")) +
  stat_qq(distribution = qnbinom, 
          dparams = list(size = nb$estimate[1], mu = nb$estimate[2]),
          geom = "line",
          aes(color = "negative binomial")) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
  scale_color_manual(name = "", 
                     values = c("uniform" = "red",
                                "geometric" = "blue",
                                "negative binomial" = "green"),
                     breaks = c("uniform",
                                "geometric",
                                "negative binomial")) +
  labs(title = "Q-Q Plot")

nb
## Fitting of the distribution ' nbinom ' by maximum likelihood 
## Parameters:
##          estimate Std. Error
## size 3.665819e+07        NaN
## mu   1.308243e+01  0.4631131
mean(adult_wkday)
## [1] 13.08197

Import Simulation Results for Model Validation and Comparison

fixed <- read.csv("Adult_Amb_Clinic_Fixed_Exam_Rms_Model_Experiment1_ResultsSummary.csv", header = TRUE, stringsAsFactors = FALSE)

flex <- read.csv("Adult_Amb_Clinic_Flex_Exam_Rms_Model_Experiment1_ResultsSummary.csv", header = TRUE, stringsAsFactors = FALSE)

Model Verification and Validation

model_vv <- data.frame(Source = c("Observed Data", 
                                  "Status Quo Model (Fixed Exam Rooms)", 
                                  "Intervention Model (Flexible Exam Rooms)"),
                       Total_Visits = numeric(3),
                       Time_In_System = numeric(3),
                       Average_Wait_Time = numeric(3))

model_vv[model_vv$Source == "Observed Data", ]$Total_Visits <- visits %>%
  filter(!(wday(mdy(Date), label = TRUE) %in% c("Sat", "Sun"))) %>%
  summarize(visits_per_day = mean(Visits)) * (52 * 5)

model_vv[model_vv$Source == "Observed Data", ]$Time_In_System <- enc_times %>%
  filter(length > 0) %>% 
  summarize(avg_length = mean(length, na.rm = TRUE))

model_vv[model_vv$Source == "Observed Data", ]$Average_Wait_Time <- enc_times %>%
  filter(Wait_Time_After_Reg > 0 & Wait_Time_After_Reg <= 240) %>%
  summarize(avg_wait = mean(Wait_Time_After_Reg, na.rm = TRUE))

model_vv[model_vv$Source == "Status Quo Model (Fixed Exam Rooms)", ]$Total_Visits <- fixed %>% 
  filter(Object.Name == "Patient" & Data.Item == "NumberDestroyed" & Statistic.Type == "Total") %>%
  select(Average)

model_vv[model_vv$Source == "Status Quo Model (Fixed Exam Rooms)", ]$Time_In_System <- fixed %>% 
  filter(Object.Name == "Patient" & Data.Item == "TimeInSystem" & Statistic.Type == "Average (Minutes)") %>%
  select(Average)

model_vv[model_vv$Source == "Status Quo Model (Fixed Exam Rooms)", ]$Average_Wait_Time <- fixed %>% 
  filter(Object.Name == "Model" & Data.Source == "WaitTime" & Statistic.Type == "Average (Minutes)") %>%
  select(Average)

model_vv[model_vv$Source == "Intervention Model (Flexible Exam Rooms)", ]$Total_Visits <- flex %>% 
  filter(Object.Name == "Patient" & Data.Item == "NumberDestroyed" & Statistic.Type == "Total") %>%
  select(Average)

model_vv[model_vv$Source == "Intervention Model (Flexible Exam Rooms)", ]$Time_In_System <- flex %>% 
  filter(Object.Name == "Patient" & Data.Item == "TimeInSystem" & Statistic.Type == "Average (Minutes)") %>%
  select(Average)

model_vv[model_vv$Source == "Intervention Model (Flexible Exam Rooms)", ]$Average_Wait_Time <- flex %>% 
  filter(Object.Name == "Model" & Data.Source == "WaitTime" & Statistic.Type == "Average (Minutes)") %>%
  select(Average)

knitr::kable(model_vv, digits = 2)
Source Total_Visits Time_In_System Average_Wait_Time
Observed Data 41011.58 65.3478 24.72246
Status Quo Model (Fixed Exam Rooms) 42259 89.05314 32.36252
Intervention Model (Flexible Exam Rooms) 42255.7 70.32399 9.327603

Status Quo versus Intervention Models:

Effects of Flexible Exam Room Allocation on Average Wait Time Between Registration and Escort to Exam Room and Average Total Time in System

\(t = \frac{\bar{x_1} - \bar{x_2}}{\sqrt{\frac{s_1^2}{n_1} + \frac{s_2^2}{n_2}}}\)

\(d.f. = \frac{(s_1^2 / n_1 + s_2^2/ n_2)^2}{(s_1^2 / n_1)^2 / (n_1 - 1) + (s_2^2 / n_2)^2 / (n_2 - 1)}\)

(fixed_wait <- fixed %>% 
  filter(Object.Name == "Model" & Data.Source == "WaitTime" & Statistic.Type == "Average (Minutes)") %>%
  select(Average, Standard.Deviation, Half.Width))
##    Average Standard.Deviation Half.Width
## 1 32.36252           1.394246   0.997314
(fixed_tis <- fixed %>% 
  filter(Object.Name == "Patient" & Data.Item == "TimeInSystem" & Statistic.Type == "Average (Minutes)") %>%
  select(Average, Standard.Deviation, Half.Width))
##    Average Standard.Deviation Half.Width
## 1 89.05314           1.634038   1.168839
(flex_wait <- flex %>% 
  filter(Object.Name == "Model" & Data.Source == "WaitTime" & Statistic.Type == "Average (Minutes)") %>%
  select(Average, Standard.Deviation, Half.Width))
##    Average Standard.Deviation Half.Width
## 1 9.327603          0.1084512 0.07757589
(flex_tis <- flex %>% 
  filter(Object.Name == "Patient" & Data.Item == "TimeInSystem" & Statistic.Type == "Average (Minutes)") %>%
  select(Average, Standard.Deviation, Half.Width))
##    Average Standard.Deviation Half.Width
## 1 70.32399          0.4667451  0.3338661
df_wait <- (fixed_wait$Standard.Deviation^2 / 10 + flex_wait$Standard.Deviation^2 / 10)^2 /
  (((fixed_wait$Standard.Deviation^2 / 10)^2 / 9) + ((flex_wait$Standard.Deviation^2 / 10)^2 / 9))

(t_wait <- (flex_wait$Average - fixed_wait$Average) / 
  (sqrt(flex_wait$Standard.Deviation^2 / 10 + fixed_wait$Standard.Deviation^2 / 10)))
## [1] -52.08798
(p_value_wait <- pt(t_wait, df_wait))
## [1] 6.846879e-13
df_tis <- (fixed_tis$Standard.Deviation^2 / 10 + flex_tis$Standard.Deviation^2 / 10)^2 /
  (((fixed_tis$Standard.Deviation^2 / 10)^2 / 9) + ((flex_tis$Standard.Deviation^2 / 10)^2 / 9))

(t_tis <- (flex_tis$Average - fixed_tis$Average) / 
  (sqrt(flex_tis$Standard.Deviation^2 / 10 + fixed_tis$Standard.Deviation^2 / 10)))
## [1] -34.85176
(p_value_tis <- pt(t_tis, df_tis))
## [1] 1.837663e-12
diff_means <- data.frame(Parameter = c("Average Wait Time (minutes)", "Average Time In System (minutes)"),
                         t_statistic = c(t_wait, t_tis),
                         p_value_1_sided = formatC(c(p_value_wait, p_value_tis), format = "e"))

knitr::kable(diff_means)
Parameter t_statistic p_value_1_sided
Average Wait Time (minutes) -52.08798 6.8469e-13
Average Time In System (minutes) -34.85176 1.8377e-12