Simulating Bias in Performance Measurement from ED Congestion Effects

Let’s generate some data assuming no congestion effect and try to estimate the average time each type of doc spends on each type of patient.

library(data.table)

times = seq_len(10)
type = rep(c("Quiet","Busy"), 5)
days_dt = data.table(t=times, type=type)

patients_dt = rbindlist(lapply(seq_len(nrow(days_dt)), function(t) {
  
  if(days_dt[t,type] == "Quiet") {
    return(rbind(
      # Type B docs take 75/100 simple patients
      data.table(
        t = t, t_type = "Quiet", dr = rep("B",75), patient_type = rep("Simple", 75),
        time_to_treat_min = pmax(0,pmin(120,rnorm(75, 60, 30)))),
      
      # Type A docs take 25/100 simple patients
      data.table(
        t = t, t_type = "Quiet", dr = rep("A",25), patient_type = rep("Simple", 25),
        time_to_treat_min = pmax(0,pmin(120,rnorm(25, 60, 30)))),
      
      # Type B docs take 15/20 complex patients
      data.table(
        t = t, t_type = "Quiet", dr = rep("B",15), patient_type = rep("Complex", 15),
        time_to_treat_min = pmax(0,pmin(360,rnorm(15, 180, 30)))),
      
      # Type A docs take 5/20 complex patients
      data.table(
        t = t, t_type = "Quiet", dr = rep("A",5), patient_type = rep("Complex", 5),
        time_to_treat_min = pmax(0,pmin(240,rnorm(5, 120, 30))))
      ))
    
  } else { # BUSY
    return(rbind(
      # Type B docs take 25/100 simple patients
      data.table(
        t = t, t_type = "Busy", dr = rep("B",25), patient_type = rep("Simple", 25),
        time_to_treat_min = pmax(0,pmin(120,rnorm(25, 60, 30)))),
      
      # Type A docs take 75/100 simple patients
      data.table(
        t = t, t_type = "Busy", dr = rep("A",75), patient_type = rep("Simple", 75),
        time_to_treat_min = pmax(0,pmin(120,rnorm(75, 60, 30)))),
      
      # Type B docs take 25/20 complex patients
      data.table(
        t = t, t_type = "Busy", dr = rep("B",25), patient_type = rep("Complex", 25),
        time_to_treat_min = pmax(0,pmin(360,rnorm(25, 180, 30)))),
      
      # Type A docs take 75/20 complex patients
      data.table(
        t = t, t_type = "Busy", dr = rep("A",75), patient_type = rep("Complex", 75),
        time_to_treat_min = pmax(0,pmin(240,rnorm(75, 120, 30))))
      ))
  }
}))


# Calculate average time each doc type takes on patients, controlling for patient type
summary(lm(time_to_treat_min ~ -1 + dr*patient_type, data=patients_dt))
## 
## Call:
## lm(formula = time_to_treat_min ~ -1 + dr * patient_type, data = patients_dt)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -86.104 -20.949   1.056  21.190  81.416 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## drA                     119.382      1.456   82.01   <2e-16 ***
## drB                     179.485      2.059   87.19   <2e-16 ***
## patient_typeSimple      -60.179      1.953  -30.81   <2e-16 ***
## drB:patient_typeSimple  -59.016      3.122  -18.90   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 29.11 on 1596 degrees of freedom
## Multiple R-squared:  0.9207, Adjusted R-squared:  0.9205 
## F-statistic:  4635 on 4 and 1596 DF,  p-value: < 2.2e-16

Model estimates correctly that:

Adding congestion effect

Suppose now that on Busy days, every patient takes 50% longer because of congestion. Let’s try estimating the times DrA and DrB spend on each patient again, without controlling for congestion.

patients_dt[, time_to_treat_min_w_congestion_effect := time_to_treat_min + 0.5*time_to_treat_min*(t_type=="Busy")]
summary(lm(time_to_treat_min_w_congestion_effect ~ -1 + dr*patient_type, data=patients_dt))
## 
## Call:
## lm(formula = time_to_treat_min_w_congestion_effect ~ -1 + dr * 
##     patient_type, data = patients_dt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -131.688  -31.173   -1.731   29.376  156.308 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## drA                     175.367      2.188   80.13   <2e-16 ***
## drB                     235.043      3.095   75.95   <2e-16 ***
## patient_typeSimple      -94.336      2.936  -32.13   <2e-16 ***
## drB:patient_typeSimple  -72.412      4.694  -15.43   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 43.77 on 1596 degrees of freedom
## Multiple R-squared:  0.9045, Adjusted R-squared:  0.9043 
## F-statistic:  3780 on 4 and 1596 DF,  p-value: < 2.2e-16

Now we get “biased” estimates of the time each doc spends on each type of patient:

  • Dr A now spends ~3 hours on complex patients, Dr B spends less than ~3.9 hours.
  • Dr A now spends ~80 minutes on simple patients, Dr B spends ~69 minutes

Let’s finally try controlling for the type of day to see if we can recover the original time estimates for each doc.

summary(lm(time_to_treat_min_w_congestion_effect ~ -1 + dr*patient_type*t_type, data=patients_dt))
## 
## Call:
## lm(formula = time_to_treat_min_w_congestion_effect ~ -1 + dr * 
##     patient_type * t_type, data = patients_dt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -129.237  -25.946    1.213   26.313  124.669 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## drA                                 179.153      2.026  88.436  < 2e-16 ***
## drB                                 266.682      3.509  76.004  < 2e-16 ***
## patient_typeSimple                  -91.842      2.865 -32.057  < 2e-16 ***
## t_typeQuiet                         -60.578      8.103  -7.476 1.26e-13 ***
## drB:patient_typeSimple              -78.772      5.730 -13.748  < 2e-16 ***
## drB:t_typeQuiet                     -23.791      9.924  -2.397   0.0166 *  
## patient_typeSimple:t_typeQuiet       35.457      9.060   3.914 9.47e-05 ***
## drB:patient_typeSimple:t_typeQuiet   11.882     11.460   1.037   0.3000    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 39.23 on 1592 degrees of freedom
## Multiple R-squared:  0.9235, Adjusted R-squared:  0.9231 
## F-statistic:  2402 on 8 and 1592 DF,  p-value: < 2.2e-16

Now we get these estimates for time spent on Quiet days:

  • Dr A spends ~124 minutes on Complex patients.
  • Dr B spends ~175 minutes on Complex patients.
  • Dr A spends ~62 minutes on Simple patients.
  • Dr B spends ~61 minutes on Simple patients.

SUCCESS! Controlling for the congestion level recovered the “true” (independent of congestion) performance properties of each Doctor