Load packages and data
library(data.table)
library(ggplot2)
library(parallel)
options(repr.plot.height = 480, repr.plot.width = 1080)
dt = fread('~/Downloads/feats_and_interventions.csv')
eval_dt = copy(dt)[intervention>=0 & week>=13]
eval_dt[,
num_flags :=
ifelse(TIR < 65, 1, 0) +
ifelse(PW < 75, 1, 0) +
ifelse(TBR54 > 1, 1, 0) +
ifelse(TBR70 > 4, 1, 0)]
Define some helper functions
N_PROVIDERS = 5
NUMBER_OF_PATIENTS = 500
create_bootstrap_population = function(dt) {
all_patient_ids = dt[,unique(patient_id)]
bs_sample_ids = sample(all_patient_ids, NUMBER_OF_PATIENTS, replace = T)
bs_sample_dt = data.table(patient_id = bs_sample_ids, new_patient_id = seq(1,NUMBER_OF_PATIENTS))
bs_sample_dt = merge(bs_sample_dt, eval_dt, by='patient_id', allow.cartesian = T)
return(bs_sample_dt)
}
get_stats_for_top_k_reviewed = function(k, dt) {
return(data.table(
K = k,
n_reviewed = dt[rnk <= k, ceiling(.N/uniqueN(week))],
precision = dt[rnk <= k, mean(intervention)],
recall = dt[intervention==1, sum(rnk <= k, na.rm = TRUE)/.N]
))
}
get_stats_from_ranked_data = function(dt) {
return(rbindlist(lapply(seq(1,max(dt$rnk, na.rm = T)), function(x) get_stats_for_top_k_reviewed(x, dt))))
}
assign_patients_to_providers_w_partial_segmentation = function(p_permanently_assigned_providers, dt, n_providers_out) {
# Assign p_permanently_assigned_providers of patients permanently to providers. This is 0 for pooled and 1 for completely segmented.
copy_dt = copy(dt)
n_patients_to_assign_permanently = ceiling(p_permanently_assigned_providers*uniqueN(copy_dt$new_patient_id))
# message(paste("n_patients_to_assign_permanently:", n_patients_to_assign_permanently))
patient_ids_to_assign_permanently = sample(unique(copy_dt$new_patient_id), n_patients_to_assign_permanently)
copy_dt[,provider_assignment := as.numeric(NA)]
copy_dt[new_patient_id %in% patient_ids_to_assign_permanently, provider_assignment := 1 + new_patient_id %% N_PROVIDERS]
# sample providers out by week if any are out
if(n_providers_out>0) {
providers_out_by_week = sapply(as.character(copy_dt[,unique(week)]), function(x) {
return(list(x = sample(seq(1,N_PROVIDERS), n_providers_out)))
}, simplify = F, USE.NAMES = T)
# remove permanent assignments to providers who are out if p_permanently_assigned_providers < 1.
if(p_permanently_assigned_providers < 1) {
for(wk in copy_dt[,unique(week)]) {
copy_dt[week == wk & provider_assignment %in% providers_out_by_week[[as.character(wk)]], provider_assignment:=NA]
}
}
}
# Rank patients globally and then step through table by row assigning each patient to their permanently assigned provider if they have one,
# and otherwise to the provider with the fewest assigned patients to review so far.
setorder(copy_dt, week, -num_flags, TIR)
n_assigned_to_each_prov = rep(0, N_PROVIDERS-n_providers_out)
current_week = copy_dt$week[1]
for(i in 1:nrow(copy_dt)) {
# reset provider counts if new week
if(current_week != copy_dt$week[i]) {
n_assigned_to_each_prov = n_assigned_to_each_prov*0
current_week = copy_dt$week[i]
}
# provider working this week (lookup vector)
providers_working_this_week = seq(1,N_PROVIDERS)
if(n_providers_out>0) providers_working_this_week = setdiff(providers_working_this_week, providers_out_by_week[[as.character(current_week)]])
if(is.na(copy_dt$provider_assignment[i])) {
# patient is not permanently assigned a provider, so assign them (for this week only) to the provider with the fewest assigned patients so far.
copy_dt$provider_assignment[i] = providers_working_this_week[which.min(n_assigned_to_each_prov)]
n_assigned_to_each_prov[which.min(n_assigned_to_each_prov)] = 1 + n_assigned_to_each_prov[which.min(n_assigned_to_each_prov)]
} else {
if(n_providers_out>0 & p_permanently_assigned_providers==1) {
if(copy_dt$provider_assignment[i] %in% providers_out_by_week[[as.character(current_week)]]) {
# Patient is permanently assigned a provider who is out and we're doing full segmentation so this patients will not be reviewed.
copy_dt$provider_assignment[i] = NA
next
}
}
# otherwise, respect the permanent assignment of the patient to a provider and just increment the counter of the provider
index_of_assigmnent = match(copy_dt$provider_assignment[i], providers_working_this_week)
n_assigned_to_each_prov[index_of_assigmnent] = 1 + n_assigned_to_each_prov[index_of_assigmnent]
}
}
# rank patients within week and assigned provider and output results
copy_dt[,rnk := as.numeric(NA)]
copy_dt[!is.na(provider_assignment), rnk := seq_len(.N), by = c('week','provider_assignment')]
return(copy_dt)
}
run_one_bs_simulation = function(seed, dt, n_providers_out = 0) {
set.seed(seed)
bs_sample = create_bootstrap_population(dt)
results_by_p_segmentation = rbindlist(lapply(c(0, 0.9, 0.95, 0.99, 1), function(x) {
dt = assign_patients_to_providers_w_partial_segmentation(x, bs_sample, n_providers_out)
res = get_stats_from_ranked_data(dt)
res[,p_permanently_assigned_providers:=x]
res[,seed:=seed]
return(res)
}))
return(results_by_p_segmentation)
}
res = rbindlist(mclapply(seq(1,100), function(x) run_one_bs_simulation(x, eval_dt), mc.cores=4))
aggdt = res[,.(recall=mean(recall), precision=mean(precision)), by=c('p_permanently_assigned_providers','K')]
ggplot(
aggdt,
aes(x=K, y=recall, color=factor(p_permanently_assigned_providers), group=factor(p_permanently_assigned_providers))) + geom_line() + coord_cartesian(xlim=c(30,60), ylim=c(0.9,1)) + theme_minimal() + guides(color=guide_legend(title="Patients\npermanently\nassigned\nprovider")) +
ggtitle("Pooling provides limited benefits when all providers are working every week") + theme(legend.position="bottom") +
geom_vline(xintercept = 40)
Let’s see how the results change if one of the five providers are off each week.
res_out = rbindlist(mclapply(seq(1,100), function(x) run_one_bs_simulation(x, eval_dt, n_providers_out=1), mc.cores=4))
aggdtout = res_out[,.(recall=mean(recall), precision=mean(precision)), by=c('p_permanently_assigned_providers','K')]
ggplot(
aggdtout,
aes(x=K, y=recall, color=factor(p_permanently_assigned_providers), group=factor(p_permanently_assigned_providers))) + geom_line() +
coord_cartesian(xlim=c(30,60), ylim=c(0.7,1)) + theme_minimal() + guides(color=guide_legend(title="Patients\npermanently\nassigned\nprovider")) +
ggtitle("Non-zero pooling provides larger benefit when one providers is out each week") + theme(legend.position="bottom") +
geom_vline(xintercept = 50)
Not doing pooling is very slightly suboptimal when all providers are working, but very harmful when a provider is off in a given week. Even a small amount of pooling resolves this problem.
Let’s now see what happens if we assign patients to the same provider they were reviewed by before if they were reviewed in the past month. This would line up nicely with potential monthly billing of remote view. It should hopefully also make a decent number of reviews repeat reviews of the same patient by the same provider.
Let’s continue with 500 patients, and 5 providers. Only 4 of 5 providers are working each week. Let’s assume each provider reviews the top 50 patients sent to them in a given week.
PROVIDER_CAPACITY = 50
# new function that assigns reviews based on who the patient was reviewed by last time
assign_patients_to_providers_for_review = function(dt, n_providers_out=1) {
copy_dt = copy(dt)
copy_dt[,provider_assignment := as.numeric(NA)]
copy_dt[,reviewed := as.numeric(NA)]
copy_dt[,repeat_review := as.numeric(NA)]
# sample providers out by week if any are out
if(n_providers_out>0) {
providers_out_by_week = sapply(as.character(copy_dt[,unique(week)]), function(x) {
return(list(x = sample(seq(1,N_PROVIDERS), n_providers_out)))
}, simplify = F, USE.NAMES = T)
}
# iterate over weeks
for(wk in copy_dt[,unique(week)]) {
week_dt = copy_dt[week==wk]
setorder(week_dt, -num_flags, TIR)
n_assigned_to_each_prov = rep(0, N_PROVIDERS - n_providers_out)
providers_working_this_week = seq(1,N_PROVIDERS)
if(n_providers_out>0) providers_working_this_week = setdiff(providers_working_this_week, providers_out_by_week[[as.character(wk)]])
# loop over patients until all providers capacity filled
i = 1;
while(min(n_assigned_to_each_prov) < PROVIDER_CAPACITY) {
# check if patient has been reviewed in past month
pat_reviewed_bf_dt = copy_dt[week<wk & week>wk-5 & new_patient_id == week_dt$new_patient_id[i] & reviewed == 1 &
provider_assignment %in% providers_working_this_week,
.(week, provider_assignment)]
if(nrow(pat_reviewed_bf_dt)>0) {
# message('Patient reviewed in past month by a provider working this week!')
setorder(pat_reviewed_bf_dt, -week)
last_reviewed_by = pat_reviewed_bf_dt$provider_assignment[1]
last_reviewed_by_idx = match(last_reviewed_by, providers_working_this_week)
# assign to past provider if they have capacity
if(n_assigned_to_each_prov[last_reviewed_by_idx] < PROVIDER_CAPACITY) {
week_dt$provider_assignment[i] = last_reviewed_by
week_dt$reviewed[i] = 1
week_dt$repeat_review[i] = 1
n_assigned_to_each_prov[last_reviewed_by_idx] = n_assigned_to_each_prov[last_reviewed_by_idx] + 1
i = i + 1
next
}
}
# otherwise assign to provider with fewest assignments this week
assign_idx = which.min(n_assigned_to_each_prov)
week_dt$provider_assignment[i] = providers_working_this_week[assign_idx]
week_dt$reviewed[i] = 1
n_assigned_to_each_prov[assign_idx] = n_assigned_to_each_prov[assign_idx] + 1
i = i + 1
}
copy_dt[week==wk] = week_dt
}
return(copy_dt)
}
monthly_reass = rbindlist(mclapply(seq(1,100), function(x) assign_patients_to_providers_for_review(create_bootstrap_population(eval_dt)), mc.cores=4))
week_stats = monthly_reass[,.(
p_repeat_reviews = sum(repeat_review, na.rm = T)/sum(reviewed, na.rm = T)
), by = 'week']
ggplot(
week_stats,
aes(x=week, y=p_repeat_reviews)) + geom_line() + theme_minimal() +
ggtitle("About 3/4 of reviews are for patients reviewed in the past month.\nWe can safely assign these to same provider and maintain pooling efficiency.") +
theme(legend.position="bottom") + ylim(0,NA)