Background

Pharmacies typically engage with a single pharmaceutical distributor to purchase all the drugs they need. This type of arrangement is governed by a contract called a Prime Vendor Agreement (PVA), which the pharmacy enters into with the distributor for a specific number of years defined in the agreement itself. The PVA also stipulates what percentage of its drugs a pharmacy must buy from the distributor in order to be compliant with the agreement. A pharmacy may choose to purchase drugs from a distributor other than the one it has a PVA with; this is referred to as “leakage,” and it can sometimes be significant. Your challenge is to determine which drugs are being bought outside the PVA from other distributors, hypothesize why this is happening, and recommend where AmerisourceBergen should concentrate efforts to reduce this leakage and regain lost revenue and profit.

Data
Extracted, anonymized and modified from the AmerisourceBergen Data Warehouse.

To retrieve actual data go to:
https://github.com/SupChanda/ML-Supratik
- For the data select “Dispense Usage Transactions Top 50 GCN Seq Nbrs Week 201836.xlsx”
- For an explanation of acronyms select “AmerisourceBergen Challenge Data Dictionary.xlsx”

Complete Data set

Amerisource <- read_excel("~/Documents/ITKM 549- Technical Concepts/Final Project/Dispense Usage.xlsx")

head(Amerisource)

New table with the mean of the variable COGS_PRC_MTRC. Grouped by NDC_DESC.

Note: The mean calculation does not calculate NA’s.

MEAN_COGS <- Amerisource %>% 
  group_by(NDC_DESC) %>% 
  summarise(mean_cost = mean(COGS_PRC_MTRC, na.rm = TRUE))

head(MEAN_COGS)

Previous 2 tables joined by NDC_DESC

JOIN <- inner_join(Amerisource,MEAN_COGS,
                 by = c('NDC_DESC' = 'NDC_DESC'))
head(JOIN)

Replaced NA’s in COGS_PRC_MTRC with the mean of COGS_PRC_MTRC.

NO_NA <- JOIN %>% 
  mutate(COGS_PRC_MTRC = ifelse(is.na(COGS_PRC_MTRC) == TRUE , mean_cost , COGS_PRC_MTRC)) %>% 
  na.omit(DSPN_COGS_NDC_1WK, DSPN_COGS_NDC_4WKS , DSPN_COGS_NDC_13WKS , DSPN_COGS_NDC_26WKS)

head(NO_NA)

Removed all negative records from the following fields:

PURCH_QTY_NDC_1WK_MTRC, PURCH_QTY_NDC_4WK_MTRC, PURCH_QTY_NDC_13WK_MTRC , and PURCH_QTY_NDC_26WK_MTRC

NO_NEG <- NO_NA %>% 
  filter(PURCH_QTY_NDC_1WK_MTRC > 0) %>%
  filter(PURCH_QTY_NDC_4WKS_MTRC > 0) %>%  
  filter(PURCH_QTY_NDC_13WKS_MTRC > 0) %>% 
  filter(PURCH_QTY_NDC_26WKS_MTRC > 0)

head(NO_NEG)

New leakage variables for each time interval

Leakage = dispense - purchase quantities

LEAK <- NO_NEG %>% 
  mutate(LEAK_NDC_1WK = DSPN_COGS_NDC_1WK - PURCH_QTY_NDC_1WK_MTRC, 
         LEAK_NDC_4WKS = DSPN_COGS_NDC_4WKS - PURCH_QTY_NDC_4WKS_MTRC,
         LEAK_NDC_13WKS = DSPN_COGS_NDC_13WKS - PURCH_QTY_NDC_13WKS_MTRC,
         LEAK_NDC_26WKS = DSPN_COGS_NDC_26WKS - PURCH_QTY_NDC_26WKS_MTRC) 

head(LEAK)

Standardized per week metric for the four leakage variables

STD_LEAK <- LEAK %>% 
  mutate(STD_LEAK_NDC_1WK = LEAK_NDC_1WK,
         STD_LEAK_NDC_4WKS = LEAK_NDC_4WKS/4,
         STD_LEAK_NDC_13WKS = LEAK_NDC_13WKS/13,
         STD_LEAK_NDC_26WKS = LEAK_NDC_26WKS/26)

head(STD_LEAK)

Mean per week leakage for each time interval

MEAN_LEAK <- STD_LEAK %>% 
  summarise(MEAN_LEAK_NDC_1WK = mean(STD_LEAK_NDC_1WK),
            MEAN_LEAK_NDC_4WKS = mean(STD_LEAK_NDC_4WKS),
            MEAN_LEAK_NDC_13WKS = mean(STD_LEAK_NDC_13WKS),
            MEAN_LEAK_NDC_26WKS = mean(STD_LEAK_NDC_26WKS))

head(MEAN_LEAK)

Differing leakage by time period

LEAK_BY_TIME <- gather(MEAN_LEAK, mean) 
  ggplot(LEAK_BY_TIME) +
    geom_bar(aes(x=mean, y= value), stat= 'summary', fun.y= 'mean', fill="black", alpha=1/2 )

AmerisourceBergen’s most relevant way to contain leakage regarding Leakage in Week 13

Amount of leakage

AMOUNT_LEAK <- STD_LEAK %>% select(PHRMCY_NAME, LEAK_NDC_13WKS , COGS_PRC_MTRC) %>% 
    mutate(DOLLAR_AMOUNT_LEAK = LEAK_NDC_13WKS * COGS_PRC_MTRC)  
    
head(AMOUNT_LEAK)

Total leakage for week 13

TOTAL_WK13_LEAK <- sum(AMOUNT_LEAK$DOLLAR_AMOUNT_LEAK)

  head(TOTAL_WK13_LEAK)
## [1] 144497652654

Leakage by pharmacy - highest to lowest

 LEAK_BY_PHARM <- AMOUNT_LEAK %>% 
     group_by(PHRMCY_NAME) %>% 
     mutate(TOTAL_LEAK_PER_PHARM = sum(DOLLAR_AMOUNT_LEAK)) %>% 
     arrange(desc(TOTAL_LEAK_PER_PHARM)) %>% 
     select(PHRMCY_NAME , TOTAL_LEAK_PER_PHARM)

head(LEAK_BY_PHARM)

Individual pharmacies

UNIQUE<- LEAK_BY_PHARM %>% 
     distinct(PHRMCY_NAME , .keep_all = TRUE)

head(UNIQUE)

Percent leakage by each pharmacy

PERCT_LEAK <- UNIQUE %>% 
     mutate(PERCT_LEAK = (TOTAL_LEAK_PER_PHARM / TOTAL_WK13_LEAK)*100) %>% 
     select(PHRMCY_NAME , TOTAL_LEAK_PER_PHARM , PERCT_LEAK)
   
head(PERCT_LEAK)