CNX

Downloaded data from https://www.cdc.gov/nchs/nvss/vsrr/drug-overdose-data.htm

Note that the data includes Puerto Rico & DC, does not include drug specific counts for Pennsylvania or Louisiana, and is a rolling 12 month sum

show code
knitr::opts_chunk$set(warning = TRUE)
library('tidyverse'); library('rempsyc'); library('flextable'); library('ggh4x'); library('tidyr'); library('stringr'); library('lubridate'); library('slider')
setwd("~/Library/CloudStorage/Box-Box/CNX")

##### load and clean data
# odd_dat_raw = read.csv('VSRR_Provisional_Drug_Overdose_Death_Counts.csv', stringsAsFactors=TRUE)
## 
# #make clean dataframe for plot w/ transaction counts

# odd_dat = odd_dat_raw %>% 
#   filter(Indicator !='Percent with drugs specified' & State != 'US') %>% 
#   mutate(month_year = as.Date(paste(as.character(Year), '-', Month, sep=''))) %>% 
#   aggregate(Data.Value ~ Indicator * month_year, FUN=sum) %>% 
#   rename(OverdoseDeaths = Data.Value, OverdoseCategory = Indicator) %>% 
#   filter(month_year < '2025-08-01') %>% 
#   filter(OverdoseCategory == 'Synthetic opioids, excl. methadone (T40.4)') %>% 
#   mutate(OverdoseCategory = factor(OverdoseCategory, labels = 'Synthetic opioid overdose deaths')) %>% 
#   rename(variable = OverdoseCategory, date = month_year, count = OverdoseDeaths)
# 
# write_csv(odd_dat_cleaned, 'overdoseDeathsData_cleaned.csv')

#### when odd_dat is already cleaned and save, only run this line 
odd_dat = read.csv('overdoseDeathsData_cleaned.csv') %>% 
  mutate(date = as.Date(date))

# plot
ggplot(data = odd_dat, aes(x = date, y = count, color=variable)) +
  geom_line(linewidth = 1.25) +
  labs(title="Overdose deaths in the U.S.", y = 'Deaths', x = '', color='')+
   scale_x_date(
    breaks = "1 year",
    labels = scales::label_date("%Y"),
    minor_breaks = "1 month") +
  guides(x = ggplot2::guide_axis(minor.ticks = TRUE))+
  theme_classic(base_size=15)+
  theme(
    axis.ticks.length = unit(0.2, "cm"))+
  geom_vline(xintercept=as.numeric(as.Date('2023-05-01')), linetype='dashed')+
  geom_text(x = as.numeric(as.Date('2023-02-01')), y = 60000, label = "Mexico Anti-Fentanyl amendment", angle=90, color='black', check_overlap = TRUE, size=3.5)+
  geom_vline(xintercept=as.numeric(as.Date('2019-05-01')), linetype='dashed')+
  geom_text(x = as.numeric(as.Date('2019-02-01')), y = 40000, label = "China schedules fentanyl-related substances", angle=90, color='black', check_overlap = TRUE, size=3.5)

Next add in shipments from Altana’s CNX dasbhoard. First, the CNX dashboard was filtered to companies that are flagged as risky receivers or senders of fentanyl shipments. Then I queried the transactions table in databricks to grab all transactions where a company on the list was either the sender or receiver. That yields the data and figures below.

show code
library('lubridate'); library('slider')

#load in and clean data. uncomment these lines if your cleaned dataframe is not already written and stored
# fent_transactions = read.csv('altana_cnx_transactions.csv') %>% 
#   select(transaction_date, sender_company_name, sender_country_iso2, receiver_company_name, receiver_country_iso2, hs_code, goods_description, 
#          country_of_origin_iso_alpha2, country_of_destination_iso_alpha2)
# 
# fent_transactions_plot = fent_transactions %>% 
#    mutate(transaction_date = as.Date(transaction_date),
#           month = floor_date(transaction_date, unit = "month")) %>% 
#   count(month) %>% 
#   rename(date = month, count = n) %>% 
#   mutate(variable = 'risky transactions') %>% 
#   filter(!is.na(date))
#
#write.csv(fent_transactions_plot, 'altanaRiskyTransactions_cleaned.csv', row.names=FALSE)

#only run this line when the cleaned data is already written
fent_transactions_plot = read.csv('altanaRiskyTransactions_cleaned.csv')

#visualize just the raw risky transactions
ggplot(data = fent_transactions_plot, aes(x = as.Date(date), y = count)) +
  geom_line(linewidth = 1.25) +
  labs(title="Fentanyl risky shipments", y = 'shipments', x = '', color='')+
  theme_classic()

show code
# combine transactions and overdose deaths into one plot
CNX_overdoseXshipments = rbind(odd_dat, fent_transactions_plot)
#write.csv(CNX_overdoseXshipments, 'overdoseDeaths_altanaRiskyTransactions_cleaned.csv')

scale_factor = CNX_overdoseXshipments %>% #make common scale to plot both variables on one plot
  summarise(max_transactions = max(count[variable == "risky transactions"], na.rm = TRUE),
            max_overdoses    = max(count[variable == "Synthetic opioid overdose deaths"], na.rm = TRUE)) %>%
  transmute(scale = max_transactions / max_overdoses) %>%
  pull(scale)

ggplot(data = CNX_overdoseXshipments, aes(x = date, color=variable)) +
  geom_smooth(data = CNX_overdoseXshipments %>% filter(variable == "risky transactions"), aes(y = count), linewidth = 1.5, span = 0.075)+
  geom_line(data = CNX_overdoseXshipments %>% filter(variable == "Synthetic opioid overdose deaths"), aes(y = count * scale_factor), linewidth = 1.5)+
  labs(title="Global fentanyl-risky shipments and fentanyl overdose deaths in the US", y = 'shipments', x = '', color='', subtitle="transactions N = 337,243") +
  scale_y_continuous(name = "Global Risky Transactions", sec.axis = sec_axis( ~ . / scale_factor, name = "Overdose Deaths")) +
  scale_x_date(date_breaks = "6 months", date_labels = "%b %Y", limits = as.Date(c("2015-12-01","2025-06-01")))+
  theme_minimal(base_size=15)+
  theme(axis.ticks.length = unit(0.2, "cm"),  axis.text.x = element_text(angle = 45, hjust = 1))+
  geom_vline(xintercept=as.numeric(as.Date('2022-08-01')), linetype='dashed')+
  geom_text(x = as.numeric(as.Date('2022-07-01')), y = 1000, label = "Operation Mongoos Azeteca begins", angle=90, color='black', check_overlap = TRUE, size=3.5)+
  geom_vline(xintercept=as.numeric(as.Date('2023-01-05')), linetype='dashed')+
  geom_text(x = as.numeric(as.Date('2022-12-01')), y = 3500, label = "El Raton captured", angle=90, color='#0B1F3A', check_overlap = TRUE, size=3.5)+
  geom_vline(xintercept=as.numeric(as.Date('2023-05-01')), linetype='dashed')+
  geom_text(x = as.numeric(as.Date('2023-04-01')), y = 3500, label = "Los Chapitos bans fentanyl", angle=90, color='#0B1F3A', check_overlap = TRUE, size=3.5)

show code
#make long data
CNX_overdoseXshipments_long = pivot_wider(CNX_overdoseXshipments, id_cols=date, names_from=variable, values_from=count) %>% 
  rename(overdoseDeaths = `Synthetic opioid overdose deaths`, riskyTransactions = `risky transactions`)

ggplot(data = CNX_overdoseXshipments_long, aes(x = riskyTransactions, y=overdoseDeaths)) +
  geom_smooth(linewidth = 1.5, span = 0.5)+
  geom_point(alpha=0.3)

Drug specific counts for Pennsylvania and Louisiana are missing from the CDC overdose deaths data. The missingness would pose a threat to our analysis in the particular circumstance that the overdose mortality trends in Pennsylvania and Louisiana go in the opposite direction of the mortality trends observed in the CDC data that includes the remaining 48 states, D.C. and Puerto Rico. It could be the case, though unlikely, that the downward shift in mortality rates beginning in May of 2023 — the inflection point that our analysis centers on — does not exist when Pennsylvania and Louisiana mortality counts are incorporated into the data. Such a case would fundamentally change our analysis. To address this possibility, we here include robustness checks to bolster our confidence that the trends we observed in national fentanyl mortality rates hold when accounting for missingness.

Pennsylvania

Pennsylvania reports monthly overdose deaths to the CDC, but the counts are not specified by type of drug overdose. The Pennsylvania Department of Health reports the annual rate of fentanyl-related overdoses relative to overall overdose death counts for 2022 - 2024 [1] and Overdose Free PA reports the 2015 - 2018 rates [2]. Annual fentanyl-related overdose counts for 2020 - 2024 are available via the CDC’s SUDORS system [3]. Accordingly, we can achieve a good approximation of monthly fentanyl overdose counts by applying the annual ratio of fentanyl-specific overdose rates to monthly overall overdose counts that Pennsylvania provides. But note that the data for 2019 are still missing in this methodology. We identified a source that reports the percentage of opioid deaths in all drug overdose deaths per state, which we use to fill in 2019 [3]. However, note that this source’s figures are inconsistent with the Pennsylvania DOH reports and does not report fentanyl-specific or synthetic-opioid-specific deaths.

Year Percent of overdose deaths involving fentanyl Source
2024 67.3% Pennsylvania DOH annual report
2023 77% Pennsylvania DOH annual report
2022 78% Pennsylvania DOH annual report
2021 78% CDC’s SUDORS
2020 77.7% CDC’s SUDORS
2019 69%

KFF.org opioid crisis monitor

*note that this is all opioids, not fentanyl specific, and the rates are inconsistent with those reported by Pennsylvania DOH.

2018 69.9% Overdose Free PA
2017 66.5% Overdose Free PA
2016 46.9% Overdose Free PA
2015 26.6% Overdose Free PA

Louisiana

Louisiana also reports monthly overdose death counts to the CDC but without specification by drug-type. The Louisiana Department of Health reports quarterly overdose death counts specified by overdose type. Using the LDOH data source, we compiled the monthly counts of overdose deaths caused by fentanyl.

Year Fentanyl ODD - Q1 Fentanyl ODD - Q2 Fentanyl ODD - Q3 Fentanyl ODD - Q4 Fentanyl ODD - Total Source
2024 202 178 137 NA Louisiana DOH
2023 271 281 232 221 1005 Louisiana DOH
2022 317 313 289 298 1217 Louisiana DOH
2021 299 279 327 277 1182 Louisiana DOH
2020 121 195 193 199 708 Louisiana DOH
2019 56 59 84 128 327 Louisiana DOH
2018 69 56 36 49 210 Louisiana DOH
2017 39 30 29 50 148 Louisiana DOH
2016 26 18 16 20 80 Louisiana DOH

Robustness check

We transformed the Pennsylvania and Louisiana data to fill in the missingness from the national CDC overdose death data. We integrated the Pennsylvania data into the national data by applying the annual percentage of fentanyl-caused deaths to the monthly overdose death counts that Pennsylvania reports to the CDC. We integrated the Louisiana data by dividing each quarterly fentanyl overdose count by 3 to achieve monthly approximations.

show code
odd_dat_raw = read.csv('VSRR_Provisional_Drug_Overdose_Death_Counts.csv', stringsAsFactors=TRUE)

odd_dat_robustnessCheck_temp = odd_dat_raw %>% 
   filter(Indicator !='Percent with drugs specified' & State != 'US') %>% 
   mutate(month_year = as.Date(paste(as.character(Year), '-', Month, sep=''))) %>% 
  select(State, Year, month_year, Indicator, Data.Value)


#transform PA data
PA_data = subset(odd_dat_robustnessCheck_temp, State == "PA") %>% 
  filter(Indicator == "Number of Drug Overdose Deaths" & Year < 2025) %>% 
  mutate(Data.Value = ifelse(Year == 2015, Data.Value * 0.266,
                        ifelse(Year == 2016, Data.Value * 0.469,
                          ifelse(Year == 2017, Data.Value * 0.665,
                              ifelse(Year == 2018, Data.Value * 0.699,
                                     ifelse(Year == 2019, Data.Value * 0.69, 
                                            ifelse(Year == 2020, Data.Value * 0.77,
                                                   ifelse( Year == 2021 | Year == 2022, Data.Value * 0.78,
                                                           ifelse(Year == 2023, Data.Value * 0.77,
                                                                  ifelse(Year == 2024, Data.Value * 0.673, NA)))))))))) %>% 
  mutate(Indicator = "Synthetic opioids, excl. methadone (T40.4)")
  
#transform LA data
LA_data = read.csv('Louisiana_fentanylDeaths.csv') %>% 
  pivot_longer(cols = starts_with("X"), names_to = "month", values_to = "Raw.Value") %>%
  mutate(month = str_extract(month, "\\d{2}") |> as.integer(), month_year = make_date(Year, month, 1)) %>%
  arrange(month_year) %>%
  mutate(Data.Value = slide_dbl(Raw.Value, sum, .before = 11, .complete = TRUE, na.rm = TRUE),
         State = "LA",
         Indicator = "Synthetic opioids, excl. methadone (T40.4)") %>% 
  select(Year, month_year, Data.Value, State, Indicator)

#combine original data, LA transformed and PA transformed data
odd_allDat = rbind(odd_dat_robustnessCheck_temp, PA_data, LA_data) %>% 
    filter(month_year < '2025-08-01' & Year >2016 & Indicator == 'Synthetic opioids, excl. methadone (T40.4)') %>% 
  aggregate(Data.Value ~ month_year, FUN=sum) %>% 
  mutate(variable = 'W/ LA and PA patched') %>% 
    rename(date = month_year, count = Data.Value)

odd_CDC = odd_dat_robustnessCheck_temp %>% 
  filter(month_year < '2025-08-01' & Indicator == 'Synthetic opioids, excl. methadone (T40.4)') %>% 
  aggregate(Data.Value ~ month_year, FUN=sum) %>% 
  mutate(variable = 'Original CDC data') %>% 
    rename(date = month_year, count = Data.Value)
  
odd_dat_robustnessCheck = rbind(odd_allDat, odd_CDC)
  
ggplot(data = odd_dat_robustnessCheck, aes(x = date, color=variable, y = count)) +
  geom_line(linewidth = 1.5)+
  labs(title="Original CDC data vs Patched data to include PA and LA", y = 'Overdose Deaths', x = '', color='', subtitle = 'Rolling 12-month sum') +
  scale_x_date(date_breaks = "6 months", date_labels = "%b %Y", limits = as.Date(c("2015-12-01","2025-06-01")))+
  theme_minimal(base_size=15)+
  theme(axis.ticks.length = unit(0.2, "cm"),  axis.text.x = element_text(angle = 45, hjust = 1))+
  geom_vline(xintercept=as.numeric(as.Date('2022-08-01')), linetype='dashed')+
  geom_text(x = as.numeric(as.Date('2022-07-01')), y = 40000, label = "Operation Mongoos Azeteca begins", angle=90, color='black', check_overlap = TRUE, size=3.5)+
  geom_vline(xintercept=as.numeric(as.Date('2023-01-05')), linetype='dashed')+
  geom_text(x = as.numeric(as.Date('2022-12-01')), y = 50000, label = "El Raton captured", angle=90, color='#0B1F3A', check_overlap = TRUE, size=3.5)+
  geom_vline(xintercept=as.numeric(as.Date('2023-05-01')), linetype='dashed')+
  geom_text(x = as.numeric(as.Date('2023-04-01')), y = 55000, label = "Los Chapitos bans fentanyl", angle=90, color='#0B1F3A', check_overlap = TRUE, size=3.5)

got this data by specifying the following options in “transaction search”:

  1. HS code

  2. Product country of origin

  3. Receiver facility country

Note that the product country of origin is different from the sender facility country, and the same goes for the receiver side. The right options to use here is a matter of how we operationalize the trends of interest and is up for discussion.

HScode

Altana_description

compound.s.

included

285000

Compounds of rare earth metals, radioactive elements, or isotopes

sodium borohydride

290369

Halogenated derivatives of hydrocarbons: brominated or iodinated derivatives of acyclic hydrocarbons: other

phenethyl bromide

2918XX

Carboxylic acids with additional oxygen function...

291830

- with aldehyde or ketone function but without other oxygen function

diethyl 2-(2-phenylacetyl)malonate

X

291899

- other

3-methyl-3-phenyloxirane-2-carboxylic acid

X

29333X

Compounds containing an unfused pyridine ring ...

293332

- piperidine and its salts

4-piperidinol

X

293334

- other fentanyls and their derivatives

1-boc-norcarfentanil

293339

- other

1-boc-4-piperidinol, 4-anilino-1-benzylpiperidine, 4-carbomethoxy 4-ANPP, norcarfentanil, para-methyl boc-4-AP, 1-benzylpiperidin-4-one

X

293999

vegetable alkaloids - other

propionyl chloride

294200

Other organic compounds

sodium triacetoxyborohydride

Shipments to US from China per HS code

Some - but not all - shipments note the weight in kilograms. Sometimes they are only noted as “# piece”, and there seem to be other messy issues with the quantity column. Will need to be cleaned more thoroughly.

Shipments to Mexico from China per HS code