Step 0: Loading Library
library(data.table)
Step 1: Defining Parameters
#Define the folder path
raw_folder <- ("C://Users//William//Desktop//DDI Codes//data//raw//")
processed_folder <- ("C://Users//William//Desktop//DDI Codes//data//processed//")
#Define type of processing
process_type <- ("iterative") #single or iterative
Step 2: Defining All Drug Names and Adverse Reaction SMQs
(Broad)
#drugs
#Macrolides
azithromycin <- toupper(c("Azasite", "Azithromycin 3 Day Dose Pack",
"Azithromycin 5 Day Dose Pack", "Zithromax",
"Zithromax TRI-PAK", "Zithromax Z-Pak", "Zmax"))
erythromycin <- toupper(c("erythromycin","E.E.S. Granules", "E.E.S.-200",
"E.E.S.-400 Filmtab", "EryPed 200", "EryPed 400",
"Ery-Tab", "Erythrocin Lactobionate",
"Erythrocin Stearate Filmtab"))
clarithromycin <- toupper(c("CLARITHROMYCIN", "Biaxin", "Biaxin XL"))
#ACE inhibitor (Heart failure and atrial fibrillation medications)
digoxin <- toupper(c("DIGOXIN", "Digitek", "Digox", "Lanoxin", "Lanoxicaps", "Cardoxin"))
#immunosupressant
tacrolimus <- toupper(c("TACROLIMUS", "Astagraf XL", "Envarsus XR", "Prograf", "Hecoria"))
#Anti-viral
lopinavir_ritonavir <- toupper(c("Kaletra, Lopinavir and Ritonavir", "Lopinavir-Ritonavir",
"Lopinavir\ritonavir", "Lopinavir/ritonavir"))
lopinavir <- toupper(c("lopinavir"))
ritonavir <- toupper(c("ritonavir"))
#Statin
atorvastatin <- toupper(c("ATORVASTATIN", "Atorvaliq", "Lipitor"))
rosuvastatin <- toupper(c("ROSUVASTATIN", "Crestor", "Ezallor Sprinkle"))
simvastatin <- toupper(c("SIMVASTATIN", "Zocor"))
#ae#
# SMQ for ARRHYTHMIA
arrhythmia_smq <- toupper(c(
"Arrhythmia",
"Heart alternation",
"Heart rate irregular",
"Holiday heart syndrome",
"Pacemaker generated arrhythmia",
"Pacemaker syndrome",
"Paroxysmal arrhythmia",
"Pulseless electrical activity",
"Reperfusion arrhythmia",
"Withdrawal arrhythmia"
))
# SMQ for QT PROLONGED
qt_prolonged_smq <- toupper(c(
"Electrocardiogram QT interval abnormal",
"Electrocardiogram QT prolonged",
"Long QT syndrome",
"Long QT syndrome congenital",
"Torsade de pointes",
"Ventricular tachycardia",
"Arrhythmic storm",
"Cardiac arrest",
"Cardiac death",
"Cardiac fibrillation",
"Cardio-respiratory arrest",
"Electrocardiogram repolarisation abnormality",
"Electrocardiogram U wave inversion",
"Electrocardiogram U wave present",
"Electrocardiogram U-wave abnormality",
"Loss of consciousness",
"Seizure",
"Sudden cardiac death",
"Sudden death",
"Syncope",
"Ventricular arrhythmia",
"Ventricular fibrillation",
"Ventricular flutter",
"Ventricular tachyarrhythmia"
))
# SMQ for MYOPATHY, RHABDOMYOLYSIS
myopathy_rhabdomyolysis_smq <- toupper(c(
"Diabetic myonecrosis",
"Exertional rhabdomyolysis",
"Hypothyroid myopathy",
"Muscle infarction",
"Muscle necrosis",
"Myoglobin blood increased",
"Myoglobin blood present",
"Myoglobin urine present",
"Myoglobinaemia",
"Myoglobinuria",
"Myopathy",
"Myopathy toxic",
"Necrotising myositis",
"Rhabdomyolysis",
"Thyrotoxic myopathy",
"Acute kidney injury",
"Anuria",
"Biopsy muscle abnormal",
"Blood calcium decreased",
"Blood creatine phosphokinase abnormal",
"Blood creatine phosphokinase increased",
"Blood creatine phosphokinase MM increased",
"Blood creatinine abnormal",
"Blood creatinine increased",
"Chromaturia",
"Chronic kidney disease",
"Compartment syndrome",
"Creatinine renal clearance abnormal",
"Creatinine renal clearance decreased",
"Diaphragm muscle weakness",
"Electromyogram abnormal",
"End stage renal disease",
"Glomerular filtration rate abnormal",
"Glomerular filtration rate decreased",
"Haematoma muscle",
"Hypercreatininaemia",
"Hypocalcaemia",
"Muscle discomfort",
"Muscle disorder",
"Muscle enzyme abnormal",
"Muscle enzyme increased",
"Muscle fatigue",
"Muscle haemorrhage",
"Muscle rupture",
"Muscle strength abnormal",
"Muscular weakness",
"Musculoskeletal discomfort",
"Musculoskeletal disorder",
"Musculoskeletal pain",
"Musculoskeletal toxicity",
"Myalgia",
"Myalgia intercostal",
"Myositis",
"Oliguria",
"Renal failure",
"Renal impairment",
"Renal tubular necrosis",
"Subacute kidney injury",
"Tendon discomfort",
"Rhabdomyolysis"
))
Step 3: Writing a Function for Cleaning Data
merge_and_clean <- function(drug,reac){ #change caseid, primaryid as necessary
drug <- drug_table[,.(caseid,role_cod,drugname,trade_name,active_ingredient)]
reac <- reac_table[,.(caseid,pt)] #only need these columns
merged <- drug[reac, on = "caseid", allow.cartesian = TRUE] #merging demo_reac and demo_drug by caseid#
merged$pt <- toupper(merged$pt)
cat("Assigning SMQ", "\n")
merged[pt %in% arrhythmia_smq, pt := "ARRHYTHMIA"]
merged[pt %in% qt_prolonged_smq, pt := "QT PROLONGED"]
merged[pt %in% myopathy_rhabdomyolysis_smq, pt := "MYOPATHY/RHABDOMYOLYSIS"]
cat("Greplling Macrolides", "\n")
# Macrolides
merged[grepl(paste((azithromycin), collapse = "|"), (active_ingredient)) |
grepl(paste((azithromycin), collapse = "|"), (drugname)) |
grepl(paste((azithromycin), collapse = "|"), (trade_name)), active_ingredient := "AZITHROMYCIN"]
merged[grepl(paste((erythromycin), collapse = "|"), (active_ingredient)) |
grepl(paste((erythromycin), collapse = "|"), (drugname)) |
grepl(paste((erythromycin), collapse = "|"), (trade_name)), active_ingredient := "ERYTHROMYCIN"]
merged[grepl(paste((clarithromycin), collapse = "|"), (active_ingredient)) |
grepl(paste((clarithromycin), collapse = "|"), (drugname)) |
grepl(paste((clarithromycin), collapse = "|"), (trade_name)), active_ingredient := "CLARITHROMYCIN"]
cat("ACE inhibitors", "\n")
# ACE inhibitor (Heart failure and atrial fibrillation medications)
merged[grepl(paste((digoxin), collapse = "|"), (active_ingredient)) |
grepl(paste((digoxin), collapse = "|"), (drugname)) |
grepl(paste((digoxin), collapse = "|"), (trade_name)), active_ingredient := "DIGOXIN"]
cat("Greplling Immunosuppressant","\n")
# Immunosuppressant
merged[grepl(paste((tacrolimus), collapse = "|"), (active_ingredient)) |
grepl(paste((tacrolimus), collapse = "|"), (drugname)) |
grepl(paste((tacrolimus), collapse = "|"), (trade_name)), active_ingredient := "TACROLIMUS"]
cat("Greplling Antiviral","\n")
# Antiviral
merged[grepl(paste((lopinavir_ritonavir), collapse = "|"), (active_ingredient)) |
grepl(paste((lopinavir_ritonavir), collapse = "|"), (drugname)) |
grepl(paste((lopinavir_ritonavir), collapse = "|"), (trade_name)), active_ingredient := "LOPINAVIR_RITONAVIR"]
merged[grepl(paste((lopinavir), collapse = "|"), (active_ingredient)) |
grepl(paste((lopinavir), collapse = "|"), (drugname)) |
grepl(paste((lopinavir), collapse = "|"), (trade_name)), active_ingredient := "LOPINAVIR"]
merged[grepl(paste((ritonavir), collapse = "|"), (active_ingredient)) |
grepl(paste((ritonavir), collapse = "|"), (drugname)) |
grepl(paste((ritonavir), collapse = "|"), (trade_name)), active_ingredient := "RITONAVIR"]
cat("Greplling Statins","\n")
# Statins
merged[grepl(paste((atorvastatin), collapse = "|"), (active_ingredient)) |
grepl(paste((atorvastatin), collapse = "|"), (drugname)) |
grepl(paste((atorvastatin), collapse = "|"), (trade_name)), active_ingredient := "ATORVASTATIN"]
merged[grepl(paste((rosuvastatin), collapse = "|"), (active_ingredient)) |
grepl(paste((rosuvastatin), collapse = "|"), (drugname)) |
grepl(paste((rosuvastatin), collapse = "|"), (trade_name)), active_ingredient := "ROSUVASTATIN"]
merged[grepl(paste((simvastatin), collapse = "|"), (active_ingredient)) |
grepl(paste((simvastatin), collapse = "|"), (drugname)) |
grepl(paste((simvastatin), collapse = "|"), (trade_name)), active_ingredient := "SIMVASTATIN"]
merged[,drugname:=NULL]
merged[,trade_name:=NULL]
return(merged)
}
Step 4: Calling the Processed Data Function
#Make sure the folders are created in processed data
if (process_type == "single") { # Fixed condition
drug_table <- fread(paste0(raw_folder, "drug.csv"))
reac_table <- fread(paste0(raw_folder, "reac.csv"))
processed <- merge_and_clean(drug = drug_table, reac = reac_table)
fwrite(processed, paste0(processed_folder, "processed.csv")) #stored as processed.csv
} else if (process_type == "iterative") { # Fixed condition
folder_list <- list.dirs(raw_folder, full.names = TRUE, recursive = FALSE)
folders <- length(folder_list) #counts how many folders
for (i in 1:folders) {
cat("\014") # Clear console (if running in interactive mode)
cat("Processing Folder:", i, "\n")
# Define the folder path
raw_folder_i <- folder_list[i] # Use folder_list instead of constructing manually
# Find the drug and reac files dynamically
drug_file <- list.files(raw_folder_i, pattern = "^drug", full.names = TRUE)
reac_file <- list.files(raw_folder_i, pattern = "^reac", full.names = TRUE)
# Check if files are found
if (length(drug_file) == 0 | length(reac_file) == 0) {
warning(sprintf("Missing drug or reac file in folder %s", raw_folder_i))
next
}
# Read the files
cat("Reading Drug File:", drug_file[1], "\n")
drug_table <- fread(drug_file[1])
cat("Reading Reac File:", reac_file[1], "\n")
reac_table <- fread(reac_file[1])
cat("Starting Processing Procedure for file: ", i, "\n")
processed <- merge_and_clean(drug = drug_table, reac = reac_table) # Fixed variable names
fwrite(processed, paste0(processed_folder, i, ".csv")) #Stored as 1.csv
}
}
## Processing Folder: 1
## Reading Drug File: C://Users//William//Desktop//DDI Codes//data//raw//1/drug1739165203.csv
## Reading Reac File: C://Users//William//Desktop//DDI Codes//data//raw//1/reac1739165203.csv
## Starting Processing Procedure for file: 1
## Assigning SMQ
## Greplling Macrolides
## ACE inhibitors
## Greplling Immunosuppressant
## Greplling Antiviral
## Greplling Statins
Processed Data
head(processed) #Shows first few rows of processed
## caseid role_cod active_ingredient
## 1: 7763600 PS FENTANYL
## 2: 7763600 SS FLUOXETINE\\FLUOXETINE CAPSULES\\FLUOXETINE HYDROCHLORIDE
## 3: 7763600 SS FENTANYL\\FENTANYL CITRATE\\FENTANYL TRANSDERMAL SYSTEM
## 4: 7763600 PS FENTANYL
## 5: 7763600 SS FLUOXETINE\\FLUOXETINE CAPSULES\\FLUOXETINE HYDROCHLORIDE
## 6: 7763600 SS FENTANYL\\FENTANYL CITRATE\\FENTANYL TRANSDERMAL SYSTEM
## pt
## 1: MYOCARDITIS
## 2: MYOCARDITIS
## 3: MYOCARDITIS
## 4: PULMONARY OEDEMA
## 5: PULMONARY OEDEMA
## 6: PULMONARY OEDEMA
head(processed[role_cod == "C"]) #Shows first few rows of concomitant medications
## caseid role_cod active_ingredient pt
## 1: 8755883 C CISPLATIN CONSTIPATION
## 2: 8755883 C CISPLATIN CONSTIPATION
## 3: 8755883 C CISPLATIN CONSTIPATION
## 4: 8755883 C CISPLATIN CONSTIPATION
## 5: 8755883 C CISPLATIN CONSTIPATION
## 6: 8755883 C CISPLATIN CONSTIPATION
length(unique(processed$caseid)) #Shows how many reports were submitted
## [1] 162650