Step 0: Loading Library

library(data.table)

Step 1: Defining Parameters

  #Define the folder path
  raw_folder <- ("C://Users//William//Desktop//DDI Codes//data//raw//")
  processed_folder <- ("C://Users//William//Desktop//DDI Codes//data//processed//")

  #Define type of processing
  process_type <- ("iterative") #single or iterative

Step 2: Defining All Drug Names and Adverse Reaction SMQs (Broad)

#drugs

  #Macrolides
  
  azithromycin <- toupper(c("Azasite", "Azithromycin 3 Day Dose Pack", 
                            "Azithromycin 5 Day Dose Pack", "Zithromax", 
                            "Zithromax TRI-PAK", "Zithromax Z-Pak", "Zmax"))
  
  erythromycin <- toupper(c("erythromycin","E.E.S. Granules", "E.E.S.-200", 
                            "E.E.S.-400 Filmtab", "EryPed 200", "EryPed 400", 
                            "Ery-Tab", "Erythrocin Lactobionate", 
                            "Erythrocin Stearate Filmtab"))
  
  
  
  clarithromycin <- toupper(c("CLARITHROMYCIN", "Biaxin", "Biaxin XL"))
  

  #ACE inhibitor (Heart failure and atrial fibrillation medications)
  
  digoxin <- toupper(c("DIGOXIN", "Digitek", "Digox", "Lanoxin", "Lanoxicaps", "Cardoxin"))
  
  #immunosupressant
  
  tacrolimus <- toupper(c("TACROLIMUS", "Astagraf XL", "Envarsus XR", "Prograf", "Hecoria"))
  
  #Anti-viral
  
  lopinavir_ritonavir <- toupper(c("Kaletra, Lopinavir and Ritonavir", "Lopinavir-Ritonavir", 
                                   "Lopinavir\ritonavir", "Lopinavir/ritonavir"))
  
  lopinavir <- toupper(c("lopinavir"))
  
  ritonavir <- toupper(c("ritonavir"))
  
  #Statin
  
  atorvastatin <- toupper(c("ATORVASTATIN", "Atorvaliq", "Lipitor"))
  
  rosuvastatin <- toupper(c("ROSUVASTATIN", "Crestor", "Ezallor Sprinkle"))
  
  simvastatin <- toupper(c("SIMVASTATIN", "Zocor"))
  


#ae#
# SMQ for ARRHYTHMIA
arrhythmia_smq <- toupper(c(
  "Arrhythmia", 
  "Heart alternation", 
  "Heart rate irregular", 
  "Holiday heart syndrome", 
  "Pacemaker generated arrhythmia", 
  "Pacemaker syndrome", 
  "Paroxysmal arrhythmia", 
  "Pulseless electrical activity", 
  "Reperfusion arrhythmia", 
  "Withdrawal arrhythmia"
))

# SMQ for QT PROLONGED
qt_prolonged_smq <- toupper(c(
  "Electrocardiogram QT interval abnormal", 
  "Electrocardiogram QT prolonged", 
  "Long QT syndrome", 
  "Long QT syndrome congenital", 
  "Torsade de pointes", 
  "Ventricular tachycardia", 
  "Arrhythmic storm", 
  "Cardiac arrest", 
  "Cardiac death", 
  "Cardiac fibrillation", 
  "Cardio-respiratory arrest", 
  "Electrocardiogram repolarisation abnormality", 
  "Electrocardiogram U wave inversion", 
  "Electrocardiogram U wave present", 
  "Electrocardiogram U-wave abnormality", 
  "Loss of consciousness", 
  "Seizure", 
  "Sudden cardiac death", 
  "Sudden death", 
  "Syncope", 
  "Ventricular arrhythmia", 
  "Ventricular fibrillation", 
  "Ventricular flutter", 
  "Ventricular tachyarrhythmia"
))

# SMQ for MYOPATHY, RHABDOMYOLYSIS

myopathy_rhabdomyolysis_smq <- toupper(c(
  "Diabetic myonecrosis",
  "Exertional rhabdomyolysis",
  "Hypothyroid myopathy",
  "Muscle infarction",
  "Muscle necrosis",
  "Myoglobin blood increased",
  "Myoglobin blood present",
  "Myoglobin urine present",
  "Myoglobinaemia",
  "Myoglobinuria",
  "Myopathy",
  "Myopathy toxic",
  "Necrotising myositis",
  "Rhabdomyolysis",
  "Thyrotoxic myopathy",
  "Acute kidney injury",
  "Anuria",
  "Biopsy muscle abnormal",
  "Blood calcium decreased",
  "Blood creatine phosphokinase abnormal",
  "Blood creatine phosphokinase increased",
  "Blood creatine phosphokinase MM increased",
  "Blood creatinine abnormal",
  "Blood creatinine increased",
  "Chromaturia",
  "Chronic kidney disease",
  "Compartment syndrome",
  "Creatinine renal clearance abnormal",
  "Creatinine renal clearance decreased",
  "Diaphragm muscle weakness",
  "Electromyogram abnormal",
  "End stage renal disease",
  "Glomerular filtration rate abnormal",
  "Glomerular filtration rate decreased",
  "Haematoma muscle",
  "Hypercreatininaemia",
  "Hypocalcaemia",
  "Muscle discomfort",
  "Muscle disorder",
  "Muscle enzyme abnormal",
  "Muscle enzyme increased",
  "Muscle fatigue",
  "Muscle haemorrhage",
  "Muscle rupture",
  "Muscle strength abnormal",
  "Muscular weakness",
  "Musculoskeletal discomfort",
  "Musculoskeletal disorder",
  "Musculoskeletal pain",
  "Musculoskeletal toxicity",
  "Myalgia",
  "Myalgia intercostal",
  "Myositis",
  "Oliguria",
  "Renal failure",
  "Renal impairment",
  "Renal tubular necrosis",
  "Subacute kidney injury",
  "Tendon discomfort",
  "Rhabdomyolysis"
))

Step 3: Writing a Function for Cleaning Data

merge_and_clean <- function(drug,reac){ #change caseid, primaryid as necessary
  
  drug <- drug_table[,.(caseid,role_cod,drugname,trade_name,active_ingredient)] 
  
  reac <- reac_table[,.(caseid,pt)] #only need these columns
  
  merged <- drug[reac, on = "caseid", allow.cartesian = TRUE] #merging demo_reac and demo_drug by caseid#
  
  merged$pt <- toupper(merged$pt)
  
    cat("Assigning SMQ", "\n")

  
  merged[pt %in% arrhythmia_smq, pt := "ARRHYTHMIA"]
  merged[pt %in% qt_prolonged_smq, pt := "QT PROLONGED"]
  merged[pt %in% myopathy_rhabdomyolysis_smq, pt := "MYOPATHY/RHABDOMYOLYSIS"]

  
  cat("Greplling Macrolides", "\n")


 # Macrolides
  merged[grepl(paste((azithromycin), collapse = "|"), (active_ingredient)) |
           grepl(paste((azithromycin), collapse = "|"), (drugname)) |
           grepl(paste((azithromycin), collapse = "|"), (trade_name)), active_ingredient := "AZITHROMYCIN"]
  
  merged[grepl(paste((erythromycin), collapse = "|"), (active_ingredient)) |
           grepl(paste((erythromycin), collapse = "|"), (drugname)) |
           grepl(paste((erythromycin), collapse = "|"), (trade_name)), active_ingredient := "ERYTHROMYCIN"]
  
  merged[grepl(paste((clarithromycin), collapse = "|"), (active_ingredient)) |
           grepl(paste((clarithromycin), collapse = "|"), (drugname)) |
           grepl(paste((clarithromycin), collapse = "|"), (trade_name)), active_ingredient := "CLARITHROMYCIN"]
  
  cat("ACE inhibitors", "\n")
  
  # ACE inhibitor (Heart failure and atrial fibrillation medications)
  merged[grepl(paste((digoxin), collapse = "|"), (active_ingredient)) |
           grepl(paste((digoxin), collapse = "|"), (drugname)) |
           grepl(paste((digoxin), collapse = "|"), (trade_name)), active_ingredient := "DIGOXIN"]
  
  cat("Greplling Immunosuppressant","\n")
  
  # Immunosuppressant
  merged[grepl(paste((tacrolimus), collapse = "|"), (active_ingredient)) |
           grepl(paste((tacrolimus), collapse = "|"), (drugname)) |
           grepl(paste((tacrolimus), collapse = "|"), (trade_name)), active_ingredient := "TACROLIMUS"]
  
  cat("Greplling Antiviral","\n")
  
  # Antiviral
  merged[grepl(paste((lopinavir_ritonavir), collapse = "|"), (active_ingredient)) |
           grepl(paste((lopinavir_ritonavir), collapse = "|"), (drugname)) |
           grepl(paste((lopinavir_ritonavir), collapse = "|"), (trade_name)), active_ingredient := "LOPINAVIR_RITONAVIR"]
  
  merged[grepl(paste((lopinavir), collapse = "|"), (active_ingredient)) |
           grepl(paste((lopinavir), collapse = "|"), (drugname)) |
           grepl(paste((lopinavir), collapse = "|"), (trade_name)), active_ingredient := "LOPINAVIR"]
  
  merged[grepl(paste((ritonavir), collapse = "|"), (active_ingredient)) |
           grepl(paste((ritonavir), collapse = "|"), (drugname)) |
           grepl(paste((ritonavir), collapse = "|"), (trade_name)), active_ingredient := "RITONAVIR"]
  
  cat("Greplling Statins","\n")
  
  # Statins
  merged[grepl(paste((atorvastatin), collapse = "|"), (active_ingredient)) |
           grepl(paste((atorvastatin), collapse = "|"), (drugname)) |
           grepl(paste((atorvastatin), collapse = "|"), (trade_name)), active_ingredient := "ATORVASTATIN"]
  
  merged[grepl(paste((rosuvastatin), collapse = "|"), (active_ingredient)) |
           grepl(paste((rosuvastatin), collapse = "|"), (drugname)) |
           grepl(paste((rosuvastatin), collapse = "|"), (trade_name)), active_ingredient := "ROSUVASTATIN"]
  
  merged[grepl(paste((simvastatin), collapse = "|"), (active_ingredient)) |
           grepl(paste((simvastatin), collapse = "|"), (drugname)) |
           grepl(paste((simvastatin), collapse = "|"), (trade_name)), active_ingredient := "SIMVASTATIN"]
  
  
  merged[,drugname:=NULL] 
  merged[,trade_name:=NULL]
  
  return(merged)
}

Step 4: Calling the Processed Data Function

#Make sure the folders are created in processed data

if (process_type == "single") { # Fixed condition
  
  drug_table <- fread(paste0(raw_folder, "drug.csv"))
  
  reac_table <- fread(paste0(raw_folder, "reac.csv"))

  processed <- merge_and_clean(drug = drug_table, reac = reac_table)
  
  fwrite(processed, paste0(processed_folder, "processed.csv")) #stored as processed.csv

} else if (process_type == "iterative") {  # Fixed condition

  folder_list <- list.dirs(raw_folder, full.names = TRUE, recursive = FALSE)
  folders <- length(folder_list) #counts how many folders
    
  for (i in 1:folders) {
  
    cat("\014")  # Clear console (if running in interactive mode)
    cat("Processing Folder:", i, "\n")
  
    # Define the folder path
    raw_folder_i <- folder_list[i]  # Use folder_list instead of constructing manually

    # Find the drug and reac files dynamically
    drug_file <- list.files(raw_folder_i, pattern = "^drug", full.names = TRUE)
    reac_file <- list.files(raw_folder_i, pattern = "^reac", full.names = TRUE)
  
    # Check if files are found
    if (length(drug_file) == 0 | length(reac_file) == 0) {
      warning(sprintf("Missing drug or reac file in folder %s", raw_folder_i))
      next
    }
  
    # Read the files
    cat("Reading Drug File:", drug_file[1], "\n")
    drug_table <- fread(drug_file[1])  
  
    cat("Reading Reac File:", reac_file[1], "\n")
    reac_table <- fread(reac_file[1])  
    
    cat("Starting Processing Procedure for file: ", i, "\n")
  
    processed <- merge_and_clean(drug = drug_table, reac = reac_table)  # Fixed variable names

    fwrite(processed, paste0(processed_folder, i, ".csv")) #Stored as 1.csv
  }
}
## Processing Folder: 1 
## Reading Drug File: C://Users//William//Desktop//DDI Codes//data//raw//1/drug1739165203.csv 
## Reading Reac File: C://Users//William//Desktop//DDI Codes//data//raw//1/reac1739165203.csv 
## Starting Processing Procedure for file:  1 
## Assigning SMQ 
## Greplling Macrolides 
## ACE inhibitors 
## Greplling Immunosuppressant 
## Greplling Antiviral 
## Greplling Statins

Processed Data

head(processed) #Shows first few rows of processed
##     caseid role_cod                                         active_ingredient
## 1: 7763600       PS                                                  FENTANYL
## 2: 7763600       SS FLUOXETINE\\FLUOXETINE CAPSULES\\FLUOXETINE HYDROCHLORIDE
## 3: 7763600       SS   FENTANYL\\FENTANYL CITRATE\\FENTANYL TRANSDERMAL SYSTEM
## 4: 7763600       PS                                                  FENTANYL
## 5: 7763600       SS FLUOXETINE\\FLUOXETINE CAPSULES\\FLUOXETINE HYDROCHLORIDE
## 6: 7763600       SS   FENTANYL\\FENTANYL CITRATE\\FENTANYL TRANSDERMAL SYSTEM
##                  pt
## 1:      MYOCARDITIS
## 2:      MYOCARDITIS
## 3:      MYOCARDITIS
## 4: PULMONARY OEDEMA
## 5: PULMONARY OEDEMA
## 6: PULMONARY OEDEMA
head(processed[role_cod == "C"]) #Shows first few rows of concomitant medications
##     caseid role_cod active_ingredient           pt
## 1: 8755883        C         CISPLATIN CONSTIPATION
## 2: 8755883        C         CISPLATIN CONSTIPATION
## 3: 8755883        C         CISPLATIN CONSTIPATION
## 4: 8755883        C         CISPLATIN CONSTIPATION
## 5: 8755883        C         CISPLATIN CONSTIPATION
## 6: 8755883        C         CISPLATIN CONSTIPATION
length(unique(processed$caseid)) #Shows how many reports were submitted
## [1] 162650