First, download the data from medDRA repository (manually) and put it in the root folder;
Unzip it with provided Password as below:
- English and non-Japanese translations: *hide*
- Japanese translation: *hide*
Please find related emails to get the password
File downloaded from MedDRA are encoded as .asc, which is not a valid format for data import in SQL developer;
#Convert the data format from .asc to .tsv
#Two tables: "meddra_history_english", "meddra_release" are not used.
file_name <- c("hlgt", "hlgt_hlt", "hlt", "hlt_pt", "intl_ord", "llt", "mdhier", "pt", "smq_content", "smq_list", "soc", "soc_hlgt")
if(!file.exists("Convert")){
dir.create("Convert")
}
statistics = NULL
for (i in 1:length(file_name)){
tmp_name <- paste("meddra_20_1_english/MedAscii/",file_name[i],".asc",sep="")
data <- read.table(tmp_name, header = F, sep="$", fill=T, quote="")
statistics <- rbind(statistics, c(file_name[i], nrow(data)))
out_name <- paste("Convert/",file_name[i],".tsv",sep="")
write.table(data, file = out_name, row.names = F, col.names = F, quote = F, na = "", sep = "\t")
}
colnames(statistics) = c("Table - Category","Counts")
statistics[,1] = toupper(statistics[,1])
statistics
## Table - Category Counts
## [1,] "HLGT" "337"
## [2,] "HLGT_HLT" "1756"
## [3,] "HLT" "1738"
## [4,] "HLT_PT" "32912"
## [5,] "INTL_ORD" "27"
## [6,] "LLT" "78026"
## [7,] "MDHIER" "34830"
## [8,] "PT" "22774"
## [9,] "SMQ_CONTENT" "77125"
## [10,] "SMQ_LIST" "222"
## [11,] "SOC" "27"
## [12,] "SOC_HLGT" "354"
statistics_add <- read.xlsx("meddra_20_1_english/version_report_20_1_English.xlsx", sheetIndex = 1)
head(statistics_add, 13)
## Report.Title Total.Changes.between.selected.versions
## 1 New LLTs including New PTs 778
## 2 New PTs 288
## 3 Promotions 24
## 4 Demotions 37
## 5 LLTs under different PTs 394
## 6 LLT (excl PT) Primary SOC Chg 34
## 7 PT Primary SOC Changes 9
## 8 MedDRA Term Name Changes 2
## 9 MedDRA Code Switches 0
## 10 Currency Changes 16
## 11 Complex Changes 0
## 12 SMQ Changes 2
## 13 PT Changes in SMQs 487
…
Some updating job in SQL developer to update MedDRA tables respectively.
…
statistics_percent <- statistic_updates %>% spread(key = Version, value = COUNT) %>% rename(Version20.1="201", Version19.0="190") %>% mutate(Version20.1 = round(Version20.1/Version19.0*100,2), Version19.0=100) %>% gather(key = Version, value = COUNT, -TABLE_NAME)
g <- ggplot(statistics_percent,aes(x=TABLE_NAME, y=COUNT))
g <- g + geom_bar(aes(fill=as.character(Version)),position=position_dodge(width = 0),stat = "identity", alpha=0.95)
g <- g + scale_fill_manual(values = c("gray","darkGreen"))
g <- g + coord_flip()
g <- g + labs(x="Tables", y = "Total Counts Percentage (%)")
g
statistics_new <- statistic_updates %>% spread(key = Version, value = COUNT) %>% rename(Version20.1="201", Version19.0="190") %>% mutate(diff = Version20.1 - Version19.0, Percent.Increase = paste(round(diff/Version19.0*100,2),"%",sep=""))
statistics_new
## TABLE_NAME Version19.0 Version20.1 diff Percent.Increase
## 1 HIGH_LEVEL_GROUPING_TERM 335 337 2 0.6%
## 2 HIGH_LEVEL_TERM 1732 1738 6 0.35%
## 3 HLGT_HLT 1750 1756 6 0.34%
## 4 HLT_PT 31536 32912 1376 4.36%
## 5 LOW_LEVEL_TERM 75818 78026 2208 2.91%
## 6 MEDDRA_HIERARCHY 33395 34830 1435 4.3%
## 7 PREFERRED_TERM 21920 22774 854 3.9%
## 8 SMQ_CONTENT 69839 77125 7286 10.43%
## 9 SMQ_LIST 214 222 8 3.74%
## 10 SOC_HLGT 352 354 2 0.57%
## 11 SOC_INTL_ORDER 26 27 1 3.85%
## 12 SOC_TERM 27 27 0 0%
MedDRA ver 20.1 Development Server: http://ncsvmweb01.fda.gov/fdalabel/ui/search
MedDRA ver 18.1 Production Server: http://fdalabel.fda.gov/fdalabel/ui/search
The Entire lists of changed Terms are saved in “New_Version_CheckList.xlsx”.
**sql query:**
select count(pt_name) from preferred_term where pt_name not in (select pt_name from preferred_term_190);
> 983
select count(pt_name) from preferred_term_190 where pt_name not in (select pt_name from preferred_term);
> 129
select count(llt_name) from low_level_term where llt_name not in (select llt_name from low_level_term_190);
> 2226
select count(llt_name) from low_level_term_190 where llt_name not in (select llt_name from low_level_term);
> 16
set.seed(123)
PT_added <- read.xlsx("New_Version_CheckList.xlsx",sheetIndex = 1,colIndex = 1)
PT_removed <- read.xlsx("New_Version_CheckList.xlsx",sheetIndex = 1,colIndex = 3)
LLT_added <- read.xlsx("New_Version_CheckList.xlsx",sheetIndex = 2,colIndex = 1)
LLT_removed <- read.xlsx("New_Version_CheckList.xlsx",sheetIndex = 2,colIndex = 3)
Stats <- c(nrow(PT_added),nrow(PT_removed),nrow(LLT_added),nrow(LLT_removed))
names(Stats) <- c("PT_added", "PT_removed", "LLT_added", "LLT_removed")
Stats
## PT_added PT_removed LLT_added LLT_removed
## 983 129 2226 16
shown_data <- cbind(as.matrix(PT_added[sample(nrow(PT_added),15),1]), as.matrix(PT_removed[sample(nrow(PT_removed),15),1]), as.matrix(LLT_added[sample(nrow(LLT_added),15),1]), as.matrix(LLT_removed[sample(nrow(LLT_removed),15),1]))
colnames(shown_data) <- c("PT_added", "PT_removed", "LLT_added", "LLT_removed")
shown_data[,1:2]
## PT_added
## [1,] "Drug use disorder, postpartum"
## [2,] "Renal tubular injury"
## [3,] "Haemosiderin stain"
## [4,] "Tender joint count decreased"
## [5,] "Tumour cavitation"
## [6,] "Antemortem blood drug level increased"
## [7,] "Lymphoid hyperplasia of intestine"
## [8,] "Therapeutic product cross-reactivity"
## [9,] "Manufacturing stability testing issue"
## [10,] "Indeterminate glucose tolerance"
## [11,] "Ureter biopsy"
## [12,] "Inappropriate release of product for distribution"
## [13,] "Paraneoplastic thrombosis"
## [14,] "Microsatellite instability cancer"
## [15,] "Benign keratocystic odontogenic neoplasm"
## PT_removed
## [1,] "Systemic sclerosis"
## [2,] "Cystopexy"
## [3,] "Allergic granulomatous angiitis"
## [4,] "Follicle-stimulating hormone deficiency"
## [5,] "Trapezectomy"
## [6,] "Sleep phase rhythm disturbance"
## [7,] "Parovarian cyst"
## [8,] "Obstructive uropathy"
## [9,] "Upper extremity mass"
## [10,] "Urea cycle enzyme deficiency"
## [11,] "Pancreatic insufficiency"
## [12,] "Lower extremity mass"
## [13,] "Meningoencephalitis adenoviral"
## [14,] "Dental subluxation"
## [15,] "Blood cortisol normal"
shown_data[,3:4]
## LLT_added
## [1,] "Vasopressin deficiency"
## [2,] "Throat clearing"
## [3,] "Pediatric-type follicular lymphoma"
## [4,] "Renal vein stenosis"
## [5,] "Adrenal nodule"
## [6,] "Joubert syndrome"
## [7,] "Profundaplasty"
## [8,] "Cracked product"
## [9,] "Exposure via unknown route"
## [10,] "Deep anterior chamber of the eye"
## [11,] "Cardiac ultrasound"
## [12,] "Hip impingement syndrome"
## [13,] "High-grade B-cell lymphoma, NOS"
## [14,] "Fuchs coloboma"
## [15,] "Category III foetal heart rate"
## LLT_removed
## [1,] "Angiomimmunoblastic (AILD, LgX (Kiel Classification) recurrent"
## [2,] "Angiomimmunoblastic (AILD, LgX (Kiel Classification) refractory"
## [3,] "Angiomimmunoblastic (AILD, LgX (Kiel Classification) stage III"
## [4,] "Trihammer pulse"
## [5,] "deQuervain's thyroiditis"
## [6,] "Aeromona infection"
## [7,] "Angiomimmunoblastic (AILD, LgX (Kiel Classification) stage I"
## [8,] "Angiomimmunoblastic (AILD, LgX (Kiel Classification) stage IV"
## [9,] "Finger top hypertrophy"
## [10,] "Follicle-stimulating hormone deficiency"
## [11,] "Angiomimmunoblastic (AILD, LgX (Kiel Classification)"
## [12,] "Bordet-Wasserman reaction"
## [13,] "Parovarian cyst"
## [14,] "Urinary bladder perforation interoperative"
## [15,] "Cornelia de-Lange syndrome"