See this website for database explanation
http://www.fda.gov/Drugs/InformationOnDrugs/ucm079750.htm
library(downloader)
library(pander);panderOptions('table.split.table',Inf);options(knitr.table.format = 'pandoc');panderOptions("table.alignment.default", "left")
#doInstall <- T;toInstall <- c("downloader");if(doInstall){install.packages(toInstall)};lapply(toInstall, library, character.only = T)
#if (!require(downloader)) {install.packages("downloader"); require(downloader)}
#url <- 'http://www.fda.gov/downloads/Drugs/InformationOnDrugs/UCM054599.zip'
#download(url,dest="data.zip", mode = "wb")
#unzip ("data.zip",exdir = ".")
product = read.delim(file='Product.txt',as.is=T)
regActDt = read.delim(file='RegActionDate.txt',as.is=T)
#fix dates
regActDt$ActionDate2 <- as.Date(regActDt$ActionDate)
#table3
appDoc = read.delim(file='AppDoc.txt',as.is=T)
#table 4
app = read.delim(file='application.txt',as.is=T)
#19k applications with 40k app documents
#for 31k products and 123k regulatory acct Dates
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
require(data.table)
## Loading required package: data.table
##
## Attaching package: 'data.table'
##
## The following objects are masked from 'package:dplyr':
##
## between, last
#because of the like command but dplyr now has a similar command
tta<- product %>% filter(tolower(activeingred) %like% 'limus')
pander(tta)
ApplNo | ProductNo | Form | Dosage | ProductMktStatus | TECode | ReferenceDrug | drugname | activeingred |
---|---|---|---|---|---|---|---|---|
21083 | 1 | SOLUTION;ORAL | 1MG/ML | 1 | 1 | RAPAMUNE | SIROLIMUS | |
21110 | 1 | TABLET;ORAL | 1MG | 1 | AB | 0 | RAPAMUNE | SIROLIMUS |
21110 | 2 | TABLET;ORAL | 2MG | 1 | AB | 1 | RAPAMUNE | SIROLIMUS |
21110 | 3 | TABLET;ORAL | 5MG Federal Register determination that product was not discontinued or withdrawn for safety or efficacy reasons | 3 | 0 | RAPAMUNE | SIROLIMUS | |
21110 | 4 | TABLET;ORAL | 0.5MG | 1 | AB | 0 | RAPAMUNE | SIROLIMUS |
21302 | 1 | CREAM;TOPICAL | 1% | 1 | 1 | ELIDEL | PIMECROLIMUS | |
21560 | 1 | TABLET;ORAL | 0.25MG | 1 | 0 | ZORTRESS | EVEROLIMUS | |
21560 | 2 | TABLET;ORAL | 0.5MG | 1 | 0 | ZORTRESS | EVEROLIMUS | |
21560 | 3 | TABLET;ORAL | 0.75MG | 1 | 1 | ZORTRESS | EVEROLIMUS | |
22088 | 1 | SOLUTION;INTRAVENOUS | 25MG/ML (25MG/ML) | 1 | 1 | TORISEL | TEMSIROLIMUS | |
22334 | 1 | TABLET;ORAL | 5MG | 1 | 0 | AFINITOR | EVEROLIMUS | |
22334 | 2 | TABLET;ORAL | 10MG | 1 | 1 | AFINITOR | EVEROLIMUS | |
22334 | 3 | TABLET;ORAL | 2.5MG | 1 | 0 | AFINITOR | EVEROLIMUS | |
22334 | 4 | TABLET;ORAL | 7.5MG | 1 | 0 | AFINITOR | EVEROLIMUS | |
50708 | 1 | CAPSULE;ORAL | EQ 1MG BASE | 1 | AB | 0 | PROGRAF | TACROLIMUS |
50708 | 2 | CAPSULE;ORAL | EQ 5MG BASE | 1 | AB | 1 | PROGRAF | TACROLIMUS |
50708 | 3 | CAPSULE;ORAL | EQ 0.5MG BASE | 1 | AB | 0 | PROGRAF | TACROLIMUS |
50709 | 1 | INJECTABLE;INJECTION | EQ 5MG BASE/ML | 1 | 1 | PROGRAF | TACROLIMUS | |
50777 | 1 | OINTMENT;TOPICAL | 0.03% | 1 | AB | 1 | PROTOPIC | TACROLIMUS |
50777 | 2 | OINTMENT;TOPICAL | 0.1% | 1 | AB | 1 | PROTOPIC | TACROLIMUS |
65461 | 1 | CAPSULE;ORAL | EQ 0.5MG BASE | 1 | AB | 0 | TACROLIMUS | TACROLIMUS |
65461 | 2 | CAPSULE;ORAL | EQ 1MG BASE | 1 | AB | 0 | TACROLIMUS | TACROLIMUS |
65461 | 3 | CAPSULE;ORAL | EQ 5MG BASE | 1 | AB | 0 | TACROLIMUS | TACROLIMUS |
90402 | 1 | CAPSULE;ORAL | EQ 5MG BASE | 3 | 0 | TACROLIMUS | TACROLIMUS | |
90509 | 1 | CAPSULE;ORAL | EQ 0.5MG BASE | 1 | AB | 0 | TACROLIMUS | TACROLIMUS |
90509 | 2 | CAPSULE;ORAL | EQ 1MG BASE | 1 | AB | 0 | TACROLIMUS | TACROLIMUS |
90509 | 3 | CAPSULE;ORAL | EQ 5MG BASE | 1 | AB | 0 | TACROLIMUS | TACROLIMUS |
90596 | 1 | CAPSULE;ORAL | EQ 0.5MG BASE | 1 | AB | 0 | TACROLIMUS | TACROLIMUS |
90596 | 2 | CAPSULE;ORAL | EQ 1MG BASE | 1 | AB | 0 | TACROLIMUS | TACROLIMUS |
90596 | 3 | CAPSULE;ORAL | EQ 5MG BASE | 1 | AB | 0 | TACROLIMUS | TACROLIMUS |
90687 | 1 | CAPSULE;ORAL | EQ 0.5MG BASE | 1 | AB | 0 | TACROLIMUS | TACROLIMUS |
90687 | 2 | CAPSULE;ORAL | EQ 1MG BASE | 1 | AB | 0 | TACROLIMUS | TACROLIMUS |
90687 | 3 | CAPSULE;ORAL | EQ 5MG BASE | 1 | AB | 0 | TACROLIMUS | TACROLIMUS |
90802 | 1 | CAPSULE;ORAL | EQ 0.5MG BASE | 1 | AB | 0 | TACROLIMUS | TACROLIMUS |
90802 | 2 | CAPSULE;ORAL | EQ 1MG BASE | 1 | AB | 0 | TACROLIMUS | TACROLIMUS |
90802 | 3 | CAPSULE;ORAL | EQ 5MG BASE | 1 | AB | 0 | TACROLIMUS | TACROLIMUS |
91195 | 1 | CAPSULE;ORAL | EQ 0.5MG BASE | 1 | AB | 0 | TACROLIMUS | TACROLIMUS |
91195 | 2 | CAPSULE;ORAL | EQ 1MG BASE | 1 | AB | 0 | TACROLIMUS | TACROLIMUS |
91195 | 3 | CAPSULE;ORAL | EQ 5MG BASE | 1 | AB | 0 | TACROLIMUS | TACROLIMUS |
200744 | 1 | OINTMENT;TOPICAL | 0.03% | 1 | AB | 0 | TACROLIMUS | TACROLIMUS |
200744 | 2 | OINTMENT;TOPICAL | 0.1% | 1 | AB | 0 | TACROLIMUS | TACROLIMUS |
201578 | 1 | TABLET;ORAL | 1MG | 1 | AB | 0 | SIROLIMUS | SIROLIMUS |
201578 | 2 | TABLET;ORAL | 2MG | 1 | AB | 0 | SIROLIMUS | SIROLIMUS |
201676 | 1 | TABLET; ORAL | 0.5MG | 1 | 0 | SIROLIMUS | SIROLIMUS | |
201676 | 2 | TABLET; ORAL | 1MG | 4 | 0 | SIROLIMUS | SIROLIMUS | |
201676 | 3 | TABLET;ORAL | 0.5MG | 1 | AB | 0 | SIROLIMUS | SIROLIMUS |
203985 | 1 | TABLET, FOR SUSPENSION;ORAL | 2MG | 1 | 0 | AFINITOR DISPERZ | EVEROLIMUS | |
203985 | 2 | TABLET, FOR SUSPENSION;ORAL | 3MG | 1 | 0 | AFINITOR DISPERZ | EVEROLIMUS | |
203985 | 3 | TABLET, FOR SUSPENSION;ORAL | 5MG | 1 | 1 | AFINITOR DISPERZ | EVEROLIMUS | |
204096 | 1 | CAPSULE, EXTENDED RELEASE;ORAL | EQ 0.5MG BASE | 1 | 0 | ASTAGRAF XL | TACROLIMUS | |
204096 | 2 | CAPSULE, EXTENDED RELEASE;ORAL | EQ 1MG BASE | 1 | 0 | ASTAGRAF XL | TACROLIMUS | |
204096 | 3 | CAPSULE, EXTENDED RELEASE;ORAL | EQ 5MG BASE | 1 | 1 | ASTAGRAF XL | TACROLIMUS | |
206406 | 1 | TABLET, EXTENDED RELEASE;ORAL | 0.75MG | 4 | 0 | ENVARSUS XR | TACROLIMUS | |
206406 | 2 | TABLET, EXTENDED RELEASE;ORAL | 1MG | 4 | 0 | ENVARSUS XR | TACROLIMUS | |
206406 | 3 | TABLET, EXTENDED RELEASE;ORAL | 4MG | 4 | 0 | ENVARSUS XR | TACROLIMUS |
product %>% filter(tolower(activeingred) %like% 'vacizu')
## ApplNo ProductNo Form Dosage ProductMktStatus TECode
## 1 125085 1 VIAL; INTRAVENOUS 100MG 1
## 2 125085 2 VIAL; INTRAVENOUS 400MG 1
## ReferenceDrug drugname activeingred
## 1 0 AVASTIN BEVACIZUMAB
## 2 0 AVASTIN BEVACIZUMAB
#iris %>% filter(tolower(Species) %like% 'nica')
tta<-product %>% filter(tolower(drugname) %like% 'ntyvi')
tta
## ApplNo ProductNo Form Dosage ProductMktStatus TECode
## 1 125476 1 INJECTABLE;INJECTION 300MG 1
## ReferenceDrug drugname activeingred
## 1 0 ENTYVIO VEDOLIZUMAB
#end of exploration
#next theme, what actions are taken
table(regActDt$ActionType)
##
## AP TA
## 121843 1973
#in what year
as.data.frame(table(format(regActDt$ActionDate2, "%Y")))
## Var1 Freq
## 1 1939 12
## 2 1940 11
## 3 1941 14
## 4 1942 11
## 5 1943 6
## 6 1944 9
## 7 1945 5
## 8 1946 13
## 9 1947 11
## 10 1948 22
## 11 1949 11
## 12 1950 27
## 13 1951 23
## 14 1952 36
## 15 1953 53
## 16 1954 51
## 17 1955 55
## 18 1956 42
## 19 1957 67
## 20 1958 50
## 21 1959 60
## 22 1960 74
## 23 1961 102
## 24 1962 70
## 25 1963 96
## 26 1964 120
## 27 1965 125
## 28 1966 143
## 29 1967 158
## 30 1968 112
## 31 1969 148
## 32 1970 202
## 33 1971 250
## 34 1972 287
## 35 1973 289
## 36 1974 601
## 37 1975 1017
## 38 1976 1428
## 39 1977 1354
## 40 1978 1938
## 41 1979 1867
## 42 1980 1986
## 43 1981 2156
## 44 1982 2938
## 45 1983 2416
## 46 1984 2845
## 47 1985 3373
## 48 1986 3507
## 49 1987 3512
## 50 1988 4457
## 51 1989 3817
## 52 1990 2826
## 53 1991 3728
## 54 1992 3743
## 55 1993 3584
## 56 1994 3806
## 57 1995 3892
## 58 1996 4638
## 59 1997 4475
## 60 1998 5084
## 61 1999 4827
## 62 2000 5280
## 63 2001 5181
## 64 2002 5740
## 65 2003 1627
## 66 2004 1774
## 67 2005 1473
## 68 2006 1652
## 69 2007 1941
## 70 2008 1759
## 71 2009 1733
## 72 2010 1797
## 73 2011 2280
## 74 2012 2217
## 75 2013 3217
## 76 2014 3298
## 77 2015 262
#how many OTC medications there are?
#status
table(product$ProductMktStatus)
##
## 1 2 3 4
## 16653 629 13562 991
#(1=prescription, 2=OTC, 3=discontinued, 4=tentative approval) (Primary Key)
#http://www.fda.gov/Drugs/InformationOnDrugs/ucm079750.htm
#629 OTC meds
#what ingredients they tend to have ?
ttc<-product %>% filter(ProductMktStatus==2) %>% select (i=activeingred) %>% distinct()
nrow(ttc)
## [1] 85
#85 rows for otc drugs
#Well.....there are semicolons
ttc
## i
## 1 CHLORPHENIRAMINE MALEATE
## 2 ACETAMINOPHEN
## 3 MICONAZOLE NITRATE
## 4 CLOTRIMAZOLE
## 5 CHLORHEXIDINE GLUCONATE
## 6 DOXYLAMINE SUCCINATE
## 7 PSEUDOEPHEDRINE SULFATE
## 8 CHLORPHENIRAMINE MALEATE; PSEUDOEPHEDRINE SULFATE
## 9 OXYMETAZOLINE HYDROCHLORIDE
## 10 NICOTINE POLACRILEX
## 11 DEXTROMETHORPHAN POLISTIREX
## 12 POTASSIUM IODIDE
## 13 NONOXYNOL-9
## 14 ALUMINUM HYDROXIDE; MAGNESIUM TRISILICATE
## 15 INSULIN RECOMBINANT HUMAN
## 16 INSULIN SUSP ISOPHANE RECOMBINANT HUMAN
## 17 IBUPROFEN
## 18 POVIDONE-IODINE
## 19 LOPERAMIDE HYDROCHLORIDE
## 20 MINOXIDIL
## 21 LORATADINE
## 22 LORATADINE; PSEUDOEPHEDRINE SULFATE
## 23 INSULIN RECOMBINANT HUMAN; INSULIN SUSP ISOPHANE RECOMBINANT HUMAN
## 24 IBUPROFEN; PSEUDOEPHEDRINE HYDROCHLORIDE
## 25 CETIRIZINE HYDROCHLORIDE
## 26 SODIUM CHLORIDE
## 27 PERMETHRIN
## 28 PSEUDOEPHEDRINE HYDROCHLORIDE
## 29 AVOBENZONE; OCTINOXATE; OXYBENZONE
## 30 NAPHAZOLINE HYDROCHLORIDE; PHENIRAMINE MALEATE
## 31 NICOTINE
## 32 NAPROXEN SODIUM
## 33 SODIUM FLUORIDE; TRICLOSAN
## 34 CIMETIDINE
## 35 KETOCONAZOLE
## 36 FAMOTIDINE
## 37 BUTOCONAZOLE NITRATE
## 38 TRIAMCINOLONE ACETONIDE
## 39 RANITIDINE HYDROCHLORIDE
## 40 BENTOQUATAM
## 41 NIZATIDINE
## 42 LOPERAMIDE HYDROCHLORIDE; SIMETHICONE
## 43 TIOCONAZOLE
## 44 FEXOFENADINE HYDROCHLORIDE; PSEUDOEPHEDRINE HYDROCHLORIDE
## 45 ACETAMINOPHEN; ASPIRIN; CAFFEINE
## 46 CHLORHEXIDINE GLUCONATE; ISOPROPYL ALCOHOL
## 47 FEXOFENADINE HYDROCHLORIDE
## 48 CLEMASTINE FUMARATE
## 49 DOCOSANOL
## 50 CALCIUM CARBONATE; FAMOTIDINE; MAGNESIUM HYDROXIDE
## 51 TERBINAFINE HYDROCHLORIDE
## 52 LEVONORGESTREL
## 53 KETOTIFEN FUMARATE
## 54 ALCOHOL; CHLORHEXIDINE GLUCONATE
## 55 NAPROXEN SODIUM; PSEUDOEPHEDRINE HYDROCHLORIDE
## 56 CETIRIZINE HYDROCHLORIDE; PSEUDOEPHEDRINE HYDROCHLORIDE
## 57 OMEPRAZOLE MAGNESIUM
## 58 GUAIFENESIN
## 59 BUTENAFINE HYDROCHLORIDE
## 60 DIPHENHYDRAMINE HYDROCHLORIDE; IBUPROFEN
## 61 DIPHENHYDRAMINE CITRATE; IBUPROFEN
## 62 CHLORPHENIRAMINE MALEATE; IBUPROFEN; PSEUDOEPHEDRINE HYDROCHLORIDE
## 63 AVOBENZONE; ECAMSULE; OCTOCRYLENE; TITANIUM DIOXIDE
## 64 AVOBENZONE; ECAMSULE; OCTOCRYLENE
## 65 GUAIFENESIN; PSEUDOEPHEDRINE HYDROCHLORIDE
## 66 IODINE POVACRYLEX; ISOPROPYL ALCOHOL
## 67 DEXTROMETHORPHAN HYDROBROMIDE; GUAIFENESIN
## 68 ORLISTAT
## 69 TERBINAFINE
## 70 POLYETHYLENE GLYCOL 3350
## 71 MENTHOL; METHYL SALICYLATE
## 72 OMEPRAZOLE
## 73 CHLORPHENIRAMINE MALEATE; IBUPROFEN; PHENYLEPHRINE HYDROCHLORIDE
## 74 OMEPRAZOLE; SODIUM BICARBONATE
## 75 PURIFIED WATER
## 76 LANSOPRAZOLE
## 77 IBUPROFEN; PHENYLEPHRINE HYDROCHLORIDE
## 78 CROMOLYN SODIUM
## 79 DEXBROMPHENIRAMINE MALEATE; PSEUDOEPHEDRINE SULFATE
## 80 IBUPROFEN SODIUM
## 81 OXYBUTYNIN
## 82 ASPIRIN
## 83 ESOMEPRAZOLE MAGNESIUM
## 84 DIPHENHYDRAMINE HYDROCHLORIDE; NAPROXEN SODIUM
## 85 FLUTICASONE PROPIONATE
#has 2
ttc[24,]
## [1] "IBUPROFEN; PSEUDOEPHEDRINE HYDROCHLORIDE"
#let's split those
ttd<-strsplit(ttc$i,';')
#more readable version would be: ttd<-strsplit(ttc$i,split=';')
length(ttd)
## [1] 85
#still 85 rows and semicolons
ttd[[24]]
## [1] "IBUPROFEN" " PSEUDOEPHEDRINE HYDROCHLORIDE"
length(ttd[[24]])
## [1] 2
length(ttd[[25]])
## [1] 1
#little bit of advanced R, don't listen if you want your life simple
lapply(ttd,length)
## [[1]]
## [1] 1
##
## [[2]]
## [1] 1
##
## [[3]]
## [1] 1
##
## [[4]]
## [1] 1
##
## [[5]]
## [1] 1
##
## [[6]]
## [1] 1
##
## [[7]]
## [1] 1
##
## [[8]]
## [1] 2
##
## [[9]]
## [1] 1
##
## [[10]]
## [1] 1
##
## [[11]]
## [1] 1
##
## [[12]]
## [1] 1
##
## [[13]]
## [1] 1
##
## [[14]]
## [1] 2
##
## [[15]]
## [1] 1
##
## [[16]]
## [1] 1
##
## [[17]]
## [1] 1
##
## [[18]]
## [1] 1
##
## [[19]]
## [1] 1
##
## [[20]]
## [1] 1
##
## [[21]]
## [1] 1
##
## [[22]]
## [1] 2
##
## [[23]]
## [1] 2
##
## [[24]]
## [1] 2
##
## [[25]]
## [1] 1
##
## [[26]]
## [1] 1
##
## [[27]]
## [1] 1
##
## [[28]]
## [1] 1
##
## [[29]]
## [1] 3
##
## [[30]]
## [1] 2
##
## [[31]]
## [1] 1
##
## [[32]]
## [1] 1
##
## [[33]]
## [1] 2
##
## [[34]]
## [1] 1
##
## [[35]]
## [1] 1
##
## [[36]]
## [1] 1
##
## [[37]]
## [1] 1
##
## [[38]]
## [1] 1
##
## [[39]]
## [1] 1
##
## [[40]]
## [1] 1
##
## [[41]]
## [1] 1
##
## [[42]]
## [1] 2
##
## [[43]]
## [1] 1
##
## [[44]]
## [1] 2
##
## [[45]]
## [1] 3
##
## [[46]]
## [1] 2
##
## [[47]]
## [1] 1
##
## [[48]]
## [1] 1
##
## [[49]]
## [1] 1
##
## [[50]]
## [1] 3
##
## [[51]]
## [1] 1
##
## [[52]]
## [1] 1
##
## [[53]]
## [1] 1
##
## [[54]]
## [1] 2
##
## [[55]]
## [1] 2
##
## [[56]]
## [1] 2
##
## [[57]]
## [1] 1
##
## [[58]]
## [1] 1
##
## [[59]]
## [1] 1
##
## [[60]]
## [1] 2
##
## [[61]]
## [1] 2
##
## [[62]]
## [1] 3
##
## [[63]]
## [1] 4
##
## [[64]]
## [1] 3
##
## [[65]]
## [1] 2
##
## [[66]]
## [1] 2
##
## [[67]]
## [1] 2
##
## [[68]]
## [1] 1
##
## [[69]]
## [1] 1
##
## [[70]]
## [1] 1
##
## [[71]]
## [1] 2
##
## [[72]]
## [1] 1
##
## [[73]]
## [1] 3
##
## [[74]]
## [1] 2
##
## [[75]]
## [1] 1
##
## [[76]]
## [1] 1
##
## [[77]]
## [1] 2
##
## [[78]]
## [1] 1
##
## [[79]]
## [1] 2
##
## [[80]]
## [1] 1
##
## [[81]]
## [1] 1
##
## [[82]]
## [1] 1
##
## [[83]]
## [1] 1
##
## [[84]]
## [1] 2
##
## [[85]]
## [1] 1
sapply(ttd,length) #sapply is a user friendly version and wrapper for lapply that returns a vector
## [1] 1 1 1 1 1 1 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1 2 2 2 1 1 1 1 3 2 1 1 2 1 1
## [36] 1 1 1 1 1 1 2 1 2 3 2 1 1 1 3 1 1 1 2 2 2 1 1 1 2 2 3 4 3 2 2 2 1 1 1
## [71] 2 1 3 2 1 1 2 1 2 1 1 1 1 2 1
#final step for spliting
tte<-unlist(ttd)
length(tte)
## [1] 123
head(tte)
## [1] "CHLORPHENIRAMINE MALEATE" "ACETAMINOPHEN"
## [3] "MICONAZOLE NITRATE" "CLOTRIMAZOLE"
## [5] "CHLORHEXIDINE GLUCONATE" "DOXYLAMINE SUCCINATE"
#end demo here
#Next problem
mega<-left_join(product,regActDt)
## Joining by: "ApplNo"
#only column in common is ApplNo
#let's not explore the full complexity here
#entyvio example
ttg<- mega %>% filter(tolower(drugname) %like% 'entyvio')
ttg
## ApplNo ProductNo Form Dosage ProductMktStatus TECode
## 1 125476 1 INJECTABLE;INJECTION 300MG 1
## ReferenceDrug drugname activeingred ActionType InDocTypeSeqNo
## 1 0 ENTYVIO VEDOLIZUMAB AP 0
## DuplicateCounter ActionDate DocType ActionDate2
## 1 0 2014-05-20 00:00:00 N 2014-05-20
#we know more compared to
tta
## ApplNo ProductNo Form Dosage ProductMktStatus TECode
## 1 125476 1 INJECTABLE;INJECTION 300MG 1
## ReferenceDrug drugname activeingred
## 1 0 ENTYVIO VEDOLIZUMAB
ncol(ttg);ncol(tta)
## [1] 15
## [1] 9
#we have more columns, we know that the last action date was 2014
#and DocType was N probably new application
#using example for the drug entyvio
#more joining with appDoc ...
#end demo here
#notes
#http://www.fda.gov/Drugs/InformationOnDrugs/ucm142438.htm
#other zip file (NDC)
#http://www.fda.gov/downloads/Drugs/DevelopmentApprovalProcess/UCM070838.zip
cat(readr::read_file('drugsAtFDA-demoA.Rmd'))
---
title: "DrugAtFDA"
output:
html_document:
toc: yes
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
See this website for database explanation
http://www.fda.gov/Drugs/InformationOnDrugs/ucm079750.htm
```{r cars}
library(downloader)
library(pander);panderOptions('table.split.table',Inf);options(knitr.table.format = 'pandoc');panderOptions("table.alignment.default", "left")
#doInstall <- T;toInstall <- c("downloader");if(doInstall){install.packages(toInstall)};lapply(toInstall, library, character.only = T)
#if (!require(downloader)) {install.packages("downloader"); require(downloader)}
#url <- 'http://www.fda.gov/downloads/Drugs/InformationOnDrugs/UCM054599.zip'
#download(url,dest="data.zip", mode = "wb")
#unzip ("data.zip",exdir = ".")
product = read.delim(file='Product.txt',as.is=T)
regActDt = read.delim(file='RegActionDate.txt',as.is=T)
#fix dates
regActDt$ActionDate2 <- as.Date(regActDt$ActionDate)
#table3
appDoc = read.delim(file='AppDoc.txt',as.is=T)
#table 4
app = read.delim(file='application.txt',as.is=T)
#19k applications with 40k app documents
#for 31k products and 123k regulatory acct Dates
require(dplyr)
require(data.table)
#because of the like command but dplyr now has a similar command
tta<- product %>% filter(tolower(activeingred) %like% 'limus')
pander(tta)
product %>% filter(tolower(activeingred) %like% 'vacizu')
#iris %>% filter(tolower(Species) %like% 'nica')
tta<-product %>% filter(tolower(drugname) %like% 'ntyvi')
tta
#end of exploration
#next theme, what actions are taken
table(regActDt$ActionType)
#in what year
as.data.frame(table(format(regActDt$ActionDate2, "%Y")))
#how many OTC medications there are?
#status
table(product$ProductMktStatus)
#(1=prescription, 2=OTC, 3=discontinued, 4=tentative approval) (Primary Key)
#http://www.fda.gov/Drugs/InformationOnDrugs/ucm079750.htm
#629 OTC meds
#what ingredients they tend to have ?
ttc<-product %>% filter(ProductMktStatus==2) %>% select (i=activeingred) %>% distinct()
nrow(ttc)
#85 rows for otc drugs
#Well.....there are semicolons
ttc
#has 2
ttc[24,]
#let's split those
ttd<-strsplit(ttc$i,';')
#more readable version would be: ttd<-strsplit(ttc$i,split=';')
length(ttd)
#still 85 rows and semicolons
ttd[[24]]
length(ttd[[24]])
length(ttd[[25]])
#little bit of advanced R, don't listen if you want your life simple
lapply(ttd,length)
sapply(ttd,length) #sapply is a user friendly version and wrapper for lapply that returns a vector
#final step for spliting
tte<-unlist(ttd)
length(tte)
head(tte)
#end demo here
#Next problem
mega<-left_join(product,regActDt)
#only column in common is ApplNo
#let's not explore the full complexity here
#entyvio example
ttg<- mega %>% filter(tolower(drugname) %like% 'entyvio')
ttg
#we know more compared to
tta
ncol(ttg);ncol(tta)
#we have more columns, we know that the last action date was 2014
#and DocType was N probably new application
#using example for the drug entyvio
#more joining with appDoc ...
#end demo here
#notes
#http://www.fda.gov/Drugs/InformationOnDrugs/ucm142438.htm
#other zip file (NDC)
#http://www.fda.gov/downloads/Drugs/DevelopmentApprovalProcess/UCM070838.zip
```
#code export
```{r comment=''}
cat(readr::read_file('drugsAtFDA-demoA.Rmd'))
```
## end