See this website for database explanation

http://www.fda.gov/Drugs/InformationOnDrugs/ucm079750.htm

library(downloader)
library(pander);panderOptions('table.split.table',Inf);options(knitr.table.format = 'pandoc');panderOptions("table.alignment.default", "left")
  

#doInstall <- T;toInstall <- c("downloader");if(doInstall){install.packages(toInstall)};lapply(toInstall, library, character.only = T)
#if (!require(downloader)) {install.packages("downloader"); require(downloader)}

#url <- 'http://www.fda.gov/downloads/Drugs/InformationOnDrugs/UCM054599.zip'
#download(url,dest="data.zip", mode = "wb") 
#unzip ("data.zip",exdir = ".")




product = read.delim(file='Product.txt',as.is=T)
regActDt = read.delim(file='RegActionDate.txt',as.is=T)

      #fix dates
      regActDt$ActionDate2 <- as.Date(regActDt$ActionDate)

#table3
appDoc = read.delim(file='AppDoc.txt',as.is=T)
#table 4
app = read.delim(file='application.txt',as.is=T)

#19k applications with 40k app documents 
  #for 31k products and 123k regulatory acct Dates


require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
require(data.table)  
## Loading required package: data.table
## 
## Attaching package: 'data.table'
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, last
#because of the like command but dplyr now has a similar command

tta<- product %>% filter(tolower(activeingred) %like% 'limus')
pander(tta)
ApplNo ProductNo Form Dosage ProductMktStatus TECode ReferenceDrug drugname activeingred
21083 1 SOLUTION;ORAL 1MG/ML 1 1 RAPAMUNE SIROLIMUS
21110 1 TABLET;ORAL 1MG 1 AB 0 RAPAMUNE SIROLIMUS
21110 2 TABLET;ORAL 2MG 1 AB 1 RAPAMUNE SIROLIMUS
21110 3 TABLET;ORAL 5MG Federal Register determination that product was not discontinued or withdrawn for safety or efficacy reasons 3 0 RAPAMUNE SIROLIMUS
21110 4 TABLET;ORAL 0.5MG 1 AB 0 RAPAMUNE SIROLIMUS
21302 1 CREAM;TOPICAL 1% 1 1 ELIDEL PIMECROLIMUS
21560 1 TABLET;ORAL 0.25MG 1 0 ZORTRESS EVEROLIMUS
21560 2 TABLET;ORAL 0.5MG 1 0 ZORTRESS EVEROLIMUS
21560 3 TABLET;ORAL 0.75MG 1 1 ZORTRESS EVEROLIMUS
22088 1 SOLUTION;INTRAVENOUS 25MG/ML (25MG/ML) 1 1 TORISEL TEMSIROLIMUS
22334 1 TABLET;ORAL 5MG 1 0 AFINITOR EVEROLIMUS
22334 2 TABLET;ORAL 10MG 1 1 AFINITOR EVEROLIMUS
22334 3 TABLET;ORAL 2.5MG 1 0 AFINITOR EVEROLIMUS
22334 4 TABLET;ORAL 7.5MG 1 0 AFINITOR EVEROLIMUS
50708 1 CAPSULE;ORAL EQ 1MG BASE 1 AB 0 PROGRAF TACROLIMUS
50708 2 CAPSULE;ORAL EQ 5MG BASE 1 AB 1 PROGRAF TACROLIMUS
50708 3 CAPSULE;ORAL EQ 0.5MG BASE 1 AB 0 PROGRAF TACROLIMUS
50709 1 INJECTABLE;INJECTION EQ 5MG BASE/ML 1 1 PROGRAF TACROLIMUS
50777 1 OINTMENT;TOPICAL 0.03% 1 AB 1 PROTOPIC TACROLIMUS
50777 2 OINTMENT;TOPICAL 0.1% 1 AB 1 PROTOPIC TACROLIMUS
65461 1 CAPSULE;ORAL EQ 0.5MG BASE 1 AB 0 TACROLIMUS TACROLIMUS
65461 2 CAPSULE;ORAL EQ 1MG BASE 1 AB 0 TACROLIMUS TACROLIMUS
65461 3 CAPSULE;ORAL EQ 5MG BASE 1 AB 0 TACROLIMUS TACROLIMUS
90402 1 CAPSULE;ORAL EQ 5MG BASE 3 0 TACROLIMUS TACROLIMUS
90509 1 CAPSULE;ORAL EQ 0.5MG BASE 1 AB 0 TACROLIMUS TACROLIMUS
90509 2 CAPSULE;ORAL EQ 1MG BASE 1 AB 0 TACROLIMUS TACROLIMUS
90509 3 CAPSULE;ORAL EQ 5MG BASE 1 AB 0 TACROLIMUS TACROLIMUS
90596 1 CAPSULE;ORAL EQ 0.5MG BASE 1 AB 0 TACROLIMUS TACROLIMUS
90596 2 CAPSULE;ORAL EQ 1MG BASE 1 AB 0 TACROLIMUS TACROLIMUS
90596 3 CAPSULE;ORAL EQ 5MG BASE 1 AB 0 TACROLIMUS TACROLIMUS
90687 1 CAPSULE;ORAL EQ 0.5MG BASE 1 AB 0 TACROLIMUS TACROLIMUS
90687 2 CAPSULE;ORAL EQ 1MG BASE 1 AB 0 TACROLIMUS TACROLIMUS
90687 3 CAPSULE;ORAL EQ 5MG BASE 1 AB 0 TACROLIMUS TACROLIMUS
90802 1 CAPSULE;ORAL EQ 0.5MG BASE 1 AB 0 TACROLIMUS TACROLIMUS
90802 2 CAPSULE;ORAL EQ 1MG BASE 1 AB 0 TACROLIMUS TACROLIMUS
90802 3 CAPSULE;ORAL EQ 5MG BASE 1 AB 0 TACROLIMUS TACROLIMUS
91195 1 CAPSULE;ORAL EQ 0.5MG BASE 1 AB 0 TACROLIMUS TACROLIMUS
91195 2 CAPSULE;ORAL EQ 1MG BASE 1 AB 0 TACROLIMUS TACROLIMUS
91195 3 CAPSULE;ORAL EQ 5MG BASE 1 AB 0 TACROLIMUS TACROLIMUS
200744 1 OINTMENT;TOPICAL 0.03% 1 AB 0 TACROLIMUS TACROLIMUS
200744 2 OINTMENT;TOPICAL 0.1% 1 AB 0 TACROLIMUS TACROLIMUS
201578 1 TABLET;ORAL 1MG 1 AB 0 SIROLIMUS SIROLIMUS
201578 2 TABLET;ORAL 2MG 1 AB 0 SIROLIMUS SIROLIMUS
201676 1 TABLET; ORAL 0.5MG 1 0 SIROLIMUS SIROLIMUS
201676 2 TABLET; ORAL 1MG 4 0 SIROLIMUS SIROLIMUS
201676 3 TABLET;ORAL 0.5MG 1 AB 0 SIROLIMUS SIROLIMUS
203985 1 TABLET, FOR SUSPENSION;ORAL 2MG 1 0 AFINITOR DISPERZ EVEROLIMUS
203985 2 TABLET, FOR SUSPENSION;ORAL 3MG 1 0 AFINITOR DISPERZ EVEROLIMUS
203985 3 TABLET, FOR SUSPENSION;ORAL 5MG 1 1 AFINITOR DISPERZ EVEROLIMUS
204096 1 CAPSULE, EXTENDED RELEASE;ORAL EQ 0.5MG BASE 1 0 ASTAGRAF XL TACROLIMUS
204096 2 CAPSULE, EXTENDED RELEASE;ORAL EQ 1MG BASE 1 0 ASTAGRAF XL TACROLIMUS
204096 3 CAPSULE, EXTENDED RELEASE;ORAL EQ 5MG BASE 1 1 ASTAGRAF XL TACROLIMUS
206406 1 TABLET, EXTENDED RELEASE;ORAL 0.75MG 4 0 ENVARSUS XR TACROLIMUS
206406 2 TABLET, EXTENDED RELEASE;ORAL 1MG 4 0 ENVARSUS XR TACROLIMUS
206406 3 TABLET, EXTENDED RELEASE;ORAL 4MG 4 0 ENVARSUS XR TACROLIMUS
product %>% filter(tolower(activeingred) %like% 'vacizu')
##   ApplNo ProductNo              Form Dosage ProductMktStatus TECode
## 1 125085         1 VIAL; INTRAVENOUS  100MG                1       
## 2 125085         2 VIAL; INTRAVENOUS  400MG                1       
##   ReferenceDrug drugname activeingred
## 1             0  AVASTIN  BEVACIZUMAB
## 2             0  AVASTIN  BEVACIZUMAB
#iris %>% filter(tolower(Species) %like% 'nica')

tta<-product %>% filter(tolower(drugname) %like% 'ntyvi')
tta
##   ApplNo ProductNo                 Form Dosage ProductMktStatus TECode
## 1 125476         1 INJECTABLE;INJECTION  300MG                1       
##   ReferenceDrug drugname activeingred
## 1             0  ENTYVIO  VEDOLIZUMAB
#end of exploration




#next theme, what actions are taken
table(regActDt$ActionType)
## 
##     AP     TA 
## 121843   1973
#in what year
as.data.frame(table(format(regActDt$ActionDate2, "%Y")))
##    Var1 Freq
## 1  1939   12
## 2  1940   11
## 3  1941   14
## 4  1942   11
## 5  1943    6
## 6  1944    9
## 7  1945    5
## 8  1946   13
## 9  1947   11
## 10 1948   22
## 11 1949   11
## 12 1950   27
## 13 1951   23
## 14 1952   36
## 15 1953   53
## 16 1954   51
## 17 1955   55
## 18 1956   42
## 19 1957   67
## 20 1958   50
## 21 1959   60
## 22 1960   74
## 23 1961  102
## 24 1962   70
## 25 1963   96
## 26 1964  120
## 27 1965  125
## 28 1966  143
## 29 1967  158
## 30 1968  112
## 31 1969  148
## 32 1970  202
## 33 1971  250
## 34 1972  287
## 35 1973  289
## 36 1974  601
## 37 1975 1017
## 38 1976 1428
## 39 1977 1354
## 40 1978 1938
## 41 1979 1867
## 42 1980 1986
## 43 1981 2156
## 44 1982 2938
## 45 1983 2416
## 46 1984 2845
## 47 1985 3373
## 48 1986 3507
## 49 1987 3512
## 50 1988 4457
## 51 1989 3817
## 52 1990 2826
## 53 1991 3728
## 54 1992 3743
## 55 1993 3584
## 56 1994 3806
## 57 1995 3892
## 58 1996 4638
## 59 1997 4475
## 60 1998 5084
## 61 1999 4827
## 62 2000 5280
## 63 2001 5181
## 64 2002 5740
## 65 2003 1627
## 66 2004 1774
## 67 2005 1473
## 68 2006 1652
## 69 2007 1941
## 70 2008 1759
## 71 2009 1733
## 72 2010 1797
## 73 2011 2280
## 74 2012 2217
## 75 2013 3217
## 76 2014 3298
## 77 2015  262
#how many OTC medications there are?
#status
table(product$ProductMktStatus)
## 
##     1     2     3     4 
## 16653   629 13562   991
#(1=prescription, 2=OTC, 3=discontinued, 4=tentative approval) (Primary Key)
#http://www.fda.gov/Drugs/InformationOnDrugs/ucm079750.htm
#629 OTC meds



#what ingredients they tend to have ?
ttc<-product %>% filter(ProductMktStatus==2) %>% select (i=activeingred) %>% distinct()
nrow(ttc)
## [1] 85
#85 rows for otc drugs

#Well.....there are semicolons
ttc
##                                                                     i
## 1                                            CHLORPHENIRAMINE MALEATE
## 2                                                       ACETAMINOPHEN
## 3                                                  MICONAZOLE NITRATE
## 4                                                        CLOTRIMAZOLE
## 5                                             CHLORHEXIDINE GLUCONATE
## 6                                                DOXYLAMINE SUCCINATE
## 7                                             PSEUDOEPHEDRINE SULFATE
## 8                   CHLORPHENIRAMINE MALEATE; PSEUDOEPHEDRINE SULFATE
## 9                                         OXYMETAZOLINE HYDROCHLORIDE
## 10                                                NICOTINE POLACRILEX
## 11                                        DEXTROMETHORPHAN POLISTIREX
## 12                                                   POTASSIUM IODIDE
## 13                                                        NONOXYNOL-9
## 14                          ALUMINUM HYDROXIDE; MAGNESIUM TRISILICATE
## 15                                          INSULIN RECOMBINANT HUMAN
## 16                            INSULIN SUSP ISOPHANE RECOMBINANT HUMAN
## 17                                                          IBUPROFEN
## 18                                                    POVIDONE-IODINE
## 19                                           LOPERAMIDE HYDROCHLORIDE
## 20                                                          MINOXIDIL
## 21                                                         LORATADINE
## 22                                LORATADINE; PSEUDOEPHEDRINE SULFATE
## 23 INSULIN RECOMBINANT HUMAN; INSULIN SUSP ISOPHANE RECOMBINANT HUMAN
## 24                           IBUPROFEN; PSEUDOEPHEDRINE HYDROCHLORIDE
## 25                                           CETIRIZINE HYDROCHLORIDE
## 26                                                    SODIUM CHLORIDE
## 27                                                         PERMETHRIN
## 28                                      PSEUDOEPHEDRINE HYDROCHLORIDE
## 29                                 AVOBENZONE; OCTINOXATE; OXYBENZONE
## 30                     NAPHAZOLINE HYDROCHLORIDE; PHENIRAMINE MALEATE
## 31                                                           NICOTINE
## 32                                                    NAPROXEN SODIUM
## 33                                         SODIUM FLUORIDE; TRICLOSAN
## 34                                                         CIMETIDINE
## 35                                                       KETOCONAZOLE
## 36                                                         FAMOTIDINE
## 37                                               BUTOCONAZOLE NITRATE
## 38                                            TRIAMCINOLONE ACETONIDE
## 39                                           RANITIDINE HYDROCHLORIDE
## 40                                                        BENTOQUATAM
## 41                                                         NIZATIDINE
## 42                              LOPERAMIDE HYDROCHLORIDE; SIMETHICONE
## 43                                                        TIOCONAZOLE
## 44          FEXOFENADINE HYDROCHLORIDE; PSEUDOEPHEDRINE HYDROCHLORIDE
## 45                                   ACETAMINOPHEN; ASPIRIN; CAFFEINE
## 46                         CHLORHEXIDINE GLUCONATE; ISOPROPYL ALCOHOL
## 47                                         FEXOFENADINE HYDROCHLORIDE
## 48                                                CLEMASTINE FUMARATE
## 49                                                          DOCOSANOL
## 50                 CALCIUM CARBONATE; FAMOTIDINE; MAGNESIUM HYDROXIDE
## 51                                          TERBINAFINE HYDROCHLORIDE
## 52                                                     LEVONORGESTREL
## 53                                                 KETOTIFEN FUMARATE
## 54                                   ALCOHOL; CHLORHEXIDINE GLUCONATE
## 55                     NAPROXEN SODIUM; PSEUDOEPHEDRINE HYDROCHLORIDE
## 56            CETIRIZINE HYDROCHLORIDE; PSEUDOEPHEDRINE HYDROCHLORIDE
## 57                                               OMEPRAZOLE MAGNESIUM
## 58                                                        GUAIFENESIN
## 59                                           BUTENAFINE HYDROCHLORIDE
## 60                           DIPHENHYDRAMINE HYDROCHLORIDE; IBUPROFEN
## 61                                 DIPHENHYDRAMINE CITRATE; IBUPROFEN
## 62 CHLORPHENIRAMINE MALEATE; IBUPROFEN; PSEUDOEPHEDRINE HYDROCHLORIDE
## 63                AVOBENZONE; ECAMSULE; OCTOCRYLENE; TITANIUM DIOXIDE
## 64                                  AVOBENZONE; ECAMSULE; OCTOCRYLENE
## 65                         GUAIFENESIN; PSEUDOEPHEDRINE HYDROCHLORIDE
## 66                               IODINE POVACRYLEX; ISOPROPYL ALCOHOL
## 67                         DEXTROMETHORPHAN HYDROBROMIDE; GUAIFENESIN
## 68                                                           ORLISTAT
## 69                                                        TERBINAFINE
## 70                                           POLYETHYLENE GLYCOL 3350
## 71                                         MENTHOL; METHYL SALICYLATE
## 72                                                         OMEPRAZOLE
## 73   CHLORPHENIRAMINE MALEATE; IBUPROFEN; PHENYLEPHRINE HYDROCHLORIDE
## 74                                     OMEPRAZOLE; SODIUM BICARBONATE
## 75                                                     PURIFIED WATER
## 76                                                       LANSOPRAZOLE
## 77                             IBUPROFEN; PHENYLEPHRINE HYDROCHLORIDE
## 78                                                    CROMOLYN SODIUM
## 79                DEXBROMPHENIRAMINE MALEATE; PSEUDOEPHEDRINE SULFATE
## 80                                                   IBUPROFEN SODIUM
## 81                                                         OXYBUTYNIN
## 82                                                            ASPIRIN
## 83                                             ESOMEPRAZOLE MAGNESIUM
## 84                     DIPHENHYDRAMINE HYDROCHLORIDE; NAPROXEN SODIUM
## 85                                             FLUTICASONE PROPIONATE
#has 2
ttc[24,]
## [1] "IBUPROFEN; PSEUDOEPHEDRINE HYDROCHLORIDE"
#let's split those
ttd<-strsplit(ttc$i,';')

#more readable version would be:  ttd<-strsplit(ttc$i,split=';')

length(ttd)
## [1] 85
#still 85 rows and semicolons
ttd[[24]]
## [1] "IBUPROFEN"                      " PSEUDOEPHEDRINE HYDROCHLORIDE"
length(ttd[[24]])
## [1] 2
length(ttd[[25]])
## [1] 1
#little bit of advanced R, don't listen if you want your life simple
lapply(ttd,length)
## [[1]]
## [1] 1
## 
## [[2]]
## [1] 1
## 
## [[3]]
## [1] 1
## 
## [[4]]
## [1] 1
## 
## [[5]]
## [1] 1
## 
## [[6]]
## [1] 1
## 
## [[7]]
## [1] 1
## 
## [[8]]
## [1] 2
## 
## [[9]]
## [1] 1
## 
## [[10]]
## [1] 1
## 
## [[11]]
## [1] 1
## 
## [[12]]
## [1] 1
## 
## [[13]]
## [1] 1
## 
## [[14]]
## [1] 2
## 
## [[15]]
## [1] 1
## 
## [[16]]
## [1] 1
## 
## [[17]]
## [1] 1
## 
## [[18]]
## [1] 1
## 
## [[19]]
## [1] 1
## 
## [[20]]
## [1] 1
## 
## [[21]]
## [1] 1
## 
## [[22]]
## [1] 2
## 
## [[23]]
## [1] 2
## 
## [[24]]
## [1] 2
## 
## [[25]]
## [1] 1
## 
## [[26]]
## [1] 1
## 
## [[27]]
## [1] 1
## 
## [[28]]
## [1] 1
## 
## [[29]]
## [1] 3
## 
## [[30]]
## [1] 2
## 
## [[31]]
## [1] 1
## 
## [[32]]
## [1] 1
## 
## [[33]]
## [1] 2
## 
## [[34]]
## [1] 1
## 
## [[35]]
## [1] 1
## 
## [[36]]
## [1] 1
## 
## [[37]]
## [1] 1
## 
## [[38]]
## [1] 1
## 
## [[39]]
## [1] 1
## 
## [[40]]
## [1] 1
## 
## [[41]]
## [1] 1
## 
## [[42]]
## [1] 2
## 
## [[43]]
## [1] 1
## 
## [[44]]
## [1] 2
## 
## [[45]]
## [1] 3
## 
## [[46]]
## [1] 2
## 
## [[47]]
## [1] 1
## 
## [[48]]
## [1] 1
## 
## [[49]]
## [1] 1
## 
## [[50]]
## [1] 3
## 
## [[51]]
## [1] 1
## 
## [[52]]
## [1] 1
## 
## [[53]]
## [1] 1
## 
## [[54]]
## [1] 2
## 
## [[55]]
## [1] 2
## 
## [[56]]
## [1] 2
## 
## [[57]]
## [1] 1
## 
## [[58]]
## [1] 1
## 
## [[59]]
## [1] 1
## 
## [[60]]
## [1] 2
## 
## [[61]]
## [1] 2
## 
## [[62]]
## [1] 3
## 
## [[63]]
## [1] 4
## 
## [[64]]
## [1] 3
## 
## [[65]]
## [1] 2
## 
## [[66]]
## [1] 2
## 
## [[67]]
## [1] 2
## 
## [[68]]
## [1] 1
## 
## [[69]]
## [1] 1
## 
## [[70]]
## [1] 1
## 
## [[71]]
## [1] 2
## 
## [[72]]
## [1] 1
## 
## [[73]]
## [1] 3
## 
## [[74]]
## [1] 2
## 
## [[75]]
## [1] 1
## 
## [[76]]
## [1] 1
## 
## [[77]]
## [1] 2
## 
## [[78]]
## [1] 1
## 
## [[79]]
## [1] 2
## 
## [[80]]
## [1] 1
## 
## [[81]]
## [1] 1
## 
## [[82]]
## [1] 1
## 
## [[83]]
## [1] 1
## 
## [[84]]
## [1] 2
## 
## [[85]]
## [1] 1
sapply(ttd,length) #sapply is a user friendly version and wrapper for lapply that returns a vector
##  [1] 1 1 1 1 1 1 1 2 1 1 1 1 1 2 1 1 1 1 1 1 1 2 2 2 1 1 1 1 3 2 1 1 2 1 1
## [36] 1 1 1 1 1 1 2 1 2 3 2 1 1 1 3 1 1 1 2 2 2 1 1 1 2 2 3 4 3 2 2 2 1 1 1
## [71] 2 1 3 2 1 1 2 1 2 1 1 1 1 2 1
#final step for spliting
tte<-unlist(ttd)
length(tte)
## [1] 123
head(tte)
## [1] "CHLORPHENIRAMINE MALEATE" "ACETAMINOPHEN"           
## [3] "MICONAZOLE NITRATE"       "CLOTRIMAZOLE"            
## [5] "CHLORHEXIDINE GLUCONATE"  "DOXYLAMINE SUCCINATE"
#end demo here




#Next problem

mega<-left_join(product,regActDt) 
## Joining by: "ApplNo"
#only column in common is ApplNo
#let's not explore the full complexity here

#entyvio example
ttg<- mega %>% filter(tolower(drugname) %like% 'entyvio')
ttg
##   ApplNo ProductNo                 Form Dosage ProductMktStatus TECode
## 1 125476         1 INJECTABLE;INJECTION  300MG                1       
##   ReferenceDrug drugname activeingred ActionType InDocTypeSeqNo
## 1             0  ENTYVIO  VEDOLIZUMAB         AP              0
##   DuplicateCounter          ActionDate DocType ActionDate2
## 1                0 2014-05-20 00:00:00       N  2014-05-20
#we know more compared to 
tta
##   ApplNo ProductNo                 Form Dosage ProductMktStatus TECode
## 1 125476         1 INJECTABLE;INJECTION  300MG                1       
##   ReferenceDrug drugname activeingred
## 1             0  ENTYVIO  VEDOLIZUMAB
ncol(ttg);ncol(tta)
## [1] 15
## [1] 9
#we have more columns, we know that the last action date was 2014 
#and DocType was N probably new application

#using example for the drug entyvio




#more joining with appDoc ...





#end demo here







#notes
#http://www.fda.gov/Drugs/InformationOnDrugs/ucm142438.htm

#other zip file    (NDC)
#http://www.fda.gov/downloads/Drugs/DevelopmentApprovalProcess/UCM070838.zip

code export

cat(readr::read_file('drugsAtFDA-demoA.Rmd'))
---
title: "DrugAtFDA"
output: 
  html_document: 
    toc: yes
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

See this website for database explanation

http://www.fda.gov/Drugs/InformationOnDrugs/ucm079750.htm

```{r cars}

library(downloader)
library(pander);panderOptions('table.split.table',Inf);options(knitr.table.format = 'pandoc');panderOptions("table.alignment.default", "left")
  

#doInstall <- T;toInstall <- c("downloader");if(doInstall){install.packages(toInstall)};lapply(toInstall, library, character.only = T)
#if (!require(downloader)) {install.packages("downloader"); require(downloader)}

#url <- 'http://www.fda.gov/downloads/Drugs/InformationOnDrugs/UCM054599.zip'
#download(url,dest="data.zip", mode = "wb") 
#unzip ("data.zip",exdir = ".")




product = read.delim(file='Product.txt',as.is=T)
regActDt = read.delim(file='RegActionDate.txt',as.is=T)

      #fix dates
      regActDt$ActionDate2 <- as.Date(regActDt$ActionDate)

#table3
appDoc = read.delim(file='AppDoc.txt',as.is=T)
#table 4
app = read.delim(file='application.txt',as.is=T)

#19k applications with 40k app documents 
  #for 31k products and 123k regulatory acct Dates


require(dplyr)

require(data.table)  
#because of the like command but dplyr now has a similar command

tta<- product %>% filter(tolower(activeingred) %like% 'limus')
pander(tta)

product %>% filter(tolower(activeingred) %like% 'vacizu')
#iris %>% filter(tolower(Species) %like% 'nica')

tta<-product %>% filter(tolower(drugname) %like% 'ntyvi')
tta


#end of exploration




#next theme, what actions are taken
table(regActDt$ActionType)

#in what year
as.data.frame(table(format(regActDt$ActionDate2, "%Y")))


#how many OTC medications there are?
#status
table(product$ProductMktStatus)
#(1=prescription, 2=OTC, 3=discontinued, 4=tentative approval) (Primary Key)
#http://www.fda.gov/Drugs/InformationOnDrugs/ucm079750.htm
#629 OTC meds



#what ingredients they tend to have ?
ttc<-product %>% filter(ProductMktStatus==2) %>% select (i=activeingred) %>% distinct()
nrow(ttc)
#85 rows for otc drugs

#Well.....there are semicolons
ttc
#has 2
ttc[24,]


#let's split those
ttd<-strsplit(ttc$i,';')

#more readable version would be:  ttd<-strsplit(ttc$i,split=';')

length(ttd)
#still 85 rows and semicolons
ttd[[24]]

length(ttd[[24]])
length(ttd[[25]])

#little bit of advanced R, don't listen if you want your life simple
lapply(ttd,length)
sapply(ttd,length) #sapply is a user friendly version and wrapper for lapply that returns a vector

#final step for spliting
tte<-unlist(ttd)
length(tte)
head(tte)








#end demo here




#Next problem

mega<-left_join(product,regActDt) 
#only column in common is ApplNo
#let's not explore the full complexity here

#entyvio example
ttg<- mega %>% filter(tolower(drugname) %like% 'entyvio')
ttg

#we know more compared to 
tta

ncol(ttg);ncol(tta)
#we have more columns, we know that the last action date was 2014 
#and DocType was N probably new application

#using example for the drug entyvio




#more joining with appDoc ...





#end demo here







#notes
#http://www.fda.gov/Drugs/InformationOnDrugs/ucm142438.htm

#other zip file    (NDC)
#http://www.fda.gov/downloads/Drugs/DevelopmentApprovalProcess/UCM070838.zip


```

#code export
```{r comment=''}
cat(readr::read_file('drugsAtFDA-demoA.Rmd'))
```

## end

end