This is a clinical case study. We’ll extract the data from different tables to answers all the questions.

We manipulate data using “Dplyr” package.

library("dplyr")

We load all the tables.

ae <- tbl_df(ae)
ae
## Source: local data frame [151 x 10]
## 
##    Patient_Id           AE Date_of_Visit Serious Start_Date Stop.Date
## 1        T004     Vomiting      05-12-06     Yes   05-12-06        NA
## 2        T028        Fever      23-05-07      No   23-05-07        NA
## 3        T018       Nausea      07-07-05     Yes   07-07-05        NA
## 4        T038         Acne      24-03-06     Yes   24-03-06        NA
## 5        T042     Headache      15-02-06      No   15-02-06        NA
## 6        T027     Vomiting      05-12-06      No   05-12-06        NA
## 7        T035     Epilepsy      23-05-07      No   23-05-07        NA
## 8        T052        Ulser      07-07-05     Yes   07-07-05        NA
## 9        T093 Stomach Ache      24-03-06      No   24-03-06        NA
## 10       T085         Acne      30-05-06      No   30-05-06        NA
## ..        ...          ...           ...     ...        ...       ...
## Variables not shown: Severity (fctr), Relationship_to_TestArticle (fctr),
##   Outcome (fctr), Action (fctr)
cpp <- tbl_df(cpp)
cpp
## Source: local data frame [20 x 5]
## 
##      Id Visit Date_of_Conclusion Screen_Failure
## 1  T004    NA           27-12-06            Yes
## 2  T038    NA           15-05-07            Yes
## 3  T044    NA           06-06-06             No
## 4  T052    NA           21-03-05            Yes
## 5  T027    NA           28-06-05             No
## 6  T077    NA           17-08-06             No
## 7  T081    NA           18-10-06             No
## 8  T099    NA           08-04-05            Yes
## 9  T105    NA           27-03-06            Yes
## 10 T118    NA           06-07-06             No
## 11 T122    NA           16-02-06             No
## 12 T142    NA           22-07-06            Yes
## 13 T169    NA           28-09-06            Yes
## 14 T183    NA           19-04-05            Yes
## 15 T199    NA           26-07-06             No
## 16 T164    NA           21-03-05             No
## 17 T053    NA           27-03-06            Yes
## 18 T070    NA           06-07-06             No
## 19 T087    NA           18-10-06            Yes
## 20 T028    NA           29-04-05             No
## Variables not shown: Reason_for_Conclusion_of_Subjectparticipation (fctr)
demo <- tbl_df(demo)
demo
## Source: local data frame [1,075 x 6]
## 
##      Id Visit Date_of_Visit Date_of_Birth Sex      Race
## 1  T001     9      05-12-06      04-12-57   F  Japanese
## 2  T002     6      23-05-07      23-02-66   M     Asian
## 3  T003     1      07-07-05      14-08-59   M American 
## 4  T004     4      24-03-06      17-09-74   F     Black
## 5  T005     2      15-02-06      28-10-76   F     Asian
## 6  T006     9      05-12-06      18-05-79   F  Japanese
## 7  T007    10      23-05-07      25-05-65   M     Black
## 8  T008     4      07-07-05      21-03-58   F  American
## 9  T009     4      24-03-06      23-06-73   M  Japanese
## 10 T010     5      30-05-06      30-04-75   M  American
## ..  ...   ...           ...           ... ...       ...
hos <- tbl_df(hos)
hos
## Source: local data frame [120 x 5]
## 
##    Patient_Id Admission_Date Discharge_Date Continuing          Reason
## 1        T028       23-05-07                       Yes Disease Related
## 2        T038       24-03-06                       Yes Disease Related
## 3        T077       26-06-06      5-20-2007         No   Adverse Event
## 4        T106       24-11-06      3-17-2007         No   Adverse Event
## 5        T118       15-03-06     12-28-2006         No Disease Related
## 6        T142       15-02-06                       Yes   Adverse Event
## 7        T134       05-12-06      6-22-2007         No           Other
## 8        T148       23-05-07      3-12-2007         No Disease Related
## 9        T174       22-07-06                       Yes Disease Related
## 10       T193       13-09-06                       Yes   Adverse Event
## ..        ...            ...            ...        ...             ...
sm <- tbl_df(sm)
sm
## Source: local data frame [1,000 x 5]
## 
##    Patient_Id Dose         Route Frequency                 Outcome
## 1        T001  269          Oral         3          No Improvement
## 2        T002  343   Intravenous         5      Condition Worsened
## 3        T003  198  Subcutaneous         3      Slight Improvement
## 4        T004  298    Inhalation         2 Significant Improvement
## 5        T005  323         Nasal         6      Slight Improvement
## 6        T006  475          Oral         6      Condition Worsened
## 7        T007  162 Intraarterial         4 Significant Improvement
## 8        T008  421  Subcutaneous         5          No Improvement
## 9        T009  476         Nasal         4      Condition Worsened
## 10       T010  165    Inhalation         5      Slight Improvement
## ..        ...  ...           ...       ...                     ...
vss <- tbl_df(vss)
vss
## Source: local data frame [1,137 x 9]
## 
##    Patient_Id Date_of_Visit Height Unit.cm.in. Weight Unit.kg.lbsbs.
## 1        T001      05-12-06    146          cm     58             kg
## 2        T002      23-05-07     70          in     54             kg
## 3        T003      07-07-05    145          cm    104            lbs
## 4        T004      24-03-06     55          in     54             kg
## 5        T005      15-02-06    141          cm     45             kg
## 6        T006      05-12-06    174          cm    140            lbs
## 7        T007      23-05-07    158          cm     59             kg
## 8        T008      07-07-05    187          cm    150            lbs
## 9        T009      24-03-06     64          in     48             kg
## 10       T010      30-05-06     77          in     99             kg
## ..        ...           ...    ...         ...    ...            ...
## Variables not shown: Systolic_BP (int), Diastolic_BP (int), Pulse (int)

1.List of all side effects related to test article.

table(ae$AE)
## 
##                      Acne     Epilepsy        Fever     Headache 
##            1           28           20           15           27 
##       Nausea Stomach Ache        Ulser     Vomiting 
##           15           16           10           19

2.List of all patients suffering from fever as an AE (Related to test Article)

ae %>%
filter(AE == "Fever")
## Source: local data frame [15 x 10]
## 
##    Patient_Id    AE Date_of_Visit Serious Start_Date Stop.Date Severity
## 1        T028 Fever      23-05-07      No   23-05-07        NA     Mild
## 2        T106 Fever      24-11-06     Yes   24-11-06        NA   Severe
## 3        T133 Fever      15-03-06     Yes   15-03-06        NA   Severe
## 4        T205 Fever      23-05-07      No   23-05-07        NA     Mild
## 5        T299 Fever      24-11-06     Yes   24-11-06        NA   Severe
## 6        T418 Fever      24-11-06     Yes   24-11-06        NA   Severe
## 7        T579 Fever      15-03-06     Yes   15-03-06        NA   Severe
## 8        T594 Fever      23-05-07      No   23-05-07        NA     Mild
## 9        T614 Fever      15-03-06     Yes   15-03-06        NA   Severe
## 10       T629 Fever      23-05-07      No   23-05-07        NA     Mild
## 11       T719 Fever      24-11-06     Yes   24-11-06        NA   Severe
## 12       T819 Fever      24-11-06     Yes   24-11-06        NA   Severe
## 13       T907 Fever      24-11-06     Yes   24-11-06        NA   Severe
## 14       T973 Fever      15-03-06     Yes   15-03-06        NA   Severe
## 15       T984 Fever      23-05-07      No   23-05-07        NA     Mild
## Variables not shown: Relationship_to_TestArticle (fctr), Outcome (fctr),
##   Action (fctr)

3.Demographic details of all expired patients.

expired <- ae %>%
  select(Id = Patient_Id, Outcome) %>%
  filter(Outcome == "Death")
demographic <- demo %>%
  select(Id, Race) %>%
  inner_join(expired)
## Joining by: "Id"
demographic
## Source: local data frame [30 x 3]
## 
##      Id     Race Outcome
## 1  T048    Asian   Death
## 2  T052 Japanese   Death
## 3  T106    Black   Death
## 4  T118    Black   Death
## 5  T133 Japanese   Death
## 6  T134    Black   Death
## 7  T231    Asian   Death
## 8  T299    Black   Death
## 9  T300 Japanese   Death
## 10 T377 Japanese   Death
## 11 T418 Japanese   Death
## 12 T425    Black   Death
## 13 T482    Black   Death
## 14 T549    Asian   Death
## 15 T579 Japanese   Death
## 16 T584 American   Death
## 17 T614 American   Death
## 18 T626 American   Death
## 19 T673 American   Death
## 20 T719 American   Death
## 21 T726    Black   Death
## 22 T782    Asian   Death
## 23 T819    Black   Death
## 24 T826 Japanese   Death
## 25 T853 American   Death
## 26 T907    Asian   Death
## 27 T916 Japanese   Death
## 28 T940 American   Death
## 29 T973 Japanese   Death
## 30 T977    Asian   Death

4.Chance of occurrence of Acne as an AE

ae %>%
  filter(AE == "Acne")
## Source: local data frame [28 x 10]
## 
##    Patient_Id   AE Date_of_Visit Serious Start_Date Stop.Date Severity
## 1        T038 Acne      24-03-06     Yes   24-03-06        NA   Severe
## 2        T085 Acne      30-05-06      No   30-05-06        NA     Mild
## 3        T115 Acne      26-02-06      No   26-02-06        NA Moderate
## 4        T138 Acne      11-03-06      No   11-03-06        NA     Mild
## 5        T163 Acne      30-05-06      No   30-05-06        NA     Mild
## 6        T174 Acne      22-07-06      No   22-07-06        NA Moderate
## 7        T211 Acne      24-03-06     Yes   24-03-06        NA   Severe
## 8        T255 Acne      30-05-06      No   30-05-06        NA     Mild
## 9        T308 Acne      26-02-06      No   26-02-06        NA Moderate
## 10       T352 Acne      11-03-06      No   11-03-06        NA     Mild
## 11       T433 Acne      26-02-06      No   26-02-06        NA Moderate
## 12       T469 Acne      11-03-06      No   11-03-06        NA     Mild
## 13       T511 Acne      30-05-06      No   30-05-06        NA     Mild
## 14       T538 Acne      22-07-06      No   22-07-06        NA Moderate
## 15       T603 Acne      24-03-06     Yes   24-03-06        NA   Severe
## 16       T642 Acne      24-03-06     Yes   24-03-06        NA   Severe
## 17       T693 Acne      30-05-06      No   30-05-06        NA     Mild
## 18       T739 Acne      26-02-06      No   26-02-06        NA Moderate
## 19       T762 Acne      11-03-06      No   11-03-06        NA     Mild
## 20       T830 Acne      26-02-06      No   26-02-06        NA Moderate
## 21       T843 Acne      11-03-06      No   11-03-06        NA     Mild
## 22       T870 Acne      30-05-06      No   30-05-06        NA     Mild
## 23       T881 Acne      22-07-06      No   22-07-06        NA Moderate
## 24       T893 Acne      30-05-06      No   30-05-06        NA     Mild
## 25       T911 Acne      26-02-06      No   26-02-06        NA Moderate
## 26       T931 Acne      11-03-06      No   11-03-06        NA     Mild
## 27       T993 Acne      24-03-06     Yes   24-03-06        NA   Severe
## 28       T566 Acne      30-05-06      No   30-05-06        NA     Mild
## Variables not shown: Relationship_to_TestArticle (fctr), Outcome (fctr),
##   Action (fctr)
28/151
## [1] 0.1854

5.List of patients who are withdrawn from study due to AE not related to Test article.

table(ae$Action)
## 
##                                                            Hospitalized 
##                                   8                                  47 
## Primary Reason For Study Withdrawal    Temporarily Stopped Test Article 
##                                  26                                  55 
##                Withdrawn From Study 
##                                  15
ae %>%
  select(Patient_Id, Action) %>%
  filter(Action == "Withdrawn From Study")
## Source: local data frame [15 x 2]
## 
##    Patient_Id               Action
## 1        T035 Withdrawn From Study
## 2        T068 Withdrawn From Study
## 3        T228 Withdrawn From Study
## 4        T273 Withdrawn From Study
## 5        T579 Withdrawn From Study
## 6        T584 Withdrawn From Study
## 7        T614 Withdrawn From Study
## 8        T626 Withdrawn From Study
## 9        T661 Withdrawn From Study
## 10       T705 Withdrawn From Study
## 11       T900 Withdrawn From Study
## 12       T973 Withdrawn From Study
## 13       T977 Withdrawn From Study
## 14       T534 Withdrawn From Study
## 15       T328 Withdrawn From Study

6.List of patients who face a life threatening AE and corresponding action taken.

ae %>%
  select(Patient_Id, Severity, Action) %>%
  filter(Severity == "Life Threatening")
## Source: local data frame [17 x 3]
## 
##    Patient_Id         Severity                           Action
## 1        T052 Life Threatening                                 
## 2        T134 Life Threatening                                 
## 3        T193 Life Threatening                     Hospitalized
## 4        T048 Life Threatening                                 
## 5        T231 Life Threatening Temporarily Stopped Test Article
## 6        T377 Life Threatening                                 
## 7        T482 Life Threatening                     Hospitalized
## 8        T558 Life Threatening                     Hospitalized
## 9        T584 Life Threatening             Withdrawn From Study
## 10       T626 Life Threatening             Withdrawn From Study
## 11       T673 Life Threatening Temporarily Stopped Test Article
## 12       T782 Life Threatening                     Hospitalized
## 13       T853 Life Threatening                     Hospitalized
## 14       T940 Life Threatening                     Hospitalized
## 15       T960 Life Threatening                     Hospitalized
## 16       T977 Life Threatening             Withdrawn From Study
## 17       T549 Life Threatening Temporarily Stopped Test Article

7.Min, max and average age of patients facing Ulser as an AE

ulser <- ae %>%
  select(Id = Patient_Id, AE) %>%
  filter(AE == "Ulser")
  
ulser
## Source: local data frame [10 x 2]
## 
##      Id    AE
## 1  T052 Ulser
## 2  T150 Ulser
## 3  T231 Ulser
## 4  T390 Ulser
## 5  T505 Ulser
## 6  T673 Ulser
## 7  T800 Ulser
## 8  T861 Ulser
## 9  T955 Ulser
## 10 T549 Ulser
demo_age <- demo %>%
  select(Id, dob = Date_of_Birth)
demo_age
## Source: local data frame [1,075 x 2]
## 
##      Id      dob
## 1  T001 04-12-57
## 2  T002 23-02-66
## 3  T003 14-08-59
## 4  T004 17-09-74
## 5  T005 28-10-76
## 6  T006 18-05-79
## 7  T007 25-05-65
## 8  T008 21-03-58
## 9  T009 23-06-73
## 10 T010 30-04-75
## ..  ...      ...
demo_age$dob <- as.Date(format(as.Date(demo_age$dob, format = "%d-%m-%y"), "%d-%m-19%y"), "%d-%m-%Y")
today <- rep(Sys.Date(), 1075)
demo2_age <- cbind(demo_age, today)
age <- ulser %>%
  inner_join(demo2_age)
## Joining by: "Id"
age
## Source: local data frame [10 x 4]
## 
##      Id    AE        dob      today
## 1  T052 Ulser 1975-04-30 2014-12-31
## 2  T150 Ulser 1974-09-17 2014-12-31
## 3  T231 Ulser 1970-08-27 2014-12-31
## 4  T390 Ulser 1973-06-13 2014-12-31
## 5  T505 Ulser 1973-06-13 2014-12-31
## 6  T549 Ulser 1979-05-28 2014-12-31
## 7  T673 Ulser 1969-04-21 2014-12-31
## 8  T800 Ulser 1974-09-17 2014-12-31
## 9  T861 Ulser 1966-01-08 2014-12-31
## 10 T955 Ulser 1966-01-08 2014-12-31
age$Age <- age$today - age$dob
age$Age <- as.numeric(age$Age)
age$Age <- age$Age/365
age$Age <- round(age$Age, 0)
age
## Source: local data frame [10 x 5]
## 
##      Id    AE        dob      today Age
## 1  T052 Ulser 1975-04-30 2014-12-31  40
## 2  T150 Ulser 1974-09-17 2014-12-31  40
## 3  T231 Ulser 1970-08-27 2014-12-31  44
## 4  T390 Ulser 1973-06-13 2014-12-31  42
## 5  T505 Ulser 1973-06-13 2014-12-31  42
## 6  T549 Ulser 1979-05-28 2014-12-31  36
## 7  T673 Ulser 1969-04-21 2014-12-31  46
## 8  T800 Ulser 1974-09-17 2014-12-31  40
## 9  T861 Ulser 1966-01-08 2014-12-31  49
## 10 T955 Ulser 1966-01-08 2014-12-31  49
summary(age$Age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    36.0    40.0    42.0    42.8    45.5    49.0

8.A list with Systolic and Diastolic pressure, Weight of patients died of AE

list <- vss %>%
  select(Id = Patient_Id, Systolic_BP, Diastolic_BP, Weight)
  
ae %>%
  select(Id = Patient_Id, Outcome) %>%
  filter(Outcome == "Death") %>%
  inner_join(list)
## Joining by: "Id"
## Source: local data frame [30 x 5]
## 
##      Id Outcome Systolic_BP Diastolic_BP Weight
## 1  T048   Death         200          110    150
## 2  T052   Death         112           60    128
## 3  T106   Death         154          105    128
## 4  T118   Death         238          109    128
## 5  T133   Death         177          100    126
## 6  T134   Death          94           99     60
## 7  T231   Death         204           51    109
## 8  T299   Death         204          110     55
## 9  T300   Death         238          100     60
## 10 T377   Death         166          101     48
## 11 T418   Death         158           81     52
## 12 T425   Death         227           87     70
## 13 T482   Death         227           51     66
## 14 T549   Death         109          101     54
## 15 T579   Death         199           89    148
## 16 T584   Death         255          104     65
## 17 T614   Death         196           92    128
## 18 T626   Death         115           91     52
## 19 T673   Death         220           71    128
## 20 T719   Death         217          110     62
## 21 T726   Death         138           96     53
## 22 T782   Death         178           89     99
## 23 T819   Death         198           78     58
## 24 T826   Death         177           78     46
## 25 T853   Death         223           64     54
## 26 T907   Death          98          108    148
## 27 T916   Death         257           87     65
## 28 T940   Death          94          109     52
## 29 T973   Death         136           64     63
## 30 T977   Death         200          108    117

9.Find the Dosage at which the condition of the patient worsened

sm %>%
  select(Patient_Id, Dose, Outcome) %>%
  filter(Outcome == "Condition Worsened")
## Source: local data frame [367 x 3]
## 
##    Patient_Id Dose            Outcome
## 1        T002  343 Condition Worsened
## 2        T006  475 Condition Worsened
## 3        T009  476 Condition Worsened
## 4        T013  455 Condition Worsened
## 5        T015  169 Condition Worsened
## 6        T016  479 Condition Worsened
## 7        T019  306 Condition Worsened
## 8        T022  477 Condition Worsened
## 9        T027  161 Condition Worsened
## 10       T029  137 Condition Worsened
## ..        ...  ...                ...

10.Find the List of best route, dosage and Frequency of the medication

sm %>%
  select(Patient_Id, Dose, Route, Frequency, Outcome) %>%
  filter(Outcome == "Significant Improvement")
## Source: local data frame [248 x 5]
## 
##    Patient_Id Dose         Route Frequency                 Outcome
## 1        T004  298    Inhalation         2 Significant Improvement
## 2        T007  162 Intraarterial         4 Significant Improvement
## 3        T012  256         Nasal         2 Significant Improvement
## 4        T017  185   Intravenous         1 Significant Improvement
## 5        T021  386    Inhalation         5 Significant Improvement
## 6        T024  319   Intravenous         3 Significant Improvement
## 7        T026  223         Nasal         6 Significant Improvement
## 8        T031  457    Inhalation         1 Significant Improvement
## 9        T035  464         Nasal         4 Significant Improvement
## 10       T039  201         Nasal         6 Significant Improvement
## ..        ...  ...           ...       ...                     ...

11.Find the average no of days spent in hospital for hospitalized patients

hos1 <- hos %>%
  select(Id = Patient_Id, Adm_date = Admission_Date, Dis_date = Discharge_Date)
hos1$Adm_date <- as.Date(hos1$Adm_date, format = "%d-%m-%y")
hos1$Dis_date <- as.Date(hos1$Dis_date, format = "%m-%d-%Y")
hos1$Dis_date[is.na(hos1$Dis_date)] <- Sys.Date()
hos2 <- na.omit(hos1)
hos2$Days <- hos2$Dis_date - hos2$Adm_date
hos2$Days <- as.numeric(hos2$Days)
mean(hos2$Days)
## [1] 1435

12.Report of patients who didn’t show any improvement (should have race, age, sysbp, diasbp,height, weight)

race <- demo %>%
  select(Id, Race)

stat <- vss %>%
  select(Id = Patient_Id, Height, Weight, Systolic_BP, Diastolic_BP)

report <- sm %>%
  select(Id = Patient_Id, Outcome) %>%
  filter(Outcome == "No Improvement") %>%
  inner_join(race) %>%
  inner_join(stat) %>%
  inner_join(demo2_age)
## Joining by: "Id"
## Joining by: "Id"
## Joining by: "Id"
report$Age <- report$today - report$dob
report$Age <- as.numeric(report$Age)
report$Age <- report$Age/365
report$Age <- round(report$Age, 0)
report <- report %>%
  select(Id:Diastolic_BP, Age)
report
## Source: local data frame [224 x 8]
## 
##      Id        Outcome     Race Height Weight Systolic_BP Diastolic_BP Age
## 1  T001 No Improvement Japanese    146     58         153           85  57
## 2  T008 No Improvement American    187    150         139           68  57
## 3  T011 No Improvement    Black    157     53         108           54  52
## 4  T014 No Improvement American    163     66         121           61  49
## 5  T020 No Improvement Japanese     73     56          95           54  36
## 6  T025 No Improvement    Asian    170     55         176           86  53
## 7  T028 No Improvement American    160    126         228           67  60
## 8  T034 No Improvement American    172    104         251          109  37
## 9  T036 No Improvement    Asian    171     58         182           86  46
## 10 T043 No Improvement American    130     69         170           78  57
## ..  ...            ...      ...    ...    ...         ...          ... ...

13.Demographic Details of all the screen failure patients

cpp %>%
  select(Id, Screen_Failure) %>%
  filter(Screen_Failure == "Yes") %>%
  inner_join(demo)
## Joining by: "Id"
## Source: local data frame [10 x 7]
## 
##      Id Screen_Failure Visit Date_of_Visit Date_of_Birth Sex     Race
## 1  T004            Yes     4      24-03-06      17-09-74   F    Black
## 2  T038            Yes    10      15-02-06      14-08-59   F Japanese
## 3  T052            Yes     9      15-03-06      30-04-75   F Japanese
## 4  T053            Yes    10      05-12-06      22-07-62   M    Black
## 5  T087            Yes     4      05-12-06      25-05-75   F Japanese
## 6  T099            Yes     2      05-12-06      03-06-73   F American
## 7  T105            Yes    10      23-05-07      27-08-70   F American
## 8  T142            Yes     4      07-07-05      20-04-75   M    Asian
## 9  T169            Yes     4      15-02-06      13-06-73   F Japanese
## 10 T183            Yes     1      22-07-06      27-04-75   F Japanese

14.List of patients who are continued to be hospitalized with pulserate above avg pulse rate of all patients

mean(vss$Pulse, na.rm = T)
## [1] 72.62
pulse <- vss %>%
  select(Id = Patient_Id, Pulse) %>%
  filter(Pulse > 73)
hos %>%
  select(Id = Patient_Id, Continuing) %>%
  filter(Continuing == "Yes") %>%
  inner_join(pulse)
## Joining by: "Id"
## Source: local data frame [11 x 3]
## 
##      Id Continuing Pulse
## 1  T174        Yes    91
## 2  T193        Yes    77
## 3  T205        Yes    83
## 4  T211        Yes    75
## 5  T594        Yes    83
## 6  T629        Yes    84
## 7  T642        Yes    88
## 8  T779        Yes    79
## 9  T937        Yes    75
## 10 T960        Yes    84
## 11 T993        Yes    81