This is a clinical case study. We’ll extract the data from different tables to answers all the questions.
We manipulate data using “Dplyr” package.
library("dplyr")
We load all the tables.
ae <- tbl_df(ae)
ae
## Source: local data frame [151 x 10]
##
## Patient_Id AE Date_of_Visit Serious Start_Date Stop.Date
## 1 T004 Vomiting 05-12-06 Yes 05-12-06 NA
## 2 T028 Fever 23-05-07 No 23-05-07 NA
## 3 T018 Nausea 07-07-05 Yes 07-07-05 NA
## 4 T038 Acne 24-03-06 Yes 24-03-06 NA
## 5 T042 Headache 15-02-06 No 15-02-06 NA
## 6 T027 Vomiting 05-12-06 No 05-12-06 NA
## 7 T035 Epilepsy 23-05-07 No 23-05-07 NA
## 8 T052 Ulser 07-07-05 Yes 07-07-05 NA
## 9 T093 Stomach Ache 24-03-06 No 24-03-06 NA
## 10 T085 Acne 30-05-06 No 30-05-06 NA
## .. ... ... ... ... ... ...
## Variables not shown: Severity (fctr), Relationship_to_TestArticle (fctr),
## Outcome (fctr), Action (fctr)
cpp <- tbl_df(cpp)
cpp
## Source: local data frame [20 x 5]
##
## Id Visit Date_of_Conclusion Screen_Failure
## 1 T004 NA 27-12-06 Yes
## 2 T038 NA 15-05-07 Yes
## 3 T044 NA 06-06-06 No
## 4 T052 NA 21-03-05 Yes
## 5 T027 NA 28-06-05 No
## 6 T077 NA 17-08-06 No
## 7 T081 NA 18-10-06 No
## 8 T099 NA 08-04-05 Yes
## 9 T105 NA 27-03-06 Yes
## 10 T118 NA 06-07-06 No
## 11 T122 NA 16-02-06 No
## 12 T142 NA 22-07-06 Yes
## 13 T169 NA 28-09-06 Yes
## 14 T183 NA 19-04-05 Yes
## 15 T199 NA 26-07-06 No
## 16 T164 NA 21-03-05 No
## 17 T053 NA 27-03-06 Yes
## 18 T070 NA 06-07-06 No
## 19 T087 NA 18-10-06 Yes
## 20 T028 NA 29-04-05 No
## Variables not shown: Reason_for_Conclusion_of_Subjectparticipation (fctr)
demo <- tbl_df(demo)
demo
## Source: local data frame [1,075 x 6]
##
## Id Visit Date_of_Visit Date_of_Birth Sex Race
## 1 T001 9 05-12-06 04-12-57 F Japanese
## 2 T002 6 23-05-07 23-02-66 M Asian
## 3 T003 1 07-07-05 14-08-59 M American
## 4 T004 4 24-03-06 17-09-74 F Black
## 5 T005 2 15-02-06 28-10-76 F Asian
## 6 T006 9 05-12-06 18-05-79 F Japanese
## 7 T007 10 23-05-07 25-05-65 M Black
## 8 T008 4 07-07-05 21-03-58 F American
## 9 T009 4 24-03-06 23-06-73 M Japanese
## 10 T010 5 30-05-06 30-04-75 M American
## .. ... ... ... ... ... ...
hos <- tbl_df(hos)
hos
## Source: local data frame [120 x 5]
##
## Patient_Id Admission_Date Discharge_Date Continuing Reason
## 1 T028 23-05-07 Yes Disease Related
## 2 T038 24-03-06 Yes Disease Related
## 3 T077 26-06-06 5-20-2007 No Adverse Event
## 4 T106 24-11-06 3-17-2007 No Adverse Event
## 5 T118 15-03-06 12-28-2006 No Disease Related
## 6 T142 15-02-06 Yes Adverse Event
## 7 T134 05-12-06 6-22-2007 No Other
## 8 T148 23-05-07 3-12-2007 No Disease Related
## 9 T174 22-07-06 Yes Disease Related
## 10 T193 13-09-06 Yes Adverse Event
## .. ... ... ... ... ...
sm <- tbl_df(sm)
sm
## Source: local data frame [1,000 x 5]
##
## Patient_Id Dose Route Frequency Outcome
## 1 T001 269 Oral 3 No Improvement
## 2 T002 343 Intravenous 5 Condition Worsened
## 3 T003 198 Subcutaneous 3 Slight Improvement
## 4 T004 298 Inhalation 2 Significant Improvement
## 5 T005 323 Nasal 6 Slight Improvement
## 6 T006 475 Oral 6 Condition Worsened
## 7 T007 162 Intraarterial 4 Significant Improvement
## 8 T008 421 Subcutaneous 5 No Improvement
## 9 T009 476 Nasal 4 Condition Worsened
## 10 T010 165 Inhalation 5 Slight Improvement
## .. ... ... ... ... ...
vss <- tbl_df(vss)
vss
## Source: local data frame [1,137 x 9]
##
## Patient_Id Date_of_Visit Height Unit.cm.in. Weight Unit.kg.lbsbs.
## 1 T001 05-12-06 146 cm 58 kg
## 2 T002 23-05-07 70 in 54 kg
## 3 T003 07-07-05 145 cm 104 lbs
## 4 T004 24-03-06 55 in 54 kg
## 5 T005 15-02-06 141 cm 45 kg
## 6 T006 05-12-06 174 cm 140 lbs
## 7 T007 23-05-07 158 cm 59 kg
## 8 T008 07-07-05 187 cm 150 lbs
## 9 T009 24-03-06 64 in 48 kg
## 10 T010 30-05-06 77 in 99 kg
## .. ... ... ... ... ... ...
## Variables not shown: Systolic_BP (int), Diastolic_BP (int), Pulse (int)
1.List of all side effects related to test article.
table(ae$AE)
##
## Acne Epilepsy Fever Headache
## 1 28 20 15 27
## Nausea Stomach Ache Ulser Vomiting
## 15 16 10 19
2.List of all patients suffering from fever as an AE (Related to test Article)
ae %>%
filter(AE == "Fever")
## Source: local data frame [15 x 10]
##
## Patient_Id AE Date_of_Visit Serious Start_Date Stop.Date Severity
## 1 T028 Fever 23-05-07 No 23-05-07 NA Mild
## 2 T106 Fever 24-11-06 Yes 24-11-06 NA Severe
## 3 T133 Fever 15-03-06 Yes 15-03-06 NA Severe
## 4 T205 Fever 23-05-07 No 23-05-07 NA Mild
## 5 T299 Fever 24-11-06 Yes 24-11-06 NA Severe
## 6 T418 Fever 24-11-06 Yes 24-11-06 NA Severe
## 7 T579 Fever 15-03-06 Yes 15-03-06 NA Severe
## 8 T594 Fever 23-05-07 No 23-05-07 NA Mild
## 9 T614 Fever 15-03-06 Yes 15-03-06 NA Severe
## 10 T629 Fever 23-05-07 No 23-05-07 NA Mild
## 11 T719 Fever 24-11-06 Yes 24-11-06 NA Severe
## 12 T819 Fever 24-11-06 Yes 24-11-06 NA Severe
## 13 T907 Fever 24-11-06 Yes 24-11-06 NA Severe
## 14 T973 Fever 15-03-06 Yes 15-03-06 NA Severe
## 15 T984 Fever 23-05-07 No 23-05-07 NA Mild
## Variables not shown: Relationship_to_TestArticle (fctr), Outcome (fctr),
## Action (fctr)
3.Demographic details of all expired patients.
expired <- ae %>%
select(Id = Patient_Id, Outcome) %>%
filter(Outcome == "Death")
demographic <- demo %>%
select(Id, Race) %>%
inner_join(expired)
## Joining by: "Id"
demographic
## Source: local data frame [30 x 3]
##
## Id Race Outcome
## 1 T048 Asian Death
## 2 T052 Japanese Death
## 3 T106 Black Death
## 4 T118 Black Death
## 5 T133 Japanese Death
## 6 T134 Black Death
## 7 T231 Asian Death
## 8 T299 Black Death
## 9 T300 Japanese Death
## 10 T377 Japanese Death
## 11 T418 Japanese Death
## 12 T425 Black Death
## 13 T482 Black Death
## 14 T549 Asian Death
## 15 T579 Japanese Death
## 16 T584 American Death
## 17 T614 American Death
## 18 T626 American Death
## 19 T673 American Death
## 20 T719 American Death
## 21 T726 Black Death
## 22 T782 Asian Death
## 23 T819 Black Death
## 24 T826 Japanese Death
## 25 T853 American Death
## 26 T907 Asian Death
## 27 T916 Japanese Death
## 28 T940 American Death
## 29 T973 Japanese Death
## 30 T977 Asian Death
4.Chance of occurrence of Acne as an AE
ae %>%
filter(AE == "Acne")
## Source: local data frame [28 x 10]
##
## Patient_Id AE Date_of_Visit Serious Start_Date Stop.Date Severity
## 1 T038 Acne 24-03-06 Yes 24-03-06 NA Severe
## 2 T085 Acne 30-05-06 No 30-05-06 NA Mild
## 3 T115 Acne 26-02-06 No 26-02-06 NA Moderate
## 4 T138 Acne 11-03-06 No 11-03-06 NA Mild
## 5 T163 Acne 30-05-06 No 30-05-06 NA Mild
## 6 T174 Acne 22-07-06 No 22-07-06 NA Moderate
## 7 T211 Acne 24-03-06 Yes 24-03-06 NA Severe
## 8 T255 Acne 30-05-06 No 30-05-06 NA Mild
## 9 T308 Acne 26-02-06 No 26-02-06 NA Moderate
## 10 T352 Acne 11-03-06 No 11-03-06 NA Mild
## 11 T433 Acne 26-02-06 No 26-02-06 NA Moderate
## 12 T469 Acne 11-03-06 No 11-03-06 NA Mild
## 13 T511 Acne 30-05-06 No 30-05-06 NA Mild
## 14 T538 Acne 22-07-06 No 22-07-06 NA Moderate
## 15 T603 Acne 24-03-06 Yes 24-03-06 NA Severe
## 16 T642 Acne 24-03-06 Yes 24-03-06 NA Severe
## 17 T693 Acne 30-05-06 No 30-05-06 NA Mild
## 18 T739 Acne 26-02-06 No 26-02-06 NA Moderate
## 19 T762 Acne 11-03-06 No 11-03-06 NA Mild
## 20 T830 Acne 26-02-06 No 26-02-06 NA Moderate
## 21 T843 Acne 11-03-06 No 11-03-06 NA Mild
## 22 T870 Acne 30-05-06 No 30-05-06 NA Mild
## 23 T881 Acne 22-07-06 No 22-07-06 NA Moderate
## 24 T893 Acne 30-05-06 No 30-05-06 NA Mild
## 25 T911 Acne 26-02-06 No 26-02-06 NA Moderate
## 26 T931 Acne 11-03-06 No 11-03-06 NA Mild
## 27 T993 Acne 24-03-06 Yes 24-03-06 NA Severe
## 28 T566 Acne 30-05-06 No 30-05-06 NA Mild
## Variables not shown: Relationship_to_TestArticle (fctr), Outcome (fctr),
## Action (fctr)
28/151
## [1] 0.1854
5.List of patients who are withdrawn from study due to AE not related to Test article.
table(ae$Action)
##
## Hospitalized
## 8 47
## Primary Reason For Study Withdrawal Temporarily Stopped Test Article
## 26 55
## Withdrawn From Study
## 15
ae %>%
select(Patient_Id, Action) %>%
filter(Action == "Withdrawn From Study")
## Source: local data frame [15 x 2]
##
## Patient_Id Action
## 1 T035 Withdrawn From Study
## 2 T068 Withdrawn From Study
## 3 T228 Withdrawn From Study
## 4 T273 Withdrawn From Study
## 5 T579 Withdrawn From Study
## 6 T584 Withdrawn From Study
## 7 T614 Withdrawn From Study
## 8 T626 Withdrawn From Study
## 9 T661 Withdrawn From Study
## 10 T705 Withdrawn From Study
## 11 T900 Withdrawn From Study
## 12 T973 Withdrawn From Study
## 13 T977 Withdrawn From Study
## 14 T534 Withdrawn From Study
## 15 T328 Withdrawn From Study
6.List of patients who face a life threatening AE and corresponding action taken.
ae %>%
select(Patient_Id, Severity, Action) %>%
filter(Severity == "Life Threatening")
## Source: local data frame [17 x 3]
##
## Patient_Id Severity Action
## 1 T052 Life Threatening
## 2 T134 Life Threatening
## 3 T193 Life Threatening Hospitalized
## 4 T048 Life Threatening
## 5 T231 Life Threatening Temporarily Stopped Test Article
## 6 T377 Life Threatening
## 7 T482 Life Threatening Hospitalized
## 8 T558 Life Threatening Hospitalized
## 9 T584 Life Threatening Withdrawn From Study
## 10 T626 Life Threatening Withdrawn From Study
## 11 T673 Life Threatening Temporarily Stopped Test Article
## 12 T782 Life Threatening Hospitalized
## 13 T853 Life Threatening Hospitalized
## 14 T940 Life Threatening Hospitalized
## 15 T960 Life Threatening Hospitalized
## 16 T977 Life Threatening Withdrawn From Study
## 17 T549 Life Threatening Temporarily Stopped Test Article
7.Min, max and average age of patients facing Ulser as an AE
ulser <- ae %>%
select(Id = Patient_Id, AE) %>%
filter(AE == "Ulser")
ulser
## Source: local data frame [10 x 2]
##
## Id AE
## 1 T052 Ulser
## 2 T150 Ulser
## 3 T231 Ulser
## 4 T390 Ulser
## 5 T505 Ulser
## 6 T673 Ulser
## 7 T800 Ulser
## 8 T861 Ulser
## 9 T955 Ulser
## 10 T549 Ulser
demo_age <- demo %>%
select(Id, dob = Date_of_Birth)
demo_age
## Source: local data frame [1,075 x 2]
##
## Id dob
## 1 T001 04-12-57
## 2 T002 23-02-66
## 3 T003 14-08-59
## 4 T004 17-09-74
## 5 T005 28-10-76
## 6 T006 18-05-79
## 7 T007 25-05-65
## 8 T008 21-03-58
## 9 T009 23-06-73
## 10 T010 30-04-75
## .. ... ...
demo_age$dob <- as.Date(format(as.Date(demo_age$dob, format = "%d-%m-%y"), "%d-%m-19%y"), "%d-%m-%Y")
today <- rep(Sys.Date(), 1075)
demo2_age <- cbind(demo_age, today)
age <- ulser %>%
inner_join(demo2_age)
## Joining by: "Id"
age
## Source: local data frame [10 x 4]
##
## Id AE dob today
## 1 T052 Ulser 1975-04-30 2014-12-31
## 2 T150 Ulser 1974-09-17 2014-12-31
## 3 T231 Ulser 1970-08-27 2014-12-31
## 4 T390 Ulser 1973-06-13 2014-12-31
## 5 T505 Ulser 1973-06-13 2014-12-31
## 6 T549 Ulser 1979-05-28 2014-12-31
## 7 T673 Ulser 1969-04-21 2014-12-31
## 8 T800 Ulser 1974-09-17 2014-12-31
## 9 T861 Ulser 1966-01-08 2014-12-31
## 10 T955 Ulser 1966-01-08 2014-12-31
age$Age <- age$today - age$dob
age$Age <- as.numeric(age$Age)
age$Age <- age$Age/365
age$Age <- round(age$Age, 0)
age
## Source: local data frame [10 x 5]
##
## Id AE dob today Age
## 1 T052 Ulser 1975-04-30 2014-12-31 40
## 2 T150 Ulser 1974-09-17 2014-12-31 40
## 3 T231 Ulser 1970-08-27 2014-12-31 44
## 4 T390 Ulser 1973-06-13 2014-12-31 42
## 5 T505 Ulser 1973-06-13 2014-12-31 42
## 6 T549 Ulser 1979-05-28 2014-12-31 36
## 7 T673 Ulser 1969-04-21 2014-12-31 46
## 8 T800 Ulser 1974-09-17 2014-12-31 40
## 9 T861 Ulser 1966-01-08 2014-12-31 49
## 10 T955 Ulser 1966-01-08 2014-12-31 49
summary(age$Age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 36.0 40.0 42.0 42.8 45.5 49.0
8.A list with Systolic and Diastolic pressure, Weight of patients died of AE
list <- vss %>%
select(Id = Patient_Id, Systolic_BP, Diastolic_BP, Weight)
ae %>%
select(Id = Patient_Id, Outcome) %>%
filter(Outcome == "Death") %>%
inner_join(list)
## Joining by: "Id"
## Source: local data frame [30 x 5]
##
## Id Outcome Systolic_BP Diastolic_BP Weight
## 1 T048 Death 200 110 150
## 2 T052 Death 112 60 128
## 3 T106 Death 154 105 128
## 4 T118 Death 238 109 128
## 5 T133 Death 177 100 126
## 6 T134 Death 94 99 60
## 7 T231 Death 204 51 109
## 8 T299 Death 204 110 55
## 9 T300 Death 238 100 60
## 10 T377 Death 166 101 48
## 11 T418 Death 158 81 52
## 12 T425 Death 227 87 70
## 13 T482 Death 227 51 66
## 14 T549 Death 109 101 54
## 15 T579 Death 199 89 148
## 16 T584 Death 255 104 65
## 17 T614 Death 196 92 128
## 18 T626 Death 115 91 52
## 19 T673 Death 220 71 128
## 20 T719 Death 217 110 62
## 21 T726 Death 138 96 53
## 22 T782 Death 178 89 99
## 23 T819 Death 198 78 58
## 24 T826 Death 177 78 46
## 25 T853 Death 223 64 54
## 26 T907 Death 98 108 148
## 27 T916 Death 257 87 65
## 28 T940 Death 94 109 52
## 29 T973 Death 136 64 63
## 30 T977 Death 200 108 117
9.Find the Dosage at which the condition of the patient worsened
sm %>%
select(Patient_Id, Dose, Outcome) %>%
filter(Outcome == "Condition Worsened")
## Source: local data frame [367 x 3]
##
## Patient_Id Dose Outcome
## 1 T002 343 Condition Worsened
## 2 T006 475 Condition Worsened
## 3 T009 476 Condition Worsened
## 4 T013 455 Condition Worsened
## 5 T015 169 Condition Worsened
## 6 T016 479 Condition Worsened
## 7 T019 306 Condition Worsened
## 8 T022 477 Condition Worsened
## 9 T027 161 Condition Worsened
## 10 T029 137 Condition Worsened
## .. ... ... ...
10.Find the List of best route, dosage and Frequency of the medication
sm %>%
select(Patient_Id, Dose, Route, Frequency, Outcome) %>%
filter(Outcome == "Significant Improvement")
## Source: local data frame [248 x 5]
##
## Patient_Id Dose Route Frequency Outcome
## 1 T004 298 Inhalation 2 Significant Improvement
## 2 T007 162 Intraarterial 4 Significant Improvement
## 3 T012 256 Nasal 2 Significant Improvement
## 4 T017 185 Intravenous 1 Significant Improvement
## 5 T021 386 Inhalation 5 Significant Improvement
## 6 T024 319 Intravenous 3 Significant Improvement
## 7 T026 223 Nasal 6 Significant Improvement
## 8 T031 457 Inhalation 1 Significant Improvement
## 9 T035 464 Nasal 4 Significant Improvement
## 10 T039 201 Nasal 6 Significant Improvement
## .. ... ... ... ... ...
11.Find the average no of days spent in hospital for hospitalized patients
hos1 <- hos %>%
select(Id = Patient_Id, Adm_date = Admission_Date, Dis_date = Discharge_Date)
hos1$Adm_date <- as.Date(hos1$Adm_date, format = "%d-%m-%y")
hos1$Dis_date <- as.Date(hos1$Dis_date, format = "%m-%d-%Y")
hos1$Dis_date[is.na(hos1$Dis_date)] <- Sys.Date()
hos2 <- na.omit(hos1)
hos2$Days <- hos2$Dis_date - hos2$Adm_date
hos2$Days <- as.numeric(hos2$Days)
mean(hos2$Days)
## [1] 1435
12.Report of patients who didn’t show any improvement (should have race, age, sysbp, diasbp,height, weight)
race <- demo %>%
select(Id, Race)
stat <- vss %>%
select(Id = Patient_Id, Height, Weight, Systolic_BP, Diastolic_BP)
report <- sm %>%
select(Id = Patient_Id, Outcome) %>%
filter(Outcome == "No Improvement") %>%
inner_join(race) %>%
inner_join(stat) %>%
inner_join(demo2_age)
## Joining by: "Id"
## Joining by: "Id"
## Joining by: "Id"
report$Age <- report$today - report$dob
report$Age <- as.numeric(report$Age)
report$Age <- report$Age/365
report$Age <- round(report$Age, 0)
report <- report %>%
select(Id:Diastolic_BP, Age)
report
## Source: local data frame [224 x 8]
##
## Id Outcome Race Height Weight Systolic_BP Diastolic_BP Age
## 1 T001 No Improvement Japanese 146 58 153 85 57
## 2 T008 No Improvement American 187 150 139 68 57
## 3 T011 No Improvement Black 157 53 108 54 52
## 4 T014 No Improvement American 163 66 121 61 49
## 5 T020 No Improvement Japanese 73 56 95 54 36
## 6 T025 No Improvement Asian 170 55 176 86 53
## 7 T028 No Improvement American 160 126 228 67 60
## 8 T034 No Improvement American 172 104 251 109 37
## 9 T036 No Improvement Asian 171 58 182 86 46
## 10 T043 No Improvement American 130 69 170 78 57
## .. ... ... ... ... ... ... ... ...
13.Demographic Details of all the screen failure patients
cpp %>%
select(Id, Screen_Failure) %>%
filter(Screen_Failure == "Yes") %>%
inner_join(demo)
## Joining by: "Id"
## Source: local data frame [10 x 7]
##
## Id Screen_Failure Visit Date_of_Visit Date_of_Birth Sex Race
## 1 T004 Yes 4 24-03-06 17-09-74 F Black
## 2 T038 Yes 10 15-02-06 14-08-59 F Japanese
## 3 T052 Yes 9 15-03-06 30-04-75 F Japanese
## 4 T053 Yes 10 05-12-06 22-07-62 M Black
## 5 T087 Yes 4 05-12-06 25-05-75 F Japanese
## 6 T099 Yes 2 05-12-06 03-06-73 F American
## 7 T105 Yes 10 23-05-07 27-08-70 F American
## 8 T142 Yes 4 07-07-05 20-04-75 M Asian
## 9 T169 Yes 4 15-02-06 13-06-73 F Japanese
## 10 T183 Yes 1 22-07-06 27-04-75 F Japanese
14.List of patients who are continued to be hospitalized with pulserate above avg pulse rate of all patients
mean(vss$Pulse, na.rm = T)
## [1] 72.62
pulse <- vss %>%
select(Id = Patient_Id, Pulse) %>%
filter(Pulse > 73)
hos %>%
select(Id = Patient_Id, Continuing) %>%
filter(Continuing == "Yes") %>%
inner_join(pulse)
## Joining by: "Id"
## Source: local data frame [11 x 3]
##
## Id Continuing Pulse
## 1 T174 Yes 91
## 2 T193 Yes 77
## 3 T205 Yes 83
## 4 T211 Yes 75
## 5 T594 Yes 83
## 6 T629 Yes 84
## 7 T642 Yes 88
## 8 T779 Yes 79
## 9 T937 Yes 75
## 10 T960 Yes 84
## 11 T993 Yes 81