Exploratory data analysis:
Total patients:
dat$dead_or_alive <- as.factor(dat$dead_or_alive)
# Total patients
sum(table(dat$dead_or_alive))
## [1] 222
# Dead or alive
table(dat$dead_or_alive)
##
## Alive Dead
## 159 63
Patients by country:
##### Data of how many country:
nlevels(as.factor(dat$country))
## [1] 22
country <- data.frame(table(dat$dead_or_alive,dat$country))
colnames(country) <- c("Case Outcome","Country","Number of Patient")
country
## Case Outcome Country Number of Patient
## 1 Alive Australia 4
## 2 Dead Australia 0
## 3 Alive Canada 2
## 4 Dead Canada 0
## 5 Alive China 3
## 6 Dead China 39
## 7 Alive Egypt 1
## 8 Dead Egypt 0
## 9 Alive France 2
## 10 Dead France 2
## 11 Alive Hong Kong 2
## 12 Dead Hong Kong 2
## 13 Alive Iran 0
## 14 Dead Iran 4
## 15 Alive Japan 13
## 16 Dead Japan 5
## 17 Alive Malaysia 19
## 18 Dead Malaysia 0
## 19 Alive Nepal 1
## 20 Dead Nepal 0
## 21 Alive Phillipines 1
## 22 Dead Phillipines 1
## 23 Alive Russia 2
## 24 Dead Russia 0
## 25 Alive Singapore 62
## 26 Dead Singapore 0
## 27 Alive South Korea 19
## 28 Dead South Korea 9
## 29 Alive Spain 2
## 30 Dead Spain 0
## 31 Alive Sri Lanka 1
## 32 Dead Sri Lanka 0
## 33 Alive Taiwan 0
## 34 Dead Taiwan 1
## 35 Alive Thailand 4
## 36 Dead Thailand 0
## 37 Alive UAE 4
## 38 Dead UAE 0
## 39 Alive UK 6
## 40 Dead UK 0
## 41 Alive USA 3
## 42 Dead USA 0
## 43 Alive Vietnam 8
## 44 Dead Vietnam 0
prop.table(table(dat$dead_or_alive,dat$country),2)
##
## Australia Canada China Egypt France Hong Kong
## Alive 1.00000000 1.00000000 0.07142857 1.00000000 0.50000000 0.50000000
## Dead 0.00000000 0.00000000 0.92857143 0.00000000 0.50000000 0.50000000
##
## Iran Japan Malaysia Nepal Phillipines Russia
## Alive 0.00000000 0.72222222 1.00000000 1.00000000 0.50000000 1.00000000
## Dead 1.00000000 0.27777778 0.00000000 0.00000000 0.50000000 0.00000000
##
## Singapore South Korea Spain Sri Lanka Taiwan Thailand
## Alive 1.00000000 0.67857143 1.00000000 1.00000000 0.00000000 1.00000000
## Dead 0.00000000 0.32142857 0.00000000 0.00000000 1.00000000 0.00000000
##
## UAE UK USA Vietnam
## Alive 1.00000000 1.00000000 1.00000000 1.00000000
## Dead 0.00000000 0.00000000 0.00000000 0.00000000
Patients by gender:
##
## female male
## 80 131
##
## female male
## Alive 66 87
## Dead 14 44
##
## female male
## Alive 0.8250000 0.6641221
## Dead 0.1750000 0.3358779
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: gen
## X-squared = 5.6675, df = 1, p-value = 0.01728
Patients age distribution:
# Patient age:
summary(dat$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.25 35.50 51.00 49.75 65.00 89.00 19
med_age <- aggregate(dat$age,by=list(dat$dead_or_alive),FUN = median,na.rm=TRUE)
colnames(med_age) <- c("Outcome","Median age of the patient (years)")
med_age
## Outcome Median age of the patient (years)
## 1 Alive 42.0
## 2 Dead 69.5
avg_age <- aggregate(dat$age,by=list(dat$dead_or_alive),FUN = mean,na.rm=TRUE)
colnames(avg_age) <- c("Outcome","Average age of the patients (years)")
avg_age
## Outcome Average age of the patients (years)
## 1 Alive 42.21207
## 2 Dead 68.58621

##
## Welch Two Sample t-test
##
## data: age by dead_or_alive
## t = -11.405, df = 136.13, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -30.94709 -21.80118
## sample estimates:
## mean in group Alive mean in group Dead
## 42.21207 68.58621
Patients symptoms:
# Number of unique symptom
nlevels(as.factor(dat$symptom))
## [1] 24
# Symptoms by patient outcome:
symp <- table(dat$dead_or_alive,dat$symptom)
data.frame(symp)
## Var1 Var2 Freq
## 1 Alive chills 1
## 2 Dead chills 0
## 3 Alive cold, fever, pneumonia 0
## 4 Dead cold, fever, pneumonia 1
## 5 Alive cough 1
## 6 Dead cough 2
## 7 Alive cough, runny nose, diarrhea 1
## 8 Dead cough, runny nose, diarrhea 0
## 9 Alive cough, shortness of breath 0
## 10 Dead cough, shortness of breath 1
## 11 Alive difficulty breathing 0
## 12 Dead difficulty breathing 1
## 13 Alive fatigue 1
## 14 Dead fatigue 1
## 15 Alive fatigue, loss of appetite 1
## 16 Dead fatigue, loss of appetite 0
## 17 Alive feaver, cough, difficult in breathing 2
## 18 Dead feaver, cough, difficult in breathing 0
## 19 Alive fever 19
## 20 Dead fever 0
## 21 Alive fever, cough 3
## 22 Dead fever, cough 0
## 23 Alive fever, cough, breathlessness 1
## 24 Dead fever, cough, breathlessness 0
## 25 Alive fever, cough, chills 1
## 26 Dead fever, cough, chills 0
## 27 Alive fever, cough, flu 1
## 28 Dead fever, cough, flu 0
## 29 Alive fever, cough, malaise 1
## 30 Dead fever, cough, malaise 0
## 31 Alive fever, cough, sore throat 1
## 32 Dead fever, cough, sore throat 1
## 33 Alive fever, cough, sputum 1
## 34 Dead fever, cough, sputum 0
## 35 Alive fever, headache, runny nose 1
## 36 Dead fever, headache, runny nose 0
## 37 Alive fever, pneumonia 0
## 38 Dead fever, pneumonia 1
## 39 Alive fever, sore throat 2
## 40 Dead fever, sore throat 0
## 41 Alive headache 1
## 42 Dead headache 0
## 43 Alive high fever, chills, muscle pain 1
## 44 Dead high fever, chills, muscle pain 0
## 45 Alive myalgia, fever 0
## 46 Dead myalgia, fever 1
## 47 Alive sore throat 2
## 48 Dead sore throat 0
# Symptom for the dead patient
table(dat[dat$dead_or_alive == "Dead",]$symptom)
##
## cold, fever, pneumonia cough
## 1 2
## cough, shortness of breath difficulty breathing
## 1 1
## fatigue fever, cough, sore throat
## 1 1
## fever, pneumonia myalgia, fever
## 1 1
Symptoms to hospital visit: (Days)
# Symptom to hospital vist
summary(dat$symptom_to_hospital)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 0.000 2.000 3.148 5.000 15.000 107
hospital_vist_median_day <- aggregate(dat$symptom_to_hospital,by=list(dat$dead_or_alive),FUN = median,na.rm=TRUE)
colnames(hospital_vist_median_day) <- c("Outcome","Number of days (Median)")
hospital_vist_median_day
## Outcome Number of days (Median)
## 1 Alive 1
## 2 Dead 6
hospital_vist_mean_day <- aggregate(dat$symptom_to_hospital,by=list(dat$dead_or_alive),FUN = mean,na.rm=TRUE)
colnames(hospital_vist_mean_day) <- c("Outcome","Number of days (Mean)")
hospital_vist_mean_day
## Outcome Number of days (Mean)
## 1 Alive 2.125000
## 2 Dead 5.485714

##
## Welch Two Sample t-test
##
## data: symptom_to_hospital by dead_or_alive
## t = -5.1115, df = 50.609, p-value = 4.926e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -4.680915 -2.040513
## sample estimates:
## mean in group Alive mean in group Dead
## 2.125000 5.485714
Hospital vist to outcome (Days)
# Symptom to hospital vist
summary(dat$hosp_visit_to_outcome)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 4.00 13.00 16.00 15.73 19.00 25.00 189
hospital_vist_to_outcome_median_day <- aggregate(dat$hosp_visit_to_outcome,by=list(dat$dead_or_alive),FUN = median,na.rm=TRUE)
colnames(hospital_vist_to_outcome_median_day) <- c("Outcome","Number of days (Median)")
hospital_vist_to_outcome_median_day
## Outcome Number of days (Median)
## 1 Alive 16.5
## 2 Dead 16.0
hospital_vist_to_outcome_mean_day <- aggregate(dat$hosp_visit_to_outcome,by=list(dat$dead_or_alive),FUN = mean,na.rm=TRUE)
colnames(hospital_vist_to_outcome_mean_day) <- c("Outcome","Number of days (Mean)")
hospital_vist_to_outcome_mean_day
## Outcome Number of days (Mean)
## 1 Alive 15.76667
## 2 Dead 15.33333
