Exploratory data analysis:

Total patients:

dat$dead_or_alive <- as.factor(dat$dead_or_alive)
# Total patients
sum(table(dat$dead_or_alive))
## [1] 222
# Dead or alive
table(dat$dead_or_alive)
## 
## Alive  Dead 
##   159    63

Patients by country:

##### Data of how many country:
nlevels(as.factor(dat$country))
## [1] 22
country <- data.frame(table(dat$dead_or_alive,dat$country))
colnames(country) <- c("Case Outcome","Country","Number of Patient")
country
##    Case Outcome     Country Number of Patient
## 1         Alive   Australia                 4
## 2          Dead   Australia                 0
## 3         Alive      Canada                 2
## 4          Dead      Canada                 0
## 5         Alive       China                 3
## 6          Dead       China                39
## 7         Alive       Egypt                 1
## 8          Dead       Egypt                 0
## 9         Alive      France                 2
## 10         Dead      France                 2
## 11        Alive   Hong Kong                 2
## 12         Dead   Hong Kong                 2
## 13        Alive        Iran                 0
## 14         Dead        Iran                 4
## 15        Alive       Japan                13
## 16         Dead       Japan                 5
## 17        Alive    Malaysia                19
## 18         Dead    Malaysia                 0
## 19        Alive       Nepal                 1
## 20         Dead       Nepal                 0
## 21        Alive Phillipines                 1
## 22         Dead Phillipines                 1
## 23        Alive      Russia                 2
## 24         Dead      Russia                 0
## 25        Alive   Singapore                62
## 26         Dead   Singapore                 0
## 27        Alive South Korea                19
## 28         Dead South Korea                 9
## 29        Alive       Spain                 2
## 30         Dead       Spain                 0
## 31        Alive   Sri Lanka                 1
## 32         Dead   Sri Lanka                 0
## 33        Alive      Taiwan                 0
## 34         Dead      Taiwan                 1
## 35        Alive    Thailand                 4
## 36         Dead    Thailand                 0
## 37        Alive         UAE                 4
## 38         Dead         UAE                 0
## 39        Alive          UK                 6
## 40         Dead          UK                 0
## 41        Alive         USA                 3
## 42         Dead         USA                 0
## 43        Alive     Vietnam                 8
## 44         Dead     Vietnam                 0
prop.table(table(dat$dead_or_alive,dat$country),2)
##        
##          Australia     Canada      China      Egypt     France  Hong Kong
##   Alive 1.00000000 1.00000000 0.07142857 1.00000000 0.50000000 0.50000000
##   Dead  0.00000000 0.00000000 0.92857143 0.00000000 0.50000000 0.50000000
##        
##               Iran      Japan   Malaysia      Nepal Phillipines     Russia
##   Alive 0.00000000 0.72222222 1.00000000 1.00000000  0.50000000 1.00000000
##   Dead  1.00000000 0.27777778 0.00000000 0.00000000  0.50000000 0.00000000
##        
##          Singapore South Korea      Spain  Sri Lanka     Taiwan   Thailand
##   Alive 1.00000000  0.67857143 1.00000000 1.00000000 0.00000000 1.00000000
##   Dead  0.00000000  0.32142857 0.00000000 0.00000000 1.00000000 0.00000000
##        
##                UAE         UK        USA    Vietnam
##   Alive 1.00000000 1.00000000 1.00000000 1.00000000
##   Dead  0.00000000 0.00000000 0.00000000 0.00000000

Patients by gender:

## 
## female   male 
##     80    131
##        
##         female male
##   Alive     66   87
##   Dead      14   44
##        
##            female      male
##   Alive 0.8250000 0.6641221
##   Dead  0.1750000 0.3358779
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  gen
## X-squared = 5.6675, df = 1, p-value = 0.01728

Patients age distribution:

# Patient age:
summary(dat$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.25   35.50   51.00   49.75   65.00   89.00      19
med_age <- aggregate(dat$age,by=list(dat$dead_or_alive),FUN = median,na.rm=TRUE)
colnames(med_age) <- c("Outcome","Median age of the patient (years)")
med_age
##   Outcome Median age of the patient (years)
## 1   Alive                              42.0
## 2    Dead                              69.5
avg_age <- aggregate(dat$age,by=list(dat$dead_or_alive),FUN = mean,na.rm=TRUE)
colnames(avg_age) <- c("Outcome","Average age of the patients (years)")
avg_age
##   Outcome Average age of the patients (years)
## 1   Alive                            42.21207
## 2    Dead                            68.58621

## 
##  Welch Two Sample t-test
## 
## data:  age by dead_or_alive
## t = -11.405, df = 136.13, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -30.94709 -21.80118
## sample estimates:
## mean in group Alive  mean in group Dead 
##            42.21207            68.58621

Patients symptoms:

# Number of unique symptom
nlevels(as.factor(dat$symptom))
## [1] 24
# Symptoms by patient outcome:
symp <- table(dat$dead_or_alive,dat$symptom)
data.frame(symp)
##     Var1                                  Var2 Freq
## 1  Alive                                chills    1
## 2   Dead                                chills    0
## 3  Alive                cold, fever, pneumonia    0
## 4   Dead                cold, fever, pneumonia    1
## 5  Alive                                 cough    1
## 6   Dead                                 cough    2
## 7  Alive           cough, runny nose, diarrhea    1
## 8   Dead           cough, runny nose, diarrhea    0
## 9  Alive            cough, shortness of breath    0
## 10  Dead            cough, shortness of breath    1
## 11 Alive                  difficulty breathing    0
## 12  Dead                  difficulty breathing    1
## 13 Alive                               fatigue    1
## 14  Dead                               fatigue    1
## 15 Alive             fatigue, loss of appetite    1
## 16  Dead             fatigue, loss of appetite    0
## 17 Alive feaver, cough, difficult in breathing    2
## 18  Dead feaver, cough, difficult in breathing    0
## 19 Alive                                 fever   19
## 20  Dead                                 fever    0
## 21 Alive                          fever, cough    3
## 22  Dead                          fever, cough    0
## 23 Alive          fever, cough, breathlessness    1
## 24  Dead          fever, cough, breathlessness    0
## 25 Alive                  fever, cough, chills    1
## 26  Dead                  fever, cough, chills    0
## 27 Alive                     fever, cough, flu    1
## 28  Dead                     fever, cough, flu    0
## 29 Alive                 fever, cough, malaise    1
## 30  Dead                 fever, cough, malaise    0
## 31 Alive             fever, cough, sore throat    1
## 32  Dead             fever, cough, sore throat    1
## 33 Alive                  fever, cough, sputum    1
## 34  Dead                  fever, cough, sputum    0
## 35 Alive           fever, headache, runny nose    1
## 36  Dead           fever, headache, runny nose    0
## 37 Alive                      fever, pneumonia    0
## 38  Dead                      fever, pneumonia    1
## 39 Alive                    fever, sore throat    2
## 40  Dead                    fever, sore throat    0
## 41 Alive                              headache    1
## 42  Dead                              headache    0
## 43 Alive       high fever, chills, muscle pain    1
## 44  Dead       high fever, chills, muscle pain    0
## 45 Alive                        myalgia, fever    0
## 46  Dead                        myalgia, fever    1
## 47 Alive                           sore throat    2
## 48  Dead                           sore throat    0
# Symptom for the dead patient

table(dat[dat$dead_or_alive == "Dead",]$symptom)
## 
##     cold, fever, pneumonia                      cough 
##                          1                          2 
## cough, shortness of breath       difficulty breathing 
##                          1                          1 
##                    fatigue  fever, cough, sore throat 
##                          1                          1 
##           fever, pneumonia             myalgia, fever 
##                          1                          1

Symptoms to hospital visit: (Days)

# Symptom to hospital vist
summary(dat$symptom_to_hospital)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   0.000   2.000   3.148   5.000  15.000     107
hospital_vist_median_day <- aggregate(dat$symptom_to_hospital,by=list(dat$dead_or_alive),FUN = median,na.rm=TRUE)
colnames(hospital_vist_median_day) <-  c("Outcome","Number of days (Median)")
hospital_vist_median_day
##   Outcome Number of days (Median)
## 1   Alive                       1
## 2    Dead                       6
hospital_vist_mean_day <- aggregate(dat$symptom_to_hospital,by=list(dat$dead_or_alive),FUN = mean,na.rm=TRUE)
colnames(hospital_vist_mean_day) <- c("Outcome","Number of days (Mean)")
hospital_vist_mean_day
##   Outcome Number of days (Mean)
## 1   Alive              2.125000
## 2    Dead              5.485714

## 
##  Welch Two Sample t-test
## 
## data:  symptom_to_hospital by dead_or_alive
## t = -5.1115, df = 50.609, p-value = 4.926e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.680915 -2.040513
## sample estimates:
## mean in group Alive  mean in group Dead 
##            2.125000            5.485714

Hospital vist to outcome (Days)

# Symptom to hospital vist
summary(dat$hosp_visit_to_outcome)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    4.00   13.00   16.00   15.73   19.00   25.00     189
hospital_vist_to_outcome_median_day <- aggregate(dat$hosp_visit_to_outcome,by=list(dat$dead_or_alive),FUN = median,na.rm=TRUE)
colnames(hospital_vist_to_outcome_median_day) <-  c("Outcome","Number of days (Median)")
hospital_vist_to_outcome_median_day
##   Outcome Number of days (Median)
## 1   Alive                    16.5
## 2    Dead                    16.0
hospital_vist_to_outcome_mean_day <- aggregate(dat$hosp_visit_to_outcome,by=list(dat$dead_or_alive),FUN = mean,na.rm=TRUE)
colnames(hospital_vist_to_outcome_mean_day) <- c("Outcome","Number of days (Mean)")
hospital_vist_to_outcome_mean_day
##   Outcome Number of days (Mean)
## 1   Alive              15.76667
## 2    Dead              15.33333

Predictive modelling:

Logistic Regression:

## 
## Call:
## glm(formula = dependent ~ gender + age + symptom_to_hospital, 
##     family = "binomial", data = dat)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0083  -0.3139   0.1427   0.3906   1.8315  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         10.02979    1.95124   5.140 2.74e-07 ***
## gendermale          -0.76830    0.80551  -0.954 0.340183    
## age                 -0.12173    0.02598  -4.686 2.79e-06 ***
## symptom_to_hospital -0.38455    0.10857  -3.542 0.000397 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 137.505  on 111  degrees of freedom
## Residual deviance:  61.148  on 108  degrees of freedom
##   (110 observations deleted due to missingness)
## AIC: 69.148
## 
## Number of Fisher Scoring iterations: 6