Covid19 Survival

This document does the analysis of the patient level data set. The data was cloned from :https://github.com/beoutbreakprepared/nCoV2019 on April 2, 2020

Loading the libraries

library("FRESA.CAD")
repetitions = 300
trainFraction = 0.90

Loading the data

Loading the cloned https://github.com/beoutbreakprepared/nCoV2019/tree/master/latest_data/latestdata.csv file Then curating the data set.

latestdata <- read.csv("~/GitHub/nCoV2019/latest_data/latestdata.csv", na.strings=c("NA",""), stringsAsFactors=FALSE)


SymptomsOutcome <- latestdata[,c("age",
                                 "sex",
                                 "country",
                                 "date_onset_symptoms",
                                 "date_admission_hospital",
                                 "date_confirmation",
                                 "symptoms",
                                 "chronic_disease_binary",
                                 "chronic_disease",
                                 "outcome",
                                 "date_death_or_discharge")]


SymptomsOutcome <- SymptomsOutcome[!is.na(SymptomsOutcome$symptoms) &
                                     !is.na(SymptomsOutcome$outcome) &
                                     !is.na(SymptomsOutcome$age),]

SymptomsOutcome[SymptomsOutcome$age == "50-59","age"] <- 55
SymptomsOutcome[SymptomsOutcome$age == "60-79","age"] <- 55
SymptomsOutcome[SymptomsOutcome$age == "60-69","age"] <- 65
SymptomsOutcome[SymptomsOutcome$age == "80-89","age"] <- 85
SymptomsOutcome$age <- as.numeric(SymptomsOutcome$age)

SymptomsOutcome <- SymptomsOutcome[!is.na(SymptomsOutcome$age),]
SymptomsOutcome <- SymptomsOutcome[!(SymptomsOutcome$symptoms=="none" | SymptomsOutcome$symptoms=="asymptomatic"),]

The reported symtoms and chronic diseases were reclasified using the following keywords Symptoms:

fever_chills: fever : chills
cough: cough
breath_dyspnea: severe: pneumonia: grasp: dyspnea: breath: acute: respiratory: gasp
fatigue: weakness: fatigue
pain: headache: myalgias: sore: mialgia: pain


syn <- unlist(str_split(names(table(SymptomsOutcome$symptoms)),","))
syn <- unique(unlist(str_split(syn,":")))

SymptomsOutcome$fever_chills <- 1*(str_count(SymptomsOutcome$symptoms,"fever") > 0 | 
                                str_count(SymptomsOutcome$symptoms,"chills")  > 0 |
                                str_count(SymptomsOutcome$symptoms,"cold") > 0)
table(SymptomsOutcome$fever_chills)  
#> 
#>  0  1 
#> 85 90

SymptomsOutcome$cough <- 1*(str_count(SymptomsOutcome$symptoms,"cough") > 0)
table(SymptomsOutcome$cough)  
#> 
#>   0   1 
#> 110  65

SymptomsOutcome$breath_dyspnea <- 1*(str_count(SymptomsOutcome$symptoms,"dyspnea") > 0 | 
                                str_count(SymptomsOutcome$symptoms,"difficulty breathing")  > 0 |
                                str_count(SymptomsOutcome$symptoms,"acute respiratory")  > 0 |
                                str_count(SymptomsOutcome$symptoms,"grasp")  > 0 |
                                str_count(SymptomsOutcome$symptoms,"gasp") > 0)
table(SymptomsOutcome$breath_dyspnea)  
#> 
#>   0   1 
#> 133  42

SymptomsOutcome$fatigue <- 1*(str_count(SymptomsOutcome$symptoms,"fatigue") > 0 | 
                                str_count(SymptomsOutcome$symptoms,"weak")  > 0 |
                                str_count(SymptomsOutcome$symptoms,"fatigure")  > 0 )
table(SymptomsOutcome$fatigue)  
#> 
#>   0   1 
#> 159  16

SymptomsOutcome$pain <- 1*(str_count(SymptomsOutcome$symptoms,"chest discomfort") > 0 | 
                                str_count(SymptomsOutcome$symptoms,"pain")  > 0 |
                                str_count(SymptomsOutcome$symptoms,"myalgia")  > 0 |
                                str_count(SymptomsOutcome$symptoms,"headache")  > 0 |
                                str_count(SymptomsOutcome$symptoms,"body malaise")  > 0 |
                                str_count(SymptomsOutcome$symptoms,"myalgias")  > 0 |
                                str_count(SymptomsOutcome$symptoms,"sore")  > 0 )
table(SymptomsOutcome$pain)  
#> 
#>   0   1 
#> 150  25
SymptomsOutcome$other_symptoms <- 1*((SymptomsOutcome$pain + 
                              SymptomsOutcome$fatigue + 
                              SymptomsOutcome$breath_dyspnea + 
                              SymptomsOutcome$fever_chills) == 0)
table(SymptomsOutcome$other_symptoms)  
#> 
#>   0   1 
#> 135  40
SymptomsOutcome$symptoms <- NULL  

SymptomsOutcome$chronic_disease <- NULL
SymptomsOutcome$chronic_disease_binary <- NULL

SymptomsOutcome$date_onset_symptoms <- NULL
SymptomsOutcome$date_admission_hospital <- NULL
SymptomsOutcome$date_confirmation <- NULL
SymptomsOutcome$date_death_or_discharge <- NULL


table(SymptomsOutcome$outcome)
#> 
#>                                           dead 
#>                                              2 
#>                                          death 
#>                                              8 
#>                                       Deceased 
#>                                              1 
#>                                           died 
#>                                             91 
#>                                      discharge 
#>                                             25 
#>                                     discharged 
#>                                             28 
#>                                     Discharged 
#>                                              2 
#>                                      recovered 
#>                                              8 
#>                                         stable 
#>                                              8 
#> treated in an intensive care unit (14.02.2020) 
#>                                              2

SymptomsOutcome$outcome <- 1*(str_count(SymptomsOutcome$outcome,"dead") > 0 | 
                                str_count(SymptomsOutcome$outcome,"Dead")  > 0 |
                                str_count(SymptomsOutcome$outcome,"Deceased")  > 0 |
                                str_count(SymptomsOutcome$outcome,"death")  > 0 |
                                str_count(SymptomsOutcome$outcome,"died")  > 0 ) + 
                                2*(str_count(SymptomsOutcome$outcome,"stable") > 0 |
                                str_count(SymptomsOutcome$outcome,"treated")  > 0 |
                                str_count(SymptomsOutcome$outcome,"recovered")  > 0 )

SymptomsOutcome <- SymptomsOutcome[SymptomsOutcome$outcome < 2,]
table(SymptomsOutcome$outcome)
#> 
#>   0   1 
#>  55 102

covid19ML <- SymptomsOutcome
table(covid19ML$sex)
#> 
#> female   male 
#>     57    100
covid19ML$sex <- 1*(str_count(SymptomsOutcome$sex,"female"))
table(covid19ML$sex)
#> 
#>   0   1 
#> 100  57
pander::pander(table(SymptomsOutcome$country))
Table continues below
Brazil China France Gambia Germany Guyana Italy Japan Malaysia
4 51 2 1 1 2 1 8 2
Nepal Philippines Romania Singapore South Korea Thailand Vietnam
2 53 2 6 4 2 14


covid19ML$country <- NULL

covid19ML_Philippines <- covid19ML[SymptomsOutcome$country=="Philippines",]
table(covid19ML_Philippines$outcome)
#> 
#>  1 
#> 53

covid19ML_China <- covid19ML[SymptomsOutcome$country == "China",]
covid19ML_China <- covid19ML_China[complete.cases(covid19ML_China),]
table(covid19ML_China$outcome)
#> 
#>  0  1 
#> 11 40

covid19ML_NoChina <- covid19ML[SymptomsOutcome$country != "China",]
table(covid19ML_NoChina$outcome)
#> 
#>  0  1 
#> 44 60
covid19ML_NoChina <- covid19ML_NoChina[complete.cases(covid19ML_NoChina),]

##Basic Description


pander::pander(sum(table(covid19ML$sex)))

157


plot(density(subset(covid19ML,outcome==1)$age),xlim=c(20,90),col="red",main="Age Distribution",xlab="Age",ylab="p(x)")
lines(density(subset(covid19ML,outcome==0)$age),xlim=c(20,90),col="blue")
legend("topleft",legend = c("Died","Discharged"),bty = "n",pch = 20,col = c("Red","Blue"))


op <- par(no.readonly = TRUE)


pander::pander(summary(covid19ML$age))
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.25 44 62 58.27 73 89
pander::pander(table(covid19ML$outcome))
0 1
55 102
pander::pander(table(covid19ML$sex))
0 1
100 57
pander::pander(table(covid19ML$fever_chills))
0 1
80 77
pander::pander(table(covid19ML$cough))
0 1
104 53
pander::pander(table(covid19ML$breath_dyspnea))
0 1
115 42
pander::pander(table(covid19ML$fatigue))
0 1
141 16
pander::pander(table(covid19ML$pain))
0 1
136 21
pander::pander(table(covid19ML$other_symptoms))
0 1
120 37

Modeling no chinese patients

nochinamodel <- BSWiMS.model(outcome~.,covid19ML_NoChina,NumberofRepeats = -100)

Logistic Predict in China


cStats <- predictionStats_binary(cbind(covid19ML_China$outcome,predict(nochinamodel,covid19ML_China)),plotname = "China Prediction");

China Prediction

cStats <- predictionStats_binary(cbind(covid19ML_NoChina$outcome,predict(nochinamodel,covid19ML_NoChina)),plotname = "No Chinese Prediction");

No Chinese Prediction


sm <- summary(nochinamodel)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower OR upper u.Accuracy
cough -1.355 0.2008 0.2579 0.3311 0.7981
fever_chills -7.545 3.284e-05 0.0005288 0.008514 0.9038
breath_dyspnea 18.72 237921 134391539 7.591e+10 0.7596
pain -1.562 0.11 0.2097 0.4 0.6827
other_symptoms 1.089 1.839 2.971 4.801 0.5577
age 0.06011 1.026 1.062 1.099 0.8044
Table continues below
  r.Accuracy full.Accuracy u.AUC r.AUC full.AUC
cough 0.6896 0.8634 0.7705 0.6401 0.8481
fever_chills 0.8503 0.9178 0.8894 0.8558 0.9126
breath_dyspnea 0.8653 0.9175 0.7917 0.8591 0.9143
pain 0.7979 0.8619 0.6311 0.7706 0.8461
other_symptoms 0.8544 0.9195 0.5955 0.8608 0.9189
age 0.9008 0.9181 0.8015 0.8897 0.915
  IDI NRI z.IDI z.NRI Frequency
cough 0.3285 1.093 8.056 8.105 0.7371
fever_chills 0.177 1.51 5.032 13.84 0.7443
breath_dyspnea 0.1668 1.098 4.805 8.684 1.272
pain 0.125 0.542 4.601 4.355 0.7154
other_symptoms 0.1286 0.9081 4.344 7.639 0.5203
age 0.08589 0.8002 3.259 5.769 1.243

logistic Model

lmodel <- glm(outcome~.,covid19ML,family=binomial(link=logit))

Logistic Results


cStats <- predictionStats_binary(cbind(covid19ML$outcome,predict(lmodel,covid19ML)),plotname = "All");

All

cStats <- predictionStats_binary(cbind(covid19ML_China$outcome,predict(lmodel,covid19ML_China)),plotname = "China Prediction");

China Prediction

cStats <- predictionStats_binary(cbind(covid19ML_NoChina$outcome,predict(lmodel,covid19ML_NoChina)),plotname = "No Chinese Prediction");

No Chinese Prediction


sm <- summary(lmodel)
pander::pander(sm$coefficients)
  Estimate Std. Error z value Pr(>|z|)
(Intercept) -6.085 1.818 -3.347 0.0008167
age 0.1203 0.02325 5.176 2.272e-07
sex 0.2089 0.6241 0.3347 0.7379
fever_chills -1.387 1.164 -1.191 0.2336
cough 0.6383 0.6192 1.031 0.3026
breath_dyspnea 18.44 1414 0.01304 0.9896
fatigue 0.4636 0.9885 0.469 0.6391
pain 0.01336 0.7867 0.01699 0.9864
other_symptoms 0.217 1.335 0.1626 0.8708

ML Modeling

nochronic <- covid19ML

nochronic.mat <- as.data.frame(model.matrix(outcome ~ age*.,nochronic))
nochronic.mat$`(Intercept)` <- NULL
nochronic.mat$outcome <- as.numeric(nochronic$outcome)
fnames <- colnames(nochronic.mat)
fnames <- str_replace_all(fnames," ","_")
fnames <- str_replace_all(fnames,"/","_")
fnames <- str_replace_all(fnames,":",".")
colnames(nochronic.mat) <- fnames
bm <- BSWiMS.model(outcome~.,nochronic.mat,NumberofRepeats = -100)

Results

par(op)
pander::pander(bm$univariate)
  Name RName ZUni
age age age 11.23
breath_dyspnea breath_dyspnea breath_dyspnea 8.408
age.breath_dyspnea age.breath_dyspnea age.breath_dyspnea 8.408
fever_chills fever_chills fever_chills 5.632
cough cough cough 2.925
pain pain pain 2.887
age.other_symptoms age.other_symptoms age.other_symptoms 2.685
age.sex age.sex age.sex 2.528
age.pain age.pain age.pain 1.97
age.fever_chills age.fever_chills age.fever_chills 1.677
other_symptoms other_symptoms other_symptoms 0.7889
fatigue fatigue fatigue 0.7308
sex sex sex 0.6959
age.cough age.cough age.cough 0.4095
age.fatigue age.fatigue age.fatigue 0.2718

cStats <- predictionStats_binary(cbind(covid19ML$outcome,predict(bm,nochronic.mat)),plotname = "BSWiMS");

BSWiMS


sm <- summary(bm)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower OR upper u.Accuracy
age 0.06912 1.061 1.072 1.083 0.8449
breath_dyspnea 2.047 4.381 7.747 13.7 0.6178
fever_chills -4.001 0.007554 0.0183 0.04431 0.6943
age.fever_chills 0.0559 1.042 1.057 1.073 0.6569
age.breath_dyspnea 0.3868 1.29 1.472 1.68 0.6178
age.other_symptoms 0.006667 1.005 1.007 1.009 0.4713
Table continues below
  r.Accuracy full.Accuracy u.AUC r.AUC
age 0.6957 0.8853 0.8413 0.7326
breath_dyspnea 0.8072 0.875 0.7059 0.8118
fever_chills 0.7397 0.8682 0.7102 0.7822
age.fever_chills 0.7666 0.8572 0.6587 0.7801
age.breath_dyspnea 0.8227 0.8692 0.7059 0.8137
age.other_symptoms 0.7761 0.863 0.5638 0.8059
  full.AUC IDI NRI z.IDI z.NRI Frequency
age 0.8886 0.3431 1.335 10.5 13.5 0.6339
breath_dyspnea 0.8819 0.1491 0.7904 5.967 7.817 0.1128
fever_chills 0.8566 0.1557 0.6541 5.914 5.667 0.6689
age.fever_chills 0.8468 0.145 0.7866 5.882 6.817 0.5833
age.breath_dyspnea 0.863 0.1123 0.6369 4.905 6.424 0.8633
age.other_symptoms 0.8707 0.09312 0.7403 4.196 6.84 0.2839

par(op)

gplots::heatmap.2(bm$bagging$formulaNetwork,trace="none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.75,cexCol = 0.75)

Riks Charts


colfeat <- rownames(sm$coefficients)

riskhealtyage <- as.data.frame(cbind(outcome=rep(0,71),age=c(20:90)))
for (cname in  colfeat[-1])
{
  riskhealtyage <- cbind(riskhealtyage,rep(0,nrow(riskhealtyage)))
}
colnames(riskhealtyage ) <- c("outcome",colfeat)

baseprobability <- 1.0-predict(bm,riskhealtyage);
plot(baseprobability~riskhealtyage$age,type="l",col="blue",xlab="Age",ylab="p(discharge)",ylim=c(0,1),main="Probability of Hospital Discharge (Postive COVID-19)",lty=1,lwd=5)

nc=2;
for (cname in  colfeat[-1])
{
  nriskhealtyage <- riskhealtyage; 
  nriskhealtyage[,cname] <- rep(1,nrow(riskhealtyage)) 
   diabetesprobability <- 1.0-predict(bm,nriskhealtyage);
  lines(diabetesprobability~riskhealtyage$age,col=nc,lty=nc,lwd=3)
   nc <- nc + 1
}
  
legend("topright",legend = colfeat,lty=c(1:length(colfeat)),col =c(1:length(colfeat)),lwd=3)

BSWiMS Cross-Validation of only symptoms

SymBSWIMScv <- randomCV(nochronic.mat,
                        "outcome",
                        fittingFunction=BSWiMS.model,
                        trainFraction = trainFraction,
                        repetitions = repetitions,
                        NumberofRepeats = -1)

Only Symptoms Results

par(op)
cStats <- predictionStats_binary(SymBSWIMScv$testPredictions,plotname = "BSWiMS");

BSWiMS

SymBSWIMScv$jaccard

$Jaccard.SM [1] 0.6701542

$averageLength [1] 4.656667

par(mar=c(4,10,4,4),pty="m")
barplot(SymBSWIMScv$featureFrequency/repetitions,xlim=c(0,1),las=2,cex.names =0.70,horiz = TRUE,main="Top Selected Features",xlab="Selection Frequency")

par(op)

fn <- baggedModel(SymBSWIMScv$selectedFeaturesSet,nochronic.mat,Outcome="outcome")

…………………………


gplots::heatmap.2(fn$formulaNetwork,trace="none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.75,cexCol = 0.75)