Covid19 Survival

This document does the analysis of the patient level data set. The data was cloned from :https://github.com/beoutbreakprepared/nCoV2019 on April 2, 2020

Loading the libraries

library("FRESA.CAD")

Loading the data

I curated the original data: https://github.com/beoutbreakprepared/nCoV2019/tree/master/latest_data/latestdata.csv

The reported symtoms and chronic diseases were reclasified using the following keywords

Symptoms:

fever_chills: fever : chills
cough: cough
breath_dyspnea: severe: pneumonia: grasp: dyspnea: breath: acute: respiratory: gasp
fatigue: weakness: fatigue
pain: headache: myalgias: sore: mialgia: pain

Chronic Diseases:

hypertension: hypertension
renal: kidney renal
diabetes: diabetes
asthma: asthma

SymptomsOutcome <- read.delim("../Data/symptomsOutcome.txt", stringsAsFactors=FALSE)
SymptomsOutcome$Chronic_Disease <- NULL

nochronic <- SymptomsOutcome
nochronic$hypertension <- NULL;
nochronic$diabetes <- NULL;
nochronic$renal <- NULL;
nochronic$other_chronic <- NULL;
nochronic$asthma <- NULL;
nochronic$Country <- NULL;

nochronic.mat <- as.data.frame(model.matrix(Death ~ Age*.,nochronic))
nochronic.mat$`(Intercept)` <- NULL
nochronic.mat$Death <- as.numeric(nochronic$Death)
fnames <- colnames(nochronic.mat)
fnames <- str_replace_all(fnames," ","_")
fnames <- str_replace_all(fnames,"/","_")
fnames <- str_replace_all(fnames,":",".")
colnames(nochronic.mat) <- fnames

repetitions = 300
trainFraction = 0.90

##Basic Description


pander::pander(sum(table(SymptomsOutcome$Sex)))

174


plot(density(subset(SymptomsOutcome,Death==1)$Age),xlim=c(20,90),col="red",main="Age Distribution",xlab="Age",ylab="p(x)")
lines(density(subset(SymptomsOutcome,Death==0)$Age),xlim=c(20,90),col="blue")
legend("topleft",legend = c("Died","Discharged"),bty = "n",pch = 20,col = c("Red","Blue"))


op <- par(no.readonly = TRUE)


pander::pander(table(SymptomsOutcome$Country))
Table continues below
Brazil China France Gambia Germany Guyana Italy Japan Malaysia
4 59 2 1 1 2 1 10 2
Table continues below
Nepal Philippines Romania Singapore South Korea Taiwan Thailand
2 61 1 6 4 2 2
Vietnam
14
pander::pander(summary(SymptomsOutcome$Age))
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.25 44 61 57.18 72 89
pander::pander(table(SymptomsOutcome$Death))
0 1
73 101
pander::pander(table(SymptomsOutcome$Sex))
0 1
61 113
pander::pander(table(SymptomsOutcome$fever_chills))
0 1
84 90
pander::pander(table(SymptomsOutcome$cough))
0 1
109 65
pander::pander(table(SymptomsOutcome$breath_dyspnea))
0 1
126 48
pander::pander(table(SymptomsOutcome$fatigue))
0 1
160 14
pander::pander(table(SymptomsOutcome$pain))
0 1
153 21
pander::pander(table(SymptomsOutcome$other_symthom))
0 1
144 30
pander::pander(table(SymptomsOutcome$hypertension))
0 1
121 53
pander::pander(table(SymptomsOutcome$renal))
0 1
162 12
pander::pander(table(SymptomsOutcome$diabetes))
0 1
135 39
pander::pander(table(SymptomsOutcome$asthma))
0 1
170 4
pander::pander(table(SymptomsOutcome$other_chronic))
0 1
160 14


SymptomsOutcome$Country <- NULL

logistic Model

lmodel <- glm(Death~.,SymptomsOutcome,family=binomial(link=logit))

Logistic Results


cStats <- predictionStats_binary(cbind(SymptomsOutcome$Death,predict(lmodel,SymptomsOutcome)),plotname = "Logit");

Logit


sm <- summary(lmodel)
pander::pander(sm$coefficients)
  Estimate Std. Error z value Pr(>|z|)
(Intercept) -5.89 1.752 -3.361 0.000777
Age 0.1025 0.02326 4.408 1.046e-05
Sex 0.1624 0.617 0.2631 0.7924
fever_chills -0.94 0.8155 -1.153 0.249
cough -0.1283 0.6313 -0.2032 0.839
breath_dyspnea 2.621 1.198 2.188 0.02864
fatigue 0.2342 1.134 0.2066 0.8363
pain 0.2427 0.8971 0.2706 0.7867
other_symthom -0.7833 1.196 -0.6549 0.5125
hypertension 1.594 1.017 1.568 0.1169
renal 17.73 3831 0.004629 0.9963
diabetes 18.78 2218 0.008467 0.9932
asthma 17.24 7148 0.002412 0.9981
other_chronic 0.02315 0.8975 0.02579 0.9794

Modeling

bm <- BSWiMS.model(Death~.,SymptomsOutcome,NumberofRepeats = -100)

Results

par(op)
pander::pander(bm$univariate)
  Name RName ZUni
Age Age Age 11.51
hypertension hypertension hypertension 8.914
breath_dyspnea breath_dyspnea breath_dyspnea 8.018
diabetes diabetes diabetes 7.931
fever_chills fever_chills fever_chills 5.846
cough cough cough 3.778
renal renal renal 3.672
pain pain pain 2.315
asthma asthma asthma 2.031
other_symthom other_symthom other_symthom 1.508
other_chronic other_chronic other_chronic 1.101
fatigue fatigue fatigue 0.6182
Sex Sex Sex 0.1304

cStats <- predictionStats_binary(cbind(SymptomsOutcome$Death,predict(bm,SymptomsOutcome)),plotname = "BSWiMS");

BSWiMS


sm <- summary(bm)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower OR upper u.Accuracy
Age 0.09417 1.079 1.099 1.119 0.8213
breath_dyspnea 3.627 7.644 37.61 185 0.6724
diabetes 13.34 929.8 618842 411892132 0.6437
hypertension 1.455 2.04 4.285 9.001 0.7011
other_symthom 0.08956 1.033 1.094 1.158 0.4918
fever_chills -0.08304 0.8688 0.9203 0.9749 0.6954
renal 0.3072 1.086 1.36 1.702 0.4885
Table continues below
  r.Accuracy full.Accuracy u.AUC r.AUC full.AUC
Age 0.7648 0.8711 0.8195 0.7894 0.8716
breath_dyspnea 0.8276 0.863 0.714 0.8323 0.8663
diabetes 0.831 0.8617 0.6931 0.835 0.8657
hypertension 0.8174 0.851 0.7388 0.8268 0.8572
other_symthom 0.8375 0.8651 0.5373 0.8376 0.8641
fever_chills 0.8377 0.8647 0.7034 0.8377 0.8666
renal 0.8332 0.8534 0.5594 0.8322 0.853
  IDI NRI z.IDI z.NRI Frequency
Age 0.2395 1.223 7.968 11.39 1
breath_dyspnea 0.08419 0.8704 4.371 8.299 1
diabetes 0.07193 0.7225 3.931 7.33 0.8202
hypertension 0.06645 0.8479 3.707 7.925 0.4941
other_symthom 0.03484 0.4061 2.898 3.613 0.07059
fever_chills 0.033 0.8195 2.666 6.498 0.08739
renal 0.03399 0.297 2.62 3.622 0.02185

par(op)

gplots::heatmap.2(bm$bagging$formulaNetwork,trace="none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.75,cexCol = 0.75)

Symptoms only Analysis

noChronicsmodel <- BSWiMS.model(Death~.,nochronic.mat,NumberofRepeats = -100)

Symptoms only Fit Results

pander::pander(noChronicsmodel$univariate)
  Name RName ZUni
Age Age Age 11.51
Age.breath_dyspnea Age.breath_dyspnea Age.breath_dyspnea 8.34
breath_dyspnea breath_dyspnea breath_dyspnea 8.018
fever_chills fever_chills fever_chills 5.846
cough cough cough 3.778
Age.Sex Age.Sex Age.Sex 2.878
Age.other_symthom Age.other_symthom Age.other_symthom 2.566
pain pain pain 2.315
Age.fever_chills Age.fever_chills Age.fever_chills 1.941
other_symthom other_symthom other_symthom 1.508
Age.cough Age.cough Age.cough 1.317
Age.pain Age.pain Age.pain 1.258
fatigue fatigue fatigue 0.6182
Age.fatigue Age.fatigue Age.fatigue 0.3703
Sex Sex Sex 0.1304

cStats <- predictionStats_binary(cbind(nochronic.mat$Death,predict(noChronicsmodel,nochronic.mat)),plotname = "BSWiMS");

BSWiMS


sm <- summary(noChronicsmodel)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower OR upper u.Accuracy
Age 0.06828 1.059 1.071 1.083 0.8214
Age.fever_chills 0.03319 1.025 1.034 1.042 0.6525
Age.breath_dyspnea 0.07848 1.054 1.082 1.11 0.6724
breath_dyspnea 0.8563 1.542 2.354 3.596 0.6724
fever_chills -2.838 0.02002 0.05855 0.1713 0.6954
Age.other_symthom 0.006055 1.003 1.006 1.009 0.5
Table continues below
  r.Accuracy full.Accuracy u.AUC r.AUC
Age 0.7108 0.8621 0.8199 0.7234
Age.fever_chills 0.7221 0.813 0.6529 0.7336
Age.breath_dyspnea 0.7962 0.8259 0.714 0.7926
breath_dyspnea 0.8419 0.8616 0.714 0.8382
fever_chills 0.7915 0.8326 0.7034 0.8037
Age.other_symthom 0.7451 0.7884 0.556 0.7547
  full.AUC IDI NRI z.IDI z.NRI Frequency
Age 0.864 0.2839 1.28 8.951 12.2 0.6327
Age.fever_chills 0.8088 0.182 1.008 6.711 8.777 0.2076
Age.breath_dyspnea 0.8304 0.1211 0.7915 5.087 7.383 0.7685
breath_dyspnea 0.8635 0.06985 0.7994 3.804 7.552 0.2036
fever_chills 0.8364 0.06905 0.7299 3.334 6.14 0.9142
Age.other_symthom 0.8009 0.0529 0.4554 3.139 4.128 0.2735

par(op)

gplots::heatmap.2(noChronicsmodel$bagging$formulaNetwork,trace="none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.75,cexCol = 0.75)

Cross Validation: Model with Chronic Disease


BSWiMScv <- randomCV(SymptomsOutcome,
                      "Death",
                      fittingFunction=BSWiMS.model,
                      trainFraction = trainFraction,
                      repetitions = repetitions,
                      NumberofRepeats = -1)

Chronic Disease Cross Validation Results

par(op)
cStats <- predictionStats_binary(BSWiMScv$testPredictions,plotname = "BSWiMS");

BSWiMS

BSWiMScv$jaccard

$Jaccard.SM [1] 0.8385182

$averageLength [1] 4.36

par(mar=c(4,10,4,4),pty="m")
barplot(BSWiMScv$featureFrequency/repetitions,xlim=c(0,1),las=2,cex.names =0.70,horiz = TRUE,main="Top Selected Features",xlab="Selection Frequency")

par(op)


fn <- baggedModel(BSWiMScv$selectedFeaturesSet,SymptomsOutcome,Outcome="Death")

…………………………


gplots::heatmap.2(fn$formulaNetwork,trace="none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.75,cexCol = 0.75)

BSWiMS Cross-Validation of only symptoms

SymBSWIMScv <- randomCV(nochronic.mat,
                        "Death",
                        fittingFunction=BSWiMS.model,
                        trainSampleSets=BSWiMScv$trainSamplesSets,
                        NumberofRepeats = -1)

Only Symptoms Results

par(op)
cStats <- predictionStats_binary(SymBSWIMScv$testPredictions,plotname = "BSWiMS");

BSWiMS

SymBSWIMScv$jaccard

$Jaccard.SM [1] 0.7740898

$averageLength [1] 5.346667

par(mar=c(4,10,4,4),pty="m")
barplot(SymBSWIMScv$featureFrequency/repetitions,xlim=c(0,1),las=2,cex.names =0.70,horiz = TRUE,main="Top Selected Features",xlab="Selection Frequency")

par(op)

fn <- baggedModel(SymBSWIMScv$selectedFeaturesSet,nochronic.mat,Outcome="Death")

…………………………


gplots::heatmap.2(fn$formulaNetwork,trace="none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.75,cexCol = 0.75)