Covid19 Survival

This document does the analysis of the patient level data set. The data was cloned from :https://github.com/beoutbreakprepared/nCoV2019 on April 2, 2020

Loading the libraries

library("FRESA.CAD")
repetitions = 300
trainFraction = 0.90

Loading the data

Loading the cloned https://github.com/beoutbreakprepared/nCoV2019/tree/master/latest_data/latestdata.csv file Then curating the data set.

latestdata <- read.csv("~/GitHub/nCoV2019/latest_data/latestdata.csv", na.strings=c("NA",""), stringsAsFactors=FALSE)


ChronicsOutcome <- latestdata[,c("age",
                                 "sex",
                                 "country",
                                 "date_onset_symptoms",
                                 "date_admission_hospital",
                                 "date_confirmation",
                                 "symptoms",
                                 "chronic_disease_binary",
                                 "chronic_disease",
                                 "outcome",
                                 "date_death_or_discharge")]


ChronicsOutcome <- ChronicsOutcome[ !is.na(ChronicsOutcome$outcome) &
                                     !is.na(ChronicsOutcome$age),]

ChronicsOutcome[ChronicsOutcome$age == "50-59","age"] <- 55
ChronicsOutcome[ChronicsOutcome$age == "60-69","age"] <- 65
ChronicsOutcome[ChronicsOutcome$age == "70-79","age"] <- 65
ChronicsOutcome[ChronicsOutcome$age == "80-89","age"] <- 85
ChronicsOutcome$age <- as.numeric(ChronicsOutcome$age)

ChronicsOutcome <- ChronicsOutcome[!is.na(ChronicsOutcome$age),]
ChronicsOutcome <- ChronicsOutcome[!is.na(ChronicsOutcome$sex),]

The reported symtoms and chronic diseases were reclasified using the following keywords Symptoms:

fever_chills: fever : chills
cough: cough
breath_dyspnea: severe: pneumonia: grasp: dyspnea: breath: acute: respiratory: gasp
fatigue: weakness: fatigue
pain: headache: myalgias: sore: mialgia: pain

Chronic Diseases:

hypertension: hypertension
renal: kidney renal
diabetes: diabetes
asthma: asthma


ChronicsOutcome$symptoms <- NULL  

chronic <- unlist(str_split(names(table(ChronicsOutcome$chronic_disease)),","))
chronic <- unique(unlist(str_split(chronic,":")))

ChronicsOutcome$hypertension <- 1*(str_count(ChronicsOutcome$chronic_disease,"hypertension") > 0 | 
                                str_count(ChronicsOutcome$chronic_disease,"hypertenstion")  > 0 |
                                str_count(ChronicsOutcome$chronic_disease,"Hypertension")  > 0 |
                                str_count(ChronicsOutcome$chronic_disease,"hypertensive")  > 0)
ChronicsOutcome$hypertension[is.na(ChronicsOutcome$hypertension)] <- 0
table(ChronicsOutcome$hypertension)  
#> 
#>   0   1 
#> 539  85

ChronicsOutcome$diabetes <- 1*(str_count(ChronicsOutcome$chronic_disease,"diabetes") > 0 | 
                                str_count(ChronicsOutcome$chronic_disease,"Diabetes")  > 0 )
ChronicsOutcome$diabetes[is.na(ChronicsOutcome$diabetes)] <- 0
table(ChronicsOutcome$diabetes)  
#> 
#>   0   1 
#> 565  59

ChronicsOutcome$renal <- 1*(str_count(ChronicsOutcome$chronic_disease,"renal") > 0 | 
                                str_count(ChronicsOutcome$chronic_disease,"kidney")  > 0 )
ChronicsOutcome$renal[is.na(ChronicsOutcome$renal)] <- 0
table(ChronicsOutcome$renal)  
#> 
#>   0   1 
#> 606  18

ChronicsOutcome$asthma<- 1*(str_count(ChronicsOutcome$chronic_disease,"asthma") > 0 | 
                                str_count(ChronicsOutcome$chronic_disease,"chronic obstructive pulmonary disease")  > 0 |
                                str_count(ChronicsOutcome$chronic_disease,"bronch")  > 0 )
ChronicsOutcome$asthma[is.na(ChronicsOutcome$asthma)] <- 0
table(ChronicsOutcome$asthma)  
#> 
#>   0   1 
#> 608  16

ChronicsOutcome$CVD<- 1*(str_count(ChronicsOutcome$chronic_disease,"coronary") > 0 | 
                                str_count(ChronicsOutcome$chronic_disease,"atherosclerosis")  > 0 |
                                str_count(ChronicsOutcome$chronic_disease,"cardi")  > 0 |
                                str_count(ChronicsOutcome$chronic_disease,"fibrillation")  > 0 |
                                str_count(ChronicsOutcome$chronic_disease,"cardi")  > 0 |
                                str_count(ChronicsOutcome$chronic_disease,"heart")  > 0 )
ChronicsOutcome$CVD[is.na(ChronicsOutcome$CVD)] <- 0
table(ChronicsOutcome$CVD)  
#> 
#>   0   1 
#> 605  19

ChronicsOutcome$other_chronic <- 1*(((ChronicsOutcome$hypertension + 
                              ChronicsOutcome$diabetes + 
                              ChronicsOutcome$renal + 
                              ChronicsOutcome$CVD + 
                              ChronicsOutcome$asthma) == 0) & !is.na(ChronicsOutcome$chronic_disease))
table(ChronicsOutcome$other_chronic)  
#> 
#>   0   1 
#> 615   9
ChronicsOutcome$chronic_disease <- NULL
ChronicsOutcome$chronic_disease_binary <- NULL


ChronicsOutcome$date_onset_symptoms <- NULL
ChronicsOutcome$date_admission_hospital <- NULL
ChronicsOutcome$date_confirmation <- NULL
ChronicsOutcome$date_death_or_discharge <- NULL


#table(ChronicsOutcome$outcome)
discharged <- 1*(str_count(ChronicsOutcome$outcome,"disc") > 0 | 
                                str_count(ChronicsOutcome$outcome,"Disc")  > 0 |
                                str_count(ChronicsOutcome$outcome,"Recov")  > 0 |
                                str_count(ChronicsOutcome$outcome,"released")  > 0 )

ChronicsOutcome$outcome <- 1*(str_count(ChronicsOutcome$outcome,"dead") > 0 | 
                                str_count(ChronicsOutcome$outcome,"Dead")  > 0 |
                                str_count(ChronicsOutcome$outcome,"Deceased")  > 0 |
                                str_count(ChronicsOutcome$outcome,"death")  > 0 |
str_count(ChronicsOutcome$outcome,"Death")  > 0 |
str_count(ChronicsOutcome$outcome,"Died")  > 0 |
                                str_count(ChronicsOutcome$outcome,"died")  > 0 )


ChronicsOutcome <- ChronicsOutcome[ChronicsOutcome$outcome | discharged,]

table(ChronicsOutcome$outcome)
#> 
#>   0   1 
#> 235 193

covid19ML <- ChronicsOutcome
table(covid19ML$sex)
#> 
#> female   male 
#>    173    255
covid19ML$sex <- 1*(str_count(ChronicsOutcome$sex,"female"))
table(covid19ML$sex)
#> 
#>   0   1 
#> 255 173
pander::pander(table(ChronicsOutcome$country))
Table continues below
Algeria Australia Brazil Canada China Ethiopia France Gambia
7 24 4 4 74 1 6 3
Table continues below
Germany Guyana Italy Japan Malaysia Nepal Niger Philippines
1 2 4 8 6 2 1 93
Table continues below
Romania San Marino Singapore South Korea Thailand United States
8 1 100 12 2 29
Vietnam Zimbabwe
32 2


covid19ML$country <- NULL

##Basic Description


pander::pander(sum(table(covid19ML$sex)))

428


plot(density(subset(covid19ML,outcome==1)$age),xlim=c(20,90),col="red",main="Age Distribution",xlab="Age",ylab="p(x)")
lines(density(subset(covid19ML,outcome==0)$age),xlim=c(20,90),col="blue")
legend("topleft",legend = c("Died","Discharged"),bty = "n",pch = 20,col = c("Red","Blue"))


op <- par(no.readonly = TRUE)


pander::pander(summary(covid19ML$age))
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.25 38 55 53.75 69.25 95
pander::pander(table(covid19ML$outcome))
0 1
235 193
pander::pander(table(covid19ML$sex))
0 1
255 173
pander::pander(table(covid19ML$hypertension))
0 1
351 77
pander::pander(table(covid19ML$renal))
0 1
410 18
pander::pander(table(covid19ML$diabetes))
0 1
371 57
pander::pander(table(covid19ML$asthma))
0 1
415 13
pander::pander(table(covid19ML$CVD))
0 1
409 19
pander::pander(table(covid19ML$other_chronic))
0 1
419 9
nochronic <- covid19ML

nochronic.mat <- as.data.frame(model.matrix(outcome ~ sex*.,nochronic))
nochronic.mat$`(Intercept)` <- NULL
nochronic.mat$outcome <- as.numeric(nochronic$outcome)
fnames <- colnames(nochronic.mat)
fnames <- str_replace_all(fnames," ","_")
fnames <- str_replace_all(fnames,"/","_")
fnames <- str_replace_all(fnames,":",".")
colnames(nochronic.mat) <- fnames


covid19ML <- nochronic.mat

logistic Model

lmodel <- glm(outcome~.,covid19ML,family=binomial(link=logit))

Logistic Results


cStats <- predictionStats_binary(cbind(covid19ML$outcome,predict(lmodel,covid19ML)),plotname = "Logit");

Logit


sm <- summary(lmodel)
pander::pander(sm$coefficients)
  Estimate Std. Error z value Pr(>|z|)
(Intercept) -6.184 0.8445 -7.323 2.42e-13
sex -6.511 2.492 -2.613 0.008967
age 0.1073 0.01551 6.917 4.607e-12
hypertension 0.7777 0.6717 1.158 0.2469
diabetes 2.197 0.9214 2.384 0.01713
renal 17.58 2448 0.007181 0.9943
asthma 17.99 2734 0.006582 0.9947
CVD -1.377 1.07 -1.287 0.1981
other_chronic 0.7689 1.098 0.7002 0.4838
sex.age 0.08092 0.03864 2.094 0.03625
sex.hypertension 16.97 1858 0.009134 0.9927
sex.diabetes 17.86 2072 0.008619 0.9931
sex.renal 1.019 4334 0.0002351 0.9998
sex.asthma -17.16 7119 -0.00241 0.9981
sex.CVD 0.8567 5694 0.0001505 0.9999
sex.other_chronic -5.927 1.995 -2.972 0.002962

Modeling

bm <- BSWiMS.model(outcome~.,covid19ML,NumberofRepeats = -100)

Results

par(op)
pander::pander(bm$univariate)
  Name RName ZUni
age age age 21.78
hypertension hypertension hypertension 10.03
diabetes diabetes diabetes 8.346
sex.hypertension sex.hypertension sex.hypertension 5.222
renal renal renal 4.444
sex.diabetes sex.diabetes sex.diabetes 3.875
CVD CVD CVD 3.733
asthma asthma asthma 3.724
sex sex sex 2.603
sex.renal sex.renal sex.renal 2.26
sex.age sex.age sex.age 2.071
sex.CVD sex.CVD sex.CVD 1.741
sex.asthma sex.asthma sex.asthma 1.418
other_chronic other_chronic other_chronic 0.6237
sex.other_chronic sex.other_chronic sex.other_chronic 0.4198

cStats <- predictionStats_binary(cbind(covid19ML$outcome,predict(bm,covid19ML)),plotname = "BSWiMS");

BSWiMS


sm <- summary(bm)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower OR upper u.Accuracy
age 0.04072 1.036 1.042 1.048 0.8458
sex.age 0.07707 1.068 1.08 1.092 0.6145
sex -5.378 0.001472 0.004619 0.01449 0.5514
hypertension 2.155 3.167 8.626 23.49 0.7103
diabetes 2.778 3.886 16.09 66.64 0.6729
asthma 1.794 1.842 6.012 19.63 0.5794
Table continues below
  r.Accuracy full.Accuracy u.AUC r.AUC full.AUC
age 0.7407 0.8692 0.8438 0.7143 0.866
sex.age 0.7407 0.7897 0.5892 0.7143 0.7729
sex 0.8048 0.8197 0.5614 0.7886 0.8077
hypertension 0.7873 0.8197 0.6806 0.7718 0.8077
diabetes 0.7967 0.8197 0.6382 0.7831 0.8077
asthma 0.7407 0.7547 0.5337 0.7143 0.7298
  IDI NRI z.IDI z.NRI Frequency
age 0.3451 1.242 14.57 16.31 0.4367
sex.age 0.1671 0.3066 9.031 3.343 0.4279
sex 0.0761 0.3385 5.342 3.77 1
hypertension 0.04352 0.1887 3.994 2.367 1
diabetes 0.0338 0.6411 3.791 9.166 1
asthma 0.02473 0.1347 2.972 3.733 0.1354

par(op)

gplots::heatmap.2(bm$bagging$formulaNetwork,trace="none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.75,cexCol = 0.75)

Riks Charts


colfeat <- rownames(sm$coefficients)

riskhealtyage <- as.data.frame(cbind(outcome=rep(0,71),age=c(20:90)))
for (cname in  colfeat[-1])
{
  riskhealtyage <- cbind(riskhealtyage,rep(0,nrow(riskhealtyage)))
}
colnames(riskhealtyage ) <- c("outcome",colfeat)

baseprobability <- 1.0-predict(bm,riskhealtyage);
plot(baseprobability~riskhealtyage$age,type="l",col="blue",xlab="Age",ylab="p(discharge)",ylim=c(0,1),main="Probability of Hospital Discharge (Postive COVID-19)",lty=1,lwd=5)

nc=2;
for (cname in  colfeat[-1])
{
  nriskhealtyage <- riskhealtyage; 
  nriskhealtyage[,cname] <- rep(1,nrow(riskhealtyage)) 
   diabetesprobability <- 1.0-predict(bm,nriskhealtyage);
  lines(diabetesprobability~riskhealtyage$age,col=nc,lty=nc,lwd=3)
   nc <- nc + 1
}
  
legend("topright",legend = colfeat,lty=c(1:length(colfeat)),col =c(1:length(colfeat)),lwd=3)