Covid19 Survival

This document does the analysis of the patient level data set. The data was cloned from :https://github.com/beoutbreakprepared/nCoV2019 on April 2, 2020

Loading the libraries

library("FRESA.CAD")
repetitions = 300
trainFraction = 0.90

Loading the data

Loading the cloned https://github.com/beoutbreakprepared/nCoV2019/tree/master/latest_data/latestdata.csv file Then curating the data set.

latestdata <- read.csv("~/GitHub/nCoV2019/latest_data/latestdata.csv", na.strings=c("NA",""), stringsAsFactors=FALSE)


ChronicsOutcome <- latestdata[,c("age",
                                 "sex",
                                 "country",
                                 "date_onset_symptoms",
                                 "date_admission_hospital",
                                 "date_confirmation",
                                 "symptoms",
                                 "chronic_disease_binary",
                                 "chronic_disease",
                                 "outcome",
                                 "date_death_or_discharge")]


ChronicsOutcome <- ChronicsOutcome[ !is.na(ChronicsOutcome$outcome) &
                                     !is.na(ChronicsOutcome$age),]

ChronicsOutcome[ChronicsOutcome$age == "50-59","age"] <- 55
ChronicsOutcome[ChronicsOutcome$age == "60-69","age"] <- 65
ChronicsOutcome[ChronicsOutcome$age == "70-79","age"] <- 65
ChronicsOutcome[ChronicsOutcome$age == "80-89","age"] <- 85
ChronicsOutcome$age <- as.numeric(ChronicsOutcome$age)

ChronicsOutcome <- ChronicsOutcome[!is.na(ChronicsOutcome$age),]
ChronicsOutcome <- ChronicsOutcome[!is.na(ChronicsOutcome$sex),]

The reported symtoms and chronic diseases were reclasified using the following keywords Symptoms:

fever_chills: fever : chills
cough: cough
breath_dyspnea: severe: pneumonia: grasp: dyspnea: breath: acute: respiratory: gasp
fatigue: weakness: fatigue
pain: headache: myalgias: sore: mialgia: pain

Chronic Diseases:

hypertension: hypertension
renal: kidney renal
diabetes: diabetes
asthma: asthma


ChronicsOutcome$symptoms <- NULL  

chronic <- unlist(str_split(names(table(ChronicsOutcome$chronic_disease)),","))
chronic <- unique(unlist(str_split(chronic,":")))

ChronicsOutcome$hypertension <- 1*(str_count(ChronicsOutcome$chronic_disease,"hypertension") > 0 | 
                                str_count(ChronicsOutcome$chronic_disease,"hypertenstion")  > 0 |
                                str_count(ChronicsOutcome$chronic_disease,"Hypertension")  > 0 |
                                str_count(ChronicsOutcome$chronic_disease,"hypertensive")  > 0)
ChronicsOutcome$hypertension[is.na(ChronicsOutcome$hypertension)] <- 0
table(ChronicsOutcome$hypertension)  
#> 
#>   0   1 
#> 539  85

ChronicsOutcome$diabetes <- 1*(str_count(ChronicsOutcome$chronic_disease,"diabetes") > 0 | 
                                str_count(ChronicsOutcome$chronic_disease,"Diabetes")  > 0 )
ChronicsOutcome$diabetes[is.na(ChronicsOutcome$diabetes)] <- 0
table(ChronicsOutcome$diabetes)  
#> 
#>   0   1 
#> 565  59

ChronicsOutcome$renal <- 1*(str_count(ChronicsOutcome$chronic_disease,"renal") > 0 | 
                                str_count(ChronicsOutcome$chronic_disease,"kidney")  > 0 )
ChronicsOutcome$renal[is.na(ChronicsOutcome$renal)] <- 0
table(ChronicsOutcome$renal)  
#> 
#>   0   1 
#> 606  18

ChronicsOutcome$asthma<- 1*(str_count(ChronicsOutcome$chronic_disease,"asthma") > 0 | 
                                str_count(ChronicsOutcome$chronic_disease,"chronic obstructive pulmonary disease")  > 0 |
                                str_count(ChronicsOutcome$chronic_disease,"bronch")  > 0 )
ChronicsOutcome$asthma[is.na(ChronicsOutcome$asthma)] <- 0
table(ChronicsOutcome$asthma)  
#> 
#>   0   1 
#> 608  16

ChronicsOutcome$CVD<- 1*(str_count(ChronicsOutcome$chronic_disease,"coronary") > 0 | 
                                str_count(ChronicsOutcome$chronic_disease,"atherosclerosis")  > 0 |
                                str_count(ChronicsOutcome$chronic_disease,"cardi")  > 0 |
                                str_count(ChronicsOutcome$chronic_disease,"fibrillation")  > 0 |
                                str_count(ChronicsOutcome$chronic_disease,"cardi")  > 0 |
                                str_count(ChronicsOutcome$chronic_disease,"heart")  > 0 )
ChronicsOutcome$CVD[is.na(ChronicsOutcome$CVD)] <- 0
table(ChronicsOutcome$CVD)  
#> 
#>   0   1 
#> 605  19

ChronicsOutcome$other_chronic <- 1*(((ChronicsOutcome$hypertension + 
                              ChronicsOutcome$diabetes + 
                              ChronicsOutcome$renal + 
                              ChronicsOutcome$CVD + 
                              ChronicsOutcome$asthma) == 0) & !is.na(ChronicsOutcome$chronic_disease))
table(ChronicsOutcome$other_chronic)  
#> 
#>   0   1 
#> 615   9
ChronicsOutcome$chronic_disease <- NULL
ChronicsOutcome$chronic_disease_binary <- NULL


ChronicsOutcome$date_onset_symptoms <- NULL
ChronicsOutcome$date_admission_hospital <- NULL
ChronicsOutcome$date_confirmation <- NULL
ChronicsOutcome$date_death_or_discharge <- NULL


#table(ChronicsOutcome$outcome)
discharged <- 1*(str_count(ChronicsOutcome$outcome,"disc") > 0 | 
                                str_count(ChronicsOutcome$outcome,"Disc")  > 0 |
                                str_count(ChronicsOutcome$outcome,"Recov")  > 0 |
                                str_count(ChronicsOutcome$outcome,"released")  > 0 )

ChronicsOutcome$outcome <- 1*(str_count(ChronicsOutcome$outcome,"dead") > 0 | 
                                str_count(ChronicsOutcome$outcome,"Dead")  > 0 |
                                str_count(ChronicsOutcome$outcome,"Deceased")  > 0 |
                                str_count(ChronicsOutcome$outcome,"death")  > 0 |
str_count(ChronicsOutcome$outcome,"Death")  > 0 |
str_count(ChronicsOutcome$outcome,"Died")  > 0 |
                                str_count(ChronicsOutcome$outcome,"died")  > 0 )


ChronicsOutcome <- ChronicsOutcome[ChronicsOutcome$outcome | discharged,]

table(ChronicsOutcome$outcome)
#> 
#>   0   1 
#> 235 193

covid19ML <- ChronicsOutcome
table(covid19ML$sex)
#> 
#> female   male 
#>    173    255
covid19ML$sex <- 1*(str_count(ChronicsOutcome$sex,"female"))
table(covid19ML$sex)
#> 
#>   0   1 
#> 255 173
pander::pander(table(ChronicsOutcome$country))

Table continues below
Algeria	Australia	Brazil	Canada	China	Ethiopia	France	Gambia
7	24	4	4	74	1	6	3

Table continues below
Germany	Guyana	Italy	Japan	Malaysia	Nepal	Niger	Philippines
1	2	4	8	6	2	1	93

Table continues below
Romania	San Marino	Singapore	South Korea	Thailand	United States
8	1	100	12	2	29

Vietnam	Zimbabwe
32	2



covid19ML$country <- NULL

##Basic Description


pander::pander(sum(table(covid19ML$sex)))

428


plot(density(subset(covid19ML,outcome==1)$age),xlim=c(20,90),col="red",main="Age Distribution",xlab="Age",ylab="p(x)")
lines(density(subset(covid19ML,outcome==0)$age),xlim=c(20,90),col="blue")
legend("topleft",legend = c("Died","Discharged"),bty = "n",pch = 20,col = c("Red","Blue"))


op <- par(no.readonly = TRUE)


pander::pander(summary(covid19ML$age))

Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
0.25	38	55	53.75	69.25	95

pander::pander(table(covid19ML$outcome))

0	1
235	193

pander::pander(table(covid19ML$sex))

0	1
255	173

pander::pander(table(covid19ML$hypertension))

0	1
351	77

pander::pander(table(covid19ML$renal))

0	1
410	18

pander::pander(table(covid19ML$diabetes))

0	1
371	57

pander::pander(table(covid19ML$asthma))

0	1
415	13

pander::pander(table(covid19ML$CVD))

0	1
409	19

pander::pander(table(covid19ML$other_chronic))

0	1
419	9

nochronic <- covid19ML

nochronic.mat <- as.data.frame(model.matrix(outcome ~ sex*.,nochronic))
nochronic.mat$`(Intercept)` <- NULL
nochronic.mat$outcome <- as.numeric(nochronic$outcome)
fnames <- colnames(nochronic.mat)
fnames <- str_replace_all(fnames," ","_")
fnames <- str_replace_all(fnames,"/","_")
fnames <- str_replace_all(fnames,":",".")
colnames(nochronic.mat) <- fnames


covid19ML <- nochronic.mat

logistic Model

lmodel <- glm(outcome~.,covid19ML,family=binomial(link=logit))

Logistic Results


cStats <- predictionStats_binary(cbind(covid19ML$outcome,predict(lmodel,covid19ML)),plotname = "Logit");

Logit


sm <- summary(lmodel)
pander::pander(sm$coefficients)

	Estimate	Std. Error	z value	Pr(>\|z\|)
(Intercept)	-6.184	0.8445	-7.323	2.42e-13
sex	-6.511	2.492	-2.613	0.008967
age	0.1073	0.01551	6.917	4.607e-12
hypertension	0.7777	0.6717	1.158	0.2469
diabetes	2.197	0.9214	2.384	0.01713
renal	17.58	2448	0.007181	0.9943
asthma	17.99	2734	0.006582	0.9947
CVD	-1.377	1.07	-1.287	0.1981
other_chronic	0.7689	1.098	0.7002	0.4838
sex.age	0.08092	0.03864	2.094	0.03625
sex.hypertension	16.97	1858	0.009134	0.9927
sex.diabetes	17.86	2072	0.008619	0.9931
sex.renal	1.019	4334	0.0002351	0.9998
sex.asthma	-17.16	7119	-0.00241	0.9981
sex.CVD	0.8567	5694	0.0001505	0.9999
sex.other_chronic	-5.927	1.995	-2.972	0.002962

Modeling

bm <- BSWiMS.model(outcome~.,covid19ML,NumberofRepeats = -100)

Results

par(op)
pander::pander(bm$univariate)

	Name	RName	ZUni
age	age	age	21.78
hypertension	hypertension	hypertension	10.03
diabetes	diabetes	diabetes	8.346
sex.hypertension	sex.hypertension	sex.hypertension	5.222
renal	renal	renal	4.444
sex.diabetes	sex.diabetes	sex.diabetes	3.875
CVD	CVD	CVD	3.733
asthma	asthma	asthma	3.724
sex	sex	sex	2.603
sex.renal	sex.renal	sex.renal	2.26
sex.age	sex.age	sex.age	2.071
sex.CVD	sex.CVD	sex.CVD	1.741
sex.asthma	sex.asthma	sex.asthma	1.418
other_chronic	other_chronic	other_chronic	0.6237
sex.other_chronic	sex.other_chronic	sex.other_chronic	0.4198


cStats <- predictionStats_binary(cbind(covid19ML$outcome,predict(bm,covid19ML)),plotname = "BSWiMS");

BSWiMS


sm <- summary(bm)
pander::pander(sm$coefficients)

Table continues below
	Estimate	lower	OR	upper	u.Accuracy
age	0.04072	1.036	1.042	1.048	0.8458
sex.age	0.07707	1.068	1.08	1.092	0.6145
sex	-5.378	0.001472	0.004619	0.01449	0.5514
hypertension	2.155	3.167	8.626	23.49	0.7103
diabetes	2.778	3.886	16.09	66.64	0.6729
asthma	1.794	1.842	6.012	19.63	0.5794

Table continues below
	r.Accuracy	full.Accuracy	u.AUC	r.AUC	full.AUC
age	0.7407	0.8692	0.8438	0.7143	0.866
sex.age	0.7407	0.7897	0.5892	0.7143	0.7729
sex	0.8048	0.8197	0.5614	0.7886	0.8077
hypertension	0.7873	0.8197	0.6806	0.7718	0.8077
diabetes	0.7967	0.8197	0.6382	0.7831	0.8077
asthma	0.7407	0.7547	0.5337	0.7143	0.7298

	IDI	NRI	z.IDI	z.NRI	Frequency
age	0.3451	1.242	14.57	16.31	0.4367
sex.age	0.1671	0.3066	9.031	3.343	0.4279
sex	0.0761	0.3385	5.342	3.77	1
hypertension	0.04352	0.1887	3.994	2.367	1
diabetes	0.0338	0.6411	3.791	9.166	1
asthma	0.02473	0.1347	2.972	3.733	0.1354


par(op)

gplots::heatmap.2(bm$bagging$formulaNetwork,trace="none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.75,cexCol = 0.75)

Riks Charts


colfeat <- rownames(sm$coefficients)

riskhealtyage <- as.data.frame(cbind(outcome=rep(0,71),age=c(20:90)))
for (cname in  colfeat[-1])
{
  riskhealtyage <- cbind(riskhealtyage,rep(0,nrow(riskhealtyage)))
}
colnames(riskhealtyage ) <- c("outcome",colfeat)

baseprobability <- 1.0-predict(bm,riskhealtyage);
plot(baseprobability~riskhealtyage$age,type="l",col="blue",xlab="Age",ylab="p(discharge)",ylim=c(0,1),main="Probability of Hospital Discharge (Postive COVID-19)",lty=1,lwd=5)

nc=2;
for (cname in  colfeat[-1])
{
  nriskhealtyage <- riskhealtyage; 
  nriskhealtyage[,cname] <- rep(1,nrow(riskhealtyage)) 
   diabetesprobability <- 1.0-predict(bm,nriskhealtyage);
  lines(diabetesprobability~riskhealtyage$age,col=nc,lty=nc,lwd=3)
   nc <- nc + 1
}
  
legend("topright",legend = colfeat,lty=c(1:length(colfeat)),col =c(1:length(colfeat)),lwd=3)

SurvivalPredictor

Jose Tamez

4/14/2020