Loading the data
Loading the cloned https://github.com/beoutbreakprepared/nCoV2019/tree/master/latest_data/latestdata.csv file Then curating the data set.
latestdata <- read.csv("~/GitHub/nCoV2019/latest_data/latestdata.csv", na.strings=c("NA",""), stringsAsFactors=FALSE)
ChronicsOutcome <- latestdata[,c("age",
"sex",
"country",
"date_onset_symptoms",
"date_admission_hospital",
"date_confirmation",
"symptoms",
"chronic_disease_binary",
"chronic_disease",
"outcome",
"date_death_or_discharge")]
ChronicsOutcome <- ChronicsOutcome[ !is.na(ChronicsOutcome$outcome) &
!is.na(ChronicsOutcome$age),]
ChronicsOutcome[ChronicsOutcome$age == "50-59","age"] <- 55
ChronicsOutcome[ChronicsOutcome$age == "60-69","age"] <- 65
ChronicsOutcome[ChronicsOutcome$age == "70-79","age"] <- 65
ChronicsOutcome[ChronicsOutcome$age == "80-89","age"] <- 85
ChronicsOutcome$age <- as.numeric(ChronicsOutcome$age)
ChronicsOutcome <- ChronicsOutcome[!is.na(ChronicsOutcome$age),]
ChronicsOutcome <- ChronicsOutcome[!is.na(ChronicsOutcome$sex),]
The reported symtoms and chronic diseases were reclasified using the following keywords Symptoms:
fever_chills: fever : chills
cough: cough
breath_dyspnea: severe: pneumonia: grasp: dyspnea: breath: acute: respiratory: gasp
fatigue: weakness: fatigue
pain: headache: myalgias: sore: mialgia: pain
Chronic Diseases:
hypertension: hypertension
renal: kidney renal
diabetes: diabetes
asthma: asthma
ChronicsOutcome$symptoms <- NULL
chronic <- unlist(str_split(names(table(ChronicsOutcome$chronic_disease)),","))
chronic <- unique(unlist(str_split(chronic,":")))
ChronicsOutcome$hypertension <- 1*(str_count(ChronicsOutcome$chronic_disease,"hypertension") > 0 |
str_count(ChronicsOutcome$chronic_disease,"hypertenstion") > 0 |
str_count(ChronicsOutcome$chronic_disease,"Hypertension") > 0 |
str_count(ChronicsOutcome$chronic_disease,"hypertensive") > 0)
ChronicsOutcome$hypertension[is.na(ChronicsOutcome$hypertension)] <- 0
table(ChronicsOutcome$hypertension)
#>
#> 0 1
#> 539 85
ChronicsOutcome$diabetes <- 1*(str_count(ChronicsOutcome$chronic_disease,"diabetes") > 0 |
str_count(ChronicsOutcome$chronic_disease,"Diabetes") > 0 )
ChronicsOutcome$diabetes[is.na(ChronicsOutcome$diabetes)] <- 0
table(ChronicsOutcome$diabetes)
#>
#> 0 1
#> 565 59
ChronicsOutcome$renal <- 1*(str_count(ChronicsOutcome$chronic_disease,"renal") > 0 |
str_count(ChronicsOutcome$chronic_disease,"kidney") > 0 )
ChronicsOutcome$renal[is.na(ChronicsOutcome$renal)] <- 0
table(ChronicsOutcome$renal)
#>
#> 0 1
#> 606 18
ChronicsOutcome$asthma<- 1*(str_count(ChronicsOutcome$chronic_disease,"asthma") > 0 |
str_count(ChronicsOutcome$chronic_disease,"chronic obstructive pulmonary disease") > 0 |
str_count(ChronicsOutcome$chronic_disease,"bronch") > 0 )
ChronicsOutcome$asthma[is.na(ChronicsOutcome$asthma)] <- 0
table(ChronicsOutcome$asthma)
#>
#> 0 1
#> 608 16
ChronicsOutcome$CVD<- 1*(str_count(ChronicsOutcome$chronic_disease,"coronary") > 0 |
str_count(ChronicsOutcome$chronic_disease,"atherosclerosis") > 0 |
str_count(ChronicsOutcome$chronic_disease,"cardi") > 0 |
str_count(ChronicsOutcome$chronic_disease,"fibrillation") > 0 |
str_count(ChronicsOutcome$chronic_disease,"cardi") > 0 |
str_count(ChronicsOutcome$chronic_disease,"heart") > 0 )
ChronicsOutcome$CVD[is.na(ChronicsOutcome$CVD)] <- 0
table(ChronicsOutcome$CVD)
#>
#> 0 1
#> 605 19
ChronicsOutcome$other_chronic <- 1*(((ChronicsOutcome$hypertension +
ChronicsOutcome$diabetes +
ChronicsOutcome$renal +
ChronicsOutcome$CVD +
ChronicsOutcome$asthma) == 0) & !is.na(ChronicsOutcome$chronic_disease))
table(ChronicsOutcome$other_chronic)
#>
#> 0 1
#> 615 9
ChronicsOutcome$chronic_disease <- NULL
ChronicsOutcome$chronic_disease_binary <- NULL
ChronicsOutcome$date_onset_symptoms <- NULL
ChronicsOutcome$date_admission_hospital <- NULL
ChronicsOutcome$date_confirmation <- NULL
ChronicsOutcome$date_death_or_discharge <- NULL
#table(ChronicsOutcome$outcome)
discharged <- 1*(str_count(ChronicsOutcome$outcome,"disc") > 0 |
str_count(ChronicsOutcome$outcome,"Disc") > 0 |
str_count(ChronicsOutcome$outcome,"Recov") > 0 |
str_count(ChronicsOutcome$outcome,"released") > 0 )
ChronicsOutcome$outcome <- 1*(str_count(ChronicsOutcome$outcome,"dead") > 0 |
str_count(ChronicsOutcome$outcome,"Dead") > 0 |
str_count(ChronicsOutcome$outcome,"Deceased") > 0 |
str_count(ChronicsOutcome$outcome,"death") > 0 |
str_count(ChronicsOutcome$outcome,"Death") > 0 |
str_count(ChronicsOutcome$outcome,"Died") > 0 |
str_count(ChronicsOutcome$outcome,"died") > 0 )
ChronicsOutcome <- ChronicsOutcome[ChronicsOutcome$outcome | discharged,]
table(ChronicsOutcome$outcome)
#>
#> 0 1
#> 235 193
covid19ML <- ChronicsOutcome
table(covid19ML$sex)
#>
#> female male
#> 173 255
covid19ML$sex <- 1*(str_count(ChronicsOutcome$sex,"female"))
table(covid19ML$sex)
#>
#> 0 1
#> 255 173
pander::pander(table(ChronicsOutcome$country))
Table continues below
| 7 |
24 |
4 |
4 |
74 |
1 |
6 |
3 |
Table continues below
| 1 |
2 |
4 |
8 |
6 |
2 |
1 |
93 |
Table continues below
| 8 |
1 |
100 |
12 |
2 |
29 |
covid19ML$country <- NULL
##Basic Description
pander::pander(sum(table(covid19ML$sex)))
428
plot(density(subset(covid19ML,outcome==1)$age),xlim=c(20,90),col="red",main="Age Distribution",xlab="Age",ylab="p(x)")
lines(density(subset(covid19ML,outcome==0)$age),xlim=c(20,90),col="blue")
legend("topleft",legend = c("Died","Discharged"),bty = "n",pch = 20,col = c("Red","Blue"))

op <- par(no.readonly = TRUE)
pander::pander(summary(covid19ML$age))
| 0.25 |
38 |
55 |
53.75 |
69.25 |
95 |
pander::pander(table(covid19ML$outcome))
pander::pander(table(covid19ML$sex))
pander::pander(table(covid19ML$hypertension))
pander::pander(table(covid19ML$renal))
pander::pander(table(covid19ML$diabetes))
pander::pander(table(covid19ML$asthma))
pander::pander(table(covid19ML$CVD))
pander::pander(table(covid19ML$other_chronic))
nochronic <- covid19ML
nochronic.mat <- as.data.frame(model.matrix(outcome ~ sex*.,nochronic))
nochronic.mat$`(Intercept)` <- NULL
nochronic.mat$outcome <- as.numeric(nochronic$outcome)
fnames <- colnames(nochronic.mat)
fnames <- str_replace_all(fnames," ","_")
fnames <- str_replace_all(fnames,"/","_")
fnames <- str_replace_all(fnames,":",".")
colnames(nochronic.mat) <- fnames
covid19ML <- nochronic.mat
Modeling
bm <- BSWiMS.model(outcome~.,covid19ML,NumberofRepeats = -100)
Results
par(op)
pander::pander(bm$univariate)
| age |
age |
age |
21.78 |
| hypertension |
hypertension |
hypertension |
10.03 |
| diabetes |
diabetes |
diabetes |
8.346 |
| sex.hypertension |
sex.hypertension |
sex.hypertension |
5.222 |
| renal |
renal |
renal |
4.444 |
| sex.diabetes |
sex.diabetes |
sex.diabetes |
3.875 |
| CVD |
CVD |
CVD |
3.733 |
| asthma |
asthma |
asthma |
3.724 |
| sex |
sex |
sex |
2.603 |
| sex.renal |
sex.renal |
sex.renal |
2.26 |
| sex.age |
sex.age |
sex.age |
2.071 |
| sex.CVD |
sex.CVD |
sex.CVD |
1.741 |
| sex.asthma |
sex.asthma |
sex.asthma |
1.418 |
| other_chronic |
other_chronic |
other_chronic |
0.6237 |
| sex.other_chronic |
sex.other_chronic |
sex.other_chronic |
0.4198 |
cStats <- predictionStats_binary(cbind(covid19ML$outcome,predict(bm,covid19ML)),plotname = "BSWiMS");
BSWiMS 
sm <- summary(bm)
pander::pander(sm$coefficients)
Table continues below
| age |
0.04072 |
1.036 |
1.042 |
1.048 |
0.8458 |
| sex.age |
0.07707 |
1.068 |
1.08 |
1.092 |
0.6145 |
| sex |
-5.378 |
0.001472 |
0.004619 |
0.01449 |
0.5514 |
| hypertension |
2.155 |
3.167 |
8.626 |
23.49 |
0.7103 |
| diabetes |
2.778 |
3.886 |
16.09 |
66.64 |
0.6729 |
| asthma |
1.794 |
1.842 |
6.012 |
19.63 |
0.5794 |
Table continues below
| age |
0.7407 |
0.8692 |
0.8438 |
0.7143 |
0.866 |
| sex.age |
0.7407 |
0.7897 |
0.5892 |
0.7143 |
0.7729 |
| sex |
0.8048 |
0.8197 |
0.5614 |
0.7886 |
0.8077 |
| hypertension |
0.7873 |
0.8197 |
0.6806 |
0.7718 |
0.8077 |
| diabetes |
0.7967 |
0.8197 |
0.6382 |
0.7831 |
0.8077 |
| asthma |
0.7407 |
0.7547 |
0.5337 |
0.7143 |
0.7298 |
| age |
0.3451 |
1.242 |
14.57 |
16.31 |
0.4367 |
| sex.age |
0.1671 |
0.3066 |
9.031 |
3.343 |
0.4279 |
| sex |
0.0761 |
0.3385 |
5.342 |
3.77 |
1 |
| hypertension |
0.04352 |
0.1887 |
3.994 |
2.367 |
1 |
| diabetes |
0.0338 |
0.6411 |
3.791 |
9.166 |
1 |
| asthma |
0.02473 |
0.1347 |
2.972 |
3.733 |
0.1354 |
par(op)
gplots::heatmap.2(bm$bagging$formulaNetwork,trace="none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.75,cexCol = 0.75)

Riks Charts
colfeat <- rownames(sm$coefficients)
riskhealtyage <- as.data.frame(cbind(outcome=rep(0,71),age=c(20:90)))
for (cname in colfeat[-1])
{
riskhealtyage <- cbind(riskhealtyage,rep(0,nrow(riskhealtyage)))
}
colnames(riskhealtyage ) <- c("outcome",colfeat)
baseprobability <- 1.0-predict(bm,riskhealtyage);
plot(baseprobability~riskhealtyage$age,type="l",col="blue",xlab="Age",ylab="p(discharge)",ylim=c(0,1),main="Probability of Hospital Discharge (Postive COVID-19)",lty=1,lwd=5)
nc=2;
for (cname in colfeat[-1])
{
nriskhealtyage <- riskhealtyage;
nriskhealtyage[,cname] <- rep(1,nrow(riskhealtyage))
diabetesprobability <- 1.0-predict(bm,nriskhealtyage);
lines(diabetesprobability~riskhealtyage$age,col=nc,lty=nc,lwd=3)
nc <- nc + 1
}
legend("topright",legend = colfeat,lty=c(1:length(colfeat)),col =c(1:length(colfeat)),lwd=3)
