Loading the data
Loading the cloned https://github.com/beoutbreakprepared/nCoV2019/tree/master/latest_data/latestdata.csv file Then curating the data set.
latestdata <- read.csv("~/GitHub/nCoV2019/latest_data/latestdata.csv", na.strings=c("NA",""), stringsAsFactors=FALSE)
SymptomsOutcome <- latestdata[,c("age",
"sex",
"country",
"date_onset_symptoms",
"date_admission_hospital",
"date_confirmation",
"symptoms",
"chronic_disease_binary",
"chronic_disease",
"outcome",
"date_death_or_discharge")]
SymptomsOutcome <- SymptomsOutcome[!is.na(SymptomsOutcome$symptoms) &
!is.na(SymptomsOutcome$outcome) &
!is.na(SymptomsOutcome$age),]
SymptomsOutcome[SymptomsOutcome$age == "50-59","age"] <- 55
SymptomsOutcome[SymptomsOutcome$age == "60-79","age"] <- 55
SymptomsOutcome[SymptomsOutcome$age == "60-69","age"] <- 65
SymptomsOutcome[SymptomsOutcome$age == "80-89","age"] <- 85
SymptomsOutcome$age <- as.numeric(SymptomsOutcome$age)
SymptomsOutcome <- SymptomsOutcome[!is.na(SymptomsOutcome$age),]
SymptomsOutcome <- SymptomsOutcome[!(SymptomsOutcome$symptoms=="none" | SymptomsOutcome$symptoms=="asymptomatic"),]
The reported symtoms and chronic diseases were reclasified using the following keywords Symptoms:
fever_chills: fever : chills
cough: cough
breath_dyspnea: severe: pneumonia: grasp: dyspnea: breath: acute: respiratory: gasp
fatigue: weakness: fatigue
pain: headache: myalgias: sore: mialgia: pain
syn <- unlist(str_split(names(table(SymptomsOutcome$symptoms)),","))
syn <- unique(unlist(str_split(syn,":")))
SymptomsOutcome$fever_chills <- 1*(str_count(SymptomsOutcome$symptoms,"fever") > 0 |
str_count(SymptomsOutcome$symptoms,"chills") > 0 |
str_count(SymptomsOutcome$symptoms,"cold") > 0)
table(SymptomsOutcome$fever_chills)
#>
#> 0 1
#> 85 90
SymptomsOutcome$cough <- 1*(str_count(SymptomsOutcome$symptoms,"cough") > 0)
table(SymptomsOutcome$cough)
#>
#> 0 1
#> 110 65
SymptomsOutcome$breath_dyspnea <- 1*(str_count(SymptomsOutcome$symptoms,"dyspnea") > 0 |
str_count(SymptomsOutcome$symptoms,"difficulty breathing") > 0 |
str_count(SymptomsOutcome$symptoms,"acute respiratory") > 0 |
str_count(SymptomsOutcome$symptoms,"grasp") > 0 |
str_count(SymptomsOutcome$symptoms,"gasp") > 0)
table(SymptomsOutcome$breath_dyspnea)
#>
#> 0 1
#> 133 42
SymptomsOutcome$fatigue <- 1*(str_count(SymptomsOutcome$symptoms,"fatigue") > 0 |
str_count(SymptomsOutcome$symptoms,"weak") > 0 |
str_count(SymptomsOutcome$symptoms,"fatigure") > 0 )
table(SymptomsOutcome$fatigue)
#>
#> 0 1
#> 159 16
SymptomsOutcome$pain <- 1*(str_count(SymptomsOutcome$symptoms,"chest discomfort") > 0 |
str_count(SymptomsOutcome$symptoms,"pain") > 0 |
str_count(SymptomsOutcome$symptoms,"myalgia") > 0 |
str_count(SymptomsOutcome$symptoms,"headache") > 0 |
str_count(SymptomsOutcome$symptoms,"body malaise") > 0 |
str_count(SymptomsOutcome$symptoms,"myalgias") > 0 |
str_count(SymptomsOutcome$symptoms,"sore") > 0 )
table(SymptomsOutcome$pain)
#>
#> 0 1
#> 150 25
SymptomsOutcome$other_symptoms <- 1*((SymptomsOutcome$pain +
SymptomsOutcome$fatigue +
SymptomsOutcome$breath_dyspnea +
SymptomsOutcome$fever_chills) == 0)
table(SymptomsOutcome$other_symptoms)
#>
#> 0 1
#> 135 40
SymptomsOutcome$symptoms <- NULL
SymptomsOutcome$chronic_disease <- NULL
SymptomsOutcome$chronic_disease_binary <- NULL
SymptomsOutcome$date_onset_symptoms <- NULL
SymptomsOutcome$date_admission_hospital <- NULL
SymptomsOutcome$date_confirmation <- NULL
SymptomsOutcome$date_death_or_discharge <- NULL
table(SymptomsOutcome$outcome)
#>
#> dead
#> 2
#> death
#> 8
#> Deceased
#> 1
#> died
#> 91
#> discharge
#> 25
#> discharged
#> 28
#> Discharged
#> 2
#> recovered
#> 8
#> stable
#> 8
#> treated in an intensive care unit (14.02.2020)
#> 2
SymptomsOutcome$outcome <- 1*(str_count(SymptomsOutcome$outcome,"dead") > 0 |
str_count(SymptomsOutcome$outcome,"Dead") > 0 |
str_count(SymptomsOutcome$outcome,"Deceased") > 0 |
str_count(SymptomsOutcome$outcome,"death") > 0 |
str_count(SymptomsOutcome$outcome,"died") > 0 ) +
2*(str_count(SymptomsOutcome$outcome,"stable") > 0 |
str_count(SymptomsOutcome$outcome,"treated") > 0 |
str_count(SymptomsOutcome$outcome,"recovered") > 0 )
SymptomsOutcome <- SymptomsOutcome[SymptomsOutcome$outcome < 2,]
table(SymptomsOutcome$outcome)
#>
#> 0 1
#> 55 102
covid19ML <- SymptomsOutcome
table(covid19ML$sex)
#>
#> female male
#> 57 100
covid19ML$sex <- 1*(str_count(SymptomsOutcome$sex,"female"))
table(covid19ML$sex)
#>
#> 0 1
#> 100 57
pander::pander(table(SymptomsOutcome$country))
Table continues below
| 4 |
51 |
2 |
1 |
1 |
2 |
1 |
8 |
2 |
covid19ML$country <- NULL
covid19ML_Philippines <- covid19ML[SymptomsOutcome$country=="Philippines",]
table(covid19ML_Philippines$outcome)
#>
#> 1
#> 53
covid19ML_China <- covid19ML[SymptomsOutcome$country == "China",]
covid19ML_China <- covid19ML_China[complete.cases(covid19ML_China),]
table(covid19ML_China$outcome)
#>
#> 0 1
#> 11 40
covid19ML_NoChina <- covid19ML[SymptomsOutcome$country != "China",]
table(covid19ML_NoChina$outcome)
#>
#> 0 1
#> 44 60
covid19ML_NoChina <- covid19ML_NoChina[complete.cases(covid19ML_NoChina),]
##Basic Description
pander::pander(sum(table(covid19ML$sex)))
157
plot(density(subset(covid19ML,outcome==1)$age),xlim=c(20,90),col="red",main="Age Distribution",xlab="Age",ylab="p(x)")
lines(density(subset(covid19ML,outcome==0)$age),xlim=c(20,90),col="blue")
legend("topleft",legend = c("Died","Discharged"),bty = "n",pch = 20,col = c("Red","Blue"))

op <- par(no.readonly = TRUE)
pander::pander(summary(covid19ML$age))
pander::pander(table(covid19ML$outcome))
pander::pander(table(covid19ML$sex))
pander::pander(table(covid19ML$fever_chills))
pander::pander(table(covid19ML$cough))
pander::pander(table(covid19ML$breath_dyspnea))
pander::pander(table(covid19ML$fatigue))
pander::pander(table(covid19ML$pain))
pander::pander(table(covid19ML$other_symptoms))
ML Modeling
nochronic <- covid19ML
nochronic.mat <- as.data.frame(model.matrix(outcome ~ age*.,nochronic))
nochronic.mat$`(Intercept)` <- NULL
nochronic.mat$outcome <- as.numeric(nochronic$outcome)
fnames <- colnames(nochronic.mat)
fnames <- str_replace_all(fnames," ","_")
fnames <- str_replace_all(fnames,"/","_")
fnames <- str_replace_all(fnames,":",".")
colnames(nochronic.mat) <- fnames
bm <- BSWiMS.model(outcome~.,nochronic.mat,NumberofRepeats = -100)
Results
par(op)
pander::pander(bm$univariate)
| age |
age |
age |
11.23 |
| breath_dyspnea |
breath_dyspnea |
breath_dyspnea |
8.408 |
| age.breath_dyspnea |
age.breath_dyspnea |
age.breath_dyspnea |
8.408 |
| fever_chills |
fever_chills |
fever_chills |
5.632 |
| cough |
cough |
cough |
2.925 |
| pain |
pain |
pain |
2.887 |
| age.other_symptoms |
age.other_symptoms |
age.other_symptoms |
2.685 |
| age.sex |
age.sex |
age.sex |
2.528 |
| age.pain |
age.pain |
age.pain |
1.97 |
| age.fever_chills |
age.fever_chills |
age.fever_chills |
1.677 |
| other_symptoms |
other_symptoms |
other_symptoms |
0.7889 |
| fatigue |
fatigue |
fatigue |
0.7308 |
| sex |
sex |
sex |
0.6959 |
| age.cough |
age.cough |
age.cough |
0.4095 |
| age.fatigue |
age.fatigue |
age.fatigue |
0.2718 |
cStats <- predictionStats_binary(cbind(covid19ML$outcome,predict(bm,nochronic.mat)),plotname = "BSWiMS");
BSWiMS 
sm <- summary(bm)
pander::pander(sm$coefficients)
Table continues below
| age |
0.06912 |
1.061 |
1.072 |
1.083 |
0.8449 |
| breath_dyspnea |
2.047 |
4.381 |
7.747 |
13.7 |
0.6178 |
| fever_chills |
-4.001 |
0.007554 |
0.0183 |
0.04431 |
0.6943 |
| age.fever_chills |
0.0559 |
1.042 |
1.057 |
1.073 |
0.6569 |
| age.breath_dyspnea |
0.3868 |
1.29 |
1.472 |
1.68 |
0.6178 |
| age.other_symptoms |
0.006667 |
1.005 |
1.007 |
1.009 |
0.4713 |
Table continues below
| age |
0.6957 |
0.8853 |
0.8413 |
0.7326 |
| breath_dyspnea |
0.8072 |
0.875 |
0.7059 |
0.8118 |
| fever_chills |
0.7397 |
0.8682 |
0.7102 |
0.7822 |
| age.fever_chills |
0.7666 |
0.8572 |
0.6587 |
0.7801 |
| age.breath_dyspnea |
0.8227 |
0.8692 |
0.7059 |
0.8137 |
| age.other_symptoms |
0.7761 |
0.863 |
0.5638 |
0.8059 |
| age |
0.8886 |
0.3431 |
1.335 |
10.5 |
13.5 |
0.6339 |
| breath_dyspnea |
0.8819 |
0.1491 |
0.7904 |
5.967 |
7.817 |
0.1128 |
| fever_chills |
0.8566 |
0.1557 |
0.6541 |
5.914 |
5.667 |
0.6689 |
| age.fever_chills |
0.8468 |
0.145 |
0.7866 |
5.882 |
6.817 |
0.5833 |
| age.breath_dyspnea |
0.863 |
0.1123 |
0.6369 |
4.905 |
6.424 |
0.8633 |
| age.other_symptoms |
0.8707 |
0.09312 |
0.7403 |
4.196 |
6.84 |
0.2839 |
par(op)
gplots::heatmap.2(bm$bagging$formulaNetwork,trace="none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.75,cexCol = 0.75)

Riks Charts
colfeat <- rownames(sm$coefficients)
riskhealtyage <- as.data.frame(cbind(outcome=rep(0,71),age=c(20:90)))
for (cname in colfeat[-1])
{
riskhealtyage <- cbind(riskhealtyage,rep(0,nrow(riskhealtyage)))
}
colnames(riskhealtyage ) <- c("outcome",colfeat)
baseprobability <- 1.0-predict(bm,riskhealtyage);
plot(baseprobability~riskhealtyage$age,type="l",col="blue",xlab="Age",ylab="p(discharge)",ylim=c(0,1),main="Probability of Hospital Discharge (Postive COVID-19)",lty=1,lwd=5)
nc=2;
for (cname in colfeat[-1])
{
nriskhealtyage <- riskhealtyage;
nriskhealtyage[,cname] <- rep(1,nrow(riskhealtyage))
diabetesprobability <- 1.0-predict(bm,nriskhealtyage);
lines(diabetesprobability~riskhealtyage$age,col=nc,lty=nc,lwd=3)
nc <- nc + 1
}
legend("topright",legend = colfeat,lty=c(1:length(colfeat)),col =c(1:length(colfeat)),lwd=3)

BSWiMS Cross-Validation of only symptoms
SymBSWIMScv <- randomCV(nochronic.mat,
"outcome",
fittingFunction=BSWiMS.model,
trainFraction = trainFraction,
repetitions = repetitions,
NumberofRepeats = -1)
Only Symptoms Results
par(op)
cStats <- predictionStats_binary(SymBSWIMScv$testPredictions,plotname = "BSWiMS");
BSWiMS 
SymBSWIMScv$jaccard
$Jaccard.SM [1] 0.6701542
$averageLength [1] 4.656667
par(mar=c(4,10,4,4),pty="m")
barplot(SymBSWIMScv$featureFrequency/repetitions,xlim=c(0,1),las=2,cex.names =0.70,horiz = TRUE,main="Top Selected Features",xlab="Selection Frequency")

par(op)
fn <- baggedModel(SymBSWIMScv$selectedFeaturesSet,nochronic.mat,Outcome="outcome")
…………………………
gplots::heatmap.2(fn$formulaNetwork,trace="none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.75,cexCol = 0.75)
