Covid19 Diagnosis

COVID-19 Diagnosis based on https://github.com/BustByte/coronastatus The data was downloaded from the standarized coronastatus reports. This is a non-validated model of COVID-19 diagnosis based on self reported sympthoms and covid-19 diagnosis. Do not use this findings for self diagnosis.

library(readxl)
library("FRESA.CAD")
repetitions = 300
trainFraction = 0.90

The data

reports <- read_excel("~/GitHub/COVID19-Monitor/Data/reports.xlsx")
table(reports$testResult)
#> 
#> NEGATIVE  PENDING POSITIVE 
#>      220      140       61
PositiveNegative <- subset(reports,testResult == "NEGATIVE" | testResult == "POSITIVE")

PositiveNegative$ID <- NULL
PositiveNegative$profileId <- NULL
PositiveNegative$submissionOrder <- NULL
PositiveNegative$hasBeenTested <- NULL
PositiveNegative$hasBeenInContactWithInfected <- NULL
PositiveNegative$symptomStart <- NULL
PositiveNegative$submissionDate <- NULL
PositiveNegative$bodyTemperature <- NULL
PositiveNegative$smokingHabit <- NULL
PositiveNegative$isolationStatus <- NULL
PositiveNegative$diagnosedWithOtherConditions <- NULL


PositiveNegative$age[PositiveNegative$age == "0 - 9"] <- 5
PositiveNegative$age[PositiveNegative$age == "10 - 19"] <- 15
PositiveNegative$age[PositiveNegative$age == "20 - 29"] <- 25
PositiveNegative$age[PositiveNegative$age == "30 - 39"] <- 35
PositiveNegative$age[PositiveNegative$age == "40 - 49"] <- 45
PositiveNegative$age[PositiveNegative$age == "50 - 59"] <- 55
PositiveNegative$age[PositiveNegative$age == "60 - 69"] <- 65
PositiveNegative$age[PositiveNegative$age == "70 - 79"] <- 75
PositiveNegative$age[PositiveNegative$age == "80 - 89"] <- 85
PositiveNegative$age[PositiveNegative$age == "90 - 99"] <- 95
PositiveNegative <- PositiveNegative[!PositiveNegative$age == "44123",]
PositiveNegative$age <- as.numeric(PositiveNegative$age)
PositiveNegative$sex <- 1*(PositiveNegative$sex == "FEMALE")
PositiveNegative$testResult <- 1*(PositiveNegative$testResult == "POSITIVE")

PositiveNegative <- PositiveNegative[complete.cases(PositiveNegative),]

PositiveNegative[,1:ncol(PositiveNegative)] <- sapply(PositiveNegative,as.numeric)
PositiveNegative <- as.data.frame(PositiveNegative)
plot(density(subset(PositiveNegative,testResult==0)$age),xlim=c(20,90),col="blue",main="Age Distribution",xlab="Age",ylab="p(x)")
lines(density(subset(PositiveNegative,testResult==1)$age),xlim=c(20,90),col="red")
legend("topleft",legend = c("Positive","Negative"),bty = "n",pch = 20,col = c("Red","Blue"))


op <- par(no.readonly = TRUE)

Modeling

bm <- BSWiMS.model(testResult~.,PositiveNegative,NumberofRepeats = -100)
#> [+::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+::::-+:::::-+::::-+:::::-+::::-+::::-+:::::-+:::::-+::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+::::-+:::::-+:::::-+:::::-+:::::-+::::-+:::::-+:::::-+::::-+:::::-+:::::-+:::::-+:::::-+::::-+:::::-+:::::-+:::::-+:::::-+:::::-+::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-]..............................

Results

namlist <- list();
n = 1;
for (nam in names(PositiveNegative))
{
  namlist[[n]] <- table(PositiveNegative[,nam]) 
  n <- n+1;
}
names(namlist) <- names(PositiveNegative)
pander::pander(namlist)
  • age:

    5 25 35 45 55 65 85 95
    12 82 92 58 19 4 3 1
  • sex:

    0 1
    92 179
  • testResult:

    0 1
    212 59
  • DRY_COUGH:

    0 1
    123 148
  • EXHAUSTION:

    0 1
    121 150
  • FEVER:

    0 1
    175 96
  • HEAVY_BREATHING:

    0 1
    164 107
  • MUSCLE_ACHING:

    0 1
    187 84
  • DIARRHEA:

    0 1
    219 52
  • HEADACHE:

    0 1
    131 140
  • SORE_THROAT:

    0 1
    134 137
  • NO_TASTE:

    0 1
    239 32
  • NO_SMELL:

    0 1
    232 39
  • SLIME_COUGH:

    0 1
    224 47
  • RUNNY_NOSE:

    0 1
    188 83
  • NAUSEA_OR_VOMITING:

    0 1
    258 13

par(op)
pander::pander(bm$univariate)
  Name RName ZUni
NO_SMELL NO_SMELL NO_SMELL 5.025
SORE_THROAT SORE_THROAT SORE_THROAT 4.432
NO_TASTE NO_TASTE NO_TASTE 4.401
FEVER FEVER FEVER 4.264
NAUSEA_OR_VOMITING NAUSEA_OR_VOMITING NAUSEA_OR_VOMITING 3.432
sex sex sex 2.97
MUSCLE_ACHING MUSCLE_ACHING MUSCLE_ACHING 2.615
DIARRHEA DIARRHEA DIARRHEA 2.512
RUNNY_NOSE RUNNY_NOSE RUNNY_NOSE 1.735
EXHAUSTION EXHAUSTION EXHAUSTION 1.304
HEAVY_BREATHING HEAVY_BREATHING HEAVY_BREATHING 1.093
SLIME_COUGH SLIME_COUGH SLIME_COUGH 0.9253
DRY_COUGH DRY_COUGH DRY_COUGH 0.8238
age age age 0.4072
HEADACHE HEADACHE HEADACHE 0.1402

cStats <- predictionStats_binary(cbind(PositiveNegative$testResult,predict(bm,PositiveNegative)),plotname = "COVID-19");

COVID-19


sm <- summary(bm)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower OR upper u.Accuracy
NO_SMELL 1.956 5.302 7.073 9.437 0.8155
NO_TASTE 1.083 2.49 2.953 3.502 0.8118
NAUSEA_OR_VOMITING 6.005 131.7 405.3 1247 0.8155
SORE_THROAT -1.494 0.1268 0.2246 0.3977 0.6052
MUSCLE_ACHING 0.03491 1.022 1.036 1.05 0.6716
DIARRHEA 1.36 2.269 3.898 6.696 0.7306
FEVER 1.248 2.047 3.485 5.935 0.6863
RUNNY_NOSE -0.1173 0.796 0.8894 0.9937 0.4278
Table continues below
  r.Accuracy full.Accuracy u.AUC r.AUC
NO_SMELL 0.761 0.8233 0.668 0.7383
NO_TASTE 0.77 0.8194 0.6412 0.739
NAUSEA_OR_VOMITING 0.7594 0.8191 0.5885 0.7318
SORE_THROAT 0.8298 0.816 0.6498 0.8031
MUSCLE_ACHING 0.6978 0.7887 0.5944 0.6678
DIARRHEA 0.8111 0.8191 0.5832 0.7941
FEVER 0.8406 0.8169 0.6527 0.8169
RUNNY_NOSE 0.8532 0.839 0.5544 0.8224
  full.AUC IDI NRI z.IDI z.NRI Frequency
NO_SMELL 0.8158 0.1853 0.7584 10.06 9.783 0.5974
NO_TASTE 0.8124 0.1622 0.8885 9.202 11.32 0.3214
NAUSEA_OR_VOMITING 0.8082 0.1303 0.6439 8.071 7.932 1
SORE_THROAT 0.8076 0.0599 0.5962 5.009 6.554 0.8636
MUSCLE_ACHING 0.7188 0.05382 0.3826 4.946 4.194 0.03896
DIARRHEA 0.8082 0.04638 0.4836 4.754 5.767 1
FEVER 0.8102 0.04814 0.6135 4.37 6.734 0.8766
RUNNY_NOSE 0.812 0.01298 0.1254 1.925 1.482 0.1364

par(op)

gplots::heatmap.2(bm$bagging$formulaNetwork,trace="none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.75,cexCol = 0.75)

SymBSWIMScv <- randomCV(PositiveNegative,
                        "testResult",
                        fittingFunction=BSWiMS.model,
                        trainFraction = trainFraction,
                        repetitions = repetitions,
                        NumberofRepeats = -1)

Only Symptoms Results

par(op)
cStats <- predictionStats_binary(SymBSWIMScv$testPredictions,plotname = "COVID-19 Diagnosis");

COVID-19 Diagnosis

SymBSWIMScv$jaccard

$Jaccard.SM [1] 0.7818584

$averageLength [1] 6.883333

par(mar=c(4,10,4,4),pty="m")
barplot(SymBSWIMScv$featureFrequency/repetitions,xlim=c(0,1),las=2,cex.names =0.70,horiz = TRUE,main="Top Selected Features",xlab="Selection Frequency")

par(op)

fn <- baggedModel(SymBSWIMScv$selectedFeaturesSet,PositiveNegative,Outcome="testResult")

…………………………


gplots::heatmap.2(fn$formulaNetwork,trace="none",mar = c(10,10),main = "COVID-19 Features",cexRow = 0.75,cexCol = 0.75)

SymRFcv <- randomCV(fittingFunction=randomForest::randomForest,
                        asFactor = TRUE)

Only Symptoms Results

par(op)
cStats <- predictionStats_binary(SymRFcv$testPredictions,plotname = "COVID-19 Diagnosis RF");

COVID-19 Diagnosis RF

SymRFcv$jaccard

$Jaccard.SM [1] 1

$averageLength [1] 15