COVID-19 Diagnosis based on https://github.com/BustByte/coronastatus The data was downloaded from the standarized coronastatus reports. This is a non-validated model of COVID-19 diagnosis based on self reported sympthoms and covid-19 diagnosis. Do not use this findings for self diagnosis.
library(readxl)
library("FRESA.CAD")
repetitions = 300
trainFraction = 0.90
reports <- read_excel("~/GitHub/COVID19-Monitor/Data/reports.xlsx")
table(reports$testResult)
#>
#> NEGATIVE PENDING POSITIVE
#> 220 140 61
PositiveNegative <- subset(reports,testResult == "NEGATIVE" | testResult == "POSITIVE")
PositiveNegative$ID <- NULL
PositiveNegative$profileId <- NULL
PositiveNegative$submissionOrder <- NULL
PositiveNegative$hasBeenTested <- NULL
PositiveNegative$hasBeenInContactWithInfected <- NULL
PositiveNegative$symptomStart <- NULL
PositiveNegative$submissionDate <- NULL
PositiveNegative$bodyTemperature <- NULL
PositiveNegative$smokingHabit <- NULL
PositiveNegative$isolationStatus <- NULL
PositiveNegative$diagnosedWithOtherConditions <- NULL
PositiveNegative$age[PositiveNegative$age == "0 - 9"] <- 5
PositiveNegative$age[PositiveNegative$age == "10 - 19"] <- 15
PositiveNegative$age[PositiveNegative$age == "20 - 29"] <- 25
PositiveNegative$age[PositiveNegative$age == "30 - 39"] <- 35
PositiveNegative$age[PositiveNegative$age == "40 - 49"] <- 45
PositiveNegative$age[PositiveNegative$age == "50 - 59"] <- 55
PositiveNegative$age[PositiveNegative$age == "60 - 69"] <- 65
PositiveNegative$age[PositiveNegative$age == "70 - 79"] <- 75
PositiveNegative$age[PositiveNegative$age == "80 - 89"] <- 85
PositiveNegative$age[PositiveNegative$age == "90 - 99"] <- 95
PositiveNegative <- PositiveNegative[!PositiveNegative$age == "44123",]
PositiveNegative$age <- as.numeric(PositiveNegative$age)
PositiveNegative$sex <- 1*(PositiveNegative$sex == "FEMALE")
PositiveNegative$testResult <- 1*(PositiveNegative$testResult == "POSITIVE")
PositiveNegative <- PositiveNegative[complete.cases(PositiveNegative),]
PositiveNegative[,1:ncol(PositiveNegative)] <- sapply(PositiveNegative,as.numeric)
PositiveNegative <- as.data.frame(PositiveNegative)
plot(density(subset(PositiveNegative,testResult==0)$age),xlim=c(20,90),col="blue",main="Age Distribution",xlab="Age",ylab="p(x)")
lines(density(subset(PositiveNegative,testResult==1)$age),xlim=c(20,90),col="red")
legend("topleft",legend = c("Positive","Negative"),bty = "n",pch = 20,col = c("Red","Blue"))
op <- par(no.readonly = TRUE)
bm <- BSWiMS.model(testResult~.,PositiveNegative,NumberofRepeats = -100)
#> [+::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+::::-+:::::-+::::-+:::::-+::::-+::::-+:::::-+:::::-+::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+::::-+:::::-+:::::-+:::::-+:::::-+::::-+:::::-+:::::-+::::-+:::::-+:::::-+:::::-+:::::-+::::-+:::::-+:::::-+:::::-+:::::-+:::::-+::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-+::::-+:::::-+:::::-+:::::-+:::::-+:::::-+:::::-]..............................
namlist <- list();
n = 1;
for (nam in names(PositiveNegative))
{
namlist[[n]] <- table(PositiveNegative[,nam])
n <- n+1;
}
names(namlist) <- names(PositiveNegative)
pander::pander(namlist)
age:
| 5 | 25 | 35 | 45 | 55 | 65 | 85 | 95 |
|---|---|---|---|---|---|---|---|
| 12 | 82 | 92 | 58 | 19 | 4 | 3 | 1 |
sex:
| 0 | 1 |
|---|---|
| 92 | 179 |
testResult:
| 0 | 1 |
|---|---|
| 212 | 59 |
DRY_COUGH:
| 0 | 1 |
|---|---|
| 123 | 148 |
EXHAUSTION:
| 0 | 1 |
|---|---|
| 121 | 150 |
FEVER:
| 0 | 1 |
|---|---|
| 175 | 96 |
HEAVY_BREATHING:
| 0 | 1 |
|---|---|
| 164 | 107 |
MUSCLE_ACHING:
| 0 | 1 |
|---|---|
| 187 | 84 |
DIARRHEA:
| 0 | 1 |
|---|---|
| 219 | 52 |
HEADACHE:
| 0 | 1 |
|---|---|
| 131 | 140 |
SORE_THROAT:
| 0 | 1 |
|---|---|
| 134 | 137 |
NO_TASTE:
| 0 | 1 |
|---|---|
| 239 | 32 |
NO_SMELL:
| 0 | 1 |
|---|---|
| 232 | 39 |
SLIME_COUGH:
| 0 | 1 |
|---|---|
| 224 | 47 |
RUNNY_NOSE:
| 0 | 1 |
|---|---|
| 188 | 83 |
NAUSEA_OR_VOMITING:
| 0 | 1 |
|---|---|
| 258 | 13 |
par(op)
pander::pander(bm$univariate)
| Â | Name | RName | ZUni |
|---|---|---|---|
| NO_SMELL | NO_SMELL | NO_SMELL | 5.025 |
| SORE_THROAT | SORE_THROAT | SORE_THROAT | 4.432 |
| NO_TASTE | NO_TASTE | NO_TASTE | 4.401 |
| FEVER | FEVER | FEVER | 4.264 |
| NAUSEA_OR_VOMITING | NAUSEA_OR_VOMITING | NAUSEA_OR_VOMITING | 3.432 |
| sex | sex | sex | 2.97 |
| MUSCLE_ACHING | MUSCLE_ACHING | MUSCLE_ACHING | 2.615 |
| DIARRHEA | DIARRHEA | DIARRHEA | 2.512 |
| RUNNY_NOSE | RUNNY_NOSE | RUNNY_NOSE | 1.735 |
| EXHAUSTION | EXHAUSTION | EXHAUSTION | 1.304 |
| HEAVY_BREATHING | HEAVY_BREATHING | HEAVY_BREATHING | 1.093 |
| SLIME_COUGH | SLIME_COUGH | SLIME_COUGH | 0.9253 |
| DRY_COUGH | DRY_COUGH | DRY_COUGH | 0.8238 |
| age | age | age | 0.4072 |
| HEADACHE | HEADACHE | HEADACHE | 0.1402 |
cStats <- predictionStats_binary(cbind(PositiveNegative$testResult,predict(bm,PositiveNegative)),plotname = "COVID-19");
COVID-19
sm <- summary(bm)
pander::pander(sm$coefficients)
| Â | Estimate | lower | OR | upper | u.Accuracy |
|---|---|---|---|---|---|
| NO_SMELL | 1.956 | 5.302 | 7.073 | 9.437 | 0.8155 |
| NO_TASTE | 1.083 | 2.49 | 2.953 | 3.502 | 0.8118 |
| NAUSEA_OR_VOMITING | 6.005 | 131.7 | 405.3 | 1247 | 0.8155 |
| SORE_THROAT | -1.494 | 0.1268 | 0.2246 | 0.3977 | 0.6052 |
| MUSCLE_ACHING | 0.03491 | 1.022 | 1.036 | 1.05 | 0.6716 |
| DIARRHEA | 1.36 | 2.269 | 3.898 | 6.696 | 0.7306 |
| FEVER | 1.248 | 2.047 | 3.485 | 5.935 | 0.6863 |
| RUNNY_NOSE | -0.1173 | 0.796 | 0.8894 | 0.9937 | 0.4278 |
| Â | r.Accuracy | full.Accuracy | u.AUC | r.AUC |
|---|---|---|---|---|
| NO_SMELL | 0.761 | 0.8233 | 0.668 | 0.7383 |
| NO_TASTE | 0.77 | 0.8194 | 0.6412 | 0.739 |
| NAUSEA_OR_VOMITING | 0.7594 | 0.8191 | 0.5885 | 0.7318 |
| SORE_THROAT | 0.8298 | 0.816 | 0.6498 | 0.8031 |
| MUSCLE_ACHING | 0.6978 | 0.7887 | 0.5944 | 0.6678 |
| DIARRHEA | 0.8111 | 0.8191 | 0.5832 | 0.7941 |
| FEVER | 0.8406 | 0.8169 | 0.6527 | 0.8169 |
| RUNNY_NOSE | 0.8532 | 0.839 | 0.5544 | 0.8224 |
| Â | full.AUC | IDI | NRI | z.IDI | z.NRI | Frequency |
|---|---|---|---|---|---|---|
| NO_SMELL | 0.8158 | 0.1853 | 0.7584 | 10.06 | 9.783 | 0.5974 |
| NO_TASTE | 0.8124 | 0.1622 | 0.8885 | 9.202 | 11.32 | 0.3214 |
| NAUSEA_OR_VOMITING | 0.8082 | 0.1303 | 0.6439 | 8.071 | 7.932 | 1 |
| SORE_THROAT | 0.8076 | 0.0599 | 0.5962 | 5.009 | 6.554 | 0.8636 |
| MUSCLE_ACHING | 0.7188 | 0.05382 | 0.3826 | 4.946 | 4.194 | 0.03896 |
| DIARRHEA | 0.8082 | 0.04638 | 0.4836 | 4.754 | 5.767 | 1 |
| FEVER | 0.8102 | 0.04814 | 0.6135 | 4.37 | 6.734 | 0.8766 |
| RUNNY_NOSE | 0.812 | 0.01298 | 0.1254 | 1.925 | 1.482 | 0.1364 |
par(op)
gplots::heatmap.2(bm$bagging$formulaNetwork,trace="none",mar = c(10,10),main = "B:SWiMS Formula Network",cexRow = 0.75,cexCol = 0.75)
SymBSWIMScv <- randomCV(PositiveNegative,
"testResult",
fittingFunction=BSWiMS.model,
trainFraction = trainFraction,
repetitions = repetitions,
NumberofRepeats = -1)
par(op)
cStats <- predictionStats_binary(SymBSWIMScv$testPredictions,plotname = "COVID-19 Diagnosis");
COVID-19 Diagnosis
SymBSWIMScv$jaccard
$Jaccard.SM [1] 0.7818584
$averageLength [1] 6.883333
par(mar=c(4,10,4,4),pty="m")
barplot(SymBSWIMScv$featureFrequency/repetitions,xlim=c(0,1),las=2,cex.names =0.70,horiz = TRUE,main="Top Selected Features",xlab="Selection Frequency")
par(op)
fn <- baggedModel(SymBSWIMScv$selectedFeaturesSet,PositiveNegative,Outcome="testResult")
…………………………
gplots::heatmap.2(fn$formulaNetwork,trace="none",mar = c(10,10),main = "COVID-19 Features",cexRow = 0.75,cexCol = 0.75)
SymRFcv <- randomCV(fittingFunction=randomForest::randomForest,
asFactor = TRUE)
par(op)
cStats <- predictionStats_binary(SymRFcv$testPredictions,plotname = "COVID-19 Diagnosis RF");
COVID-19 Diagnosis RF
SymRFcv$jaccard
$Jaccard.SM [1] 1
$averageLength [1] 15