t = file.choose()
t = read.csv(t)
head(t)
## Age Gender Polyuria Polydipsia sudden.weight.loss weakness Polyphagia
## 1 40 Male No Yes No Yes No
## 2 58 Male No No No Yes No
## 3 41 Male Yes No No Yes Yes
## 4 45 Male No No Yes Yes Yes
## 5 60 Male Yes Yes Yes Yes Yes
## 6 55 Male Yes Yes No Yes Yes
## Genital.thrush visual.blurring Itching Irritability delayed.healing
## 1 No No Yes No Yes
## 2 No Yes No No No
## 3 No No Yes No Yes
## 4 Yes No Yes No Yes
## 5 No Yes Yes Yes Yes
## 6 No Yes Yes No Yes
## partial.paresis muscle.stiffness Alopecia Obesity diabetes
## 1 No Yes Yes Yes Positive
## 2 Yes No Yes No Positive
## 3 No Yes Yes No Positive
## 4 No No No No Positive
## 5 Yes Yes Yes Yes Positive
## 6 No Yes Yes Yes Positive
str(t)
## 'data.frame': 520 obs. of 17 variables:
## $ Age : int 40 58 41 45 60 55 57 66 67 70 ...
## $ Gender : chr "Male" "Male" "Male" "Male" ...
## $ Polyuria : chr "No" "No" "Yes" "No" ...
## $ Polydipsia : chr "Yes" "No" "No" "No" ...
## $ sudden.weight.loss: chr "No" "No" "No" "Yes" ...
## $ weakness : chr "Yes" "Yes" "Yes" "Yes" ...
## $ Polyphagia : chr "No" "No" "Yes" "Yes" ...
## $ Genital.thrush : chr "No" "No" "No" "Yes" ...
## $ visual.blurring : chr "No" "Yes" "No" "No" ...
## $ Itching : chr "Yes" "No" "Yes" "Yes" ...
## $ Irritability : chr "No" "No" "No" "No" ...
## $ delayed.healing : chr "Yes" "No" "Yes" "Yes" ...
## $ partial.paresis : chr "No" "Yes" "No" "No" ...
## $ muscle.stiffness : chr "Yes" "No" "Yes" "No" ...
## $ Alopecia : chr "Yes" "Yes" "Yes" "No" ...
## $ Obesity : chr "Yes" "No" "No" "No" ...
## $ diabetes : chr "Positive" "Positive" "Positive" "Positive" ...
summary(t)
## Age Gender Polyuria Polydipsia
## Min. :16.00 Length:520 Length:520 Length:520
## 1st Qu.:39.00 Class :character Class :character Class :character
## Median :47.50 Mode :character Mode :character Mode :character
## Mean :48.03
## 3rd Qu.:57.00
## Max. :90.00
## sudden.weight.loss weakness Polyphagia Genital.thrush
## Length:520 Length:520 Length:520 Length:520
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## visual.blurring Itching Irritability delayed.healing
## Length:520 Length:520 Length:520 Length:520
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## partial.paresis muscle.stiffness Alopecia Obesity
## Length:520 Length:520 Length:520 Length:520
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## diabetes
## Length:520
## Class :character
## Mode :character
##
##
##
names(t)
## [1] "Age" "Gender" "Polyuria"
## [4] "Polydipsia" "sudden.weight.loss" "weakness"
## [7] "Polyphagia" "Genital.thrush" "visual.blurring"
## [10] "Itching" "Irritability" "delayed.healing"
## [13] "partial.paresis" "muscle.stiffness" "Alopecia"
## [16] "Obesity" "diabetes"
t$diab = ifelse(t$diabetes == "Positive", 1, 0)
t$diab = as.factor(t$diab)
xvars=t[, c("Age","Gender","Polyuria","sudden.weight.loss","weakness","Polyphagia", "Genital.thrush","visual.blurring","Itching","Irritability","delayed.healing","partial.paresis","muscle.stiffness","Alopecia","Obesity")]
yvar=t[,c("diab")]
s=bic.glm(xvars,yvar, strict=F, OR=20, glm.family = "binomial")
summary(s)
##
## Call:
## bic.glm.data.frame(x = xvars, y = yvar, glm.family = "binomial", strict = F, OR = 20)
##
##
## 105 models were selected
## Best 5 models (cumulative posterior probability = 0.2082 ):
##
## p!=0 EV SD model 1 model 2
## Intercept 100 0.849456 0.66473 0.5925 0.6779
## Age 15.9 -0.005758 0.01513 . .
## Gender 100.0 -3.082734 0.42672 -3.2060 -2.9827
## Polyuria 100.0 3.767369 0.44997 3.8029 3.6971
## sudden.weight.loss 78.9 0.904674 0.57823 0.9844 1.1608
## weakness 43.6 0.432996 0.55848 0.9498 .
## Polyphagia 9.7 0.062986 0.22617 . .
## Genital.thrush 94.6 1.369749 0.55423 1.3886 1.4128
## visual.blurring 53.5 0.644774 0.69409 . 1.3127
## Itching 93.8 -1.297677 0.54867 -1.2417 -1.4224
## Irritability 100.0 2.199678 0.48063 2.1830 2.2412
## delayed.healing 55.7 -0.619196 0.63872 -1.2103 .
## partial.paresis 69.7 0.767240 0.60550 1.2234 .
## muscle.stiffness 0.0 0.000000 0.00000 . .
## Alopecia 38.2 -0.386852 0.55703 . -1.0879
## Obesity 1.6 -0.007413 0.08033 . .
##
## nVar 9 8
## BIC -2927.4764 -2927.2613
## post prob 0.051 0.046
## model 3 model 4 model 5
## Intercept 0.5235 0.7202 0.7774
## Age . . .
## Gender -3.0891 -3.1016 -3.2848
## Polyuria 3.6490 3.6851 3.9569
## sudden.weight.loss 1.2574 1.2243 .
## weakness . . 1.1910
## Polyphagia . . .
## Genital.thrush 1.6071 1.2772 1.5067
## visual.blurring 1.0452 . .
## Itching -1.4100 -1.0008 -1.3217
## Irritability 2.3178 2.3240 2.1056
## delayed.healing -1.0927 -0.9629 -1.2162
## partial.paresis 0.9861 1.2169 1.2353
## muscle.stiffness . . .
## Alopecia . . .
## Obesity . . .
##
## nVar 9 8 8
## BIC -2926.9100 -2926.9000 -2926.7735
## post prob 0.038 0.038 0.036
imageplot.bma(s)

train = createDataPartition(t$diab, p=0.7, list=FALSE)
training = t[ train, ]
testing = t[ -train, ]
mod_fit = train(diab ~ Gender + Polyuria + sudden.weight.loss + weakness + Genital.thrush + Itching + Irritability + delayed.healing + partial.paresis, data = training, method = "glm", family="binomial")
summary(mod_fit)
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.59188 -0.35804 0.04088 0.27038 2.58687
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.7148 0.5019 1.424 0.15442
## GenderMale -3.4299 0.5256 -6.526 6.77e-11 ***
## PolyuriaYes 3.9161 0.5301 7.387 1.50e-13 ***
## sudden.weight.lossYes 1.1579 0.4657 2.487 0.01290 *
## weaknessYes 0.8451 0.4508 1.875 0.06084 .
## Genital.thrushYes 1.5964 0.5034 3.172 0.00152 **
## ItchingYes -1.2057 0.4754 -2.536 0.01121 *
## IrritabilityYes 1.7637 0.5405 3.263 0.00110 **
## delayed.healingYes -1.4400 0.5036 -2.859 0.00424 **
## partial.paresisYes 1.3353 0.4404 3.032 0.00243 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 485.05 on 363 degrees of freedom
## Residual deviance: 177.54 on 354 degrees of freedom
## AIC: 197.54
##
## Number of Fisher Scoring iterations: 7
pred = predict(mod_fit,newdata = testing, type = "raw")
confusionMatrix(testing$diab,pred)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 54 6
## 1 10 86
##
## Accuracy : 0.8974
## 95% CI : (0.8388, 0.9402)
## No Information Rate : 0.5897
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.786
##
## Mcnemar's Test P-Value : 0.4533
##
## Sensitivity : 0.8438
## Specificity : 0.9348
## Pos Pred Value : 0.9000
## Neg Pred Value : 0.8958
## Prevalence : 0.4103
## Detection Rate : 0.3462
## Detection Prevalence : 0.3846
## Balanced Accuracy : 0.8893
##
## 'Positive' Class : 0
##
prob=predict(mod_fit, newdata=testing, type="prob")
pred = data.frame(prob,testing$diab)
#head(pred)
roc(pred$testing.diab,pred$X1)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
##
## Call:
## roc.default(response = pred$testing.diab, predictor = pred$X1)
##
## Data: pred$X1 in 60 controls (pred$testing.diab 0) < 96 cases (pred$testing.diab 1).
## Area under the curve: 0.9483
auc = smooth(roc(pred$testing.diab,pred$X1))
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(auc, col = "red")

#b=glm(diab ~ Gender + Polyuria + sudden.weight.loss + weakness + Genital.thrush + Itching + Irritability + delayed.healing + partial.paresis, family=binomial,data=t)
#DynNom(b,t)
ddist = datadist(t)
options(datadist = "ddist")
m = lrm(diab ~ Gender + Polyuria + sudden.weight.loss + weakness + Genital.thrush + Itching + Irritability + delayed.healing + partial.paresis,data=t)
p = nomogram(m, fun = function(x) 1/(1+exp(-x)), fun.at = c(0.01, 0.05, seq(0.1, 0.9, by = 0.1)), funlabel = "Risk of diabetes",lp = F)
plot(p, cex.axis = 0.7, lmgp = 0.1)
