library(mlbench)
data(PimaIndiansDiabetes)
dim(PimaIndiansDiabetes)
## [1] 768 9
levels(PimaIndiansDiabetes$diabetes)
## [1] "neg" "pos"
head(PimaIndiansDiabetes)
## pregnant glucose pressure triceps insulin mass pedigree age diabetes
## 1 6 148 72 35 0 33.6 0.627 50 pos
## 2 1 85 66 29 0 26.6 0.351 31 neg
## 3 8 183 64 0 0 23.3 0.672 32 pos
## 4 1 89 66 23 94 28.1 0.167 21 neg
## 5 0 137 40 35 168 43.1 2.288 33 pos
## 6 5 116 74 0 0 25.6 0.201 30 neg
str(PimaIndiansDiabetes)
## 'data.frame': 768 obs. of 9 variables:
## $ pregnant: num 6 1 8 1 0 5 3 10 2 8 ...
## $ glucose : num 148 85 183 89 137 116 78 115 197 125 ...
## $ pressure: num 72 66 64 66 40 74 50 0 70 96 ...
## $ triceps : num 35 29 0 23 35 0 32 0 45 0 ...
## $ insulin : num 0 0 0 94 168 0 88 0 543 0 ...
## $ mass : num 33.6 26.6 23.3 28.1 43.1 25.6 31 35.3 30.5 0 ...
## $ pedigree: num 0.627 0.351 0.672 0.167 2.288 ...
## $ age : num 50 31 32 21 33 30 26 29 53 54 ...
## $ diabetes: Factor w/ 2 levels "neg","pos": 2 1 2 1 2 1 2 1 2 2 ...
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(neuralnet)
library(vcd)
## Loading required package: grid
PimaIndiansDiabetes$diabetes = ifelse(PimaIndiansDiabetes$diabetes=="pos",1,0)
table(PimaIndiansDiabetes$diabetes)
##
## 0 1
## 500 268
PimaIndiansDiabetes$diabetes <- as.numeric(PimaIndiansDiabetes$diabetes)
str(PimaIndiansDiabetes)
## 'data.frame': 768 obs. of 9 variables:
## $ pregnant: num 6 1 8 1 0 5 3 10 2 8 ...
## $ glucose : num 148 85 183 89 137 116 78 115 197 125 ...
## $ pressure: num 72 66 64 66 40 74 50 0 70 96 ...
## $ triceps : num 35 29 0 23 35 0 32 0 45 0 ...
## $ insulin : num 0 0 0 94 168 0 88 0 543 0 ...
## $ mass : num 33.6 26.6 23.3 28.1 43.1 25.6 31 35.3 30.5 0 ...
## $ pedigree: num 0.627 0.351 0.672 0.167 2.288 ...
## $ age : num 50 31 32 21 33 30 26 29 53 54 ...
## $ diabetes: num 1 0 1 0 1 0 1 0 1 1 ...
summary(PimaIndiansDiabetes)
## pregnant glucose pressure triceps
## Min. : 0.000 Min. : 0.0 Min. : 0.00 Min. : 0.00
## 1st Qu.: 1.000 1st Qu.: 99.0 1st Qu.: 62.00 1st Qu.: 0.00
## Median : 3.000 Median :117.0 Median : 72.00 Median :23.00
## Mean : 3.845 Mean :120.9 Mean : 69.11 Mean :20.54
## 3rd Qu.: 6.000 3rd Qu.:140.2 3rd Qu.: 80.00 3rd Qu.:32.00
## Max. :17.000 Max. :199.0 Max. :122.00 Max. :99.00
## insulin mass pedigree age
## Min. : 0.0 Min. : 0.00 Min. :0.0780 Min. :21.00
## 1st Qu.: 0.0 1st Qu.:27.30 1st Qu.:0.2437 1st Qu.:24.00
## Median : 30.5 Median :32.00 Median :0.3725 Median :29.00
## Mean : 79.8 Mean :31.99 Mean :0.4719 Mean :33.24
## 3rd Qu.:127.2 3rd Qu.:36.60 3rd Qu.:0.6262 3rd Qu.:41.00
## Max. :846.0 Max. :67.10 Max. :2.4200 Max. :81.00
## diabetes
## Min. :0.000
## 1st Qu.:0.000
## Median :0.000
## Mean :0.349
## 3rd Qu.:1.000
## Max. :1.000
set.seed(123)
trainIndex = createDataPartition(PimaIndiansDiabetes$diabetes, p = .7, list = FALSE,
times = 1)
head(trainIndex)
## Resample1
## [1,] 1
## [2,] 4
## [3,] 5
## [4,] 7
## [5,] 8
## [6,] 10
Train = PimaIndiansDiabetes[ trainIndex,]
Test = PimaIndiansDiabetes[-trainIndex,]
table(Train$diabetes)
##
## 0 1
## 350 188
table(Test$diabetes)
##
## 0 1
## 150 80
str(Train)
## 'data.frame': 538 obs. of 9 variables:
## $ pregnant: num 6 1 0 3 10 8 4 1 5 7 ...
## $ glucose : num 148 89 137 78 115 125 110 189 166 100 ...
## $ pressure: num 72 66 40 50 0 96 92 60 72 0 ...
## $ triceps : num 35 23 35 32 0 0 0 23 19 0 ...
## $ insulin : num 0 94 168 88 0 0 0 846 175 0 ...
## $ mass : num 33.6 28.1 43.1 31 35.3 0 37.6 30.1 25.8 30 ...
## $ pedigree: num 0.627 0.167 2.288 0.248 0.134 ...
## $ age : num 50 21 33 26 29 54 30 59 51 32 ...
## $ diabetes: num 1 0 1 1 0 1 0 1 1 1 ...
str(Test)
## 'data.frame': 230 obs. of 9 variables:
## $ pregnant: num 1 8 5 2 10 10 0 7 3 7 ...
## $ glucose : num 85 183 116 197 168 139 118 107 126 196 ...
## $ pressure: num 66 64 74 70 74 80 84 74 88 90 ...
## $ triceps : num 29 0 0 45 0 0 47 0 41 0 ...
## $ insulin : num 0 0 0 543 0 0 230 0 235 0 ...
## $ mass : num 26.6 23.3 25.6 30.5 38 27.1 45.8 29.6 39.3 39.8 ...
## $ pedigree: num 0.351 0.672 0.201 0.158 0.537 ...
## $ age : num 31 32 30 53 34 57 31 31 27 41 ...
## $ diabetes: num 0 1 0 1 1 0 1 1 0 1 ...
n = names(Train)
n
## [1] "pregnant" "glucose" "pressure" "triceps" "insulin" "mass"
## [7] "pedigree" "age" "diabetes"
form <- as.formula(paste("diabetes ~", paste(n[!n %in% "diabetes"], collapse = " + ")))
form
## diabetes ~ pregnant + glucose + pressure + triceps + insulin +
## mass + pedigree + age
fit = neuralnet(form, data=Train, err.fct="ce", linear.output=FALSE)
fit$result.matrix
## 1
## error 348.1402122870707
## reached.threshold 0.0005577769028
## steps 30.0000000000000
## Intercept.to.1layhid1 0.4892072230733
## pregnant.to.1layhid1 1.2450130040674
## glucose.to.1layhid1 0.9625279684843
## pressure.to.1layhid1 1.0843094294165
## triceps.to.1layhid1 -0.9952743497995
## insulin.to.1layhid1 0.8496430456334
## mass.to.1layhid1 -0.0465572164272
## pedigree.to.1layhid1 0.5748027001613
## age.to.1layhid1 0.4745511771737
## Intercept.to.diabetes -0.1090019850295
## 1layhid.1.to.diabetes -0.5124937671739
plot(fit)
par(mfrow=c(1,2))
gwplot(fit, selected.covariate = "pregnant")
gwplot(fit, selected.covariate = "glucose")
res = compute(fit, Train[,1:8])
predTrain = res$net.result
predTrain = ifelse(predTrain>=0.5,1,0)
table(predTrain, Train$diabetes)
##
## predTrain 0 1
## 0 350 188
res2 = compute(fit, Test[,1:8])
predTest = res2$net.result
predTest = ifelse(predTest>=0.5,1,0)
table(predTest, Test$diabetes)
##
## predTest 0 1
## 0 150 80