Continuous cues: PimaIndiansDiabetes data set example
data(PimaIndiansDiabetes2)
summary(PimaIndiansDiabetes2)
## pregnant glucose pressure triceps
## Min. : 0.000 Min. : 44.0 Min. : 24.00 Min. : 7.00
## 1st Qu.: 1.000 1st Qu.: 99.0 1st Qu.: 64.00 1st Qu.:22.00
## Median : 3.000 Median :117.0 Median : 72.00 Median :29.00
## Mean : 3.845 Mean :121.7 Mean : 72.41 Mean :29.15
## 3rd Qu.: 6.000 3rd Qu.:141.0 3rd Qu.: 80.00 3rd Qu.:36.00
## Max. :17.000 Max. :199.0 Max. :122.00 Max. :99.00
## NA's :5 NA's :35 NA's :227
## insulin mass pedigree age
## Min. : 14.00 Min. :18.20 Min. :0.0780 Min. :21.00
## 1st Qu.: 76.25 1st Qu.:27.50 1st Qu.:0.2437 1st Qu.:24.00
## Median :125.00 Median :32.30 Median :0.3725 Median :29.00
## Mean :155.55 Mean :32.46 Mean :0.4719 Mean :33.24
## 3rd Qu.:190.00 3rd Qu.:36.60 3rd Qu.:0.6262 3rd Qu.:41.00
## Max. :846.00 Max. :67.10 Max. :2.4200 Max. :81.00
## NA's :374 NA's :11
## diabetes
## neg:500
## pos:268
##
##
##
##
##
glimpse(PimaIndiansDiabetes2)
## Observations: 768
## Variables:
## $ pregnant (dbl) 6, 1, 8, 1, 0, 5, 3, 10, 2, 8, 4, 10, 10, 1, 5, 7, 0,...
## $ glucose (dbl) 148, 85, 183, 89, 137, 116, 78, 115, 197, 125, 110, 1...
## $ pressure (dbl) 72, 66, 64, 66, 40, 74, 50, NA, 70, 96, 92, 74, 80, 6...
## $ triceps (dbl) 35, 29, NA, 23, 35, NA, 32, NA, 45, NA, NA, NA, NA, 2...
## $ insulin (dbl) NA, NA, NA, 94, 168, NA, 88, NA, 543, NA, NA, NA, NA,...
## $ mass (dbl) 33.6, 26.6, 23.3, 28.1, 43.1, 25.6, 31.0, 35.3, 30.5,...
## $ pedigree (dbl) 0.627, 0.351, 0.672, 0.167, 2.288, 0.201, 0.248, 0.13...
## $ age (dbl) 50, 31, 32, 21, 33, 30, 26, 29, 53, 54, 30, 34, 57, 5...
## $ diabetes (fctr) pos, neg, pos, neg, pos, neg, pos, neg, pos, pos, ne...
dat2 <- na.roughfix(PimaIndiansDiabetes2) %>% tbl_df
set.seed(1)
fit2 <- randomForest(
formula = diabetes ~ .,
data = dat2,
ntree = 2000)
imp2 <- importance(fit2) %>%
data.frame %>%
mutate(var.name = row.names(.)) %>%
arrange(desc(MeanDecreaseGini))
imp2
## MeanDecreaseGini var.name
## 1 89.51838 glucose
## 2 55.87957 mass
## 3 46.79497 age
## 4 43.33883 pedigree
## 5 30.24436 insulin
## 6 28.74235 pressure
## 7 27.77833 pregnant
## 8 25.37968 triceps
par(mfrow = c(3,3))
for (i in 1:nrow(imp2)) {
fnc_pd(fit2, dat2, imp2$var.name[i], imp2)
}
par(mfrow = c(1,1))
Mixed-type cues: Ionosphere data set example
data(Ionosphere)
summary(Ionosphere)
## V1 V2 V3 V4 V5
## 0: 38 0:351 Min. :-1.0000 Min. :-1.00000 Min. :-1.0000
## 1:313 1st Qu.: 0.4721 1st Qu.:-0.06474 1st Qu.: 0.4127
## Median : 0.8711 Median : 0.01631 Median : 0.8092
## Mean : 0.6413 Mean : 0.04437 Mean : 0.6011
## 3rd Qu.: 1.0000 3rd Qu.: 0.19418 3rd Qu.: 1.0000
## Max. : 1.0000 Max. : 1.00000 Max. : 1.0000
## V6 V7 V8 V9
## Min. :-1.0000 Min. :-1.0000 Min. :-1.00000 Min. :-1.00000
## 1st Qu.:-0.0248 1st Qu.: 0.2113 1st Qu.:-0.05484 1st Qu.: 0.08711
## Median : 0.0228 Median : 0.7287 Median : 0.01471 Median : 0.68421
## Mean : 0.1159 Mean : 0.5501 Mean : 0.11936 Mean : 0.51185
## 3rd Qu.: 0.3347 3rd Qu.: 0.9692 3rd Qu.: 0.44567 3rd Qu.: 0.95324
## Max. : 1.0000 Max. : 1.0000 Max. : 1.00000 Max. : 1.00000
## V10 V11 V12
## Min. :-1.00000 Min. :-1.00000 Min. :-1.00000
## 1st Qu.:-0.04807 1st Qu.: 0.02112 1st Qu.:-0.06527
## Median : 0.01829 Median : 0.66798 Median : 0.02825
## Mean : 0.18135 Mean : 0.47618 Mean : 0.15504
## 3rd Qu.: 0.53419 3rd Qu.: 0.95790 3rd Qu.: 0.48237
## Max. : 1.00000 Max. : 1.00000 Max. : 1.00000
## V13 V14 V15 V16
## Min. :-1.0000 Min. :-1.00000 Min. :-1.0000 Min. :-1.00000
## 1st Qu.: 0.0000 1st Qu.:-0.07372 1st Qu.: 0.0000 1st Qu.:-0.08170
## Median : 0.6441 Median : 0.03027 Median : 0.6019 Median : 0.00000
## Mean : 0.4008 Mean : 0.09341 Mean : 0.3442 Mean : 0.07113
## 3rd Qu.: 0.9555 3rd Qu.: 0.37486 3rd Qu.: 0.9193 3rd Qu.: 0.30897
## Max. : 1.0000 Max. : 1.00000 Max. : 1.0000 Max. : 1.00000
## V17 V18 V19
## Min. :-1.0000 Min. :-1.000000 Min. :-1.0000
## 1st Qu.: 0.0000 1st Qu.:-0.225690 1st Qu.: 0.0000
## Median : 0.5909 Median : 0.000000 Median : 0.5762
## Mean : 0.3819 Mean :-0.003617 Mean : 0.3594
## 3rd Qu.: 0.9357 3rd Qu.: 0.195285 3rd Qu.: 0.8993
## Max. : 1.0000 Max. : 1.000000 Max. : 1.0000
## V20 V21 V22
## Min. :-1.00000 Min. :-1.0000 Min. :-1.000000
## 1st Qu.:-0.23467 1st Qu.: 0.0000 1st Qu.:-0.243870
## Median : 0.00000 Median : 0.4991 Median : 0.000000
## Mean :-0.02402 Mean : 0.3367 Mean : 0.008296
## 3rd Qu.: 0.13437 3rd Qu.: 0.8949 3rd Qu.: 0.188760
## Max. : 1.00000 Max. : 1.0000 Max. : 1.000000
## V23 V24 V25 V26
## Min. :-1.0000 Min. :-1.00000 Min. :-1.0000 Min. :-1.00000
## 1st Qu.: 0.0000 1st Qu.:-0.36689 1st Qu.: 0.0000 1st Qu.:-0.33239
## Median : 0.5318 Median : 0.00000 Median : 0.5539 Median :-0.01505
## Mean : 0.3625 Mean :-0.05741 Mean : 0.3961 Mean :-0.07119
## 3rd Qu.: 0.9112 3rd Qu.: 0.16463 3rd Qu.: 0.9052 3rd Qu.: 0.15676
## Max. : 1.0000 Max. : 1.00000 Max. : 1.0000 Max. : 1.00000
## V27 V28 V29 V30
## Min. :-1.0000 Min. :-1.00000 Min. :-1.0000 Min. :-1.00000
## 1st Qu.: 0.2864 1st Qu.:-0.44316 1st Qu.: 0.0000 1st Qu.:-0.23689
## Median : 0.7082 Median :-0.01769 Median : 0.4966 Median : 0.00000
## Mean : 0.5416 Mean :-0.06954 Mean : 0.3784 Mean :-0.02791
## 3rd Qu.: 0.9999 3rd Qu.: 0.15354 3rd Qu.: 0.8835 3rd Qu.: 0.15407
## Max. : 1.0000 Max. : 1.00000 Max. : 1.0000 Max. : 1.00000
## V31 V32 V33
## Min. :-1.0000 Min. :-1.000000 Min. :-1.0000
## 1st Qu.: 0.0000 1st Qu.:-0.242595 1st Qu.: 0.0000
## Median : 0.4428 Median : 0.000000 Median : 0.4096
## Mean : 0.3525 Mean :-0.003794 Mean : 0.3494
## 3rd Qu.: 0.8576 3rd Qu.: 0.200120 3rd Qu.: 0.8138
## Max. : 1.0000 Max. : 1.000000 Max. : 1.0000
## V34 Class
## Min. :-1.00000 bad :126
## 1st Qu.:-0.16535 good:225
## Median : 0.00000
## Mean : 0.01448
## 3rd Qu.: 0.17166
## Max. : 1.00000
glimpse(Ionosphere)
## Observations: 351
## Variables:
## $ V1 (fctr) 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1...
## $ V2 (fctr) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...
## $ V3 (dbl) 0.99539, 1.00000, 1.00000, 1.00000, 1.00000, 0.02337, 0....
## $ V4 (dbl) -0.05889, -0.18829, -0.03365, -0.45161, -0.02401, -0.005...
## $ V5 (dbl) 0.85243, 0.93035, 1.00000, 1.00000, 0.94140, -0.09924, 0...
## $ V6 (dbl) 0.02306, -0.36156, 0.00485, 1.00000, 0.06531, -0.11949, ...
## $ V7 (dbl) 0.83398, -0.10868, 1.00000, 0.71216, 0.92106, -0.00763, ...
## $ V8 (dbl) -0.37708, -0.93597, -0.12062, -1.00000, -0.23255, -0.118...
## $ V9 (dbl) 1.00000, 1.00000, 0.88965, 0.00000, 0.77152, 0.14706, 0....
## $ V10 (dbl) 0.03760, -0.04549, 0.01198, 0.00000, -0.16399, 0.06637, ...
## $ V11 (dbl) 0.85243, 0.50874, 0.73082, 0.00000, 0.52798, 0.03786, 0....
## $ V12 (dbl) -0.17755, -0.67743, 0.05346, 0.00000, -0.20275, -0.06302...
## $ V13 (dbl) 0.59755, 0.34432, 0.85443, 0.00000, 0.56409, 0.00000, 0....
## $ V14 (dbl) -0.44945, -0.69707, 0.00827, 0.00000, -0.00712, 0.00000,...
## $ V15 (dbl) 0.60536, -0.51685, 0.54591, -1.00000, 0.34395, -0.04572,...
## $ V16 (dbl) -0.38223, -0.97515, 0.00299, 0.14516, -0.27457, -0.15540...
## $ V17 (dbl) 0.84356, 0.05499, 0.83775, 0.54094, 0.52940, -0.00343, 0...
## $ V18 (dbl) -0.38542, -0.62237, -0.13644, -0.39330, -0.21780, -0.101...
## $ V19 (dbl) 0.58212, 0.33109, 0.75535, -1.00000, 0.45107, -0.11575, ...
## $ V20 (dbl) -0.32192, -1.00000, -0.08540, -0.54467, -0.17813, -0.054...
## $ V21 (dbl) 0.56971, -0.13151, 0.70887, -0.69975, 0.05982, 0.01838, ...
## $ V22 (dbl) -0.29674, -0.45300, -0.27502, 1.00000, -0.35575, 0.03669...
## $ V23 (dbl) 0.36946, -0.18056, 0.43385, 0.00000, 0.02309, 0.01519, 0...
## $ V24 (dbl) -0.47357, -0.35734, -0.12062, 0.00000, -0.52879, 0.00888...
## $ V25 (dbl) 0.56811, -0.20332, 0.57528, 1.00000, 0.03286, 0.03513, 0...
## $ V26 (dbl) -0.51171, -0.26569, -0.40220, 0.90695, -0.65158, -0.0153...
## $ V27 (dbl) 0.41078, -0.20468, 0.58984, 0.51613, 0.13290, -0.03240, ...
## $ V28 (dbl) -0.46168, -0.18401, -0.22145, 1.00000, -0.53206, 0.09223...
## $ V29 (dbl) 0.21266, -0.19040, 0.43100, 1.00000, 0.02431, -0.07859, ...
## $ V30 (dbl) -0.34090, -0.11593, -0.17365, -0.20099, -0.62197, 0.0073...
## $ V31 (dbl) 0.42267, -0.16626, 0.60436, 0.25682, -0.05707, 0.00000, ...
## $ V32 (dbl) -0.54487, -0.06288, -0.24180, 1.00000, -0.59573, 0.00000...
## $ V33 (dbl) 0.18641, -0.13738, 0.56045, -0.32382, -0.04608, -0.00039...
## $ V34 (dbl) -0.45300, -0.02447, -0.38238, 1.00000, -0.65697, 0.12011...
## $ Class (fctr) good, bad, good, bad, good, bad, good, bad, good, bad, ...
dat3 <- Ionosphere %>% tbl_df
set.seed(1)
fit3 <- randomForest(
formula = Class ~ .,
data = dat3,
ntree = 2000)
imp3 <- importance(fit3) %>%
data.frame %>%
mutate(var.name = row.names(.)) %>%
arrange(desc(MeanDecreaseGini))
imp3
## MeanDecreaseGini var.name
## 1 21.667274 V5
## 2 14.944095 V3
## 3 14.597021 V7
## 4 14.489918 V27
## 5 7.551723 V8
## 6 6.721020 V6
## 7 6.204016 V4
## 8 5.318193 V14
## 9 4.645229 V31
## 10 4.446285 V33
## 11 4.119335 V18
## 12 4.102099 V16
## 13 4.012420 V29
## 14 3.995992 V1
## 15 3.949552 V28
## 16 3.761316 V24
## 17 3.486166 V22
## 18 3.345619 V12
## 19 2.878279 V10
## 20 2.811008 V34
## 21 2.560680 V21
## 22 2.392086 V20
## 23 2.279570 V9
## 24 2.237442 V23
## 25 2.220036 V32
## 26 2.043450 V26
## 27 1.937037 V15
## 28 1.606546 V25
## 29 1.568493 V17
## 30 1.552036 V13
## 31 1.391076 V19
## 32 1.219280 V11
## 33 1.148375 V30
## 34 0.000000 V2
par(mfrow = c(9,4))
for (i in 1:nrow(imp3)) {
fnc_pd(fit3, dat3, imp3$var.name[i], imp3)
}
par(mfrow = c(1,1))