### EXAMPLE ON GRADIANT BOOSTING MODEL (GBM) MODEL
library(mlbench)
library(caret)
## Warning: package 'caret' was built under R version 3.1.2
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.1.2
## Loading required package: ggplot2
data(Sonar)
dim(Sonar)
## [1] 208 61
# data partition
set.seed(999)
inTraining <- createDataPartition(Sonar$Class, p=0.75, list=F)
training <- Sonar[inTraining,]
testing <- Sonar[-inTraining,]
# create Ctrls for model building
fitControl <- trainControl(method="repeatedcv", number=10, repeats=10) # 1
set.seed(888)
gbmFit1 <- train(Class~., data=training, method="gbm", trControl = fitControl, verbose=F)
## Loading required package: gbm
## Loading required package: survival
## Warning: package 'survival' was built under R version 3.1.2
##
## Attaching package: 'survival'
##
## The following object is masked from 'package:caret':
##
## cluster
##
## Loading required package: splines
## Loading required package: parallel
## Loaded gbm 2.1
## Loading required package: plyr
gbmFit1$results[which.max(gbmFit1$result[,4]),] # best result
## shrinkage interaction.depth n.trees Accuracy Kappa AccuracySD
## 6 0.1 2 150 0.8465809 0.6892221 0.08671764
## KappaSD
## 6 0.1751646
summary(gbmFit1, 6) # Top 6 Variables

## var rel.inf
## V12 V12 10.76581154
## V11 V11 8.93871607
## V4 V4 7.04457262
## V46 V46 4.15939057
## V37 V37 4.15480416
## V5 V5 4.09705416
## V48 V48 3.90348952
## V27 V27 3.52131021
## V31 V31 3.44028118
## V51 V51 3.27516160
## V20 V20 3.21895965
## V16 V16 3.13178016
## V15 V15 3.00755642
## V58 V58 2.34934391
## V34 V34 2.29966901
## V49 V49 2.28875495
## V55 V55 2.28185574
## V52 V52 2.24720152
## V23 V23 1.99182851
## V39 V39 1.85536753
## V36 V36 1.71568845
## V9 V9 1.66564305
## V21 V21 1.64970980
## V6 V6 1.54968168
## V29 V29 1.52968293
## V43 V43 1.43232589
## V45 V45 1.42681465
## V17 V17 1.12469522
## V30 V30 1.09307575
## V54 V54 1.07412083
## V32 V32 0.92930528
## V59 V59 0.74115223
## V28 V28 0.71290973
## V18 V18 0.61773447
## V3 V3 0.57521708
## V25 V25 0.46010124
## V22 V22 0.36268307
## V40 V40 0.35323166
## V47 V47 0.31984023
## V53 V53 0.30671221
## V7 V7 0.29730721
## V44 V44 0.26836496
## V56 V56 0.26338017
## V8 V8 0.25599020
## V14 V14 0.15339614
## V41 V41 0.14576650
## V60 V60 0.14538562
## V24 V24 0.12544091
## V13 V13 0.12377456
## V10 V10 0.10344713
## V42 V42 0.09252498
## V1 V1 0.09136388
## V35 V35 0.08764840
## V19 V19 0.08203083
## V57 V57 0.04902696
## V2 V2 0.03996821
## V33 V33 0.03323450
## V38 V38 0.02871432
## V26 V26 0.00000000
## V50 V50 0.00000000
#tripling the n.tree from 150 to 450 for demo purposes
gbmGrid <- expand.grid(interaction.depth=c(1,5,9), n.trees=(1:30)*20, shrinkage=.1)
gbmFit2 <- train(Class~., data=training, method="gbm", trControl = fitControl, verbose=F, tuneGrid = gbmGrid)
gbmFit2$results[which.max(gbmFit2$result[,4]),] # best result
## shrinkage interaction.depth n.trees Accuracy Kappa AccuracySD
## 84 0.1 9 480 0.8612941 0.7195478 0.08726559
## KappaSD
## 84 0.1769033
summary(gbmFit2, 6) # Top 6 Variables

## var rel.inf
## V12 V12 11.301246335
## V11 V11 10.722060777
## V37 V37 6.293422378
## V4 V4 4.527088732
## V9 V9 3.948655051
## V27 V27 2.954502477
## V17 V17 2.886250685
## V31 V31 2.782174588
## V1 V1 2.574681762
## V20 V20 2.499492844
## V19 V19 2.492196837
## V48 V48 2.251168029
## V21 V21 2.243090464
## V16 V16 2.213447048
## V46 V46 1.918062004
## V43 V43 1.840270968
## V54 V54 1.764531529
## V59 V59 1.761211606
## V51 V51 1.756859818
## V24 V24 1.624816268
## V10 V10 1.559264192
## V15 V15 1.527612747
## V8 V8 1.526279107
## V52 V52 1.457424480
## V45 V45 1.423943084
## V38 V38 1.412748237
## V36 V36 1.375036348
## V55 V55 1.262252907
## V29 V29 1.226522627
## V5 V5 1.172081936
## V25 V25 1.105759557
## V34 V34 1.089680062
## V39 V39 1.061883650
## V49 V49 1.032347222
## V56 V56 0.994483405
## V32 V32 0.976768331
## V6 V6 0.951937672
## V23 V23 0.816868838
## V47 V47 0.777308749
## V44 V44 0.624094807
## V2 V2 0.554643609
## V14 V14 0.500335194
## V13 V13 0.498802382
## V58 V58 0.494176024
## V22 V22 0.482377499
## V33 V33 0.439297832
## V28 V28 0.436492926
## V42 V42 0.413184402
## V18 V18 0.409792138
## V3 V3 0.404779009
## V50 V50 0.273984757
## V26 V26 0.264160676
## V57 V57 0.242846391
## V53 V53 0.229515094
## V30 V30 0.218153593
## V40 V40 0.166247531
## V60 V60 0.157180392
## V7 V7 0.063118472
## V41 V41 0.014538301
## V35 V35 0.006845622
# model1 & 2 side by side
trellis.par.set(caretTheme())
par(mfrow=c(2,1))
plot(gbmFit1); plot(gbmFit2)


plot(gbmFit2, metric = "Kappa", plotType = "level",scales = list(x = list(rot = 90))) #KAPPA

#library(ggplot2)
#ggplot(gbmFit2)
set.seed=777
fitControl=trainControl(method="repeatedcv", number=10,repeats=10, classProbs = T, summaryFunction = twoClassSummary) #2
gbmFit3 <- train(Class~., data=training, method="gbm", trControl = fitControl, verbose=F, tuneGrid = gbmGrid, metric="ROC")
gbmFit3$results[which.max(gbmFit3$result[,4]),] # best result
## shrinkage interaction.depth n.trees ROC Sens Spec
## 85 0.1 9 500 0.9431448 0.9013889 0.8225
## ROCSD SensSD SpecSD
## 85 0.0500002 0.09969596 0.1436401
summary(gbmFit3, 6) # Top 6 Variables
## var rel.inf
## V11 V11 12.93785604
## V12 V12 6.64730990
## V4 V4 5.39335634
## V31 V31 4.98339761
## V16 V16 4.55580768
## V46 V46 4.20041655
## V49 V49 3.66432516
## V36 V36 3.31739953
## V20 V20 3.22325154
## V10 V10 2.95142744
## V51 V51 2.90678029
## V23 V23 2.75062848
## V37 V37 2.65068758
## V54 V54 2.35277101
## V28 V28 2.32877014
## V27 V27 2.21827933
## V17 V17 2.21827624
## V19 V19 2.08600674
## V43 V43 2.02540996
## V5 V5 2.00847521
## V47 V47 1.66374660
## V52 V52 1.56940085
## V15 V15 1.47165329
## V48 V48 1.35067182
## V9 V9 1.08387263
## V26 V26 1.07909595
## V59 V59 1.06048426
## V44 V44 0.93935669
## V25 V25 0.91831221
## V56 V56 0.89604143
## V13 V13 0.87251798
## V55 V55 0.86949627
## V38 V38 0.86527126
## V34 V34 0.82566031
## V45 V45 0.71420765
## V58 V58 0.67033467
## V14 V14 0.56113836
## V39 V39 0.54275349
## V18 V18 0.51359689
## V42 V42 0.50885743
## V21 V21 0.49311949
## V53 V53 0.46974835
## V24 V24 0.43154037
## V7 V7 0.42222425
## V32 V32 0.39992143
## V2 V2 0.39607765
## V29 V29 0.39353144
## V1 V1 0.37134202
## V33 V33 0.30371420
## V8 V8 0.27790528
## V50 V50 0.24628916
## V6 V6 0.22250176
## V3 V3 0.21684819
## V60 V60 0.20832346
## V35 V35 0.20155122
## V40 V40 0.15287251
## V22 V22 0.12443579
## V57 V57 0.11596734
## V30 V30 0.08548068
## V41 V41 0.06950259
which2Pct <- tolerance(gbmFit3$results, metric="ROC", tol=2, maximize=TRUE)
gbmFit3$results[which2Pct, 1:6]
## shrinkage interaction.depth n.trees ROC Sens Spec
## 34 0.1 5 80 0.9253819 0.8902778 0.80125
predict(gbmFit3, newdata=head(testing), type="prob")
## M R
## 1 9.663899e-01 0.03361012
## 2 2.590901e-01 0.74090994
## 3 8.210759e-01 0.17892408
## 4 9.851184e-01 0.01488164
## 5 9.971941e-01 0.00280590
## 6 2.340506e-07 0.99999977
predict(gbmFit3, newdata=head(testing), type="raw")
## [1] M R M M M R
## Levels: M R
