### EXAMPLE ON GRADIANT BOOSTING MODEL (GBM) MODEL 
library(mlbench)
library(caret)
## Warning: package 'caret' was built under R version 3.1.2
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.1.2
## Loading required package: ggplot2
data(Sonar)
dim(Sonar)
## [1] 208  61
# data partition
set.seed(999)
inTraining <- createDataPartition(Sonar$Class, p=0.75, list=F)
training <- Sonar[inTraining,]
testing <- Sonar[-inTraining,]
# create Ctrls for model building

fitControl <- trainControl(method="repeatedcv", number=10, repeats=10) # 1
set.seed(888)
gbmFit1 <- train(Class~., data=training, method="gbm", trControl = fitControl, verbose=F)
## Loading required package: gbm
## Loading required package: survival
## Warning: package 'survival' was built under R version 3.1.2
## 
## Attaching package: 'survival'
## 
## The following object is masked from 'package:caret':
## 
##     cluster
## 
## Loading required package: splines
## Loading required package: parallel
## Loaded gbm 2.1
## Loading required package: plyr
gbmFit1$results[which.max(gbmFit1$result[,4]),] # best result
##   shrinkage interaction.depth n.trees  Accuracy     Kappa AccuracySD
## 6       0.1                 2     150 0.8465809 0.6892221 0.08671764
##     KappaSD
## 6 0.1751646
summary(gbmFit1, 6) # Top 6 Variables

##     var     rel.inf
## V12 V12 10.76581154
## V11 V11  8.93871607
## V4   V4  7.04457262
## V46 V46  4.15939057
## V37 V37  4.15480416
## V5   V5  4.09705416
## V48 V48  3.90348952
## V27 V27  3.52131021
## V31 V31  3.44028118
## V51 V51  3.27516160
## V20 V20  3.21895965
## V16 V16  3.13178016
## V15 V15  3.00755642
## V58 V58  2.34934391
## V34 V34  2.29966901
## V49 V49  2.28875495
## V55 V55  2.28185574
## V52 V52  2.24720152
## V23 V23  1.99182851
## V39 V39  1.85536753
## V36 V36  1.71568845
## V9   V9  1.66564305
## V21 V21  1.64970980
## V6   V6  1.54968168
## V29 V29  1.52968293
## V43 V43  1.43232589
## V45 V45  1.42681465
## V17 V17  1.12469522
## V30 V30  1.09307575
## V54 V54  1.07412083
## V32 V32  0.92930528
## V59 V59  0.74115223
## V28 V28  0.71290973
## V18 V18  0.61773447
## V3   V3  0.57521708
## V25 V25  0.46010124
## V22 V22  0.36268307
## V40 V40  0.35323166
## V47 V47  0.31984023
## V53 V53  0.30671221
## V7   V7  0.29730721
## V44 V44  0.26836496
## V56 V56  0.26338017
## V8   V8  0.25599020
## V14 V14  0.15339614
## V41 V41  0.14576650
## V60 V60  0.14538562
## V24 V24  0.12544091
## V13 V13  0.12377456
## V10 V10  0.10344713
## V42 V42  0.09252498
## V1   V1  0.09136388
## V35 V35  0.08764840
## V19 V19  0.08203083
## V57 V57  0.04902696
## V2   V2  0.03996821
## V33 V33  0.03323450
## V38 V38  0.02871432
## V26 V26  0.00000000
## V50 V50  0.00000000
#tripling the n.tree from 150 to 450 for demo purposes
gbmGrid <- expand.grid(interaction.depth=c(1,5,9), n.trees=(1:30)*20, shrinkage=.1)
gbmFit2 <- train(Class~., data=training, method="gbm", trControl = fitControl, verbose=F, tuneGrid = gbmGrid)
gbmFit2$results[which.max(gbmFit2$result[,4]),] # best result
##    shrinkage interaction.depth n.trees  Accuracy     Kappa AccuracySD
## 84       0.1                 9     480 0.8612941 0.7195478 0.08726559
##      KappaSD
## 84 0.1769033
summary(gbmFit2, 6) # Top 6 Variables

##     var      rel.inf
## V12 V12 11.301246335
## V11 V11 10.722060777
## V37 V37  6.293422378
## V4   V4  4.527088732
## V9   V9  3.948655051
## V27 V27  2.954502477
## V17 V17  2.886250685
## V31 V31  2.782174588
## V1   V1  2.574681762
## V20 V20  2.499492844
## V19 V19  2.492196837
## V48 V48  2.251168029
## V21 V21  2.243090464
## V16 V16  2.213447048
## V46 V46  1.918062004
## V43 V43  1.840270968
## V54 V54  1.764531529
## V59 V59  1.761211606
## V51 V51  1.756859818
## V24 V24  1.624816268
## V10 V10  1.559264192
## V15 V15  1.527612747
## V8   V8  1.526279107
## V52 V52  1.457424480
## V45 V45  1.423943084
## V38 V38  1.412748237
## V36 V36  1.375036348
## V55 V55  1.262252907
## V29 V29  1.226522627
## V5   V5  1.172081936
## V25 V25  1.105759557
## V34 V34  1.089680062
## V39 V39  1.061883650
## V49 V49  1.032347222
## V56 V56  0.994483405
## V32 V32  0.976768331
## V6   V6  0.951937672
## V23 V23  0.816868838
## V47 V47  0.777308749
## V44 V44  0.624094807
## V2   V2  0.554643609
## V14 V14  0.500335194
## V13 V13  0.498802382
## V58 V58  0.494176024
## V22 V22  0.482377499
## V33 V33  0.439297832
## V28 V28  0.436492926
## V42 V42  0.413184402
## V18 V18  0.409792138
## V3   V3  0.404779009
## V50 V50  0.273984757
## V26 V26  0.264160676
## V57 V57  0.242846391
## V53 V53  0.229515094
## V30 V30  0.218153593
## V40 V40  0.166247531
## V60 V60  0.157180392
## V7   V7  0.063118472
## V41 V41  0.014538301
## V35 V35  0.006845622
# model1 & 2 side by side
trellis.par.set(caretTheme())
par(mfrow=c(2,1))
plot(gbmFit1); plot(gbmFit2) 

plot(gbmFit2, metric = "Kappa", plotType = "level",scales = list(x = list(rot = 90))) #KAPPA

#library(ggplot2)
#ggplot(gbmFit2)

set.seed=777

fitControl=trainControl(method="repeatedcv", number=10,repeats=10, classProbs = T, summaryFunction = twoClassSummary) #2
gbmFit3 <- train(Class~., data=training, method="gbm", trControl = fitControl, verbose=F, tuneGrid = gbmGrid, metric="ROC")
gbmFit3$results[which.max(gbmFit3$result[,4]),] # best result
##    shrinkage interaction.depth n.trees       ROC      Sens   Spec
## 85       0.1                 9     500 0.9431448 0.9013889 0.8225
##        ROCSD     SensSD    SpecSD
## 85 0.0500002 0.09969596 0.1436401
summary(gbmFit3, 6) # Top 6 Variables 
##     var     rel.inf
## V11 V11 12.93785604
## V12 V12  6.64730990
## V4   V4  5.39335634
## V31 V31  4.98339761
## V16 V16  4.55580768
## V46 V46  4.20041655
## V49 V49  3.66432516
## V36 V36  3.31739953
## V20 V20  3.22325154
## V10 V10  2.95142744
## V51 V51  2.90678029
## V23 V23  2.75062848
## V37 V37  2.65068758
## V54 V54  2.35277101
## V28 V28  2.32877014
## V27 V27  2.21827933
## V17 V17  2.21827624
## V19 V19  2.08600674
## V43 V43  2.02540996
## V5   V5  2.00847521
## V47 V47  1.66374660
## V52 V52  1.56940085
## V15 V15  1.47165329
## V48 V48  1.35067182
## V9   V9  1.08387263
## V26 V26  1.07909595
## V59 V59  1.06048426
## V44 V44  0.93935669
## V25 V25  0.91831221
## V56 V56  0.89604143
## V13 V13  0.87251798
## V55 V55  0.86949627
## V38 V38  0.86527126
## V34 V34  0.82566031
## V45 V45  0.71420765
## V58 V58  0.67033467
## V14 V14  0.56113836
## V39 V39  0.54275349
## V18 V18  0.51359689
## V42 V42  0.50885743
## V21 V21  0.49311949
## V53 V53  0.46974835
## V24 V24  0.43154037
## V7   V7  0.42222425
## V32 V32  0.39992143
## V2   V2  0.39607765
## V29 V29  0.39353144
## V1   V1  0.37134202
## V33 V33  0.30371420
## V8   V8  0.27790528
## V50 V50  0.24628916
## V6   V6  0.22250176
## V3   V3  0.21684819
## V60 V60  0.20832346
## V35 V35  0.20155122
## V40 V40  0.15287251
## V22 V22  0.12443579
## V57 V57  0.11596734
## V30 V30  0.08548068
## V41 V41  0.06950259
which2Pct <- tolerance(gbmFit3$results, metric="ROC", tol=2, maximize=TRUE)
gbmFit3$results[which2Pct, 1:6]
##    shrinkage interaction.depth n.trees       ROC      Sens    Spec
## 34       0.1                 5      80 0.9253819 0.8902778 0.80125
predict(gbmFit3, newdata=head(testing), type="prob")
##              M          R
## 1 9.663899e-01 0.03361012
## 2 2.590901e-01 0.74090994
## 3 8.210759e-01 0.17892408
## 4 9.851184e-01 0.01488164
## 5 9.971941e-01 0.00280590
## 6 2.340506e-07 0.99999977
predict(gbmFit3, newdata=head(testing), type="raw")
## [1] M R M M M R
## Levels: M R