Automated Parameter Tunning
# caret package 에는 아래 ML 기법들에 대한, 자동화된 parameter tunning 방법을 제공해주고 있다.
names(getModelInfo())
[1] "ada" "AdaBag" "AdaBoost.M1" "adaboost" "amdai" "ANFIS"
[7] "avNNet" "awnb" "awtan" "bag" "bagEarth" "bagEarthGCV"
[13] "bagFDA" "bagFDAGCV" "bam" "bartMachine" "bayesglm" "binda"
[19] "blackboost" "blasso" "blassoAveraged" "bridge" "brnn" "BstLm"
[25] "bstSm" "bstTree" "C5.0" "C5.0Cost" "C5.0Rules" "C5.0Tree"
[31] "cforest" "chaid" "CSimca" "ctree" "ctree2" "cubist"
[37] "dda" "deepboost" "DENFIS" "dnn" "dwdLinear" "dwdPoly"
[43] "dwdRadial" "earth" "elm" "enet" "evtree" "extraTrees"
[49] "fda" "FH.GBML" "FIR.DM" "foba" "FRBCS.CHI" "FRBCS.W"
[55] "FS.HGD" "gam" "gamboost" "gamLoess" "gamSpline" "gaussprLinear"
[61] "gaussprPoly" "gaussprRadial" "gbm_h2o" "gbm" "gcvEarth" "GFS.FR.MOGUL"
[67] "GFS.GCCL" "GFS.LT.RS" "GFS.THRIFT" "glm.nb" "glm" "glmboost"
[73] "glmnet_h2o" "glmnet" "glmStepAIC" "gpls" "hda" "hdda"
[79] "hdrda" "HYFIS" "icr" "J48" "JRip" "kernelpls"
[85] "kknn" "knn" "krlsPoly" "krlsRadial" "lars" "lars2"
[91] "lasso" "lda" "lda2" "leapBackward" "leapForward" "leapSeq"
[97] "Linda" "lm" "lmStepAIC" "LMT" "loclda" "logicBag"
[103] "LogitBoost" "logreg" "lssvmLinear" "lssvmPoly" "lssvmRadial" "lvq"
[109] "M5" "M5Rules" "manb" "mda" "Mlda" "mlp"
[115] "mlpML" "mlpSGD" "mlpWeightDecay" "mlpWeightDecayML" "monmlp" "msaenet"
[121] "multinom" "naive_bayes" "nb" "nbDiscrete" "nbSearch" "neuralnet"
[127] "nnet" "nnls" "nodeHarvest" "oblique.tree" "OneR" "ordinalNet"
[133] "ORFlog" "ORFpls" "ORFridge" "ORFsvm" "ownn" "pam"
[139] "parRF" "PART" "partDSA" "pcaNNet" "pcr" "pda"
[145] "pda2" "penalized" "PenalizedLDA" "plr" "pls" "plsRglm"
[151] "polr" "ppr" "PRIM" "protoclass" "pythonKnnReg" "qda"
[157] "QdaCov" "qrf" "qrnn" "randomGLM" "ranger" "rbf"
[163] "rbfDDA" "Rborist" "rda" "regLogistic" "relaxo" "rf"
[169] "rFerns" "RFlda" "rfRules" "ridge" "rlda" "rlm"
[175] "rmda" "rocc" "rotationForest" "rotationForestCp" "rpart" "rpart1SE"
[181] "rpart2" "rpartCost" "rpartScore" "rqlasso" "rqnc" "RRF"
[187] "RRFglobal" "rrlda" "RSimca" "rvmLinear" "rvmPoly" "rvmRadial"
[193] "SBC" "sda" "sdwd" "simpls" "SLAVE" "slda"
[199] "smda" "snn" "sparseLDA" "spikeslab" "spls" "stepLDA"
[205] "stepQDA" "superpc" "svmBoundrangeString" "svmExpoString" "svmLinear" "svmLinear2"
[211] "svmLinear3" "svmLinearWeights" "svmLinearWeights2" "svmPoly" "svmRadial" "svmRadialCost"
[217] "svmRadialSigma" "svmRadialWeights" "svmSpectrumString" "tan" "tanSearch" "treebag"
[223] "vbmpRadial" "vglmAdjCat" "vglmContRatio" "vglmCumulative" "widekernelpls" "WM"
[229] "wsrf" "xgbLinear" "xgbTree" "xyf"
# 각 모델들의 주요 파라미터등의 정보는 아래 메서드를 통해 알 수 있다.
modelLookup("gbm")
# 예부터 돌려보자~
# 몇가지의 tunning 결과와 함께 마지막 줄에 최적 파라미터값이 추천되는 것을 볼 수 있다.
# The final values used for the model were n.trees = 150, interaction.depth = 3, shrinkage = 0.1 and n.minobsinnode = 10.
library(mlbench)
data(Sonar)
library(caret)
set.seed(998)
inTraining <- createDataPartition(Sonar$Class, p = .75, list = FALSE)
training <- Sonar[ inTraining,]
testing <- Sonar[-inTraining,]
fitControl <- trainControl(## 10-fold CV
method = "repeatedcv",
number = 10,
## repeated ten times
repeats = 10)
set.seed(825)
gbmFit1 <- train(Class ~ ., data = training,
method = "gbm",
metric = "Accuracy",
trControl = fitControl,
## This last option is actually one
## for gbm() that passes through
verbose = FALSE)
gbmFit1
Stochastic Gradient Boosting
157 samples
60 predictor
2 classes: 'M', 'R'
No pre-processing
Resampling: Cross-Validated (10 fold, repeated 10 times)
Summary of sample sizes: 142, 142, 140, 142, 142, 141, ...
Resampling results across tuning parameters:
interaction.depth n.trees Accuracy Kappa
1 50 0.7609191 0.5163703
1 100 0.7934216 0.5817734
1 150 0.7977230 0.5897796
2 50 0.7858235 0.5667749
2 100 0.8188897 0.6316548
2 150 0.8194363 0.6329037
3 50 0.7895686 0.5726290
3 100 0.8130564 0.6195719
3 150 0.8221348 0.6383441
Tuning parameter 'shrinkage' was held constant at a value of 0.1
Tuning parameter 'n.minobsinnode' was held constant at a value of 10
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were n.trees = 150, interaction.depth = 3, shrinkage = 0.1 and n.minobsinnode = 10.
# glm 으로도 해보자
# ?? 얘는 파라미터 추천값이 결과에 없다...
# 다시 생각해보자.. glm 은 formula 구성 ( feature 선택과 featrue간 관계(interaction terms) 등에 초점을 맞춘다.
# 따라서 아래 model lookup 의 결과와 같이 별도의 parameter 가 없다.
modelLookup("glm")
library(foreign)
data <- read.dta("http://data.princeton.edu/wws509/datasets/ceb.dta")
data$y <- round(data$mean * data$n, 0)
# metric 을 지정할 수 있다.
glm.fit <- train(y ~ ., data = data, method = "glm", metric = "RMSE", trControl = fitControl, family = poisson())
glm.fit
Generalized Linear Model
70 samples
7 predictor
No pre-processing
Resampling: Cross-Validated (10 fold, repeated 10 times)
Summary of sample sizes: 62, 63, 62, 64, 64, 63, ...
Resampling results:
RMSE Rsquared
121.2727 0.9062568
# 파라미터 튜닝을 해볼 수 있는 모델들을 훑어보자.
modelLookup("naive_bayes"); modelLookup("svmLinear"); modelLookup("knn"); modelLookup("glmboost"); modelLookup("C5.0"); modelLookup("PART"); modelLookup("evtree")
LS0tCnRpdGxlOiAiQXV0b21hdGVkIFBhcmFtZXRlciBUdW5uaW5nIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7cn0KIyByZWZlcmVuY2VzCiMgW2RhdGEgc2V0XSAgICAgICBodHRwOi8vZGF0YS5wcmluY2V0b24uZWR1L3d3czUwOS9kYXRhc2V0cwojIFtwYWNha2dlXSAgICAgICAgaHR0cDovL3RvcGVwby5naXRodWIuaW8vY2FyZXQvbW9kZWwtdHJhaW5pbmctYW5kLXR1bmluZy5odG1sCiMgW3BhY2thZ2VdICAgICAgICBodHRwOi8vd3d3LnNhZWRzYXlhZC5jb20vZG9jcy9nYm0yLnBkZgpgYGAKCiMjIyBBdXRvbWF0ZWQgUGFyYW1ldGVyIFR1bm5pbmcKYGBge3J9CiMgY2FyZXQgcGFja2FnZSDsl5DripQg7JWE656YIE1MIOq4sOuyleuTpOyXkCDrjIDtlZwsIOyekOuPme2ZlOuQnCBwYXJhbWV0ZXIgdHVubmluZyDrsKnrspXsnYQg7KCc6rO17ZW07KO86rOgIOyeiOuLpC4gCm5hbWVzKGdldE1vZGVsSW5mbygpKQpgYGAKIVtdKC9Vc2Vycy9DQS9Eb3dubG9hZHMvVHJhaW5BbGdvLnBuZykKCmBgYHtyfQojIOqwgSDrqqjrjbjrk6TsnZgg7KO87JqUIO2MjOudvOuvuO2EsOuTseydmCDsoJXrs7TripQg7JWE656YIOuplOyEnOuTnOulvCDthrXtlbQg7JWMIOyImCDsnojri6QuCm1vZGVsTG9va3VwKCJnYm0iKQpgYGAKCgpgYGB7cn0KIyDsmIjrtoDthLAg64+M66Ck67O07J6QfiAKIyDrqofqsIDsp4DsnZggdHVubmluZyDqsrDqs7zsmYAg7ZWo6ruYIOuniOyngOuniSDspITsl5Ag7LWc7KCBIO2MjOudvOuvuO2EsOqwkuydtCDstpTsspzrkJjripQg6rKD7J2EIOuzvCDsiJgg7J6I64ukLgojIFRoZSBmaW5hbCB2YWx1ZXMgdXNlZCBmb3IgdGhlIG1vZGVsIHdlcmUgbi50cmVlcyA9IDE1MCwgaW50ZXJhY3Rpb24uZGVwdGggPSAzLCBzaHJpbmthZ2UgPSAwLjEgYW5kIG4ubWlub2JzaW5ub2RlID0gMTAuCmxpYnJhcnkobWxiZW5jaCkKZGF0YShTb25hcikKCmxpYnJhcnkoY2FyZXQpCnNldC5zZWVkKDk5OCkKaW5UcmFpbmluZyA8LSBjcmVhdGVEYXRhUGFydGl0aW9uKFNvbmFyJENsYXNzLCBwID0gLjc1LCBsaXN0ID0gRkFMU0UpCnRyYWluaW5nIDwtIFNvbmFyWyBpblRyYWluaW5nLF0KdGVzdGluZyAgPC0gU29uYXJbLWluVHJhaW5pbmcsXQoKZml0Q29udHJvbCA8LSB0cmFpbkNvbnRyb2woIyMgMTAtZm9sZCBDVgogICAgICAgICAgICAgICAgICAgICAgICAgICBtZXRob2QgPSAicmVwZWF0ZWRjdiIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgIG51bWJlciA9IDEwLAogICAgICAgICAgICAgICAgICAgICAgICAgICAjIyByZXBlYXRlZCB0ZW4gdGltZXMKICAgICAgICAgICAgICAgICAgICAgICAgICAgcmVwZWF0cyA9IDEwKQoKc2V0LnNlZWQoODI1KQpnYm1GaXQxIDwtIHRyYWluKENsYXNzIH4gLiwgZGF0YSA9IHRyYWluaW5nLCAKICAgICAgICAgICAgICAgICBtZXRob2QgPSAiZ2JtIiwgCiAgICAgICAgICAgICAgICAgbWV0cmljID0gIkFjY3VyYWN5IiwKICAgICAgICAgICAgICAgICB0ckNvbnRyb2wgPSBmaXRDb250cm9sLAogICAgICAgICAgICAgICAgICMjIFRoaXMgbGFzdCBvcHRpb24gaXMgYWN0dWFsbHkgb25lCiAgICAgICAgICAgICAgICAgIyMgZm9yIGdibSgpIHRoYXQgcGFzc2VzIHRocm91Z2gKICAgICAgICAgICAgICAgICB2ZXJib3NlID0gRkFMU0UpCmBgYApgYGB7cn0KZ2JtRml0MQpgYGAKCgpgYGB7cn0KIyBnbG0g7Jy866Gc64+EIO2VtOuztOyekCAKIyA/PyDslpjripQg7YyM652866+47YSwIOy2lOyynOqwkuydtCDqsrDqs7zsl5Ag7JeG64ukLi4uIAojIOuLpOyLnCDsg53qsIHtlbTrs7TsnpAuLiBnbG0g7J2AIGZvcm11bGEg6rWs7ISxICggZmVhdHVyZSDshKDtg53qs7wgZmVhdHJ1ZeqwhCDqtIDqs4QoaW50ZXJhY3Rpb24gdGVybXMpIOuTseyXkCDstIjsoJDsnYQg66ee7LaY64ukLgojIOuUsOudvOyEnCDslYTrnpggbW9kZWwgbG9va3VwIOydmCDqsrDqs7zsmYAg6rCZ7J20IOuzhOuPhOydmCBwYXJhbWV0ZXIg6rCAIOyXhuuLpC4gCm1vZGVsTG9va3VwKCJnbG0iKQpgYGAKYGBge3J9CmxpYnJhcnkoZm9yZWlnbikKZGF0YSAgIDwtIHJlYWQuZHRhKCJodHRwOi8vZGF0YS5wcmluY2V0b24uZWR1L3d3czUwOS9kYXRhc2V0cy9jZWIuZHRhIikKZGF0YSR5IDwtIHJvdW5kKGRhdGEkbWVhbiAqIGRhdGEkbiwgMCkKCiMgbWV0cmljIOydhCDsp4DsoJXtlaAg7IiYIOyeiOuLpC4gCmdsbS5maXQgPC0gdHJhaW4oeSB+IC4sIGRhdGEgPSBkYXRhLCBtZXRob2QgPSAiZ2xtIiwgbWV0cmljID0gIlJNU0UiLCB0ckNvbnRyb2wgPSBmaXRDb250cm9sLCBmYW1pbHkgPSBwb2lzc29uKCkpCmBgYApgYGB7cn0KZ2xtLmZpdApgYGAKCgpgYGB7cn0KIyDtjIzrnbzrr7jthLAg7Yqc64ud7J2EIO2VtOuzvCDsiJgg7J6I64qUIOuqqOuNuOuTpOydhCDtm5HslrTrs7TsnpAuCm1vZGVsTG9va3VwKCJuYWl2ZV9iYXllcyIpOyBtb2RlbExvb2t1cCgic3ZtTGluZWFyIik7IG1vZGVsTG9va3VwKCJrbm4iKTsgbW9kZWxMb29rdXAoImdsbWJvb3N0Iik7IG1vZGVsTG9va3VwKCJDNS4wIik7IG1vZGVsTG9va3VwKCJQQVJUIik7IG1vZGVsTG9va3VwKCJldnRyZWUiKQpgYGAKCgoKCgoKCgoKCgoKCgpgYGB7cn0KYGBg