library(devtools)
options(devtools.install.args = c(“–no-multiarch”, “–no-test-load”))
install.packages(“C:/Users/cj/Downloads/catboost-R-Windows-0.6.1.1.tgz”, repos = NULL, type = “source”, INSTALL_opts = c(“–no-multiarch”, “–no-test-load”))
library(catboost)
library(caret)
library(titanic)
# load data
set.seed(1)
idx=sample(1:nrow(iris),nrow(iris)*.7)
train=iris[idx,]
test=iris[-idx,]
fit_control <- caret::trainControl(
method = "cv",
number = 3,
search = "random",
classProbs = TRUE
)
# set grid options
grid <- expand.grid(
depth = c(4, 6, 8),
learning_rate = 0.1,
l2_leaf_reg = 0.1,
rsm = 0.95,
border_count = 64,
iterations = 10
)
model <- caret::train(
x = train[,-5],
y = train[,5],
method = catboost.caret,
metric = "Accuracy",
maximize = TRUE,
preProc = NULL,
tuneGrid = grid,
tuneLength = 30,
trControl = fit_control
)
0: learn: -0.9518412 total: 10.4ms remaining: 93.8ms
1: learn: -0.8308367 total: 20.5ms remaining: 82ms
2: learn: -0.7252607 total: 31.6ms remaining: 73.8ms
3: learn: -0.6321418 total: 41.5ms remaining: 62.2ms
4: learn: -0.5554615 total: 53.3ms remaining: 53.3ms
5: learn: -0.5042470 total: 66.1ms remaining: 44ms
6: learn: -0.4545509 total: 78.3ms remaining: 33.6ms
7: learn: -0.4092807 total: 89.3ms remaining: 22.3ms
8: learn: -0.3763194 total: 102ms remaining: 11.3ms
9: learn: -0.3413960 total: 114ms remaining: 0us
the condition has length > 1 and only the first element will be used
0: learn: -0.9273887 total: 18.2ms remaining: 164ms
1: learn: -0.8032346 total: 35.1ms remaining: 140ms
2: learn: -0.7002732 total: 52.6ms remaining: 123ms
3: learn: -0.6171879 total: 70.6ms remaining: 106ms
4: learn: -0.5578607 total: 87.2ms remaining: 87.2ms
5: learn: -0.5073589 total: 103ms remaining: 68.4ms
6: learn: -0.4506830 total: 121ms remaining: 52ms
7: learn: -0.4155692 total: 139ms remaining: 34.6ms
8: learn: -0.3725134 total: 158ms remaining: 17.5ms
9: learn: -0.3407886 total: 175ms remaining: 0us
the condition has length > 1 and only the first element will be used
0: learn: -0.9214513 total: 15.9ms remaining: 143ms
1: learn: -0.7988420 total: 41.3ms remaining: 165ms
2: learn: -0.7050312 total: 58.6ms remaining: 137ms
3: learn: -0.6199789 total: 84.7ms remaining: 127ms
4: learn: -0.5504949 total: 110ms remaining: 110ms
5: learn: -0.4952155 total: 137ms remaining: 91ms
6: learn: -0.4400647 total: 157ms remaining: 67.3ms
7: learn: -0.3965258 total: 183ms remaining: 45.8ms
8: learn: -0.3602040 total: 196ms remaining: 21.8ms
9: learn: -0.3209184 total: 220ms remaining: 0us
the condition has length > 1 and only the first element will be used
0: learn: -0.9278433 total: 11.1ms remaining: 100ms
1: learn: -0.7939372 total: 22.4ms remaining: 89.5ms
2: learn: -0.6923915 total: 35.5ms remaining: 82.8ms
3: learn: -0.6063052 total: 48.4ms remaining: 72.6ms
4: learn: -0.5456267 total: 61.7ms remaining: 61.7ms
5: learn: -0.4881213 total: 74.2ms remaining: 49.5ms
6: learn: -0.4351607 total: 85ms remaining: 36.4ms
7: learn: -0.3932114 total: 96.2ms remaining: 24.1ms
8: learn: -0.3580993 total: 107ms remaining: 11.9ms
9: learn: -0.3248072 total: 119ms remaining: 0us
the condition has length > 1 and only the first element will be used
0: learn: -0.9717273 total: 14.8ms remaining: 133ms
1: learn: -0.8326398 total: 32ms remaining: 128ms
2: learn: -0.7269475 total: 49.1ms remaining: 115ms
3: learn: -0.6393229 total: 64.8ms remaining: 97.3ms
4: learn: -0.5647785 total: 79.9ms remaining: 79.9ms
5: learn: -0.5058635 total: 98.3ms remaining: 65.6ms
6: learn: -0.4536749 total: 116ms remaining: 49.7ms
7: learn: -0.4121852 total: 133ms remaining: 33.2ms
8: learn: -0.3723781 total: 145ms remaining: 16.1ms
9: learn: -0.3404291 total: 162ms remaining: 0us
the condition has length > 1 and only the first element will be used
0: learn: -0.9471655 total: 24.9ms remaining: 224ms
1: learn: -0.8153682 total: 51.6ms remaining: 207ms
2: learn: -0.7171223 total: 78.3ms remaining: 183ms
3: learn: -0.6227224 total: 104ms remaining: 155ms
4: learn: -0.5453504 total: 130ms remaining: 130ms
5: learn: -0.4857906 total: 155ms remaining: 104ms
6: learn: -0.4309156 total: 173ms remaining: 74.2ms
7: learn: -0.3848566 total: 197ms remaining: 49.3ms
8: learn: -0.3435465 total: 223ms remaining: 24.7ms
9: learn: -0.3084642 total: 247ms remaining: 0us
the condition has length > 1 and only the first element will be used
0: learn: -0.9441565 total: 11.9ms remaining: 107ms
1: learn: -0.8111947 total: 24ms remaining: 96ms
2: learn: -0.7072868 total: 35.3ms remaining: 82.5ms
3: learn: -0.6355992 total: 48.4ms remaining: 72.6ms
4: learn: -0.5665463 total: 58.7ms remaining: 58.7ms
5: learn: -0.5091329 total: 70.3ms remaining: 46.9ms
6: learn: -0.4592434 total: 80.9ms remaining: 34.7ms
7: learn: -0.4175227 total: 92.1ms remaining: 23ms
8: learn: -0.3809564 total: 104ms remaining: 11.6ms
9: learn: -0.3480218 total: 115ms remaining: 0us
the condition has length > 1 and only the first element will be used
0: learn: -0.9304035 total: 22.2ms remaining: 200ms
1: learn: -0.8074962 total: 45.4ms remaining: 181ms
2: learn: -0.7073692 total: 72.8ms remaining: 170ms
3: learn: -0.6245997 total: 92.8ms remaining: 139ms
4: learn: -0.5527390 total: 127ms remaining: 127ms
5: learn: -0.4926269 total: 149ms remaining: 99.6ms
6: learn: -0.4411370 total: 169ms remaining: 72.6ms
7: learn: -0.3940584 total: 190ms remaining: 47.4ms
8: learn: -0.3563423 total: 210ms remaining: 23.3ms
9: learn: -0.3225233 total: 227ms remaining: 0us
the condition has length > 1 and only the first element will be used
0: learn: -0.9332015 total: 27.1ms remaining: 244ms
1: learn: -0.8075783 total: 44.6ms remaining: 178ms
2: learn: -0.7042494 total: 70.9ms remaining: 166ms
3: learn: -0.6211579 total: 91.2ms remaining: 137ms
4: learn: -0.5563990 total: 108ms remaining: 108ms
5: learn: -0.4946306 total: 128ms remaining: 85.1ms
6: learn: -0.4383814 total: 154ms remaining: 66ms
7: learn: -0.3944256 total: 179ms remaining: 44.9ms
8: learn: -0.3575307 total: 191ms remaining: 21.2ms
9: learn: -0.3240041 total: 212ms remaining: 0us
the condition has length > 1 and only the first element will be used
0: learn: -0.9370882 total: 15.9ms remaining: 143ms
1: learn: -0.8114327 total: 31.8ms remaining: 127ms
2: learn: -0.7046127 total: 49.3ms remaining: 115ms
3: learn: -0.6395155 total: 61.1ms remaining: 91.6ms
4: learn: -0.5750939 total: 79.1ms remaining: 79.1ms
5: learn: -0.5118310 total: 96.9ms remaining: 64.6ms
6: learn: -0.4619742 total: 114ms remaining: 48.7ms
7: learn: -0.4155966 total: 130ms remaining: 32.5ms
8: learn: -0.3759455 total: 148ms remaining: 16.4ms
9: learn: -0.3413545 total: 164ms remaining: 0us
table(test$Species,predict(model,test))
setosa versicolor virginica
setosa 15 0 0
versicolor 0 13 0
virginica 0 2 15
타이타닉 예제
data <- as.data.frame(as.matrix(titanic::titanic_train), stringsAsFactors=TRUE)
# handle missing value
age_levels <- levels(data$Age)
most_frequent_age <- which.max(table(data$Age))
data$Age[is.na(data$Age)] <- age_levels[most_frequent_age]
# set x and y
drop_columns = c("PassengerId", "Survived", "Name", "Ticket", "Cabin")
x <- data[,!(names(data) %in% drop_columns)]
y <- data[,c("Survived")]
# use caret for grid search
fit_control <- caret::trainControl(
method = "cv",
number = 3,
search = "random",
classProbs = TRUE
)
# set grid options
grid <- expand.grid(
depth = c(4, 6, 8),
learning_rate = 0.1,
l2_leaf_reg = 0.1,
rsm = 0.95,
border_count = 64,
iterations = 10
)
# train catboost
model <- caret::train(
x = x,
y = as.factor(make.names(y)),
method = catboost.caret,
metric = "Accuracy",
maximize = TRUE,
preProc = NULL,
tuneGrid = grid,
tuneLength = 30,
trControl = fit_control
)
0: learn: 0.6346957 total: 12.1ms remaining: 108ms
1: learn: 0.5910487 total: 23.3ms remaining: 93.1ms
2: learn: 0.5567809 total: 34.8ms remaining: 81.3ms
3: learn: 0.5341848 total: 46.6ms remaining: 69.9ms
4: learn: 0.5145478 total: 61.6ms remaining: 61.6ms
5: learn: 0.4989001 total: 79.2ms remaining: 52.8ms
6: learn: 0.4870707 total: 93.3ms remaining: 40ms
7: learn: 0.4769847 total: 107ms remaining: 26.8ms
8: learn: 0.4672956 total: 120ms remaining: 13.3ms
9: learn: 0.4635520 total: 130ms remaining: 0us
the condition has length > 1 and only the first element will be used
0: learn: 0.6367950 total: 17.4ms remaining: 157ms
1: learn: 0.5925227 total: 34.9ms remaining: 140ms
2: learn: 0.5509672 total: 60.5ms remaining: 141ms
3: learn: 0.5257444 total: 84.5ms remaining: 127ms
4: learn: 0.5007908 total: 105ms remaining: 105ms
5: learn: 0.4877500 total: 128ms remaining: 85.6ms
6: learn: 0.4799771 total: 142ms remaining: 61ms
7: learn: 0.4635848 total: 164ms remaining: 40.9ms
8: learn: 0.4492596 total: 187ms remaining: 20.8ms
9: learn: 0.4470119 total: 199ms remaining: 0us
the condition has length > 1 and only the first element will be used
0: learn: 0.6384635 total: 16.7ms remaining: 151ms
1: learn: 0.5943211 total: 30.1ms remaining: 121ms
2: learn: 0.5679090 total: 44.3ms remaining: 103ms
3: learn: 0.5328308 total: 70.7ms remaining: 106ms
4: learn: 0.5197326 total: 80.5ms remaining: 80.5ms
5: learn: 0.5093211 total: 88.7ms remaining: 59.1ms
6: learn: 0.4955954 total: 101ms remaining: 43.1ms
7: learn: 0.4693909 total: 139ms remaining: 34.8ms
8: learn: 0.4645680 total: 156ms remaining: 17.3ms
9: learn: 0.4579124 total: 171ms remaining: 0us
the condition has length > 1 and only the first element will be used
0: learn: 0.6486576 total: 11.6ms remaining: 105ms
1: learn: 0.6086620 total: 24.2ms remaining: 96.8ms
2: learn: 0.5764514 total: 37.5ms remaining: 87.5ms
3: learn: 0.5482845 total: 54.1ms remaining: 81.1ms
4: learn: 0.5299003 total: 71.4ms remaining: 71.4ms
5: learn: 0.5143942 total: 84.4ms remaining: 56.3ms
6: learn: 0.5028232 total: 101ms remaining: 43.2ms
7: learn: 0.4919621 total: 118ms remaining: 29.6ms
8: learn: 0.4886284 total: 129ms remaining: 14.3ms
9: learn: 0.4789809 total: 141ms remaining: 0us
the condition has length > 1 and only the first element will be used
0: learn: 0.6372978 total: 22.6ms remaining: 203ms
1: learn: 0.5975944 total: 40.7ms remaining: 163ms
2: learn: 0.5785881 total: 48.4ms remaining: 113ms
3: learn: 0.5634497 total: 58.9ms remaining: 88.4ms
4: learn: 0.5513877 total: 69.1ms remaining: 69.1ms
5: learn: 0.5288537 total: 90.7ms remaining: 60.5ms
6: learn: 0.5178184 total: 112ms remaining: 48.1ms
7: learn: 0.4978918 total: 134ms remaining: 33.5ms
8: learn: 0.4826115 total: 161ms remaining: 17.9ms
9: learn: 0.4693493 total: 183ms remaining: 0us
the condition has length > 1 and only the first element will be used
0: learn: 0.6370030 total: 37.8ms remaining: 341ms
1: learn: 0.5985622 total: 55.5ms remaining: 222ms
2: learn: 0.5611682 total: 84.7ms remaining: 198ms
3: learn: 0.5384854 total: 99ms remaining: 148ms
4: learn: 0.5179361 total: 119ms remaining: 119ms
5: learn: 0.4955434 total: 157ms remaining: 105ms
6: learn: 0.4847584 total: 173ms remaining: 74.2ms
7: learn: 0.4787672 total: 184ms remaining: 46ms
8: learn: 0.4747042 total: 193ms remaining: 21.5ms
9: learn: 0.4697536 total: 215ms remaining: 0us
the condition has length > 1 and only the first element will be used
0: learn: 0.6530461 total: 16.5ms remaining: 149ms
1: learn: 0.6199469 total: 30.9ms remaining: 124ms
2: learn: 0.5858906 total: 46.2ms remaining: 108ms
3: learn: 0.5555686 total: 57.3ms remaining: 86ms
4: learn: 0.5385673 total: 73.3ms remaining: 73.3ms
5: learn: 0.5259882 total: 87.3ms remaining: 58.2ms
6: learn: 0.5066419 total: 99.1ms remaining: 42.5ms
7: learn: 0.4997568 total: 107ms remaining: 26.7ms
8: learn: 0.4882376 total: 120ms remaining: 13.3ms
9: learn: 0.4793070 total: 137ms remaining: 0us
the condition has length > 1 and only the first element will be used
0: learn: 0.6343795 total: 22.1ms remaining: 199ms
1: learn: 0.5900849 total: 43.7ms remaining: 175ms
2: learn: 0.5560702 total: 61.5ms remaining: 143ms
3: learn: 0.5335539 total: 87.3ms remaining: 131ms
4: learn: 0.5154358 total: 107ms remaining: 107ms
5: learn: 0.5094437 total: 117ms remaining: 78.2ms
6: learn: 0.4880100 total: 140ms remaining: 60.2ms
7: learn: 0.4803744 total: 147ms remaining: 36.8ms
8: learn: 0.4685923 total: 167ms remaining: 18.6ms
9: learn: 0.4638362 total: 181ms remaining: 0us
the condition has length > 1 and only the first element will be used
0: learn: 0.6542323 total: 9.75ms remaining: 87.8ms
1: learn: 0.6134058 total: 28ms remaining: 112ms
2: learn: 0.5889196 total: 39.6ms remaining: 92.5ms
3: learn: 0.5346948 total: 76.8ms remaining: 115ms
4: learn: 0.5117146 total: 90.3ms remaining: 90.3ms
5: learn: 0.5023318 total: 107ms remaining: 71.5ms
6: learn: 0.4989807 total: 118ms remaining: 50.4ms
7: learn: 0.4731496 total: 151ms remaining: 37.7ms
8: learn: 0.4693314 total: 164ms remaining: 18.2ms
9: learn: 0.4546198 total: 184ms remaining: 0us
the condition has length > 1 and only the first element will be used
0: learn: 0.6379153 total: 20.8ms remaining: 187ms
1: learn: 0.5927106 total: 33.4ms remaining: 134ms
2: learn: 0.5597715 total: 56.1ms remaining: 131ms
3: learn: 0.5434711 total: 62.9ms remaining: 94.4ms
4: learn: 0.5305048 total: 70.3ms remaining: 70.3ms
5: learn: 0.5090016 total: 93.6ms remaining: 62.4ms
6: learn: 0.5010914 total: 100ms remaining: 42.9ms
7: learn: 0.4854103 total: 121ms remaining: 30.2ms
8: learn: 0.4818154 total: 130ms remaining: 14.5ms
9: learn: 0.4678419 total: 152ms remaining: 0us
print(model)
Catboost
891 samples
7 predictor
2 classes: 'X0', 'X1'
No pre-processing
Resampling: Cross-Validated (3 fold)
Summary of sample sizes: 594, 594, 594
Resampling results across tuning parameters:
depth Accuracy Kappa
4 0.7979798 0.5545028
6 0.8013468 0.5635580
8 0.7979798 0.5554245
Tuning parameter 'learning_rate' was held constant at a value of 0.1
Tuning
constant at a value of 0.95
Tuning parameter 'border_count' was held constant at a value
of 64
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were depth = 6, learning_rate = 0.1, iterations =
10, l2_leaf_reg = 0.1, rsm = 0.95 and border_count = 64.
# variable importance
importance <- varImp(model, scale = FALSE)
print(importance)
custom variable importance
plot(importance)