catboost2

library(catboost)
library(caret)
library(titanic)

set.seed(12345)

data <- as.data.frame(as.matrix(titanic_train), stringsAsFactors=TRUE)

age_levels <- levels(data$Age)
most_frequent_age <- which.max(table(data$Age))
data$Age[is.na(data$Age)] <- age_levels[most_frequent_age]

drop_columns = c("PassengerId", "Survived", "Name", "Ticket", "Cabin")
x <- data[,!(names(data) %in% drop_columns)]
y <- data[,c("Survived")]

fit_control <- trainControl(method = "cv",
                            number = 5,
                            classProbs = TRUE)

grid <- expand.grid(depth = c(4, 6, 8),
                    learning_rate = 0.1,
                    iterations = 100,
                    l2_leaf_reg = 0.1,
                    rsm = 0.95,
                    border_count = 64)

model <- train(x, as.factor(make.names(y)),
               method = catboost.caret,
               logging_level = 'Silent', preProc = NULL,
               tuneGrid = grid, trControl = fit_control)

print(model)

## Catboost 
## 
## 891 samples
##   7 predictor
##   2 classes: 'X0', 'X1' 
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 713, 713, 712, 713, 713 
## Resampling results across tuning parameters:
## 
##   depth  Accuracy   Kappa    
##   4      0.8057937  0.5725966
##   6      0.8103258  0.5835593
##   8      0.8080723  0.5778741
## 
## Tuning parameter 'learning_rate' was held constant at a value of 0.1
## 
## Tuning parameter 'rsm' was held constant at a value of 0.95
## Tuning
##  parameter 'border_count' was held constant at a value of 64
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were depth = 6, learning_rate =
##  0.1, iterations = 100, l2_leaf_reg = 0.1, rsm = 0.95 and border_count = 64.

importance <- varImp(model, scale = FALSE)
print(importance)

## custom variable importance
## 
##          Overall
## Sex       33.212
## Fare      16.739
## Pclass    16.294
## Age       11.997
## Parch      8.247
## SibSp      8.030
## Embarked   5.482