Rule-Based Regression Trees Models

Loading libraries for diverse Regression Trees based methods.

library(tidyverse)    # because you want it pretty

## ── Attaching packages ─────────────────────────────────────────────────────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.1     ✓ dplyr   1.0.0
## ✓ tidyr   1.1.0     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0

## ── Conflicts ────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(rsample)      # data splitting 
library(rpart)        # performing regression trees
library(rpart.plot)   # plotting regression trees
library(ipred)        # fitting a bagged tree
library(randomForest) # basic implementation

## randomForest 4.6-14

## Type rfNews() to see new features/changes/bug fixes.

## 
## Attaching package: 'randomForest'

## The following object is masked from 'package:dplyr':
## 
##     combine

## The following object is masked from 'package:ggplot2':
## 
##     margin

library(ranger)       # a faster implementation of randomForest

## 
## Attaching package: 'ranger'

## The following object is masked from 'package:randomForest':
## 
##     importance

library(caret)        # an aggregator package for performing many machine learning models

## Loading required package: lattice

## 
## Attaching package: 'caret'

## The following object is masked from 'package:purrr':
## 
##     lift

library(h2o)          # an extremely fast java-based platform

## 
## ----------------------------------------------------------------------
## 
## Your next step is to start H2O:
##     > h2o.init()
## 
## For H2O package documentation, ask for help:
##     > ??h2o
## 
## After starting H2O, you can use the Web UI at http://localhost:54321
## For more information visit http://docs.h2o.ai
## 
## ----------------------------------------------------------------------

## 
## Attaching package: 'h2o'

## The following objects are masked from 'package:stats':
## 
##     cor, sd, var

## The following objects are masked from 'package:base':
## 
##     &&, %*%, %in%, ||, apply, as.factor, as.numeric, colnames,
##     colnames<-, ifelse, is.character, is.factor, is.numeric, log,
##     log10, log1p, log2, round, signif, trunc

library(e1071)        # caret uses it for cross-validation
library(gbm)          # fitting a boosted tree

## Loaded gbm 2.1.5

library(TDboost)      # fitting a Tweedie boosted tree

## Loaded TDboost 1.2

## 
## Attaching package: 'TDboost'

## The following object is masked from 'package:gbm':
## 
##     relative.influence

library(vip)          # projecting variable importance

## 
## Attaching package: 'vip'

## The following object is masked from 'package:utils':
## 
##     vi

library(readxl)

Creating training and test data.

# preparing data base

Telematics <- read_excel("FullDrivers Performance Report for R.xlsx")
teletrck <- Telematics %>%
  select(Gender, Marital_Status, Age, Status, Phone, Pol_Pfx, Distance, Score) %>%
  filter(!is.na(Score)) %>%
  filter(Distance > 100) %>%
  filter(Distance < 30000) %>%
  mutate(Pol_Pfx = fct_recode(Pol_Pfx,
                              'Annual'   = 'EAA',
                              'Bravo'    = 'EAB',
                              'Enhanced' = 'EAL',
                              'Select'   = 'EAS',
  )) %>%
  mutate(LSP = 100 - Score) %>%                             # Lost Score Points (LSP) model
  select(-Score)

teletrck$Gender <- as.factor(teletrck$Gender)
teletrck$Marital_Status <- as.factor(teletrck$Marital_Status)
teletrck$Status <- as.factor(teletrck$Status)
teletrck$Phone <- as.factor(teletrck$Phone)
teletrck$pol_Pfx <- as.factor(teletrck$Pol_Pfx)

# Create training (70%) and test (30%) sets

set.seed(123)
ttrk_split <- initial_split(teletrck, prop = .7)
ttrk_train <- training(ttrk_split)
ttrk_test  <- testing(ttrk_split)

Fitting a basic tree

ttrk_trees <- rpart(LSP ~ ., ttrk_train, method = 'anova')
ttrk_trees

## n= 1312 
## 
## node), split, n, deviance, yval
##       * denotes terminal node
## 
## 1) root 1312 13953.750 8.063262  
##   2) Phone=android 701  6490.833 7.196862  
##     4) Age>=36.5 443  3658.248 6.693002 *
##     5) Age< 36.5 258  2527.008 8.062016 *
##   3) Phone=iPhone 611  6332.995 9.057283  
##     6) Age>=38.5 256  2551.527 8.457031 *
##     7) Age< 38.5 355  3622.715 9.490141 *

rpart.plot(ttrk_trees)

plotcp(ttrk_trees)

Tunning the tree

# Hyperparameters Grid 1
hyper_grid <- expand.grid(
  minsplit = seq(2, 10, 1),
  maxdepth = seq(2, 5, 1)
)

models <- list()

for (i in 1:nrow(hyper_grid)) {
  
  # get minsplit, maxdepth values at row i
  minsplit <- hyper_grid$minsplit[i]
  maxdepth <- hyper_grid$maxdepth[i]
  
  # train a model and store in the list
  models[[i]] <- rpart(
    formula = LSP ~ .,
    data    = ttrk_train,
    method  = "anova",
    control = list(minsplit = minsplit, maxdepth = maxdepth)
  )
}

# function to get optimal cp
get_cp <- function(x) {
  min    <- which.min(x$cptable[, "CP"])
  cp <- x$cptable[min, "CP"]
}

# function to get minimum error
get_min_error <- function(x) {
  min    <- which.min(x$cptable[, "xerror"])
  xerror <- x$cptable[min, "xerror"] 
}

hgo <- hyper_grid %>%
  mutate(
    cp    = map_dbl(models, get_cp),
    error = map_dbl(models, get_min_error)
    ) %>%
  arrange(error) %>%
  head()
hgo

## # A tibble: 6 x 4
##   minsplit maxdepth    cp error
##      <dbl>    <dbl> <dbl> <dbl>
## 1        8        2  0.01 0.902
## 2       10        3  0.01 0.904
## 3        4        5  0.01 0.904
## 4        5        3  0.01 0.905
## 5        3        2  0.01 0.905
## 6        3        3  0.01 0.905

optimal_tree <- rpart(
  formula = LSP ~ .,
  data    = ttrk_train,
  method  = "anova",
  control = list(minsplit = 8, maxdepth = 2, cp = 0.01)
)

optimal_tree

## n= 1312 
## 
## node), split, n, deviance, yval
##       * denotes terminal node
## 
## 1) root 1312 13953.750 8.063262  
##   2) Phone=android 701  6490.833 7.196862  
##     4) Age>=36.5 443  3658.248 6.693002 *
##     5) Age< 36.5 258  2527.008 8.062016 *
##   3) Phone=iPhone 611  6332.995 9.057283  
##     6) Age>=38.5 256  2551.527 8.457031 *
##     7) Age< 38.5 355  3622.715 9.490141 *

rpart.plot(optimal_tree)

plotcp(optimal_tree)

pred <- predict(optimal_tree, newdata = ttrk_test)
RMSE(pred, ttrk_test$LSP)

## [1] 3.11548

Bagging

# make bootstrapping reproducible
set.seed(123)

# train bagged model with ipred
bagged_m1 <- bagging(
  formula = LSP ~ .,
  data    = ttrk_train,
  coob    = TRUE,
  nbagg      = 100  # default is 25
)

bagged_m1

## 
## Bagging regression trees with 100 bootstrap replications 
## 
## Call: bagging.data.frame(formula = LSP ~ ., data = ttrk_train, coob = TRUE, 
##     nbagg = 100)
## 
## Out-of-bag estimate of root mean squared error:  3.1007

# assess 10-100 bagged trees
ntree <- 10:100

# create empty vector to store OOB RMSE values
rmse <- vector(mode = "numeric", length = length(ntree))

for (i in seq_along(ntree)) {
  # reproducibility
  set.seed(123)
  
  # perform bagged model
  model <- bagging(
  formula = LSP ~ .,
  data    = ttrk_train,
  coob    = TRUE,
  nbagg   = ntree[i]
)
  # get OOB error
  rmse[i] <- model$err
}

plot(ntree, rmse, type = 'l', lwd = 2)

# cross-validation and variable importance with caret
# Specify 10-fold cross validation

ctrl <- trainControl(method = "cv",  number = 10) 

# CV bagged model
bagged_cv <- train(
  LSP ~ .,
  data      = ttrk_train,
  method    = "treebag",
  trControl = ctrl,
  importance = TRUE
  )
bagged_cv

## Bagged CART 
## 
## 1312 samples
##    8 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 1182, 1181, 1180, 1182, 1181, 1180, ... 
## Resampling results:
## 
##   RMSE      Rsquared    MAE     
##   3.102777  0.09860987  2.450765

# plot most important variables
plot(varImp(bagged_cv))

Random Forests

# for reproduciblity
set.seed(123)

# default RF model
RF1 <- randomForest(
  formula = LSP ~ .,
  data    = ttrk_train
)
RF1

## 
## Call:
##  randomForest(formula = LSP ~ ., data = ttrk_train) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 2
## 
##           Mean of squared residuals: 9.799114
##                     % Var explained: 7.86

plot(RF1)

# number of trees with lowest MSE
which.min(RF1$mse)

## [1] 69

# RMSE of this optimal random forest
sqrt(RF1$mse[which.min(RF1$mse)])

## [1] 3.125713

# create training and validation data 
set.seed(123)
valid_split <- initial_split(ttrk_train, .8)

# training data
ttrk_train_v2 <- analysis(valid_split)

# validation data
ttrk_valid <- assessment(valid_split)
x_test <- ttrk_valid[setdiff(names(ttrk_valid), "LSP")]
y_test <- ttrk_valid$LSP

rf_oob_comp <- randomForest(
  formula = LSP ~ .,
  data    = ttrk_train_v2,
  xtest   = x_test,
  ytest   = y_test
)

# extract OOB & validation errors
oob <- sqrt(rf_oob_comp$mse)
validation <- sqrt(rf_oob_comp$test$mse)

# compare error rates
RF_plot <- tibble(
  `Out of Bag Error` = oob,
  `Test error` = validation,
  ntrees = 1:rf_oob_comp$ntree
) %>%
  gather(Metric, RMSE, -ntrees) %>%
  ggplot(aes(ntrees, RMSE, color = Metric)) +
  geom_line() +
  scale_y_continuous(labels = scales::dollar) +
  xlab("Number of trees")
RF_plot

# tunning the forest
features <- setdiff(names(ttrk_train), "LSP")

set.seed(123)

m2 <- tuneRF(
  x          = ttrk_train[features],
  y          = ttrk_train$LSP,
  ntreeTry   = 500,
  mtryStart  = 5,
  stepFactor = 1.5,
  improve    = 0.01,
  trace      = FALSE      # to not show real-time progress 
) %>%
  head()

## 0.006923681 0.01 
## -0.01292261 0.01

# hyperparameter ranger grid search
hyper_grid_rf <- expand.grid(
  mtry       = seq(2, 8, by = 1),
  node_size  = seq(3, 9, by = 2),
  sampe_size = c(.55, .632, .70, .80),
  OOB_RMSE   = 0
)

for(i in 1:nrow(hyper_grid_rf)) {
  
  # train model
  model_rf <- ranger(
    formula         = LSP ~ ., 
    data            = ttrk_train, 
    num.trees       = 500,
    mtry            = hyper_grid_rf$mtry[i],
    min.node.size   = hyper_grid_rf$node_size[i],
    sample.fraction = hyper_grid_rf$sampe_size[i],
    seed            = 123
  )
  
  # add OOB error to grid
  hyper_grid_rf$OOB_RMSE[i] <- sqrt(model_rf$prediction.error)
}

hgf <- hyper_grid_rf %>% 
  arrange(OOB_RMSE) %>%
  head()
hgf

## # A tibble: 6 x 4
##    mtry node_size sampe_size OOB_RMSE
##   <dbl>     <dbl>      <dbl>    <dbl>
## 1     2         9      0.55      3.11
## 2     2         7      0.55      3.11
## 3     2         7      0.7       3.12
## 4     2         9      0.7       3.12
## 5     2         9      0.632     3.12
## 6     2         9      0.8       3.12

# optimal ranger

optimal_rf <- ranger(
    formula         = LSP ~ ., 
    data            = ttrk_train, 
    num.trees       = 500,
    mtry            = 2,
    min.node.size   = 9,
    sample.fraction = 0.55,
    seed            = 123,
    importance      = 'impurity'
)
optimal_rf

## Ranger result
## 
## Call:
##  ranger(formula = LSP ~ ., data = ttrk_train, num.trees = 500,      mtry = 2, min.node.size = 9, sample.fraction = 0.55, seed = 123,      importance = "impurity") 
## 
## Type:                             Regression 
## Number of trees:                  500 
## Sample size:                      1312 
## Number of independent variables:  8 
## Mtry:                             2 
## Target node size:                 9 
## Variable importance mode:         impurity 
## Splitrule:                        variance 
## OOB prediction error (MSE):       9.676832 
## R squared (OOB):                  0.09083027

imp_ranger <- as.data.frame(optimal_rf$variable.importance) %>%
  rownames_to_column('Variable') %>%
  as_tibble()
colnames(imp_ranger)[2] <- 'Impurity'
imp_ranger <- imp_ranger %>%
  arrange(desc(Impurity))
imp_plot <- imp_ranger %>%
  ggplot(aes(reorder(Variable, Impurity), Impurity)) +
    geom_col() +
    coord_flip() +
    xlab('Variable') +
    ylab('Impurity')
imp_plot

Gradient Boosting Machines

ttrk_boosted <- train(LSP ~ .,
                data = ttrk_train,
                method = 'gbm',
                preProcess = c('scale', 'center'),
                trControl = trainControl(method = 'repeatedcv', 
                                         number = 5, 
                                         repeats = 3, 
                                         verboseIter = FALSE),
                verbose = 0)
ttrk_boosted

## Stochastic Gradient Boosting 
## 
## 1312 samples
##    8 predictor
## 
## Pre-processing: scaled (12), centered (12) 
## Resampling: Cross-Validated (5 fold, repeated 3 times) 
## Summary of sample sizes: 1050, 1049, 1050, 1050, 1049, 1050, ... 
## Resampling results across tuning parameters:
## 
##   interaction.depth  n.trees  RMSE      Rsquared    MAE     
##   1                   50      3.076759  0.11411868  2.413394
##   1                  100      3.082923  0.11110649  2.415808
##   1                  150      3.087277  0.10924428  2.420023
##   2                   50      3.095434  0.10249101  2.424033
##   2                  100      3.117028  0.09431837  2.442313
##   2                  150      3.127816  0.09187910  2.454448
##   3                   50      3.112207  0.09492203  2.435534
##   3                  100      3.130910  0.08912133  2.446676
##   3                  150      3.159073  0.07942147  2.468635
## 
## Tuning parameter 'shrinkage' was held constant at a value of 0.1
## 
## Tuning parameter 'n.minobsinnode' was held constant at a value of 10
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were n.trees = 50, interaction.depth =
##  1, shrinkage = 0.1 and n.minobsinnode = 10.

ttrk_prdcted <- predict(ttrk_boosted, ttrk_test)
ttrk_test$Score_Obs <- 100 - ttrk_test$LSP
ttrk_test$Score_Pre <- 100 - ttrk_prdcted

ttrk_test %>% ggplot(aes(Score_Obs, Score_Pre)) +
              geom_point() +
              xlab('Observd Score') +
              ylab('Predicted Score') +
              xlim(75, 100) +
              ylim(75, 100) +
              geom_smooth(method = 'glm')

## `geom_smooth()` using formula 'y ~ x'

#Tweddie Bosting

TDboost1 <- TDboost(LSP ~ Phone + Age + Status + Pol_Pfx + Gender + Distance,
                    data = ttrk_train,
                    var.monotone = c(0,0,0,0,0,0),
                    distribution = list(name="EDM", alpha=1.5),
                    n.trees = 3000,
                    shrinkage = 0.005,
                    interaction.depth=3,
                    bag.fraction = 0.5,
                    train.fraction = 0.5,
                    n.minobsinnode = 10,
                    cv.folds = 5,
                    keep.data=TRUE,
                    verbose=FALSE)

# print out the optimal iteration number M
best.iter <- TDboost.perf(TDboost1,method="test")
print(best.iter)

## [1] 317

# check performance using 5-fold cross-validation
best.iter <- TDboost.perf(TDboost1,method="cv")

print(best.iter)

## [1] 410

summary(TDboost1,n.trees=1)

## # A tibble: 6 x 2
##   var      rel.inf
##   <chr>      <dbl>
## 1 Phone       61.5
## 2 Age         38.5
## 3 Status       0  
## 4 Pol_Pfx      0  
## 5 Gender       0  
## 6 Distance     0

summary(TDboost1,n.trees=best.iter) # at the best iteration

## # A tibble: 6 x 2
##   var      rel.inf
##   <chr>      <dbl>
## 1 Age        34.2 
## 2 Phone      29.7 
## 3 Distance   19.1 
## 4 Status      8.15
## 5 Pol_Pfx     6.47
## 6 Gender      2.35

# making prediction on data2
f.predict <- predict.TDboost(TDboost1,ttrk_test,best.iter)

# least squares error
print(sum((data2$Y-f.predict)^2))

## Warning in data2$Y - f.predict: longer object length is not a multiple of
## shorter object length

## [1] 34015.07

# plot variable X1 after "best" iterations
plot.TDboost(TDboost1,1,best.iter)

# contour plot of variables 1 and 3 after "best" iterations
plot.TDboost(TDboost1,c(1,2),best.iter)

pretty.gbm.tree(TDboost1, i.tree = 337)

## # A tibble: 10 x 8
##    SplitVar SplitCodePred LeftNode RightNode MissingNode ErrorReduction Weight
##       <int>         <dbl>    <int>     <int>       <int>          <dbl>  <dbl>
##  1        1    54.5              1         8           9           7.59    328
##  2        1    22.5              2         3           7           4.57    299
##  3       -1    -0.000346        -1        -1          -1           0        43
##  4        1    36.5              4         5           6           7.99    256
##  5       -1     0.000549        -1        -1          -1           0       122
##  6       -1    -0.0000227       -1        -1          -1           0       134
##  7       -1     0.000250        -1        -1          -1           0       256
##  8       -1     0.000164        -1        -1          -1           0       299
##  9       -1    -0.000894        -1        -1          -1           0        29
## 10       -1     0.0000707       -1        -1          -1           0       328
## # … with 1 more variable: Prediction <dbl>

# create hyperparameter grid
hyper_grid_gbm <- expand.grid(
  shrinkage = c(0.01, 0.1, 0.3),
  interaction.depth = c(1, 3, 5),
  n.minobsinnode = c(5, 10, 15),
  bag.fraction = c(0.65, 0.8, 1), 
  optimal_trees = 0,
  min_RMSE = 0
)

# total number of combinations
nrow(hyper_grid_gbm)

## [1] 81

# randomize data
random_index <- sample(1:nrow(ttrk_train), nrow(ttrk_train))
random_ttrk_train <- ttrk_train[random_index, ]

# grid search 
for(i in 1:nrow(hyper_grid_gbm)) {
  
  # reproducibility
  set.seed(123)
  
  # train model
  gbm.tune <- gbm(
    formula = LSP ~ .,
    distribution = "gaussian",
    data = random_ttrk_train,
    n.trees = 5000,
    interaction.depth = hyper_grid_gbm$interaction.depth[i],
    shrinkage = hyper_grid_gbm$shrinkage[i],
    n.minobsinnode = hyper_grid_gbm$n.minobsinnode[i],
    bag.fraction = hyper_grid_gbm$bag.fraction[i],
    train.fraction = .75,
    n.cores = NULL, # will use all cores by default
    verbose = FALSE
  )
  
  # add min training error and trees to grid
  hyper_grid_gbm$optimal_trees[i] <- which.min(gbm.tune$valid.error)
  hyper_grid_gbm$min_RMSE[i] <- sqrt(min(gbm.tune$valid.error))
}

hyper_grid_gbm %>% 
  arrange(min_RMSE) %>%
  tibble()

## # A tibble: 81 x 6
##    shrinkage interaction.dep… n.minobsinnode bag.fraction optimal_trees min_RMSE
##        <dbl>            <dbl>          <dbl>        <dbl>         <dbl>    <dbl>
##  1      0.1                 1              5          1              31     2.92
##  2      0.1                 1             10          1              31     2.92
##  3      0.1                 1             15          1              31     2.92
##  4      0.01                1             15          1             386     2.92
##  5      0.01                1              5          1             377     2.92
##  6      0.01                1             10          1             377     2.92
##  7      0.01                1             15          0.8           310     2.92
##  8      0.01                1             10          0.8           310     2.92
##  9      0.3                 1              5          1              12     2.92
## 10      0.3                 1             10          1              12     2.92
## # … with 71 more rows

# for reproducibility
set.seed(123)

# train GBM model
gbm.fit.final <- gbm(
  formula = LSP ~ .,
  distribution = "gaussian",
  data = ttrk_train,
  n.trees = 157,
  interaction.depth = 5,
  shrinkage = 0.01,
  n.minobsinnode = 10,
  bag.fraction = 1, 
  train.fraction = 1,
  n.cores = NULL, # will use all cores by default
  verbose = FALSE
)  

par(mar = c(5, 8, 1, 1))
summary(
  gbm.fit.final,
  method = relative.influence, # also can use permutation.test.gbm
  las = 2
)

## # A tibble: 8 x 2
##   var            rel.inf
##   <chr>            <dbl>
## 1 Phone           55.8  
## 2 Age             29.9  
## 3 Distance         7.08 
## 4 Status           3.51 
## 5 Pol_Pfx          3.07 
## 6 Gender           0.567
## 7 Marital_Status   0    
## 8 pol_Pfx          0

vip(gbm.fit.final)

## H2O Boosting

h2o.no_progress()
h2o.init(max_mem_size = "1g")

## 
## H2O is not running yet, starting it now...
## 
## Note:  In case of errors look at the following log files:
##     /tmp/RtmpUvzMZD/file19e65872a56/h2o_rstudio_user_started_from_r.out
##     /tmp/RtmpUvzMZD/file19e25431b3/h2o_rstudio_user_started_from_r.err
## 
## 
## Starting H2O JVM and connecting: .. Connection successful!
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         4 seconds 28 milliseconds 
##     H2O cluster timezone:       Etc/UTC 
##     H2O data parsing timezone:  UTC 
##     H2O cluster version:        3.30.0.1 
##     H2O cluster version age:    6 months and 19 days !!! 
##     H2O cluster name:           H2O_started_from_R_rstudio-user_jzy813 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   0.97 GB 
##     H2O cluster total cores:    1 
##     H2O cluster allowed cores:  1 
##     H2O cluster healthy:        TRUE 
##     H2O Connection ip:          localhost 
##     H2O Connection port:        54321 
##     H2O Connection proxy:       NA 
##     H2O Internal Security:      FALSE 
##     H2O API Extensions:         Amazon S3, XGBoost, Algos, AutoML, Core V3, TargetEncoder, Core V4 
##     R Version:                  R version 4.0.2 (2020-06-22)

## Warning in h2o.clusterInfo(): 
## Your H2O cluster version is too old (6 months and 19 days)!
## Please download and install the latest version from http://h2o.ai/download/

y <- 'LSP'
x <- setdiff(names(ttrk_train), y)

# turn training set into h2o object
train.h2o <- as.h2o(ttrk_train)

## Warning in use.package("data.table"): data.table cannot be used without R
## package bit64 version 0.9.7 or higher. Please upgrade to take advangage of
## data.table speedups.

# training basic GBM model with defaults
h2o.fit1 <- h2o.gbm(
  x = x,
  y = y,
  training_frame = train.h2o,
  nfolds = 5
)

h2o.fit1

## Model Details:
## ==============
## 
## H2ORegressionModel: gbm
## Model ID:  GBM_model_R_1603409901012_1 
## Model Summary: 
##   number_of_trees number_of_internal_trees model_size_in_bytes min_depth
## 1              50                       50               16579         5
##   max_depth mean_depth min_leaves max_leaves mean_leaves
## 1         5    5.00000          9         32    21.66000
## 
## 
## H2ORegressionMetrics: gbm
## ** Reported on training data. **
## 
## MSE:  6.767943
## RMSE:  2.601527
## MAE:  2.037768
## RMSLE:  0.3047606
## Mean Residual Deviance :  6.767943
## 
## 
## 
## H2ORegressionMetrics: gbm
## ** Reported on cross-validation data. **
## ** 5-fold cross-validation on training data (Metrics computed for combined holdout predictions) **
## 
## MSE:  10.16471
## RMSE:  3.188215
## MAE:  2.498621
## RMSLE:  0.3676748
## Mean Residual Deviance :  10.16471
## 
## 
## Cross-Validation Metrics Summary: 
##                              mean          sd  cv_1_valid   cv_2_valid
## mae                     2.4988215  0.02977455   2.5020099    2.4707112
## mean_residual_deviance  10.168217  0.54227483    10.01449    10.224343
## mse                     10.168217  0.54227483    10.01449    10.224343
## r2                      0.0365747 0.046777964 0.012430682 -0.026870534
## residual_deviance       10.168217  0.54227483    10.01449    10.224343
## rmse                     3.187869  0.08446447    3.164568    3.1975527
## rmsle                  0.36724392 0.011001318  0.34939647    0.3716534
##                        cv_3_valid cv_4_valid  cv_5_valid
## mae                     2.5476723  2.4808135    2.492901
## mean_residual_deviance  11.021231   9.529255   10.051765
## mse                     11.021231   9.529255   10.051765
## r2                     0.07128285 0.03525179 0.090778716
## residual_deviance       11.021231   9.529255   10.051765
## rmse                     3.319824   3.086949   3.1704519
## rmsle                  0.37691563 0.37389708    0.364357

h2o.fit2 <- h2o.gbm(
  x = x,
  y = y,
  training_frame = train.h2o,
  nfolds = 5,
  ntrees = 5000,
  stopping_rounds = 10,
  stopping_tolerance = 0,
  seed = 123
)

# model stopped after 25 trees
h2o.fit2@parameters$ntrees

## [1] 25

# cross validated RMSE
h2o.rmse(h2o.fit2, xval = TRUE)

## [1] 3.18596

h2o.varimp_plot(h2o.fit2)

Rule-Based Regression Trees Models

Jorge Ortega for Alinsco