library(tidyverse) library(tidymodels) library(h2o)
set.seed(123) # for reproducibility split <- initial_split(attrition_raw_tbl, prop = 0.8, strata = “Attrition”) train_data <- training(split) test_data <- testing(split)
attrition_recipe <- recipe(Attrition ~ ., data = train_data) %>% step_rm(Over18, EmployeeCount, StandardHours) %>% # Remove columns with zero variance step_dummy(all_nominal(), one_hot = TRUE) %>% step_center(all_numeric(), -all_outcomes()) %>% step_scale(all_numeric(), -all_outcomes())
attrition_recipe_prep <- prep(attrition_recipe, training = train_data)
train_data_processed <- bake(attrition_recipe_prep, new_data = train_data) test_data_processed <- bake(attrition_recipe_prep, new_data = test_data)
h2o.init()
train_h2o <- as.h2o(train_data_processed) test_h2o <- as.h2o(test_data_processed)
predictors <- setdiff(names(train_data_processed), “Attrition_Yes”) response <- “Attrition_Yes”
automl <- h2o.automl( x = predictors, y = response, training_frame = train_h2o, max_runtime_secs = 30, # Maximum runtime in seconds seed = 123 )
predictions <- h2o.predict(automl@leader, newdata = test_h2o)$predict
performance <- h2o.performance(automl@leader, newdata = test_h2o) print(performance)
summary(automl@leader)
h2o.shutdown()