Package importing

pacman::p_load(dplyr, broom, caTools, 
               ggplot2, gridExtra, 
               caret, readxl,
               h2o, forecast)

Data preprocessing

# Importing
dataset = read_excel('Folds5x2_pp.xlsx')

# Scaling
dataset[, -ncol(dataset)] = scale(dataset[, -ncol(dataset)])

# Partitioning
set.seed(123)
split = sample.split(dataset$PE, SplitRatio = 0.8)
training.set = subset(dataset, split == T)
test.set = subset(dataset, split == F)

ANN model connection

h2o.init(nthreads = -1)
## 
## H2O is not running yet, starting it now...
## 
## Note:  In case of errors look at the following log files:
##     C:\Users\andy\AppData\Local\Temp\RtmpuENeEl\file25543b0835f2/h2o_andy_started_from_r.out
##     C:\Users\andy\AppData\Local\Temp\RtmpuENeEl\file255412c14492/h2o_andy_started_from_r.err
## 
## 
## Starting H2O JVM and connecting:  Connection successful!
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         2 seconds 607 milliseconds 
##     H2O cluster timezone:       America/Chicago 
##     H2O data parsing timezone:  UTC 
##     H2O cluster version:        3.30.0.1 
##     H2O cluster version age:    6 months and 30 days !!! 
##     H2O cluster name:           H2O_started_from_R_andy_ppf172 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   3.95 GB 
##     H2O cluster total cores:    8 
##     H2O cluster allowed cores:  8 
##     H2O cluster healthy:        TRUE 
##     H2O Connection ip:          localhost 
##     H2O Connection port:        54321 
##     H2O Connection proxy:       NA 
##     H2O Internal Security:      FALSE 
##     H2O API Extensions:         Amazon S3, Algos, AutoML, Core V3, TargetEncoder, Core V4 
##     R Version:                  R version 4.0.2 (2020-06-22)
## Warning in h2o.clusterInfo(): 
## Your H2O cluster version is too old (6 months and 30 days)!
## Please download and install the latest version from http://h2o.ai/download/

ANN model fitting

mod = h2o.deeplearning(y = 'PE',
                      training_frame = as.h2o(training.set),
                      activation = 'Rectifier',
                      hidden = c(6, 6),
                      epochs = 100,
                      train_samples_per_iteration = -2)
## Warning in use.package("data.table"): data.table cannot be used without R
## package bit64 version 0.9.7 or higher. Please upgrade to take advangage of
## data.table speedups.
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================================================| 100%

ANN model prediction

# Predicting
y.pred = h2o.predict(mod,
                     newdata = as.h2o(test.set[, -ncol(test.set)]))
## Warning in use.package("data.table"): data.table cannot be used without R
## package bit64 version 0.9.7 or higher. Please upgrade to take advangage of
## data.table speedups.
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======================================================================| 100%
y.pred = as.vector(y.pred)

# Evaluating
y.acul = as.vector(test.set$PE)
comp = data.frame(y.acul = y.acul,
                  y.pred = y.pred)
eval = accuracy(y.pred, y.acul)
paste0('The RMSE is ', format(eval[, 'RMSE'], digit = 4))
## [1] "The RMSE is 4.187"

ANN model disconnection

h2o.shutdown()
## Are you sure you want to shutdown the H2O instance running at http://localhost:54321/ (Y/N)?