Package importing
pacman::p_load(dplyr, broom, caTools,
ggplot2, gridExtra,
caret, readxl,
h2o, forecast)
Data preprocessing
# Importing
dataset = read_excel('Folds5x2_pp.xlsx')
# Scaling
dataset[, -ncol(dataset)] = scale(dataset[, -ncol(dataset)])
# Partitioning
set.seed(123)
split = sample.split(dataset$PE, SplitRatio = 0.8)
training.set = subset(dataset, split == T)
test.set = subset(dataset, split == F)
ANN model connection
h2o.init(nthreads = -1)
##
## H2O is not running yet, starting it now...
##
## Note: In case of errors look at the following log files:
## C:\Users\andy\AppData\Local\Temp\RtmpuENeEl\file25543b0835f2/h2o_andy_started_from_r.out
## C:\Users\andy\AppData\Local\Temp\RtmpuENeEl\file255412c14492/h2o_andy_started_from_r.err
##
##
## Starting H2O JVM and connecting: Connection successful!
##
## R is connected to the H2O cluster:
## H2O cluster uptime: 2 seconds 607 milliseconds
## H2O cluster timezone: America/Chicago
## H2O data parsing timezone: UTC
## H2O cluster version: 3.30.0.1
## H2O cluster version age: 6 months and 30 days !!!
## H2O cluster name: H2O_started_from_R_andy_ppf172
## H2O cluster total nodes: 1
## H2O cluster total memory: 3.95 GB
## H2O cluster total cores: 8
## H2O cluster allowed cores: 8
## H2O cluster healthy: TRUE
## H2O Connection ip: localhost
## H2O Connection port: 54321
## H2O Connection proxy: NA
## H2O Internal Security: FALSE
## H2O API Extensions: Amazon S3, Algos, AutoML, Core V3, TargetEncoder, Core V4
## R Version: R version 4.0.2 (2020-06-22)
## Warning in h2o.clusterInfo():
## Your H2O cluster version is too old (6 months and 30 days)!
## Please download and install the latest version from http://h2o.ai/download/
ANN model fitting
mod = h2o.deeplearning(y = 'PE',
training_frame = as.h2o(training.set),
activation = 'Rectifier',
hidden = c(6, 6),
epochs = 100,
train_samples_per_iteration = -2)
## Warning in use.package("data.table"): data.table cannot be used without R
## package bit64 version 0.9.7 or higher. Please upgrade to take advangage of
## data.table speedups.
##
|
| | 0%
|
|======================================================================| 100%
##
|
| | 0%
|
|=================================== | 50%
|
|======================================================================| 100%
ANN model prediction
# Predicting
y.pred = h2o.predict(mod,
newdata = as.h2o(test.set[, -ncol(test.set)]))
## Warning in use.package("data.table"): data.table cannot be used without R
## package bit64 version 0.9.7 or higher. Please upgrade to take advangage of
## data.table speedups.
##
|
| | 0%
|
|======================================================================| 100%
##
|
| | 0%
|
|======================================================================| 100%
y.pred = as.vector(y.pred)
# Evaluating
y.acul = as.vector(test.set$PE)
comp = data.frame(y.acul = y.acul,
y.pred = y.pred)
eval = accuracy(y.pred, y.acul)
paste0('The RMSE is ', format(eval[, 'RMSE'], digit = 4))
## [1] "The RMSE is 4.187"
ANN model disconnection
h2o.shutdown()
## Are you sure you want to shutdown the H2O instance running at http://localhost:54321/ (Y/N)?