suppressWarnings(suppressMessages(library(h2o)))
suppressWarnings(suppressMessages(library(timetk)))
suppressWarnings(suppressMessages(library(tidyquant)))
suppressWarnings(suppressMessages(library(tidyr)))
suppressWarnings(suppressMessages(library(plotly)))
ocbc_symbols <- "O39.SI"
ocbc_tbl <- ocbc_symbols %>% tq_get(get = "stock.prices", from = "2014-01-01", to = "2017-12-31")
# organise data
ocbc_tbl <- ocbc_tbl %>% drop_na() %>% select(date,close)
# Starting point
#ocbc_tbl %>% glimpse()
# Augment (adds data frame columns)
ocbc_tbl_aug <- ocbc_tbl %>% tk_augment_timeseries_signature()
#ocbc_tbl_aug %>% glimpse()
# prep data for h2o
ocbc_tbl_clean <- ocbc_tbl_aug %>%
select_if(~ !is.Date(.)) %>%
select_if(~ !any(is.na(.))) %>%
mutate_if(is.ordered, ~ as.character(.) %>% as.factor)
#ocbc_tbl_clean %>% glimpse()
# Split into training, validation and test sets
train_tbl <- ocbc_tbl_clean %>% filter(year <= 2014)
valid_tbl <- ocbc_tbl_clean %>% filter(year == 2015)
test_tbl <- ocbc_tbl_clean %>% filter(year >= 2016)
h2o.init()
##
## H2O is not running yet, starting it now...
##
## Note: In case of errors look at the following log files:
## C:\Users\jkkli\AppData\Local\Temp\RtmpInOHa7/h2o_jkkli_started_from_r.out
## C:\Users\jkkli\AppData\Local\Temp\RtmpInOHa7/h2o_jkkli_started_from_r.err
##
##
## Starting H2O JVM and connecting: . Connection successful!
##
## R is connected to the H2O cluster:
## H2O cluster uptime: 2 seconds 270 milliseconds
## H2O cluster version: 3.14.0.3
## H2O cluster version age: 1 month and 12 days
## H2O cluster name: H2O_started_from_R_jkkli_ief229
## H2O cluster total nodes: 1
## H2O cluster total memory: 7.09 GB
## H2O cluster total cores: 8
## H2O cluster allowed cores: 8
## H2O cluster healthy: TRUE
## H2O Connection ip: localhost
## H2O Connection port: 54321
## H2O Connection proxy: NA
## H2O Internal Security: FALSE
## H2O API Extensions: Algos, AutoML, Core V3, Core V4
## R Version: R version 3.4.2 (2017-09-28)
h2o.no_progress()
# Convert to H2OFrame objects
train_h2o <- as.h2o(train_tbl)
valid_h2o <- as.h2o(valid_tbl)
test_h2o <- as.h2o(test_tbl)
#Set the names that h2o will use as the target and predictor variables.
# Set names for h2o
y <- "close"
x <- setdiff(names(train_h2o), y)
automl_models_h2o <- h2o.automl(
x = x,
y = y,
training_frame = train_h2o,
validation_frame = valid_h2o,
leaderboard_frame = test_h2o,
max_runtime_secs = 60,
stopping_metric = "deviance")
#Extract leader model
automl_leader <- automl_models_h2o@leader
pred_h2o <- h2o.predict(automl_leader, newdata = test_h2o)
h2o.performance(automl_leader, newdata = test_h2o)
## H2ORegressionMetrics: gbm
##
## MSE: 1.189396
## RMSE: 1.090594
## MAE: 0.9566183
## RMSLE: 0.1038165
## Mean Residual Deviance : 1.189396
error_tbl <- ocbc_tbl %>%
filter(lubridate::year(date) >= 2016) %>%
add_column(pred = pred_h2o %>% as.tibble() %>% pull(predict)) %>%
rename(actual = close) %>%
mutate(
error = actual - pred,
error_pct = error / actual
)
error_tbl %>%
summarise(
me = mean(error),
rmse = mean(error^2)^0.5,
mae = mean(abs(error)),
mape = mean(abs(error_pct)),
mpe = mean(error_pct)
) %>% glimpse()
## Observations: 1
## Variables: 5
## $ me <dbl> -0.1975174
## $ rmse <dbl> 1.090594
## $ mae <dbl> 0.9566183
## $ mape <dbl> 0.1028626
## $ mpe <dbl> -0.03382029
h2o.shutdown(prompt = FALSE)
## [1] TRUE
See predicted price for the last few details
tail(error_tbl)
## # A tibble: 6 x 5
## date actual pred error error_pct
## <date> <dbl> <dbl> <dbl> <dbl>
## 1 2017-10-27 11.80 9.806380 1.993620 0.1689509
## 2 2017-10-30 11.94 9.806132 2.133868 0.1787159
## 3 2017-10-31 11.90 9.806132 2.093868 0.1759553
## 4 2017-11-01 11.94 9.829033 2.110967 0.1767979
## 5 2017-11-02 11.77 9.828786 1.941214 0.1649290
## 6 2017-11-03 11.81 9.829033 1.980967 0.1677364
See graphs on Actual vs Predicted
trace_0 <- error_tbl$actual
trace_2 <- error_tbl$pred
x <- c(1:nrow(error_tbl))
data <- data.frame(x, trace_0, trace_2)
plot_ly(data, x = ~x, y = ~trace_0, name = 'Actual Price for year 2014 to 2017', type = 'scatter', mode = 'lines') %>% add_trace(y = ~trace_2, name = 'Predicted Price for year 2016 onwards', type= 'scatter', mode = 'lines')