suppressWarnings(suppressMessages(library(h2o)))
suppressWarnings(suppressMessages(library(timetk)))
suppressWarnings(suppressMessages(library(tidyquant)))
suppressWarnings(suppressMessages(library(tidyr)))
suppressWarnings(suppressMessages(library(plotly)))

ocbc_symbols <- "O39.SI"
ocbc_tbl <- ocbc_symbols %>% tq_get(get = "stock.prices", from = "2014-01-01", to = "2017-12-31")
# organise data
ocbc_tbl <- ocbc_tbl %>% drop_na() %>% select(date,close)

# Starting point
#ocbc_tbl %>% glimpse()

# Augment (adds data frame columns)
ocbc_tbl_aug <- ocbc_tbl %>% tk_augment_timeseries_signature()
#ocbc_tbl_aug %>% glimpse()

# prep data for h2o
ocbc_tbl_clean <- ocbc_tbl_aug %>%
  select_if(~ !is.Date(.)) %>%
  select_if(~ !any(is.na(.))) %>%
  mutate_if(is.ordered, ~ as.character(.) %>% as.factor)

#ocbc_tbl_clean %>% glimpse()

# Split into training, validation and test sets
train_tbl <- ocbc_tbl_clean %>% filter(year <= 2014)
valid_tbl <- ocbc_tbl_clean %>% filter(year == 2015)
test_tbl  <- ocbc_tbl_clean %>% filter(year >= 2016)


h2o.init()

## 
## H2O is not running yet, starting it now...
## 
## Note:  In case of errors look at the following log files:
##     C:\Users\jkkli\AppData\Local\Temp\RtmpInOHa7/h2o_jkkli_started_from_r.out
##     C:\Users\jkkli\AppData\Local\Temp\RtmpInOHa7/h2o_jkkli_started_from_r.err
## 
## 
## Starting H2O JVM and connecting: . Connection successful!
## 
## R is connected to the H2O cluster: 
##     H2O cluster uptime:         2 seconds 270 milliseconds 
##     H2O cluster version:        3.14.0.3 
##     H2O cluster version age:    1 month and 12 days  
##     H2O cluster name:           H2O_started_from_R_jkkli_ief229 
##     H2O cluster total nodes:    1 
##     H2O cluster total memory:   7.09 GB 
##     H2O cluster total cores:    8 
##     H2O cluster allowed cores:  8 
##     H2O cluster healthy:        TRUE 
##     H2O Connection ip:          localhost 
##     H2O Connection port:        54321 
##     H2O Connection proxy:       NA 
##     H2O Internal Security:      FALSE 
##     H2O API Extensions:         Algos, AutoML, Core V3, Core V4 
##     R Version:                  R version 3.4.2 (2017-09-28)

h2o.no_progress()  

# Convert to H2OFrame objects
train_h2o <- as.h2o(train_tbl)
valid_h2o <- as.h2o(valid_tbl)
test_h2o  <- as.h2o(test_tbl)

#Set the names that h2o will use as the target and predictor variables.
# Set names for h2o
y <- "close"
x <- setdiff(names(train_h2o), y)

automl_models_h2o <- h2o.automl(
  x = x, 
  y = y, 
  training_frame = train_h2o, 
  validation_frame = valid_h2o, 
  leaderboard_frame = test_h2o, 
  max_runtime_secs = 60, 
  stopping_metric = "deviance")

#Extract leader model
automl_leader <- automl_models_h2o@leader
pred_h2o <- h2o.predict(automl_leader, newdata = test_h2o)
h2o.performance(automl_leader, newdata = test_h2o)

## H2ORegressionMetrics: gbm
## 
## MSE:  1.189396
## RMSE:  1.090594
## MAE:  0.9566183
## RMSLE:  0.1038165
## Mean Residual Deviance :  1.189396

error_tbl <- ocbc_tbl %>% 
  filter(lubridate::year(date) >= 2016) %>%
  add_column(pred = pred_h2o %>% as.tibble() %>% pull(predict)) %>%
  rename(actual = close) %>%
  mutate(
    error     = actual - pred,
    error_pct = error / actual
  ) 


error_tbl %>%
  summarise(
    me   = mean(error),
    rmse = mean(error^2)^0.5,
    mae  = mean(abs(error)),
    mape = mean(abs(error_pct)),
    mpe  = mean(error_pct)
  ) %>%  glimpse()

## Observations: 1
## Variables: 5
## $ me   <dbl> -0.1975174
## $ rmse <dbl> 1.090594
## $ mae  <dbl> 0.9566183
## $ mape <dbl> 0.1028626
## $ mpe  <dbl> -0.03382029

h2o.shutdown(prompt = FALSE)

## [1] TRUE

See predicted price for the last few details

tail(error_tbl)

## # A tibble: 6 x 5
##         date actual     pred    error error_pct
##       <date>  <dbl>    <dbl>    <dbl>     <dbl>
## 1 2017-10-27  11.80 9.806380 1.993620 0.1689509
## 2 2017-10-30  11.94 9.806132 2.133868 0.1787159
## 3 2017-10-31  11.90 9.806132 2.093868 0.1759553
## 4 2017-11-01  11.94 9.829033 2.110967 0.1767979
## 5 2017-11-02  11.77 9.828786 1.941214 0.1649290
## 6 2017-11-03  11.81 9.829033 1.980967 0.1677364

See graphs on Actual vs Predicted

trace_0 <- error_tbl$actual
trace_2 <- error_tbl$pred

x <- c(1:nrow(error_tbl))
data <- data.frame(x, trace_0, trace_2)

plot_ly(data, x = ~x, y = ~trace_0, name = 'Actual Price for year 2014 to 2017', type = 'scatter', mode = 'lines') %>%  add_trace(y = ~trace_2, name = 'Predicted Price for year 2016 onwards', type= 'scatter', mode = 'lines')

Predict_Stock_Price

James Lim

03 November, 2017

See predicted price for the last few details

See graphs on Actual vs Predicted