# Clear workspace: 
rm(list = ls())

# Import packages for data manipulation: 
library(tidyverse)
library(magrittr)

# A function for black theme: 

my_theme <- function(...) {
  theme(
    axis.line = element_blank(),  
    axis.text.x = element_text(color = "white", lineheight = 0.9),  
    axis.text.y = element_text(color = "white", lineheight = 0.9),  
    axis.ticks = element_line(color = "white", size  =  0.2),  
    axis.title.x = element_text(color = "white", margin = margin(0, 10, 0, 0)),  
    axis.title.y = element_text(color = "white", angle = 90, margin = margin(0, 10, 0, 0)),  
    axis.ticks.length = unit(0.3, "lines"),   
    legend.background = element_rect(color = NA, fill = "gray10"),  
    legend.key = element_rect(color = "white",  fill = "gray10"),  
    legend.key.size = unit(1.2, "lines"),  
    legend.key.height = NULL,  
    legend.key.width = NULL,      
    legend.text = element_text(color = "white"),  
    legend.title = element_text(face = "bold", hjust = 0, color = "white"),  
    legend.text.align = NULL,  
    legend.title.align = NULL,  
    legend.direction = "vertical",  
    legend.box = NULL, 
    panel.background = element_rect(fill = "gray10", color  =  NA),  
    panel.border = element_blank(),
    panel.grid.major = element_line(color = "grey35"),  
    panel.grid.minor = element_line(color = "grey20"),  
    panel.spacing = unit(0.5, "lines"),   
    strip.background = element_rect(fill = "grey30", color = "grey10"),  
    strip.text.x = element_text(color = "white"),  
    strip.text.y = element_text(color = "white", angle = -90),  
    plot.background = element_rect(color = "gray10", fill = "gray10"),  
    plot.title = element_text(color = "white", hjust = 0, lineheight = 1.25, margin = margin(2, 2, 2, 2)),  
    plot.subtitle = element_text(color = "white", hjust = 0, margin = margin(2, 2, 2, 2)),  
    plot.caption = element_text(color = "white", hjust = 0),  
    plot.margin = unit(rep(1, 4), "lines"))
  
}

# Inspect data path and load data: 
my_path <- dir("/home/chidung/Desktop/atm_cash_data", full.names = TRUE)
my_case <- read_csv(my_path[1])

# Create lag 1 variable: 
my_case_lag <- my_case %>% 
  mutate(lag1 = lag(daily_cash, 1)) %>% 
  na.omit()

# Scale 0-1 for variables: 

my_case_lag_scaled <- my_case_lag %>% 
  mutate_if(is.numeric, function(x) {(x - min(x)) / (max(x) - min(x))})


# Split data: 
N <- nrow(my_case_lag_scaled)
train_df <- my_case_lag_scaled %>% slice(1:450)
test_df <- my_case_lag_scaled %>% slice(451:N)

# Specify features and output: 

y_train <- train_df %>% pull(2)
x_train <- train_df %>% pull(3)

y_test <- test_df %>% pull(2)
x_test <- test_df %>% pull(3)


#======================================================
#  Long Short Term Memory (LSTM) Networks (Version 1)
#======================================================

# Load package keras: 
library(keras)

# Reshape the input to 3-dim: 
dim(x_train) <- c(length(x_train), 1, 1)

# specify required arguments: 
X_shape2 <- dim(x_train)[2]
X_shape3 <- dim(x_train)[3]
batch_size <- 1                
units <- 1                     

# Specify conditions for LSTM Model: 

model <- keras_model_sequential() %>%
  layer_lstm(units, batch_input_shape = c(batch_size, X_shape2, X_shape3), stateful = TRUE) %>%
  layer_dense(units = 1) %>% 
  compile(loss = "mean_squared_error",
          optimizer = optimizer_adam(lr = 0.005, decay = 1e-10),  
          metrics = c("accuracy"))

# Train / fit LSTM Model: 

Epochs <- 10   
for(i in 1:Epochs) {
  model %>% 
    fit(x_train, 
        y_train, 
        epochs = 1,
        batch_size = batch_size, 
        verbose = 1, 
        shuffle = FALSE)
  model %>% reset_states()
}

# A function for forecasting: 

my_predict_from_lstm <- function(k) {
  x <- x_test[k]
  dim(x) <- c(1, 1, 1)
  y_hat <- model %>% predict(x, batch_size = 1)
  return(y_hat)
}

# Make predictions: 
sapply(1:length(x_test), my_predict_from_lstm) ->> pred_scaled

# A function covert to origin: 

convert_to_origin <- function(x) {
  x_base <- my_case_lag$daily_cash
  y <- x*(max(x_base) - min(x_base)) + min(x_base)
  return(y)
}

# Compare actuals vs predictions: 
test_df %>% 
  rename(Actual = daily_cash) %>% 
  mutate(Predicted = convert_to_origin(pred_scaled), Actual = convert_to_origin(Actual)) %>% 
  select(-lag1) -> lstm_results

lstm_results %>% 
  gather(a, b, -DAYID) %>% 
  ggplot(aes(DAYID, b, color = a)) + 
  geom_line() + 
  scale_color_manual(values = c("purple", "orange"), name = "Series:") + 
  my_theme() + 
  labs(x = NULL, y = NULL, subtitle = "Approach Used: Long Short Term Memory (LSTM) Networks", 
       title = "One-step Ahead Prediction for Cash Withdrawals (120 Consecutive Days)")

#===================
#  ARIMA Approach
#===================

library(fpp2)

predict_from_arima <- function(k) {
  
  n_ahead <- 1
  train_arima <- my_case_lag %>% slice((1 + k):(450 + k)) %>% select(1:2)
  test_arima  <- my_case_lag %>% slice(450 + k + n_ahead) %>% select(1:2)
  
  # Automatic ARIMA model as proposed by Hyndman and Khandakar (2008): 
  my_arima <- auto.arima(train_arima[, 2] %>% ts(start = 1))
  
  # Use the model for forecasting: 
  predicted_arima <- forecast(my_arima, h = 1)$mean %>% as.vector()
  
  actual_predicted_df_test <- test_arima %>% 
    mutate(predicted = predicted_arima) 
  
  return(actual_predicted_df_test)
  
}

# Use this function: 

system.time(lapply(0:119, predict_from_arima) ->> arima_results)
##    user  system elapsed 
##  32.355  58.790  14.956
arima_results <- do.call("bind_rows", arima_results)

# Compare cash demands predicted by ARIMA approach and actuals: 

arima_results %<>%  
  rename(Actual = daily_cash, Predicted = predicted) 

arima_results %>% 
  gather(a, b, -DAYID) %>% 
  ggplot(aes(DAYID, b, color = a)) + 
  geom_line() + 
  scale_color_manual(values = c("purple", "orange"), name = "Series:") + 
  my_theme() + 
  labs(x = NULL, y = NULL, subtitle = "Approach Used: Auto ARIMA", 
       title = "One-step Ahead Prediction for Cash Withdrawals (120 Consecutive Days)")

# Function calculate some accuracy measures: 
get_accuracy_measures <- function(your_result_df) {

  act <- your_result_df %>% pull(Actual)
  pred <- your_result_df %>% pull(Predicted)
  err <- act - pred 
  per_err <- abs(err / act)
  
  # Mean Absolute Error (MAE): 
  mae <- err %>% abs() %>% mean()
  
  # Mean Squared Error (MSE): 
  mse <- mean(err^2)
  
  # Mean Absolute Percentage Error (MAPE): 
  mape <- 100*mean(per_err)
  
  # Return results: 
  return(data.frame(MAE = mae, MSE = mse, MAPE = mape, N = length(act)))
}

# Use above function for comparing: 

bind_rows(lstm_results %>% get_accuracy_measures(), arima_results %>% get_accuracy_measures) %>% 
  mutate(Approach = c("LSTM", "ARIMA")) %>% 
  select(Approach, everything()) %>% 
  mutate_if(is.numeric, function(x) {round(x, 2)}) %>% 
  knitr::kable()
Approach MAE MSE MAPE N
LSTM 46.03 7726.99 138.52 120
ARIMA 41.22 6465.79 110.74 120
---
title: "Forecasting Daily Cash Demand: Long Short Term Memory vs ARIMA Aprroach" 
subtitle: "R for Fun"
author: "Nguyen Chi Dung"
output:
  html_document: 
    code_download: true
    code_folding: hide
    highlight: pygments
    # number_sections: yes
    theme: "flatly"
    toc: TRUE
    toc_float: TRUE
---

```{r setup,include=FALSE}
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, warning = FALSE, message = FALSE)
```

```{r}
# Clear workspace: 
rm(list = ls())

# Import packages for data manipulation: 
library(tidyverse)
library(magrittr)

# A function for black theme: 

my_theme <- function(...) {
  theme(
    axis.line = element_blank(),  
    axis.text.x = element_text(color = "white", lineheight = 0.9),  
    axis.text.y = element_text(color = "white", lineheight = 0.9),  
    axis.ticks = element_line(color = "white", size  =  0.2),  
    axis.title.x = element_text(color = "white", margin = margin(0, 10, 0, 0)),  
    axis.title.y = element_text(color = "white", angle = 90, margin = margin(0, 10, 0, 0)),  
    axis.ticks.length = unit(0.3, "lines"),   
    legend.background = element_rect(color = NA, fill = "gray10"),  
    legend.key = element_rect(color = "white",  fill = "gray10"),  
    legend.key.size = unit(1.2, "lines"),  
    legend.key.height = NULL,  
    legend.key.width = NULL,      
    legend.text = element_text(color = "white"),  
    legend.title = element_text(face = "bold", hjust = 0, color = "white"),  
    legend.text.align = NULL,  
    legend.title.align = NULL,  
    legend.direction = "vertical",  
    legend.box = NULL, 
    panel.background = element_rect(fill = "gray10", color  =  NA),  
    panel.border = element_blank(),
    panel.grid.major = element_line(color = "grey35"),  
    panel.grid.minor = element_line(color = "grey20"),  
    panel.spacing = unit(0.5, "lines"),   
    strip.background = element_rect(fill = "grey30", color = "grey10"),  
    strip.text.x = element_text(color = "white"),  
    strip.text.y = element_text(color = "white", angle = -90),  
    plot.background = element_rect(color = "gray10", fill = "gray10"),  
    plot.title = element_text(color = "white", hjust = 0, lineheight = 1.25, margin = margin(2, 2, 2, 2)),  
    plot.subtitle = element_text(color = "white", hjust = 0, margin = margin(2, 2, 2, 2)),  
    plot.caption = element_text(color = "white", hjust = 0),  
    plot.margin = unit(rep(1, 4), "lines"))
  
}

# Inspect data path and load data: 
my_path <- dir("/home/chidung/Desktop/atm_cash_data", full.names = TRUE)
my_case <- read_csv(my_path[1])

# Create lag 1 variable: 
my_case_lag <- my_case %>% 
  mutate(lag1 = lag(daily_cash, 1)) %>% 
  na.omit()

# Scale 0-1 for variables: 

my_case_lag_scaled <- my_case_lag %>% 
  mutate_if(is.numeric, function(x) {(x - min(x)) / (max(x) - min(x))})


# Split data: 
N <- nrow(my_case_lag_scaled)
train_df <- my_case_lag_scaled %>% slice(1:450)
test_df <- my_case_lag_scaled %>% slice(451:N)

# Specify features and output: 

y_train <- train_df %>% pull(2)
x_train <- train_df %>% pull(3)

y_test <- test_df %>% pull(2)
x_test <- test_df %>% pull(3)


#======================================================
#  Long Short Term Memory (LSTM) Networks (Version 1)
#======================================================

# Load package keras: 
library(keras)

# Reshape the input to 3-dim: 
dim(x_train) <- c(length(x_train), 1, 1)

# specify required arguments: 
X_shape2 <- dim(x_train)[2]
X_shape3 <- dim(x_train)[3]
batch_size <- 1                
units <- 1                     

# Specify conditions for LSTM Model: 

model <- keras_model_sequential() %>%
  layer_lstm(units, batch_input_shape = c(batch_size, X_shape2, X_shape3), stateful = TRUE) %>%
  layer_dense(units = 1) %>% 
  compile(loss = "mean_squared_error",
          optimizer = optimizer_adam(lr = 0.005, decay = 1e-10),  
          metrics = c("accuracy"))

# Train / fit LSTM Model: 

Epochs <- 10   
for(i in 1:Epochs) {
  model %>% 
    fit(x_train, 
        y_train, 
        epochs = 1,
        batch_size = batch_size, 
        verbose = 1, 
        shuffle = FALSE)
  model %>% reset_states()
}

# A function for forecasting: 

my_predict_from_lstm <- function(k) {
  x <- x_test[k]
  dim(x) <- c(1, 1, 1)
  y_hat <- model %>% predict(x, batch_size = 1)
  return(y_hat)
}

# Make predictions: 
sapply(1:length(x_test), my_predict_from_lstm) ->> pred_scaled

# A function covert to origin: 

convert_to_origin <- function(x) {
  x_base <- my_case_lag$daily_cash
  y <- x*(max(x_base) - min(x_base)) + min(x_base)
  return(y)
}

# Compare actuals vs predictions: 
test_df %>% 
  rename(Actual = daily_cash) %>% 
  mutate(Predicted = convert_to_origin(pred_scaled), Actual = convert_to_origin(Actual)) %>% 
  select(-lag1) -> lstm_results

lstm_results %>% 
  gather(a, b, -DAYID) %>% 
  ggplot(aes(DAYID, b, color = a)) + 
  geom_line() + 
  scale_color_manual(values = c("purple", "orange"), name = "Series:") + 
  my_theme() + 
  labs(x = NULL, y = NULL, subtitle = "Approach Used: Long Short Term Memory (LSTM) Networks", 
       title = "One-step Ahead Prediction for Cash Withdrawals (120 Consecutive Days)")

#===================
#  ARIMA Approach
#===================

library(fpp2)

predict_from_arima <- function(k) {
  
  n_ahead <- 1
  train_arima <- my_case_lag %>% slice((1 + k):(450 + k)) %>% select(1:2)
  test_arima  <- my_case_lag %>% slice(450 + k + n_ahead) %>% select(1:2)
  
  # Automatic ARIMA model as proposed by Hyndman and Khandakar (2008): 
  my_arima <- auto.arima(train_arima[, 2] %>% ts(start = 1))
  
  # Use the model for forecasting: 
  predicted_arima <- forecast(my_arima, h = 1)$mean %>% as.vector()
  
  actual_predicted_df_test <- test_arima %>% 
    mutate(predicted = predicted_arima) 
  
  return(actual_predicted_df_test)
  
}

# Use this function: 

system.time(lapply(0:119, predict_from_arima) ->> arima_results)
arima_results <- do.call("bind_rows", arima_results)

# Compare cash demands predicted by ARIMA approach and actuals: 

arima_results %<>%  
  rename(Actual = daily_cash, Predicted = predicted) 

arima_results %>% 
  gather(a, b, -DAYID) %>% 
  ggplot(aes(DAYID, b, color = a)) + 
  geom_line() + 
  scale_color_manual(values = c("purple", "orange"), name = "Series:") + 
  my_theme() + 
  labs(x = NULL, y = NULL, subtitle = "Approach Used: Auto ARIMA", 
       title = "One-step Ahead Prediction for Cash Withdrawals (120 Consecutive Days)")

# Function calculate some accuracy measures: 
get_accuracy_measures <- function(your_result_df) {

  act <- your_result_df %>% pull(Actual)
  pred <- your_result_df %>% pull(Predicted)
  err <- act - pred 
  per_err <- abs(err / act)
  
  # Mean Absolute Error (MAE): 
  mae <- err %>% abs() %>% mean()
  
  # Mean Squared Error (MSE): 
  mse <- mean(err^2)
  
  # Mean Absolute Percentage Error (MAPE): 
  mape <- 100*mean(per_err)
  
  # Return results: 
  return(data.frame(MAE = mae, MSE = mse, MAPE = mape, N = length(act)))
}

# Use above function for comparing: 

bind_rows(lstm_results %>% get_accuracy_measures(), arima_results %>% get_accuracy_measures) %>% 
  mutate(Approach = c("LSTM", "ARIMA")) %>% 
  select(Approach, everything()) %>% 
  mutate_if(is.numeric, function(x) {round(x, 2)}) %>% 
  knitr::kable()
```

