Hello All,
Below are the steps I took to generate neural networks. Overall, I applied three different types of networks to the data - one, a autoregessive neural network, and two, a long short term memory recursive nerual network. The data below is the quarterly Australian private final consumption expenditure. The data range is from September of 1959 to March 1991.
Best, Jordan
require(fpp)
require(forecast)
data <- read.csv("C:/Users/Jharrop/Desktop/ADEC7460 Forecasting/Discussions/Discussion 8/quarterly-australian-private-fin.csv")
data.ts <- ts(data$Exp, frequency = 4, start = c(1959, 4))
tsdisplay(data.ts)
lam <- BoxCox.lambda(data.ts); print(lam)
## [1] 0.1035234
data.clean.ts <- BoxCox(data.ts, lambda = lam)
plot(data.clean.ts)
train <- ts(data.clean.ts[1:110], frequency = 4, start = c(1959, 4))
test <- ts(data.clean.ts[111:127], frequency = 4, start = c(1987, 2))
fit1 <- nnetar(train)
fcst <- forecast(fit1,h=16)
plot(fcst)
accuracy(fcst, test)
## ME RMSE MAE MPE MAPE
## Training set 0.0000287655 0.04334237 0.03434864 -0.0004952831 0.1979628
## Test set 0.0750759352 0.10945610 0.08952331 0.3921824773 0.4695194
## MASE ACF1 Theil's U
## Training set 0.3216174 0.6063114 NA
## Test set 0.8382356 0.5887714 0.7218129
plot(test, type="o", ylab="Red Wine Sales",
flwd=1, plot.conf=FALSE)
lines(window(test, start = c(1987,2)),type="o")
lines(fcst$mean,col=2)
legend("topleft", lty=1, pch=1, col=1:2,
c("Data","Neural Net"))
I borrowed heavely from the below site - it was published only about a week ago and is a great resource for Keras in R (A lot of Keras tutorials are for python).
http://www.business-science.io/timeseries-analysis/2018/04/18/keras-lstm-sunspots-time-series-prediction.html
I do not think this will work well with quarterly data.
# Core Tidyverse
library(tidyverse)
library(glue)
library(forcats)
# Time Series
library(timetk)
library(tidyquant)
library(tibbletime)
# Visualization
library(cowplot)
# Preprocessing
library(recipes)
# Sampling / Accuracy
library(rsample)
library(yardstick)
# Modeling
library(keras)
# Date/Time
library(zoo)
# If this is your first time using Tensor Flow - Keras you will need
# to run install.packages("keras", dependencies = TRUE) then run
# "libary(keras); install_keras()"
df_trn <- data[1:100,]
colnames(df_trn) <- c("index","value")
df_trn$index = gsub("Q", "", df_trn$index)
df_trn$index <- as.yearqtr(format(df_trn$index), "%Y%q")
df_tst <- data[101:120,]
colnames(df_tst) <- c("index","value")
df_tst$index = gsub("Q", "", df_tst$index)
df_tst$index <- as.yearqtr(format(df_tst$index), "%Y%q")
df <- bind_rows(
df_trn %>% add_column(key = "training"),
df_tst %>% add_column(key = "testing"))
rec_obj <- recipe(value ~ ., df) %>%
step_sqrt(value) %>%
step_center(value) %>%
step_scale(value) %>%
prep()
df_processed_tbl <- bake(rec_obj, df)
df_processed_tbl
## # A tibble: 120 x 3
## index value key
## <dbl> <dbl> <fct>
## 1 1960. -1.74 training
## 2 1960 -1.52 training
## 3 1960. -1.67 training
## 4 1960. -1.58 training
## 5 1961. -1.62 training
## 6 1961 -1.44 training
## 7 1961. -1.67 training
## 8 1962. -1.61 training
## 9 1962. -1.63 training
## 10 1962 -1.43 training
## # ... with 110 more rows
center_history <- rec_obj$steps[[2]]$means["value"]
scale_history <- rec_obj$steps[[3]]$sds["value"]
c("center" = center_history, "scale" = scale_history)
## center.value scale.value
## 150.6341 24.5523
# Model inputs
lag_setting <- nrow(df_tst)
batch_size <- 1
train_length <- 100
tsteps <- 1
epochs <- 15
# Training Set
lag_train_tbl <- df_processed_tbl %>%
mutate(value_lag = lag(value, n = lag_setting)) %>%
filter(!is.na(value_lag)) %>%
filter(key == "training") %>%
tail(train_length)
x_train_vec <- lag_train_tbl$value_lag
x_train_arr <- array(data = x_train_vec, dim = c(length(x_train_vec), 1, 1))
y_train_vec <- lag_train_tbl$value
y_train_arr <- array(data = y_train_vec, dim = c(length(y_train_vec), 1))
# Testing Set
lag_test_tbl <- df_processed_tbl %>%
mutate(
value_lag = lag(value, n = lag_setting)
) %>%
filter(!is.na(value_lag)) %>%
filter(key == "testing")
x_test_vec <- lag_test_tbl$value_lag
x_test_arr <- array(data = x_test_vec, dim = c(length(x_test_vec), 1, 1))
y_test_vec <- lag_test_tbl$value
y_test_arr <- array(data = y_test_vec, dim = c(length(y_test_vec), 1))
model <- keras_model_sequential()
model %>%
layer_lstm(units = 50,
input_shape = c(tsteps, 1),
batch_size = batch_size,
return_sequences = TRUE,
stateful = TRUE) %>%
layer_lstm(units = 50,
return_sequences = FALSE,
stateful = TRUE) %>%
layer_dense(units = 1)
model %>%
compile(loss = 'mae', optimizer = 'adam')
model
## Model
## ___________________________________________________________________________
## Layer (type) Output Shape Param #
## ===========================================================================
## lstm_1 (LSTM) (1, 1, 50) 10400
## ___________________________________________________________________________
## lstm_2 (LSTM) (1, 50) 20200
## ___________________________________________________________________________
## dense_1 (Dense) (1, 1) 51
## ===========================================================================
## Total params: 30,651
## Trainable params: 30,651
## Non-trainable params: 0
## ___________________________________________________________________________
for (i in 1:epochs) {
model %>% fit(x = x_train_arr,
y = y_train_arr,
batch_size = batch_size,
epochs = 1,
verbose = 1,
shuffle = FALSE)
model %>% reset_states()
cat("Epoch: ", i)
}
## Epoch: 1Epoch: 2Epoch: 3Epoch: 4Epoch: 5Epoch: 6Epoch: 7Epoch: 8Epoch: 9Epoch: 10Epoch: 11Epoch: 12Epoch: 13Epoch: 14Epoch: 15
# Make Predictions
pred_out <- model %>%
predict(x_test_arr, batch_size = batch_size) %>%
.[,1]
# Retransform values
pred_tbl <- tibble(
index = as.yearqtr(lag_test_tbl$index),
value = (pred_out * scale_history + center_history)^2
)
# Combine actual data with predictions
tbl_1 <- df_trn %>%
add_column(key = "actual")
tbl_2 <- df_tst %>%
add_column(key = "actual")
tbl_3 <- pred_tbl %>%
add_column(key = "predict")
# Create time_bind_rows() to solve dplyr issue
time_bind_rows <- function(data_1, data_2, index) {
index_expr <- enquo(index)
bind_rows(data_1, data_2) %>%
as_tbl_time(index = !! index_expr)
}
# ret <- list(tbl_1, tbl_2, tbl_3) %>%
# reduce(time_bind_rows, index = index) %>%
# arrange(key, index) %>%
# mutate(key = as_factor(key))
ret <- rbind(tbl_1, tbl_2, tbl_3)
head(ret)
## index value key
## 1 1959 Q4 11664 actual
## 2 1960 Q1 12859 actual
## 3 1960 Q2 12002 actual
## 4 1960 Q3 12485 actual
## 5 1960 Q4 12316 actual
## 6 1961 Q1 13269 actual
calc_rmse <- function(prediction_tbl) {
rmse_calculation <- function(data) {
data %>%
spread(key = key, value = value) %>%
select(-index) %>%
filter(!is.na(predict)) %>%
rename(
truth = actual,
estimate = predict
) %>%
rmse(truth, estimate)
}
safe_rmse <- possibly(rmse_calculation, otherwise = NA)
safe_rmse(prediction_tbl)
}
# Setup single plot function
plot_prediction <- function(data, id, alpha = 1, size = 2, base_size = 14) {
rmse_val <- calc_rmse(data)
g <- data %>%
ggplot(aes(index, value, color = key)) +
geom_point(alpha = alpha, size = size) +
theme_tq(base_size = base_size) +
scale_color_tq() +
theme(legend.position = "none") +
labs(
title = glue("{id}, RMSE: {round(rmse_val, digits = 1)}"),
x = "", y = ""
)
return(g)
}
ret %>%
plot_prediction(id = "LSTM Test", alpha = 0.65) +
theme(legend.position = "bottom")