suppressMessages(suppressWarnings(library(keras)))
suppressMessages(suppressWarnings(library(tidyverse)))
suppressMessages(suppressWarnings(library(MASS)))
suppressMessages(suppressWarnings(library(gridExtra)))

Como um dos objetivos principais desse relatório, é criar uma rede neural para gerar previsões de séries temporais.

Início

Será utilizada os dados da taxa de câmbio brasileira.

brazil <-read_rds("/opt/datasets/brazil.rds")

Para uma melhor analise, seram filtradas as informações a partir do ano 2000.

serie <- brazil %>%
  filter(Subject == "Currency exchange rates, monthly average",
         TIME >= "2000-01-01")

Criando a série:

serie %>%
  dplyr::select(Time = TIME, Value) %>%
  ggplot() + geom_point(aes(x = Time, y = Value), alpha = .5) +
  geom_line(aes(x = Time, y = Value), color = "pink", alpha = .5) + 
  theme_minimal()

Como em todos os trabalhos anteriores, vamos dividir a série em teste e treino.

# Tamanho de treino
N <- round(.7*nrow(serie))

# Retirando tendencia
serie_diff <- serie %>%
  mutate(Diff = Value - lag(Value)) %>%
  filter(row_number() >= 2) %>%
  mutate(
    Base = if_else(row_number() >= N, "test", "train"),
  )

# Como ficou
serie_diff %>%
  dplyr::select(TIME, Value, Diff, Base)
## # A tibble: 235 x 4
##    TIME       Value    Diff Base 
##    <date>     <dbl>   <dbl> <chr>
##  1 2000-02-01  1.77 -0.0284 train
##  2 2000-03-01  1.74 -0.0333 train
##  3 2000-04-01  1.77  0.0261 train
##  4 2000-05-01  1.83  0.0598 train
##  5 2000-06-01  1.81 -0.0196 train
##  6 2000-07-01  1.80 -0.0105 train
##  7 2000-08-01  1.81  0.0114 train
##  8 2000-09-01  1.84  0.0300 train
##  9 2000-10-01  1.88  0.0404 train
## 10 2000-11-01  1.95  0.0684 train
## # … with 225 more rows

Como pode ser visualizado, tempos uma variavel nova e utilizzaremos ela para separar a base de dados.

scales <- serie_diff %>%
  filter(Base == "train") %>%
  summarise(min = min(Diff), max = max(Diff), Range = max - min)

serie_norm <- serie_diff %>%
  dplyr::select(Time = TIME, Value, Diff, Base) %>%
  mutate(DiffPadrao = 2*((Diff - scales$min)/scales$Range - .5))

serie_norm %>% ggplot(aes(x = Time, y = DiffPadrao, color = Base)) +
  geom_point(alpha = .5) +
  geom_line(alpha = .5) + 
  theme_minimal() + 
  scale_color_manual(values = c("pink","ivory4"))

Criando o modelo

# To Model
serie_norm2 <- serie_norm %>%
  transmute(Time, Y = DiffPadrao, X1 = lag(Y), Base) %>% filter(row_number() >= 2)

x_train <- serie_norm2 %>%
  filter(Base == "train") %>%
  dplyr::select(X1) %>%
  as.matrix()
y_train <- serie_norm2 %>%
  filter(Base == "train") %>%
  dplyr::select(Y) %>%
  as.matrix()
x_test <- serie_norm2 %>%
  filter(Base == "test") %>%
  dplyr::select(X1) %>%
  as.matrix()
y_test <- serie_norm2 %>%
  filter(Base == "test") %>%
  dplyr::select(Y) %>%
  as.matrix()

Parâmetros para a estimação do modelo.

# Reshape the input to 3-dim
dim(x_train) <- c(length(x_train), 1, 1)

# specify required arguments
X_shape2 = dim(x_train)[2]
X_shape3 = dim(x_train)[3]
batch_size = 1 # must be a common factor of both the train and test samples
units = 5 # can adjust this, in model tuninig phase

Estimando os pesos.

model <- keras_model_sequential()
model%>%
  layer_lstm(units,
             batch_input_shape = c(batch_size, X_shape2, X_shape3),
             stateful= TRUE) %>%
  layer_dense(units = 1)


model %>% compile(
  loss = 'mean_squared_error',
  optimizer = optimizer_adam(lr= 0.02, decay = 1e-6 ),
  metrics = c('mse')
)

Epochs = 200
for(i in 1:Epochs ){
  model %>% fit(x_train, y_train, epochs = 1,
                batch_size = batch_size,
                verbose = 1,
                shuffle = FALSE)
  cat("Processing",i,"of 100\n")
  model %>% reset_states()
}
## Processing 1 of 100
## Processing 2 of 100
## Processing 3 of 100
## Processing 4 of 100
## Processing 5 of 100
## Processing 6 of 100
## Processing 7 of 100
## Processing 8 of 100
## Processing 9 of 100
## Processing 10 of 100
## Processing 11 of 100
## Processing 12 of 100
## Processing 13 of 100
## Processing 14 of 100
## Processing 15 of 100
## Processing 16 of 100
## Processing 17 of 100
## Processing 18 of 100
## Processing 19 of 100
## Processing 20 of 100
## Processing 21 of 100
## Processing 22 of 100
## Processing 23 of 100
## Processing 24 of 100
## Processing 25 of 100
## Processing 26 of 100
## Processing 27 of 100
## Processing 28 of 100
## Processing 29 of 100
## Processing 30 of 100
## Processing 31 of 100
## Processing 32 of 100
## Processing 33 of 100
## Processing 34 of 100
## Processing 35 of 100
## Processing 36 of 100
## Processing 37 of 100
## Processing 38 of 100
## Processing 39 of 100
## Processing 40 of 100
## Processing 41 of 100
## Processing 42 of 100
## Processing 43 of 100
## Processing 44 of 100
## Processing 45 of 100
## Processing 46 of 100
## Processing 47 of 100
## Processing 48 of 100
## Processing 49 of 100
## Processing 50 of 100
## Processing 51 of 100
## Processing 52 of 100
## Processing 53 of 100
## Processing 54 of 100
## Processing 55 of 100
## Processing 56 of 100
## Processing 57 of 100
## Processing 58 of 100
## Processing 59 of 100
## Processing 60 of 100
## Processing 61 of 100
## Processing 62 of 100
## Processing 63 of 100
## Processing 64 of 100
## Processing 65 of 100
## Processing 66 of 100
## Processing 67 of 100
## Processing 68 of 100
## Processing 69 of 100
## Processing 70 of 100
## Processing 71 of 100
## Processing 72 of 100
## Processing 73 of 100
## Processing 74 of 100
## Processing 75 of 100
## Processing 76 of 100
## Processing 77 of 100
## Processing 78 of 100
## Processing 79 of 100
## Processing 80 of 100
## Processing 81 of 100
## Processing 82 of 100
## Processing 83 of 100
## Processing 84 of 100
## Processing 85 of 100
## Processing 86 of 100
## Processing 87 of 100
## Processing 88 of 100
## Processing 89 of 100
## Processing 90 of 100
## Processing 91 of 100
## Processing 92 of 100
## Processing 93 of 100
## Processing 94 of 100
## Processing 95 of 100
## Processing 96 of 100
## Processing 97 of 100
## Processing 98 of 100
## Processing 99 of 100
## Processing 100 of 100
## Processing 101 of 100
## Processing 102 of 100
## Processing 103 of 100
## Processing 104 of 100
## Processing 105 of 100
## Processing 106 of 100
## Processing 107 of 100
## Processing 108 of 100
## Processing 109 of 100
## Processing 110 of 100
## Processing 111 of 100
## Processing 112 of 100
## Processing 113 of 100
## Processing 114 of 100
## Processing 115 of 100
## Processing 116 of 100
## Processing 117 of 100
## Processing 118 of 100
## Processing 119 of 100
## Processing 120 of 100
## Processing 121 of 100
## Processing 122 of 100
## Processing 123 of 100
## Processing 124 of 100
## Processing 125 of 100
## Processing 126 of 100
## Processing 127 of 100
## Processing 128 of 100
## Processing 129 of 100
## Processing 130 of 100
## Processing 131 of 100
## Processing 132 of 100
## Processing 133 of 100
## Processing 134 of 100
## Processing 135 of 100
## Processing 136 of 100
## Processing 137 of 100
## Processing 138 of 100
## Processing 139 of 100
## Processing 140 of 100
## Processing 141 of 100
## Processing 142 of 100
## Processing 143 of 100
## Processing 144 of 100
## Processing 145 of 100
## Processing 146 of 100
## Processing 147 of 100
## Processing 148 of 100
## Processing 149 of 100
## Processing 150 of 100
## Processing 151 of 100
## Processing 152 of 100
## Processing 153 of 100
## Processing 154 of 100
## Processing 155 of 100
## Processing 156 of 100
## Processing 157 of 100
## Processing 158 of 100
## Processing 159 of 100
## Processing 160 of 100
## Processing 161 of 100
## Processing 162 of 100
## Processing 163 of 100
## Processing 164 of 100
## Processing 165 of 100
## Processing 166 of 100
## Processing 167 of 100
## Processing 168 of 100
## Processing 169 of 100
## Processing 170 of 100
## Processing 171 of 100
## Processing 172 of 100
## Processing 173 of 100
## Processing 174 of 100
## Processing 175 of 100
## Processing 176 of 100
## Processing 177 of 100
## Processing 178 of 100
## Processing 179 of 100
## Processing 180 of 100
## Processing 181 of 100
## Processing 182 of 100
## Processing 183 of 100
## Processing 184 of 100
## Processing 185 of 100
## Processing 186 of 100
## Processing 187 of 100
## Processing 188 of 100
## Processing 189 of 100
## Processing 190 of 100
## Processing 191 of 100
## Processing 192 of 100
## Processing 193 of 100
## Processing 194 of 100
## Processing 195 of 100
## Processing 196 of 100
## Processing 197 of 100
## Processing 198 of 100
## Processing 199 of 100
## Processing 200 of 100

Predição.

L = length(x_test)
predictions = numeric(L)

for(i in 1:L){
  if(i==1){
    X = x_test[1]
  }
  else{
    X <- predictions[i-1]
  }
  dim(X) = c(1,1,1)
  yhat = model %>% predict(X, batch_size=batch_size)
  # invert scaling
  yhat = (yhat/2 + .5)*scales$Range + scales$min
  # store
  predictions[i] <- yhat
}

predict_df <- serie_norm %>% filter(row_number() >= N) %>%
  mutate(yhat = predictions)


predict_df %>%
  mutate(Vhat = yhat + lag(Value)) %>%
  dplyr::select(Time, Value, Vhat) %>%
  pivot_longer(cols = -Time,
               names_to = "Serie",
               values_to = "Valor") %>%
  ggplot(aes(x = Time, y = Valor, color = Serie)) +
  geom_point() +
  geom_line() +
  theme_minimal() + 
  scale_color_manual(values = c("pink","ivory4"))
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_path).

Observando o modelo, tem-se que ele se afasta do valor apenas no último ponto, os demais apresentam o mesmo comportamento.

erro_lstm <- predict_df %>%
  mutate(Vhat = yhat + lag(Value)) %>%
  summarise(erro = mean((Value - Vhat)^2,na.rm = TRUE))

erro_lstm
## # A tibble: 1 x 1
##     erro
##    <dbl>
## 1 0.0155

Analisando o erro quadratico medio, vemos que o modelo obtém um valor baixo.

Para realizar as comparações, serão utilizados os modelos mais clássicos.

mod_arima <-auto.arima(y_train)
mod_arima
## Series: y_train 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1     mean
##       0.3286  -0.1644
## s.e.  0.0738   0.0268
## 
## sigma^2 estimated as 0.05389:  log likelihood=7.71
## AIC=-9.42   AICc=-9.26   BIC=-0.13
prev_arima<-forecast(mod_arima,h = 71)

prev_arima %>% 
  autoplot() + 
  theme_minimal()

mod_ets<-ets(as.vector(y_train))

mod_ets
## ETS(A,N,N) 
## 
## Call:
##  ets(y = as.vector(y_train)) 
## 
##   Smoothing parameters:
##     alpha = 0.3123 
## 
##   Initial states:
##     l = -0.1553 
## 
##   sigma:  0.2471
## 
##      AIC     AICc      BIC 
## 378.5056 378.6565 387.7868
prev_ets <- forecast(mod_ets,h = 71)

prev_ets %>% 
  autoplot() + 
  theme_minimal()

predict_df %>% 
  mutate(Vhat = yhat + lag(Value)) %>%
  mutate(yhat_ar = (prev_arima$mean/2 + 0.5)*scales$Range + scales$min,
         yhat_ets = (prev_ets$mean/2 + 0.5)*scales$Range + scales$min) %>% 
  summarise(erro_lstm = mean((Value - Vhat)^2,na.rm = TRUE),
            erro_ar = mean((Value - yhat_ar)^2,na.rm = TRUE),
            erro_ets = mean((Value - yhat_ets)^2,na.rm = TRUE))
## # A tibble: 1 x 3
##   erro_lstm erro_ar erro_ets
##       <dbl>   <dbl>    <dbl>
## 1    0.0155    10.8     10.6

Comparando com os modelos classicos, o modelo simulado pela rede neural apresentou um desempenho muito mais alto.