Загрузка необходимых пакетов:

library(tidyverse)
library(readxl)
library(ggplot2)
library(forecast)
library(tseries)

Загрузка набора данных:

stock_data <- read_excel("stockk1.xlsx")
names(stock_data) <- c("IMEX", "USD", "SP", "Oil", "P", "IQ")

Преобразование фрейма данных в вид фрейма временных рядов:

stock_data_ts<-ts(stock_data,start = c(2000, 3), frequency = 12)
class(stock_data_ts)
[1] "mts"    "ts"     "matrix"
head(stock_data_ts)
           IMEX   USD      SP   Oil      P    IQ
Mar 2000 198.99 28.65 1366.42 29.04 101.04 110.8
Apr 2000 257.30 28.60 1498.58 24.15 100.64 109.0
May 2000 244.88 28.40 1452.43 23.33 100.89 112.0
Jun 2000 221.10 28.23 1420.60 29.03 101.75 110.8
Jul 2000 199.13 28.05 1454.60 31.30 102.55 110.9
Aug 2000 209.47 27.82 1430.83 25.43 101.79 113.5
plot(stock_data_ts)

Извлечение временного ряда Московской биржи (IMEX):

IMEX <- stock_data_ts[,1]
#This is the start of the time series
start(IMEX)
[1] 2000    3
#This is the end of the time series
end(IMEX)
[1] 2018    3
summary(IMEX)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  135.7   570.8  1386.9  1179.8  1663.1  2292.2 
plot(IMEX)
#This will fit in a line
abline(reg=lm(IMEX~time(IMEX)))

#This will print the cycle across years.
cycle(IMEX)
     Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
2000           3   4   5   6   7   8   9  10  11  12
2001   1   2   3   4   5   6   7   8   9  10  11  12
2002   1   2   3   4   5   6   7   8   9  10  11  12
2003   1   2   3   4   5   6   7   8   9  10  11  12
2004   1   2   3   4   5   6   7   8   9  10  11  12
2005   1   2   3   4   5   6   7   8   9  10  11  12
2006   1   2   3   4   5   6   7   8   9  10  11  12
2007   1   2   3   4   5   6   7   8   9  10  11  12
2008   1   2   3   4   5   6   7   8   9  10  11  12
2009   1   2   3   4   5   6   7   8   9  10  11  12
2010   1   2   3   4   5   6   7   8   9  10  11  12
2011   1   2   3   4   5   6   7   8   9  10  11  12
2012   1   2   3   4   5   6   7   8   9  10  11  12
2013   1   2   3   4   5   6   7   8   9  10  11  12
2014   1   2   3   4   5   6   7   8   9  10  11  12
2015   1   2   3   4   5   6   7   8   9  10  11  12
2016   1   2   3   4   5   6   7   8   9  10  11  12
2017   1   2   3   4   5   6   7   8   9  10  11  12
2018   1   2   3                                    
#Box plot across months will give us a sense on seasonal effect
boxplot(IMEX~cycle(IMEX))

Вывод:

стационарность ряда.

IMEX(t) = (mean + trend * t) + error

=> Differencing method: Для исключения нестацинарности ряда

Применеия ARIMA для прогнозирования индекса Московской биржи:

ARIMA имеет следуюшие параметры:

=> IMEX(t) – IMEX(t-1) = ARMA (p , q)

Деление на train and test set:

train_imex <- window(IMEX, start = c(2000, 3), end = c(2014, 12))
test_imex <- window(IMEX, start = 2015)
length(test_imex)
[1] 39

Применение модели по autoarima:

fit1 <- auto.arima(train_imex)
pred1 <- forecast(fit1, h = length(test_imex))
plot(pred1)
lines(test_imex, col = "red")
legend('bottomright',c("Actual","Forecasted"),lty=c(1,1),lwd=c(1.5,1.5),col=c('red','blue'))

Через экспоненту:

fit2 <- auto.arima(log(train_imex))
pred2 <- forecast(fit2, h = length(test_imex))
plot(pred2)
lines(log(test_imex), col = "red")
legend('bottomright',c("Actual","Forecasted"),lty=c(1,1),lwd=c(1.5,1.5),col=c('red','blue'))

Добавление BoxCox преобразования:

L <- BoxCox.lambda(IMEX)
fit3 <- auto.arima(train_imex, lambda = L)
pred3 <- forecast(fit3, h = length(test_imex), lambda = L)
plot(pred3)
lines(test_imex, col = "red")
legend('bottomright',c("Actual","Forecasted"),lty=c(1,1),lwd=c(1.5,1.5),col=c('red','blue'))

Ручной подбор модели:

Проверка стационарности ряда:

adf.test(diff(log(IMEX)), alternative="stationary", k = 0)
p-value smaller than printed p-value

    Augmented Dickey-Fuller Test

data:  diff(log(IMEX))
Dickey-Fuller = -12.097, Lag order = 0, p-value = 0.01
alternative hypothesis: stationary

Ряд стационарный после 1 difference => d = 1. Когда мы указываем d = 1 в модели ARIMA то ARIMA сама получит стационарный ряд в разности логарифмов

ACF plots:

acf(log(IMEX))

Распад диаграммы ACF происходит очень медленно => ряд нестационарный (выше уже обсудили)

View ACF and PACF after regressing on the difference:

acf(diff(log(IMEX)))

pacf(diff(log(IMEX)))

fit4 <- arima(log(train_imex), c(0, 1, 1),seasonal = list(order = c(0, 1, 1), period = 12))
pred4 <- forecast(fit4, h = length(test_imex))
accuracy(log(test_imex), pred4$mean)
                ME      RMSE        MAE      MPE     MAPE      ACF1 Theil's U
Test set 0.0947091 0.1177479 0.09982258 1.230632 1.298106 0.7568122  5.179196
plot(pred4)
lines(log(test_imex), col = "red")
legend('bottomright',c("Actual","Forecasted"),lty=c(1,1),lwd=c(1.5,1.5),col=c('red','blue'))

Возврат прогоза через экспоненту к прогнозу исходных значений индекса МБ

imex_pred <- exp(pred4$mean)
accuracy(imex_pred, test_imex)
                ME     RMSE      MAE       MPE    MAPE      ACF1 Theil's U
Test set -195.5966 249.9426 205.8375 -10.20452 10.7077 0.8229259  2.626428
plot(imex_pred)
lines(test_imex, col = "red")

df <- data.frame(actual = test_imex, forecast = imex_pred)
df
LS0tDQp0aXRsZTogItCf0YDQvtCz0L3QvtC3INC40L3QtNC10LrRgdCwINCc0L7RgdC60L7QstGB0LrQvtC5INCx0LjRgNC20LggLSBBUklNQSINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCioq0JfQsNCz0YDRg9C30LrQsCDQvdC10L7QsdGF0L7QtNC40LzRi9GFINC/0LDQutC10YLQvtCyOioqDQoNCmBgYHtyfQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KHJlYWR4bCkNCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkoZm9yZWNhc3QpDQpsaWJyYXJ5KHRzZXJpZXMpDQpgYGANCg0KKirQl9Cw0LPRgNGD0LfQutCwINC90LDQsdC+0YDQsCDQtNCw0L3QvdGL0YU6KioNCg0KYGBge3J9DQpzdG9ja19kYXRhIDwtIHJlYWRfZXhjZWwoInN0b2NrazEueGxzeCIpDQpuYW1lcyhzdG9ja19kYXRhKSA8LSBjKCJJTUVYIiwgIlVTRCIsICJTUCIsICJPaWwiLCAiUCIsICJJUSIpDQpgYGANCg0KKirQn9GA0LXQvtCx0YDQsNC30L7QstCw0L3QuNC1INGE0YDQtdC50LzQsCDQtNCw0L3QvdGL0YUg0LIg0LLQuNC0INGE0YDQtdC50LzQsCDQstGA0LXQvNC10L3QvdGL0YUg0YDRj9C00L7QsjoqKg0KDQpgYGB7cn0NCnN0b2NrX2RhdGFfdHM8LXRzKHN0b2NrX2RhdGEsc3RhcnQgPSBjKDIwMDAsIDMpLCBmcmVxdWVuY3kgPSAxMikNCmNsYXNzKHN0b2NrX2RhdGFfdHMpDQpoZWFkKHN0b2NrX2RhdGFfdHMpDQpwbG90KHN0b2NrX2RhdGFfdHMpDQpgYGANCg0KKirQmNC30LLQu9C10YfQtdC90LjQtSDQstGA0LXQvNC10L3QvdC+0LPQviDRgNGP0LTQsCDQnNC+0YHQutC+0LLRgdC60L7QuSDQsdC40YDQttC4IChJTUVYKToqKg0KDQpgYGB7cn0NCklNRVggPC0gc3RvY2tfZGF0YV90c1ssMV0NCiNUaGlzIGlzIHRoZSBzdGFydCBvZiB0aGUgdGltZSBzZXJpZXMNCnN0YXJ0KElNRVgpDQojVGhpcyBpcyB0aGUgZW5kIG9mIHRoZSB0aW1lIHNlcmllcw0KZW5kKElNRVgpDQpzdW1tYXJ5KElNRVgpDQpwbG90KElNRVgpDQojVGhpcyB3aWxsIGZpdCBpbiBhIGxpbmUNCmFibGluZShyZWc9bG0oSU1FWH50aW1lKElNRVgpKSkNCiNUaGlzIHdpbGwgcHJpbnQgdGhlIGN5Y2xlIGFjcm9zcyB5ZWFycy4NCmN5Y2xlKElNRVgpDQojQm94IHBsb3QgYWNyb3NzIG1vbnRocyB3aWxsIGdpdmUgdXMgYSBzZW5zZSBvbiBzZWFzb25hbCBlZmZlY3QNCmJveHBsb3QoSU1FWH5jeWNsZShJTUVYKSkNCmBgYA0KDQoqKtCS0YvQstC+0LQ6KioNCg0KLSDQotGA0LXQvdC0INC/0L7QutCw0LfRi9Cy0LDQtdGCLCDRh9GC0L4gSU1FWHMg0YPQstC10LvQuNGH0LjQstCw0Y7RgtGB0Y8g0LPQvtC0INC30LAg0LPQvtC00L7QvCA9PiDQstGA0LXQvNC10L3QvtC5INGA0Y/QtCBJTUVYINC90LXRgdGC0LDRhtC+0L3QsNGA0L3Ri9C5DQotINGB0YDQtdC00L3QtdC1INC30L3QsNGH0LXQvdC40LUg0LIg0LrQsNC20LTQvtC8INC80LXRgdGP0YbQtSDQvdC1INGB0LjQu9GM0L3QviDQvtGC0LvQuNGH0LDQtdGC0YHRjyA9PiDRgdC10LfQvtC90L3Ri9C5INGN0YTRhNC10LrRgiDRgdC70LDQsdGL0LkNCg0KKirRgdGC0LDRhtC40L7QvdCw0YDQvdC+0YHRgtGMINGA0Y/QtNCwLioqDQoNCklNRVgodCkgPSAobWVhbiArIHRyZW5kICogdCkgKyBlcnJvcg0KDQo9PiAqKkRpZmZlcmVuY2luZyBtZXRob2QqKjog0JTQu9GPINC40YHQutC70Y7Rh9C10L3QuNGPINC90LXRgdGC0LDRhtC40L3QsNGA0L3QvtGB0YLQuCDRgNGP0LTQsA0KDQoqKtCf0YDQuNC80LXQvdC10LjRjyBBUklNQSDQtNC70Y8g0L/RgNC+0LPQvdC+0LfQuNGA0L7QstCw0L3QuNGPINC40L3QtNC10LrRgdCwINCc0L7RgdC60L7QstGB0LrQvtC5INCx0LjRgNC20Lg6ICoqIA0KDQpBUklNQSDQuNC80LXQtdGCINGB0LvQtdC00YPRjtGI0LjQtSDQv9Cw0YDQsNC80LXRgtGA0Ys6DQoNCi0gQVI6IHANCg0KLSBJOiBkICjQutC+0LvQuNGH0LXRgdGC0LLQviAqKiJkaWZmZXJlbmNlIioqINC00LvRjyDQv9GA0LjQstC10LTQtdC90LjRjyDQuNGB0YXQvtC00L3QvtCz0L4g0L3QtdGB0YLQsNGG0LjQvtC90LDRgNC90L7Qs9C+INGA0Y/QtNCwINCyINGB0YLQsNGG0LjQvtC90LDRgNC90YvQuSkNCg0KLSBNQTogcQ0KDQo9PiBJTUVYKHQpIOKAkyBJTUVYKHQtMSkgPSBBUk1BIChwICwgIHEpDQoNCg0KKirQlNC10LvQtdC90LjQtSDQvdCwIHRyYWluIGFuZCB0ZXN0IHNldDoqKg0KDQpgYGB7cn0NCnRyYWluX2ltZXggPC0gd2luZG93KElNRVgsIHN0YXJ0ID0gYygyMDAwLCAzKSwgZW5kID0gYygyMDE0LCAxMikpDQp0ZXN0X2ltZXggPC0gd2luZG93KElNRVgsIHN0YXJ0ID0gMjAxNSkNCg0KYGBgDQoNCioq0J/RgNC40LzQtdC90LXQvdC40LUg0LzQvtC00LXQu9C4INC/0L4gYXV0b2FyaW1hOioqDQpgYGB7cn0NCmZpdDEgPC0gYXV0by5hcmltYSh0cmFpbl9pbWV4KQ0KcHJlZDEgPC0gZm9yZWNhc3QoZml0MSwgaCA9IGxlbmd0aCh0ZXN0X2ltZXgpKQ0KcGxvdChwcmVkMSkNCmxpbmVzKHRlc3RfaW1leCwgY29sID0gInJlZCIpDQpsZWdlbmQoJ2JvdHRvbXJpZ2h0JyxjKCJBY3R1YWwiLCJGb3JlY2FzdGVkIiksbHR5PWMoMSwxKSxsd2Q9YygxLjUsMS41KSxjb2w9YygncmVkJywnYmx1ZScpKQ0KYGBgDQoNCioq0KfQtdGA0LXQtyDRjdC60YHQv9C+0L3QtdC90YLRgzogKioNCg0KYGBge3J9DQpmaXQyIDwtIGF1dG8uYXJpbWEobG9nKHRyYWluX2ltZXgpKQ0KcHJlZDIgPC0gZm9yZWNhc3QoZml0MiwgaCA9IGxlbmd0aCh0ZXN0X2ltZXgpKQ0KcGxvdChwcmVkMikNCmxpbmVzKGxvZyh0ZXN0X2ltZXgpLCBjb2wgPSAicmVkIikNCmxlZ2VuZCgnYm90dG9tcmlnaHQnLGMoIkFjdHVhbCIsIkZvcmVjYXN0ZWQiKSxsdHk9YygxLDEpLGx3ZD1jKDEuNSwxLjUpLGNvbD1jKCdyZWQnLCdibHVlJykpDQpgYGANCg0KKirQlNC+0LHQsNCy0LvQtdC90LjQtSBCb3hDb3gg0L/RgNC10L7QsdGA0LDQt9C+0LLQsNC90LjRjzoqKg0KYGBge3IsIHdhcm5pbmc9RkFMU0V9DQpMIDwtIEJveENveC5sYW1iZGEoSU1FWCkNCmZpdDMgPC0gYXV0by5hcmltYSh0cmFpbl9pbWV4LCBsYW1iZGEgPSBMKQ0KcHJlZDMgPC0gZm9yZWNhc3QoZml0MywgaCA9IGxlbmd0aCh0ZXN0X2ltZXgpLCBsYW1iZGEgPSBMKQ0KcGxvdChwcmVkMykNCmxpbmVzKHRlc3RfaW1leCwgY29sID0gInJlZCIpDQpsZWdlbmQoJ2JvdHRvbXJpZ2h0JyxjKCJBY3R1YWwiLCJGb3JlY2FzdGVkIiksbHR5PWMoMSwxKSxsd2Q9YygxLjUsMS41KSxjb2w9YygncmVkJywnYmx1ZScpKQ0KYGBgDQoNCiMjIyMg0KDRg9GH0L3QvtC5INC/0L7QtNCx0L7RgCDQvNC+0LTQtdC70Lg6DQoNCioq0J/RgNC+0LLQtdGA0LrQsCDRgdGC0LDRhtC40L7QvdCw0YDQvdC+0YHRgtC4INGA0Y/QtNCwOioqDQoNCmBgYHtyfQ0KYWRmLnRlc3QoZGlmZihsb2coSU1FWCkpLCBhbHRlcm5hdGl2ZT0ic3RhdGlvbmFyeSIsIGsgPSAwKQ0KYGBgDQoNCioq0KDRj9C0INGB0YLQsNGG0LjQvtC90LDRgNC90YvQuSDQv9C+0YHQu9C1IDEgZGlmZmVyZW5jZSoqID0+IGQgPSAxLiDQmtC+0LPQtNCwINC80Ysg0YPQutCw0LfRi9Cy0LDQtdC8IGQgPSAxINCyINC80L7QtNC10LvQuCBBUklNQSDRgtC+IEFSSU1BINGB0LDQvNCwINC/0L7Qu9GD0YfQuNGCINGB0YLQsNGG0LjQvtC90LDRgNC90YvQuSDRgNGP0LQg0LIg0YDQsNC30L3QvtGB0YLQuCDQu9C+0LPQsNGA0LjRhNC80L7QsiANCg0KKipBQ0YgcGxvdHM6KioNCmBgYHtyfQ0KYWNmKGxvZyhJTUVYKSkNCmBgYA0KDQrQoNCw0YHQv9Cw0LQg0LTQuNCw0LPRgNCw0LzQvNGLIEFDRiDQv9GA0L7QuNGB0YXQvtC00LjRgiDQvtGH0LXQvdGMINC80LXQtNC70LXQvdC90L4gPT4g0YDRj9C0INC90LXRgdGC0LDRhtC40L7QvdCw0YDQvdGL0LkgKNCy0YvRiNC1INGD0LbQtSDQvtCx0YHRg9C00LjQu9C4KQ0KDQoqKlZpZXcgQUNGIGFuZCBQQUNGIGFmdGVyIHJlZ3Jlc3Npbmcgb24gdGhlIGRpZmZlcmVuY2U6KioNCg0KYGBge3J9DQphY2YoZGlmZihsb2coSU1FWCkpKQ0KcGFjZihkaWZmKGxvZyhJTUVYKSkpDQpgYGANCg0KDQpgYGB7cn0NCmZpdDQgPC0gYXJpbWEobG9nKHRyYWluX2ltZXgpLCBjKDAsIDEsIDEpLHNlYXNvbmFsID0gbGlzdChvcmRlciA9IGMoMCwgMSwgMSksIHBlcmlvZCA9IDEyKSkNCnByZWQ0IDwtIGZvcmVjYXN0KGZpdDQsIGggPSBsZW5ndGgodGVzdF9pbWV4KSkNCmFjY3VyYWN5KGxvZyh0ZXN0X2ltZXgpLCBwcmVkNCRtZWFuKQ0KcGxvdChwcmVkNCkNCmxpbmVzKGxvZyh0ZXN0X2ltZXgpLCBjb2wgPSAicmVkIikNCmxlZ2VuZCgnYm90dG9tcmlnaHQnLGMoIkFjdHVhbCIsIkZvcmVjYXN0ZWQiKSxsdHk9YygxLDEpLGx3ZD1jKDEuNSwxLjUpLGNvbD1jKCdyZWQnLCdibHVlJykpDQpgYGANCg0KKirQktC+0LfQstGA0LDRgiDQv9GA0L7Qs9C+0LfQsCDRh9C10YDQtdC3INGN0LrRgdC/0L7QvdC10L3RgtGDINC6INC/0YDQvtCz0L3QvtC30YMg0LjRgdGF0L7QtNC90YvRhSDQt9C90LDRh9C10L3QuNC5INC40L3QtNC10LrRgdCwINCc0JEqKg0KYGBge3J9DQppbWV4X3ByZWQgPC0gZXhwKHByZWQ0JG1lYW4pDQphY2N1cmFjeShpbWV4X3ByZWQsIHRlc3RfaW1leCkNCnBsb3QoaW1leF9wcmVkKQ0KbGluZXModGVzdF9pbWV4LCBjb2wgPSAicmVkIikNCmRmIDwtIGRhdGEuZnJhbWUoYWN0dWFsID0gdGVzdF9pbWV4LCBmb3JlY2FzdCA9IGltZXhfcHJlZCkNCmRmDQpgYGANCg0K