library(Quandl)     # to access data from Quandl, see Quandl.com or run ?Quandl() for details
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(tseries)    # a variety of tools for TSE
library(stargazer)  # regression outputs (or other tables) formatting, see ?stargazer for details
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.1. https://CRAN.R-project.org/package=stargazer
library(foreach)    # conventilnal looping, see ?foreach() for details
library(forecast)
df <- Quandl("FRED/CP0213ATM086NEST", api_key="JJzFyVSwQ7zs9zdLeyLo", type='xts')

#nrow(df)

#id <- c(1:(nrow(df)/3)*3)

#df = df[id]

summary(df)    # a quick summary of the variables in the set
##      Index            df        
##  Min.   :1996   Min.   : 71.00  
##  1st Qu.:2001   1st Qu.: 74.51  
##  Median :2006   Median : 77.75  
##  Mean   :2006   Mean   : 82.80  
##  3rd Qu.:2012   3rd Qu.: 90.16  
##  Max.   :2017   Max.   :102.02
head(df, 5)    # first five rows of df
##           [,1]
## Jan 1996 73.11
## Feb 1996 72.89
## Mar 1996 73.40
## Apr 1996 73.04
## May 1996 72.75
tail(df, 5)    # last five rows of df
##            [,1]
## May 2016 101.49
## Jun 2016 101.15
## Jul 2016 100.61
## Aug 2016 102.02
## Sep 2016 101.72
plot(df)       # a fast plot of raw data

plot(df, main = 'CPI for Beer in Austria')

diff_1   <- diff(df)       # taking the first diffs
diff_log_1 <- diff(log(df))  # taking the log-diffs

#plotting the raw and transformed data together
plot.new()
layout(matrix(c(1,2,3), 3, 1))
plot(df,     main = '[A] CPI for Beer in Austria')
plot(diff_1,    main = '[B] First diffs')
plot(diff_log_1,  main = '[C] Log diffs')

adf.test(df)   #ADF test, mind the warnings!
## 
##  Augmented Dickey-Fuller Test
## 
## data:  df
## Dickey-Fuller = -1.4379, Lag order = 6, p-value = 0.8124
## alternative hypothesis: stationary
kpss.test(df)  #KPSS test, mind the warnings!
## 
##  KPSS Test for Level Stationarity
## 
## data:  df
## KPSS Level = 5.8854, Truncation lag parameter = 3, p-value = 0.01
adf.test(diff_1[-1])
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff_1[-1]
## Dickey-Fuller = -7.1298, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(diff_1[-1])
## 
##  KPSS Test for Level Stationarity
## 
## data:  diff_1[-1]
## KPSS Level = 0.38788, Truncation lag parameter = 3, p-value =
## 0.08238
adf.test(diff_log_1[-1])
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff_log_1[-1]
## Dickey-Fuller = -7.0795, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(diff_log_1[-1])
## 
##  KPSS Test for Level Stationarity
## 
## data:  diff_log_1[-1]
## KPSS Level = 0.29144, Truncation lag parameter = 3, p-value = 0.1
plot.new() #reset the plotting device (clear the layout)
acf(diff_log_1[-1])

pacf(diff_log_1[-1])

acf(diff_1[-1])

pacf(diff_1[-1])

plot.new()
layout(matrix(c(1,1,2,2,3,4), 3, 2, byrow = TRUE),
       widths=c(1,1), heights=c(2,2,3))
plot(df, main='[A] CPI for Beer in Austria')
plot(diff_1, main='[B] the log diffs')
acf(diff_log_1[-1], main='[C] log diffs ACF')
pacf(diff_log_1[-1], main='[D] log diffs pACF')

Тут я попытался сделать автопредсказание

plot.new()
autopilot <- forecast::auto.arima(diff_log_1, max.order = 10)
arima.prediction <- forecast(autopilot, h = 10)
plot(arima.prediction)

### ESTIMATING and ANALYZING SEVERAL SPECIFICATIONS

## Set the parameters for the procedure

# setting the caps for the specifications orders
phat <- 8
qhat <- 8

# setting critical level(s)
alpha <- 0.1

# other shortcuts
N    <- length(df)-1 # number of obs in the transformed series
pq   <- expand.grid(0:phat,0:qhat) # all combinations of (p,q)
M    <- nrow(pq) # number of specifications
Lhat <- 5 # max lags to use in residual ACF analysis


## Estiamte all the ARMA and collect the results

SKIPm <- c(0) # the specs to skip (if there are errors or something)

#loop over all the combinations of (p,q) and estimate the ARMAs
ARMAs <- foreach(m=seq(1:M)[!(1:M %in% SKIPm)], .combine=rbind) %do% {
  
  # the orders
  p_ <- pq[m,1]
  q_ <- pq[m,2]
  
  # the estimates
  armapq <- arima(diff_1, c(p_,0,q_))
  
  # the AR part coefficients (only valid for p>0)
  if(p_>0) {arcoefpq <- armapq$coef[1:p_]}
  
  # stationarity of AR part (only valid for p>0, TRUE for p=0)
  if(p_>0) {
    statAR <- sum(abs(polyroot(c(1,-arcoefpq)))>1)==p_
  } else {
    statAR <- TRUE
  }
  
  # the residuals
  respq <- armapq$residuals
  
  # the LB test results
  LBpvalpq <- Box.test(respq[-1], lag = Lhat, type = c("Ljung-Box"), fitdf=p_+q_)$p.value
  
  # info criteria
  aicpq <- AIC(armapq)
  bicpq <- AIC(armapq, k=log(N))
  
  # combine the results together
  data.frame(m    = m,         # spec number
             p    = p_,        # AR order
             q    = q_,        # MA order
             stat = statAR,    # AR part stationarity check
             LB   = LBpvalpq,  # LB test p-value
             AIC  = aicpq,     # AIC
             SBIC = bicpq)     # SBIC
}

Все модельки

# all the results
ARMAs
##     m p q stat         LB      AIC     SBIC
## 1   1 0 0 TRUE 0.01150044 532.2467 539.2735
## 2   2 1 0 TRUE 0.46545272 522.9351 533.4754
## 3   3 2 0 TRUE 0.33143671 524.5658 538.6195
## 4   4 3 0 TRUE 0.39924985 524.9438 542.5110
## 5   5 4 0 TRUE 0.18198604 526.9133 547.9939
## 6   6 5 0 TRUE 0.00000000 527.3055 551.8995
## 7   7 6 0 TRUE        NaN 529.0162 557.1237
## 8   8 7 0 TRUE        NaN 531.0028 562.6236
## 9   9 8 0 TRUE        NaN 532.9189 568.0532
## 10 10 0 1 TRUE 0.54108782 522.1757 532.7160
## 11 11 1 1 TRUE 0.39148091 523.8400 537.8937
## 12 12 2 1 TRUE 0.38991594 525.1170 542.6842
## 13 13 3 1 TRUE 0.16975991 526.9713 548.0519
## 14 14 4 1 TRUE 0.00000000 528.0148 552.6088
## 15 15 5 1 TRUE        NaN 529.0249 557.1323
## 16 16 6 1 TRUE        NaN 531.0083 562.6292
## 17 17 7 1 TRUE        NaN 533.0072 568.1415
## 18 18 8 1 TRUE        NaN 534.0297 572.6774
## 19 19 0 2 TRUE 0.37786242 524.0427 538.0964
## 20 20 1 2 TRUE 0.33285187 525.3279 542.8951
## 21 21 2 2 TRUE 0.53183219 525.3472 546.4278
## 22 22 3 2 TRUE 0.00000000 527.3395 551.9335
## 23 23 4 2 TRUE        NaN 529.3316 557.4391
## 24 24 5 2 TRUE        NaN 531.0120 562.6328
## 25 25 6 2 TRUE        NaN 533.0082 568.1425
## 26 26 7 2 TRUE        NaN 522.8701 561.5179
## 27 27 8 2 TRUE        NaN 535.0243 577.1854
## 28 28 0 3 TRUE 0.40987806 524.8469 542.4141
## 29 29 1 3 TRUE 0.38009877 525.7544 546.8350
## 30 30 2 3 TRUE 0.00000000 527.3393 551.9333
## 31 31 3 3 TRUE        NaN 529.3464 557.4538
## 32 32 4 3 TRUE        NaN 531.3393 562.9602
## 33 33 5 3 TRUE        NaN 523.9282 559.0624
## 34 34 6 3 TRUE        NaN 525.8224 564.4701
## 35 35 7 3 TRUE        NaN 527.3633 569.5245
## 36 36 8 3 TRUE        NaN 536.9429 582.6175
## 37 37 0 4 TRUE 0.18564768 526.8186 547.8992
## 38 38 1 4 TRUE 0.00000000 527.5982 552.1922
## 39 39 2 4 TRUE        NaN 529.3361 557.4435
## 40 40 3 4 TRUE        NaN 526.6549 558.2758
## 41 41 4 4 TRUE        NaN 524.1757 559.3100
## 42 42 5 4 TRUE        NaN 525.7407 564.3884
## 43 43 6 4 TRUE        NaN 533.9275 576.0886
## 44 44 7 4 TRUE        NaN 528.7889 574.4635
## 45 45 8 4 TRUE        NaN 530.3946 579.5826
## 46 46 0 5 TRUE 0.00000000 527.5264 552.1204
## 47 47 1 5 TRUE        NaN 529.4693 557.5768
## 48 48 2 5 TRUE        NaN 522.4020 554.0228
## 49 49 3 5 TRUE        NaN 530.5625 565.6968
## 50 50 4 5 TRUE        NaN 532.2486 570.8963
## 51 51 5 5 TRUE        NaN 529.2550 571.4161
## 52 52 6 5 TRUE        NaN 520.0307 565.7052
## 53 53 7 5 TRUE        NaN 522.0038 571.1918
## 54 54 8 5 TRUE        NaN 527.8977 580.5991
## 55 55 0 6 TRUE        NaN 529.4255 557.5329
## 56 56 1 6 TRUE        NaN 529.1671 560.7879
## 57 57 2 6 TRUE        NaN 524.0070 559.1413
## 58 58 3 6 TRUE        NaN 525.8921 564.5398
## 59 59 4 6 TRUE        NaN 534.2405 576.4016
## 60 60 5 6 TRUE        NaN 521.1590 566.8336
## 61 61 6 6 TRUE        NaN 521.9514 571.1394
## 62 62 7 6 TRUE        NaN 515.0196 567.7211
## 63 63 8 6 TRUE        NaN 529.7517 585.9666
## 64 64 0 7 TRUE        NaN 531.1778 562.7986
## 65 65 1 7 TRUE        NaN 530.9219 566.0562
## 66 66 2 7 TRUE        NaN 527.5364 566.1841
## 67 67 3 7 TRUE        NaN 527.3295 569.4907
## 68 68 4 7 TRUE        NaN 528.8165 574.4911
## 69 69 5 7 TRUE        NaN 526.1725 575.3605
## 70 70 6 7 TRUE        NaN 528.1627 580.8641
## 71 71 7 7 TRUE        NaN 530.0083 586.2231
## 72 72 8 7 TRUE        NaN 522.1049 581.8332
## 73 73 0 8 TRUE        NaN 532.8273 567.9616
## 74 74 1 8 TRUE        NaN 533.6812 572.3289
## 75 75 2 8 TRUE        NaN 533.9697 576.1308
## 76 76 3 8 TRUE        NaN 535.0069 580.6815
## 77 77 4 8 TRUE        NaN 528.9082 578.0962
## 78 78 5 8 TRUE        NaN 528.9796 581.6811
## 79 79 6 8 TRUE        NaN 521.6035 577.8184
## 80 80 7 8 TRUE        NaN 519.0809 578.8092
## 81 81 8 8 TRUE        NaN 519.6439 582.8856

Топ 5 моделек

# remove invalid LB test results
ARMAs <- ARMAs[!is.nan(ARMAs$LB),]

# keep only stationary & without residual correlation 
ARMAs <- ARMAs[ARMAs$stat == 1 & ARMAs$LB >= alpha,] 

# sort by SBIC (keep top-5)
ARMAs[order(ARMAs$SBIC)[1:5],]
##     m p q stat        LB      AIC     SBIC
## 10 10 0 1 TRUE 0.5410878 522.1757 532.7160
## 2   2 1 0 TRUE 0.4654527 522.9351 533.4754
## 11 11 1 1 TRUE 0.3914809 523.8400 537.8937
## 19 19 0 2 TRUE 0.3778624 524.0427 538.0964
## 3   3 2 0 TRUE 0.3314367 524.5658 538.6195
# estimate the top-5 specs again and check the outputs
ARMAs2 <- foreach(m=ARMAs[order(ARMAs$SBIC)[1:5],1]) %do% {
  arima(diff_1, c(pq[m,1],0,pq[m,2]))
}


## PLOT the BEST MODEL RESULTS

#create a new time series object
best_arma <- data.frame(index       = index(df),
                        diff_1         = as.numeric(diff_1),
                        d1fitted    = as.numeric(diff_log_1-ARMAs2[[1]]$residuals),
                        d1residuals = as.numeric(ARMAs2[[1]]$residuals),
                        df          = as.numeric(df),
                        dffitted    = as.numeric(df-ARMAs2[[1]]$residuals))
best_arma <- xts(best_arma[,2:6], order.by = best_arma$index)
colnames(best_arma) <- c('Log diffs', 'Fitted log diffs', 'Residuals', 'Raw data in levels', 'Fitted levels')


plot(best_arma,
     main='Fitted MA(1) for the log differences results')#,     legend.loc='topright')

addLegend("topright", on=1, 
          lty = c(1, 1, 1, 1, 1),
          lwd = c(2, 2, 2, 2, 2))