TS

library(Quandl)     # to access data from Quandl, see Quandl.com or run ?Quandl() for details

## Loading required package: xts

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(tseries)    # a variety of tools for TSE
library(stargazer)  # regression outputs (or other tables) formatting, see ?stargazer for details

## 
## Please cite as:

##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.1. https://CRAN.R-project.org/package=stargazer

library(foreach)    # conventilnal looping, see ?foreach() for details
library(forecast)

df <- Quandl("FRED/CP0213ATM086NEST", api_key="JJzFyVSwQ7zs9zdLeyLo", type='xts')

nrow(df)

## [1] 249

id <- c(1:(nrow(df)/3)*3)

df = df[id]

summary(df)    # a quick summary of the variables in the set

##      Index            df        
##  Min.   :1996   Min.   : 71.87  
##  1st Qu.:2001   1st Qu.: 74.69  
##  Median :2006   Median : 77.68  
##  Mean   :2006   Mean   : 82.89  
##  3rd Qu.:2012   3rd Qu.: 90.10  
##  Max.   :2017   Max.   :101.72

head(df, 5)    # first five rows of df

##           [,1]
## Mar 1996 73.40
## Jun 1996 73.04
## Sep 1996 73.11
## Dec 1996 72.45
## Mar 1997 73.40

tail(df, 5)    # last five rows of df

##            [,1]
## Sep 2015 100.24
## Dec 2015  99.46
## Mar 2016 101.04
## Jun 2016 101.15
## Sep 2016 101.72

plot(df)       # a fast plot of raw data

plot(df, main = 'CPI for Beer in Austria')

diff_1   <- diff(df)       # taking the first diffs
diff_log_1 <- diff(log(df))  # taking the log-diffs

#plotting the raw and transformed data together
plot.new()
layout(matrix(c(1,2,3), 3, 1))
plot(df,     main = '[A] CPI for Beer in Austria')
plot(diff_1,    main = '[B] First diffs')
plot(diff_log_1,  main = '[C] Log diffs')

adf.test(df)   #ADF test, mind the warnings!

## 
##  Augmented Dickey-Fuller Test
## 
## data:  df
## Dickey-Fuller = -1.6602, Lag order = 4, p-value = 0.7151
## alternative hypothesis: stationary

kpss.test(df)  #KPSS test, mind the warnings!

## 
##  KPSS Test for Level Stationarity
## 
## data:  df
## KPSS Level = 2.6554, Truncation lag parameter = 2, p-value = 0.01

adf.test(diff_1[-1])

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff_1[-1]
## Dickey-Fuller = -3.7059, Lag order = 4, p-value = 0.02941
## alternative hypothesis: stationary

kpss.test(diff_1[-1])

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff_1[-1]
## KPSS Level = 0.45351, Truncation lag parameter = 2, p-value =
## 0.05409

adf.test(diff_log_1[-1])

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff_log_1[-1]
## Dickey-Fuller = -3.7114, Lag order = 4, p-value = 0.02893
## alternative hypothesis: stationary

kpss.test(diff_log_1[-1])

## 
##  KPSS Test for Level Stationarity
## 
## data:  diff_log_1[-1]
## KPSS Level = 0.36181, Truncation lag parameter = 2, p-value =
## 0.09362

plot.new() #reset the plotting device (clear the layout)
acf(diff_log_1[-1])

pacf(diff_log_1[-1])

acf(diff_1[-1])

pacf(diff_1[-1])

plot.new()
layout(matrix(c(1,1,2,2,3,4), 3, 2, byrow = TRUE),
       widths=c(1,1), heights=c(2,2,3))
plot(df, main='[A] CPI for Beer in Austria')
plot(diff_1, main='[B] the log diffs')
acf(diff_log_1[-1], main='[C] log diffs ACF')
pacf(diff_log_1[-1], main='[D] log diffs pACF')

Тут я попытался сделать автопредсказание

plot.new()
autopilot <- forecast::auto.arima(diff_log_1, max.order = 10)
arima.prediction <- forecast(autopilot, h = 10)
plot(arima.prediction)

### ESTIMATING and ANALYZING SEVERAL SPECIFICATIONS

## Set the parameters for the procedure

# setting the caps for the specifications orders
phat <- 8
qhat <- 8

# setting critical level(s)
alpha <- 0.1

# other shortcuts
N    <- length(df)-1 # number of obs in the transformed series
pq   <- expand.grid(0:phat,0:qhat) # all combinations of (p,q)
M    <- nrow(pq) # number of specifications
Lhat <- 5 # max lags to use in residual ACF analysis


## Estiamte all the ARMA and collect the results

SKIPm <- c(0) # the specs to skip (if there are errors or something)

#loop over all the combinations of (p,q) and estimate the ARMAs
ARMAs <- foreach(m=seq(1:M)[!(1:M %in% SKIPm)], .combine=rbind) %do% {
  
  # the orders
  p_ <- pq[m,1]
  q_ <- pq[m,2]
  
  # the estimates
  armapq <- arima(diff_1, c(p_,0,q_))
  
  # the AR part coefficients (only valid for p>0)
  if(p_>0) {arcoefpq <- armapq$coef[1:p_]}
  
  # stationarity of AR part (only valid for p>0, TRUE for p=0)
  if(p_>0) {
    statAR <- sum(abs(polyroot(c(1,-arcoefpq)))>1)==p_
  } else {
    statAR <- TRUE
  }
  
  # the residuals
  respq <- armapq$residuals
  
  # the LB test results
  LBpvalpq <- Box.test(respq[-1], lag = Lhat, type = c("Ljung-Box"), fitdf=p_+q_)$p.value
  
  # info criteria
  aicpq <- AIC(armapq)
  bicpq <- AIC(armapq, k=log(N))
  
  # combine the results together
  data.frame(m    = m,         # spec number
             p    = p_,        # AR order
             q    = q_,        # MA order
             stat = statAR,    # AR part stationarity check
             LB   = LBpvalpq,  # LB test p-value
             AIC  = aicpq,     # AIC
             SBIC = bicpq)     # SBIC
}

Все модельки

# all the results
ARMAs

##     m p q stat         LB      AIC     SBIC
## 1   1 0 0 TRUE 0.22567570 245.0298 249.8432
## 2   2 1 0 TRUE 0.33407691 244.8582 252.0784
## 3   3 2 0 TRUE 0.22801799 246.7749 256.4018
## 4   4 3 0 TRUE 0.11964190 248.3033 260.3369
## 5   5 4 0 TRUE 0.56518616 246.7510 261.1913
## 6   6 5 0 TRUE 0.00000000 248.4547 265.3017
## 7   7 6 0 TRUE        NaN 250.0038 269.2576
## 8   8 7 0 TRUE        NaN 250.7975 272.4579
## 9   9 8 0 TRUE        NaN 252.6731 276.7403
## 10 10 0 1 TRUE 0.36216054 244.7482 251.9684
## 11 11 1 1 TRUE 0.22833435 246.7436 256.3705
## 12 12 2 1 TRUE 0.11617958 248.7397 260.7733
## 13 13 3 1 TRUE 0.07719603 249.3962 263.8365
## 14 14 4 1 TRUE 0.00000000 248.5598 265.4069
## 15 15 5 1 TRUE        NaN 249.4367 268.6905
## 16 16 6 1 TRUE        NaN 251.1224 272.7829
## 17 17 7 1 TRUE        NaN 252.6430 276.7102
## 18 18 8 1 TRUE        NaN 254.5377 281.0116
## 19 19 0 2 TRUE 0.22858234 246.7431 256.3700
## 20 20 1 2 TRUE 0.11514690 248.7431 260.7767
## 21 21 2 2 TRUE 0.04389670 250.4268 264.8671
## 22 22 3 2 TRUE 0.00000000 248.1659 265.0130
## 23 23 4 2 TRUE        NaN 249.2552 268.5090
## 24 24 5 2 TRUE        NaN 250.1735 271.8339
## 25 25 6 2 TRUE        NaN 251.2766 275.3438
## 26 26 7 2 TRUE        NaN 253.2502 279.7241
## 27 27 8 2 TRUE        NaN 254.3134 283.1940
## 28 28 0 3 TRUE 0.11725146 248.7410 260.7746
## 29 29 1 3 TRUE 0.05947607 249.6324 264.0727
## 30 30 2 3 TRUE 0.00000000 248.2352 265.0822
## 31 31 3 3 TRUE        NaN 249.1318 268.3856
## 32 32 4 3 TRUE        NaN 251.9200 273.5804
## 33 33 5 3 TRUE        NaN 251.4572 275.5244
## 34 34 6 3 TRUE        NaN 253.2349 279.7088
## 35 35 7 3 TRUE        NaN 253.5142 282.3949
## 36 36 8 3 TRUE        NaN 255.6238 286.9111
## 37 37 0 4 TRUE 0.70315838 245.4139 259.8543
## 38 38 1 4 TRUE 0.00000000 247.1908 264.0378
## 39 39 2 4 TRUE        NaN 249.0638 268.3175
## 40 40 3 4 TRUE        NaN 250.4964 272.1569
## 41 41 4 4 TRUE        NaN 252.2893 276.3565
## 42 42 5 4 TRUE        NaN 253.3115 279.7854
## 43 43 6 4 TRUE        NaN 254.6914 283.5721
## 44 44 7 4 TRUE        NaN 255.4246 286.7120
## 45 45 8 4 TRUE        NaN 256.8903 290.5844
## 46 46 0 5 TRUE 0.00000000 247.1801 264.0271
## 47 47 1 5 TRUE        NaN 249.1740 268.4278
## 48 48 2 5 TRUE        NaN 250.0418 271.7022
## 49 49 3 5 TRUE        NaN 251.6649 275.7320
## 50 50 4 5 TRUE        NaN 253.5073 279.9812
## 51 51 5 5 TRUE        NaN 251.8211 280.7018
## 52 52 6 5 TRUE        NaN 256.6831 287.9705
## 53 53 7 5 TRUE        NaN 257.0474 290.7414
## 54 54 8 5 TRUE        NaN 258.3707 294.4715
## 55 55 0 6 TRUE        NaN 249.1565 268.4102
## 56 56 1 6 TRUE        NaN 251.0801 272.7406
## 57 57 2 6 TRUE        NaN 249.3798 273.4470
## 58 58 3 6 TRUE        NaN 252.9817 279.4556
## 59 59 4 6 TRUE        NaN 255.0609 283.9415
## 60 60 5 6 TRUE        NaN 257.2174 288.5047
## 61 61 6 6 TRUE        NaN 253.4441 287.1382
## 62 62 7 6 TRUE        NaN 255.4015 291.5022
## 63 63 8 6 TRUE        NaN 257.3537 295.8612
## 64 64 0 7 TRUE        NaN 250.4645 272.1250
## 65 65 1 7 TRUE        NaN 251.9881 276.0553
## 66 66 2 7 TRUE        NaN 253.9873 280.4613
## 67 67 3 7 TRUE        NaN 253.1436 282.0242
## 68 68 4 7 TRUE        NaN 255.5472 286.8346
## 69 69 5 7 TRUE        NaN 255.4467 289.1408
## 70 70 6 7 TRUE        NaN 255.3983 291.4991
## 71 71 7 7 TRUE        NaN 257.3901 295.8976
## 72 72 8 7 TRUE        NaN 259.3368 300.2510
## 73 73 0 8 TRUE        NaN 252.1410 276.2082
## 74 74 1 8 TRUE        NaN 253.9876 280.4615
## 75 75 2 8 TRUE        NaN 255.9476 284.8282
## 76 76 3 8 TRUE        NaN 256.3011 287.5884
## 77 77 4 8 TRUE        NaN 257.3049 290.9990
## 78 78 5 8 TRUE        NaN 259.5819 295.6827
## 79 79 6 8 TRUE        NaN 257.3662 295.8737
## 80 80 7 8 TRUE        NaN 259.3520 300.2662
## 81 81 8 8 TRUE        NaN 260.4098 303.7307

Топ 5 моделек

# remove invalid LB test results
ARMAs <- ARMAs[!is.nan(ARMAs$LB),]

# keep only stationary & without residual correlation 
ARMAs <- ARMAs[ARMAs$stat == 1 & ARMAs$LB >= alpha,] 

# sort by SBIC (keep top-5)
ARMAs[order(ARMAs$SBIC)[1:5],]

##     m p q stat        LB      AIC     SBIC
## 1   1 0 0 TRUE 0.2256757 245.0298 249.8432
## 10 10 0 1 TRUE 0.3621605 244.7482 251.9684
## 2   2 1 0 TRUE 0.3340769 244.8582 252.0784
## 19 19 0 2 TRUE 0.2285823 246.7431 256.3700
## 11 11 1 1 TRUE 0.2283344 246.7436 256.3705

# estimate the top-5 specs again and check the outputs
ARMAs2 <- foreach(m=ARMAs[order(ARMAs$SBIC)[1:5],1]) %do% {
  arima(diff_1, c(pq[m,1],0,pq[m,2]))
}


## PLOT the BEST MODEL RESULTS

#create a new time series object
best_arma <- data.frame(index       = index(df),
                        diff_1         = as.numeric(diff_1),
                        d1fitted    = as.numeric(diff_log_1-ARMAs2[[1]]$residuals),
                        d1residuals = as.numeric(ARMAs2[[1]]$residuals),
                        df          = as.numeric(df),
                        dffitted    = as.numeric(df-ARMAs2[[1]]$residuals))
best_arma <- xts(best_arma[,2:6], order.by = best_arma$index)
colnames(best_arma) <- c('Log diffs', 'Fitted log diffs', 'Residuals', 'Raw data in levels', 'Fitted levels')


plot(best_arma,
     main='Fitted MA(1) for the log differences results')#,     legend.loc='topright')

addLegend("topright", on=1, 
          lty = c(1, 1, 1, 1, 1),
          lwd = c(2, 2, 2, 2, 2))