library(Quandl) # to access data from Quandl, see Quandl.com or run ?Quandl() for details
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(tseries) # a variety of tools for TSE
library(stargazer) # regression outputs (or other tables) formatting, see ?stargazer for details
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.1. https://CRAN.R-project.org/package=stargazer
library(foreach) # conventilnal looping, see ?foreach() for details
library(forecast)
df <- Quandl("FRED/CP0213ATM086NEST", api_key="JJzFyVSwQ7zs9zdLeyLo", type='xts')
#nrow(df)
#id <- c(1:(nrow(df)/3)*3)
#df = df[id]
summary(df) # a quick summary of the variables in the set
## Index df
## Min. :1996 Min. : 71.00
## 1st Qu.:2001 1st Qu.: 74.51
## Median :2006 Median : 77.75
## Mean :2006 Mean : 82.80
## 3rd Qu.:2012 3rd Qu.: 90.16
## Max. :2017 Max. :102.02
head(df, 5) # first five rows of df
## [,1]
## Jan 1996 73.11
## Feb 1996 72.89
## Mar 1996 73.40
## Apr 1996 73.04
## May 1996 72.75
tail(df, 5) # last five rows of df
## [,1]
## May 2016 101.49
## Jun 2016 101.15
## Jul 2016 100.61
## Aug 2016 102.02
## Sep 2016 101.72
plot(df) # a fast plot of raw data
plot(df, main = 'CPI for Beer in Austria')
diff_1 <- diff(df) # taking the first diffs
diff_log_1 <- diff(log(df)) # taking the log-diffs
#plotting the raw and transformed data together
plot.new()
layout(matrix(c(1,2,3), 3, 1))
plot(df, main = '[A] CPI for Beer in Austria')
plot(diff_1, main = '[B] First diffs')
plot(diff_log_1, main = '[C] Log diffs')
adf.test(df) #ADF test, mind the warnings!
##
## Augmented Dickey-Fuller Test
##
## data: df
## Dickey-Fuller = -1.4379, Lag order = 6, p-value = 0.8124
## alternative hypothesis: stationary
kpss.test(df) #KPSS test, mind the warnings!
##
## KPSS Test for Level Stationarity
##
## data: df
## KPSS Level = 5.8854, Truncation lag parameter = 3, p-value = 0.01
adf.test(diff_1[-1])
##
## Augmented Dickey-Fuller Test
##
## data: diff_1[-1]
## Dickey-Fuller = -7.1298, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(diff_1[-1])
##
## KPSS Test for Level Stationarity
##
## data: diff_1[-1]
## KPSS Level = 0.38788, Truncation lag parameter = 3, p-value =
## 0.08238
adf.test(diff_log_1[-1])
##
## Augmented Dickey-Fuller Test
##
## data: diff_log_1[-1]
## Dickey-Fuller = -7.0795, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(diff_log_1[-1])
##
## KPSS Test for Level Stationarity
##
## data: diff_log_1[-1]
## KPSS Level = 0.29144, Truncation lag parameter = 3, p-value = 0.1
plot.new() #reset the plotting device (clear the layout)
acf(diff_log_1[-1])
pacf(diff_log_1[-1])
acf(diff_1[-1])
pacf(diff_1[-1])
plot.new()
layout(matrix(c(1,1,2,2,3,4), 3, 2, byrow = TRUE),
widths=c(1,1), heights=c(2,2,3))
plot(df, main='[A] CPI for Beer in Austria')
plot(diff_1, main='[B] the log diffs')
acf(diff_log_1[-1], main='[C] log diffs ACF')
pacf(diff_log_1[-1], main='[D] log diffs pACF')
Тут я попытался сделать автопредсказание
plot.new()
autopilot <- forecast::auto.arima(diff_log_1, max.order = 10)
arima.prediction <- forecast(autopilot, h = 10)
plot(arima.prediction)
### ESTIMATING and ANALYZING SEVERAL SPECIFICATIONS
## Set the parameters for the procedure
# setting the caps for the specifications orders
phat <- 8
qhat <- 8
# setting critical level(s)
alpha <- 0.1
# other shortcuts
N <- length(df)-1 # number of obs in the transformed series
pq <- expand.grid(0:phat,0:qhat) # all combinations of (p,q)
M <- nrow(pq) # number of specifications
Lhat <- 5 # max lags to use in residual ACF analysis
## Estiamte all the ARMA and collect the results
SKIPm <- c(0) # the specs to skip (if there are errors or something)
#loop over all the combinations of (p,q) and estimate the ARMAs
ARMAs <- foreach(m=seq(1:M)[!(1:M %in% SKIPm)], .combine=rbind) %do% {
# the orders
p_ <- pq[m,1]
q_ <- pq[m,2]
# the estimates
armapq <- arima(diff_1, c(p_,0,q_))
# the AR part coefficients (only valid for p>0)
if(p_>0) {arcoefpq <- armapq$coef[1:p_]}
# stationarity of AR part (only valid for p>0, TRUE for p=0)
if(p_>0) {
statAR <- sum(abs(polyroot(c(1,-arcoefpq)))>1)==p_
} else {
statAR <- TRUE
}
# the residuals
respq <- armapq$residuals
# the LB test results
LBpvalpq <- Box.test(respq[-1], lag = Lhat, type = c("Ljung-Box"), fitdf=p_+q_)$p.value
# info criteria
aicpq <- AIC(armapq)
bicpq <- AIC(armapq, k=log(N))
# combine the results together
data.frame(m = m, # spec number
p = p_, # AR order
q = q_, # MA order
stat = statAR, # AR part stationarity check
LB = LBpvalpq, # LB test p-value
AIC = aicpq, # AIC
SBIC = bicpq) # SBIC
}
Все модельки
# all the results
ARMAs
## m p q stat LB AIC SBIC
## 1 1 0 0 TRUE 0.01150044 532.2467 539.2735
## 2 2 1 0 TRUE 0.46545272 522.9351 533.4754
## 3 3 2 0 TRUE 0.33143671 524.5658 538.6195
## 4 4 3 0 TRUE 0.39924985 524.9438 542.5110
## 5 5 4 0 TRUE 0.18198604 526.9133 547.9939
## 6 6 5 0 TRUE 0.00000000 527.3055 551.8995
## 7 7 6 0 TRUE NaN 529.0162 557.1237
## 8 8 7 0 TRUE NaN 531.0028 562.6236
## 9 9 8 0 TRUE NaN 532.9189 568.0532
## 10 10 0 1 TRUE 0.54108782 522.1757 532.7160
## 11 11 1 1 TRUE 0.39148091 523.8400 537.8937
## 12 12 2 1 TRUE 0.38991594 525.1170 542.6842
## 13 13 3 1 TRUE 0.16975991 526.9713 548.0519
## 14 14 4 1 TRUE 0.00000000 528.0148 552.6088
## 15 15 5 1 TRUE NaN 529.0249 557.1323
## 16 16 6 1 TRUE NaN 531.0083 562.6292
## 17 17 7 1 TRUE NaN 533.0072 568.1415
## 18 18 8 1 TRUE NaN 534.0297 572.6774
## 19 19 0 2 TRUE 0.37786242 524.0427 538.0964
## 20 20 1 2 TRUE 0.33285187 525.3279 542.8951
## 21 21 2 2 TRUE 0.53183219 525.3472 546.4278
## 22 22 3 2 TRUE 0.00000000 527.3395 551.9335
## 23 23 4 2 TRUE NaN 529.3316 557.4391
## 24 24 5 2 TRUE NaN 531.0120 562.6328
## 25 25 6 2 TRUE NaN 533.0082 568.1425
## 26 26 7 2 TRUE NaN 522.8701 561.5179
## 27 27 8 2 TRUE NaN 535.0243 577.1854
## 28 28 0 3 TRUE 0.40987806 524.8469 542.4141
## 29 29 1 3 TRUE 0.38009877 525.7544 546.8350
## 30 30 2 3 TRUE 0.00000000 527.3393 551.9333
## 31 31 3 3 TRUE NaN 529.3464 557.4538
## 32 32 4 3 TRUE NaN 531.3393 562.9602
## 33 33 5 3 TRUE NaN 523.9282 559.0624
## 34 34 6 3 TRUE NaN 525.8224 564.4701
## 35 35 7 3 TRUE NaN 527.3633 569.5245
## 36 36 8 3 TRUE NaN 536.9429 582.6175
## 37 37 0 4 TRUE 0.18564768 526.8186 547.8992
## 38 38 1 4 TRUE 0.00000000 527.5982 552.1922
## 39 39 2 4 TRUE NaN 529.3361 557.4435
## 40 40 3 4 TRUE NaN 526.6549 558.2758
## 41 41 4 4 TRUE NaN 524.1757 559.3100
## 42 42 5 4 TRUE NaN 525.7407 564.3884
## 43 43 6 4 TRUE NaN 533.9275 576.0886
## 44 44 7 4 TRUE NaN 528.7889 574.4635
## 45 45 8 4 TRUE NaN 530.3946 579.5826
## 46 46 0 5 TRUE 0.00000000 527.5264 552.1204
## 47 47 1 5 TRUE NaN 529.4693 557.5768
## 48 48 2 5 TRUE NaN 522.4020 554.0228
## 49 49 3 5 TRUE NaN 530.5625 565.6968
## 50 50 4 5 TRUE NaN 532.2486 570.8963
## 51 51 5 5 TRUE NaN 529.2550 571.4161
## 52 52 6 5 TRUE NaN 520.0307 565.7052
## 53 53 7 5 TRUE NaN 522.0038 571.1918
## 54 54 8 5 TRUE NaN 527.8977 580.5991
## 55 55 0 6 TRUE NaN 529.4255 557.5329
## 56 56 1 6 TRUE NaN 529.1671 560.7879
## 57 57 2 6 TRUE NaN 524.0070 559.1413
## 58 58 3 6 TRUE NaN 525.8921 564.5398
## 59 59 4 6 TRUE NaN 534.2405 576.4016
## 60 60 5 6 TRUE NaN 521.1590 566.8336
## 61 61 6 6 TRUE NaN 521.9514 571.1394
## 62 62 7 6 TRUE NaN 515.0196 567.7211
## 63 63 8 6 TRUE NaN 529.7517 585.9666
## 64 64 0 7 TRUE NaN 531.1778 562.7986
## 65 65 1 7 TRUE NaN 530.9219 566.0562
## 66 66 2 7 TRUE NaN 527.5364 566.1841
## 67 67 3 7 TRUE NaN 527.3295 569.4907
## 68 68 4 7 TRUE NaN 528.8165 574.4911
## 69 69 5 7 TRUE NaN 526.1725 575.3605
## 70 70 6 7 TRUE NaN 528.1627 580.8641
## 71 71 7 7 TRUE NaN 530.0083 586.2231
## 72 72 8 7 TRUE NaN 522.1049 581.8332
## 73 73 0 8 TRUE NaN 532.8273 567.9616
## 74 74 1 8 TRUE NaN 533.6812 572.3289
## 75 75 2 8 TRUE NaN 533.9697 576.1308
## 76 76 3 8 TRUE NaN 535.0069 580.6815
## 77 77 4 8 TRUE NaN 528.9082 578.0962
## 78 78 5 8 TRUE NaN 528.9796 581.6811
## 79 79 6 8 TRUE NaN 521.6035 577.8184
## 80 80 7 8 TRUE NaN 519.0809 578.8092
## 81 81 8 8 TRUE NaN 519.6439 582.8856
Топ 5 моделек
# remove invalid LB test results
ARMAs <- ARMAs[!is.nan(ARMAs$LB),]
# keep only stationary & without residual correlation
ARMAs <- ARMAs[ARMAs$stat == 1 & ARMAs$LB >= alpha,]
# sort by SBIC (keep top-5)
ARMAs[order(ARMAs$SBIC)[1:5],]
## m p q stat LB AIC SBIC
## 10 10 0 1 TRUE 0.5410878 522.1757 532.7160
## 2 2 1 0 TRUE 0.4654527 522.9351 533.4754
## 11 11 1 1 TRUE 0.3914809 523.8400 537.8937
## 19 19 0 2 TRUE 0.3778624 524.0427 538.0964
## 3 3 2 0 TRUE 0.3314367 524.5658 538.6195
# estimate the top-5 specs again and check the outputs
ARMAs2 <- foreach(m=ARMAs[order(ARMAs$SBIC)[1:5],1]) %do% {
arima(diff_1, c(pq[m,1],0,pq[m,2]))
}
## PLOT the BEST MODEL RESULTS
#create a new time series object
best_arma <- data.frame(index = index(df),
diff_1 = as.numeric(diff_1),
d1fitted = as.numeric(diff_log_1-ARMAs2[[1]]$residuals),
d1residuals = as.numeric(ARMAs2[[1]]$residuals),
df = as.numeric(df),
dffitted = as.numeric(df-ARMAs2[[1]]$residuals))
best_arma <- xts(best_arma[,2:6], order.by = best_arma$index)
colnames(best_arma) <- c('Log diffs', 'Fitted log diffs', 'Residuals', 'Raw data in levels', 'Fitted levels')
plot(best_arma,
main='Fitted MA(1) for the log differences results')#, legend.loc='topright')
addLegend("topright", on=1,
lty = c(1, 1, 1, 1, 1),
lwd = c(2, 2, 2, 2, 2))