library(Quandl) # to access data from Quandl, see Quandl.com or run ?Quandl() for details
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(tseries) # a variety of tools for TSE
library(stargazer) # regression outputs (or other tables) formatting, see ?stargazer for details
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.1. https://CRAN.R-project.org/package=stargazer
library(foreach) # conventilnal looping, see ?foreach() for details
library(forecast)
df <- Quandl("FRED/CP0213ATM086NEST", api_key="JJzFyVSwQ7zs9zdLeyLo", type='xts')
nrow(df)
## [1] 249
id <- c(1:(nrow(df)/3)*3)
df = df[id]
summary(df) # a quick summary of the variables in the set
## Index df
## Min. :1996 Min. : 71.87
## 1st Qu.:2001 1st Qu.: 74.69
## Median :2006 Median : 77.68
## Mean :2006 Mean : 82.89
## 3rd Qu.:2012 3rd Qu.: 90.10
## Max. :2017 Max. :101.72
head(df, 5) # first five rows of df
## [,1]
## Mar 1996 73.40
## Jun 1996 73.04
## Sep 1996 73.11
## Dec 1996 72.45
## Mar 1997 73.40
tail(df, 5) # last five rows of df
## [,1]
## Sep 2015 100.24
## Dec 2015 99.46
## Mar 2016 101.04
## Jun 2016 101.15
## Sep 2016 101.72
plot(df) # a fast plot of raw data
plot(df, main = 'CPI for Beer in Austria')
diff_1 <- diff(df) # taking the first diffs
diff_log_1 <- diff(log(df)) # taking the log-diffs
#plotting the raw and transformed data together
plot.new()
layout(matrix(c(1,2,3), 3, 1))
plot(df, main = '[A] CPI for Beer in Austria')
plot(diff_1, main = '[B] First diffs')
plot(diff_log_1, main = '[C] Log diffs')
adf.test(df) #ADF test, mind the warnings!
##
## Augmented Dickey-Fuller Test
##
## data: df
## Dickey-Fuller = -1.6602, Lag order = 4, p-value = 0.7151
## alternative hypothesis: stationary
kpss.test(df) #KPSS test, mind the warnings!
##
## KPSS Test for Level Stationarity
##
## data: df
## KPSS Level = 2.6554, Truncation lag parameter = 2, p-value = 0.01
adf.test(diff_1[-1])
##
## Augmented Dickey-Fuller Test
##
## data: diff_1[-1]
## Dickey-Fuller = -3.7059, Lag order = 4, p-value = 0.02941
## alternative hypothesis: stationary
kpss.test(diff_1[-1])
##
## KPSS Test for Level Stationarity
##
## data: diff_1[-1]
## KPSS Level = 0.45351, Truncation lag parameter = 2, p-value =
## 0.05409
adf.test(diff_log_1[-1])
##
## Augmented Dickey-Fuller Test
##
## data: diff_log_1[-1]
## Dickey-Fuller = -3.7114, Lag order = 4, p-value = 0.02893
## alternative hypothesis: stationary
kpss.test(diff_log_1[-1])
##
## KPSS Test for Level Stationarity
##
## data: diff_log_1[-1]
## KPSS Level = 0.36181, Truncation lag parameter = 2, p-value =
## 0.09362
plot.new() #reset the plotting device (clear the layout)
acf(diff_log_1[-1])
pacf(diff_log_1[-1])
acf(diff_1[-1])
pacf(diff_1[-1])
plot.new()
layout(matrix(c(1,1,2,2,3,4), 3, 2, byrow = TRUE),
widths=c(1,1), heights=c(2,2,3))
plot(df, main='[A] CPI for Beer in Austria')
plot(diff_1, main='[B] the log diffs')
acf(diff_log_1[-1], main='[C] log diffs ACF')
pacf(diff_log_1[-1], main='[D] log diffs pACF')
Тут я попытался сделать автопредсказание
plot.new()
autopilot <- forecast::auto.arima(diff_log_1, max.order = 10)
arima.prediction <- forecast(autopilot, h = 10)
plot(arima.prediction)
### ESTIMATING and ANALYZING SEVERAL SPECIFICATIONS
## Set the parameters for the procedure
# setting the caps for the specifications orders
phat <- 8
qhat <- 8
# setting critical level(s)
alpha <- 0.1
# other shortcuts
N <- length(df)-1 # number of obs in the transformed series
pq <- expand.grid(0:phat,0:qhat) # all combinations of (p,q)
M <- nrow(pq) # number of specifications
Lhat <- 5 # max lags to use in residual ACF analysis
## Estiamte all the ARMA and collect the results
SKIPm <- c(0) # the specs to skip (if there are errors or something)
#loop over all the combinations of (p,q) and estimate the ARMAs
ARMAs <- foreach(m=seq(1:M)[!(1:M %in% SKIPm)], .combine=rbind) %do% {
# the orders
p_ <- pq[m,1]
q_ <- pq[m,2]
# the estimates
armapq <- arima(diff_1, c(p_,0,q_))
# the AR part coefficients (only valid for p>0)
if(p_>0) {arcoefpq <- armapq$coef[1:p_]}
# stationarity of AR part (only valid for p>0, TRUE for p=0)
if(p_>0) {
statAR <- sum(abs(polyroot(c(1,-arcoefpq)))>1)==p_
} else {
statAR <- TRUE
}
# the residuals
respq <- armapq$residuals
# the LB test results
LBpvalpq <- Box.test(respq[-1], lag = Lhat, type = c("Ljung-Box"), fitdf=p_+q_)$p.value
# info criteria
aicpq <- AIC(armapq)
bicpq <- AIC(armapq, k=log(N))
# combine the results together
data.frame(m = m, # spec number
p = p_, # AR order
q = q_, # MA order
stat = statAR, # AR part stationarity check
LB = LBpvalpq, # LB test p-value
AIC = aicpq, # AIC
SBIC = bicpq) # SBIC
}
Все модельки
# all the results
ARMAs
## m p q stat LB AIC SBIC
## 1 1 0 0 TRUE 0.22567570 245.0298 249.8432
## 2 2 1 0 TRUE 0.33407691 244.8582 252.0784
## 3 3 2 0 TRUE 0.22801799 246.7749 256.4018
## 4 4 3 0 TRUE 0.11964190 248.3033 260.3369
## 5 5 4 0 TRUE 0.56518616 246.7510 261.1913
## 6 6 5 0 TRUE 0.00000000 248.4547 265.3017
## 7 7 6 0 TRUE NaN 250.0038 269.2576
## 8 8 7 0 TRUE NaN 250.7975 272.4579
## 9 9 8 0 TRUE NaN 252.6731 276.7403
## 10 10 0 1 TRUE 0.36216054 244.7482 251.9684
## 11 11 1 1 TRUE 0.22833435 246.7436 256.3705
## 12 12 2 1 TRUE 0.11617958 248.7397 260.7733
## 13 13 3 1 TRUE 0.07719603 249.3962 263.8365
## 14 14 4 1 TRUE 0.00000000 248.5598 265.4069
## 15 15 5 1 TRUE NaN 249.4367 268.6905
## 16 16 6 1 TRUE NaN 251.1224 272.7829
## 17 17 7 1 TRUE NaN 252.6430 276.7102
## 18 18 8 1 TRUE NaN 254.5377 281.0116
## 19 19 0 2 TRUE 0.22858234 246.7431 256.3700
## 20 20 1 2 TRUE 0.11514690 248.7431 260.7767
## 21 21 2 2 TRUE 0.04389670 250.4268 264.8671
## 22 22 3 2 TRUE 0.00000000 248.1659 265.0130
## 23 23 4 2 TRUE NaN 249.2552 268.5090
## 24 24 5 2 TRUE NaN 250.1735 271.8339
## 25 25 6 2 TRUE NaN 251.2766 275.3438
## 26 26 7 2 TRUE NaN 253.2502 279.7241
## 27 27 8 2 TRUE NaN 254.3134 283.1940
## 28 28 0 3 TRUE 0.11725146 248.7410 260.7746
## 29 29 1 3 TRUE 0.05947607 249.6324 264.0727
## 30 30 2 3 TRUE 0.00000000 248.2352 265.0822
## 31 31 3 3 TRUE NaN 249.1318 268.3856
## 32 32 4 3 TRUE NaN 251.9200 273.5804
## 33 33 5 3 TRUE NaN 251.4572 275.5244
## 34 34 6 3 TRUE NaN 253.2349 279.7088
## 35 35 7 3 TRUE NaN 253.5142 282.3949
## 36 36 8 3 TRUE NaN 255.6238 286.9111
## 37 37 0 4 TRUE 0.70315838 245.4139 259.8543
## 38 38 1 4 TRUE 0.00000000 247.1908 264.0378
## 39 39 2 4 TRUE NaN 249.0638 268.3175
## 40 40 3 4 TRUE NaN 250.4964 272.1569
## 41 41 4 4 TRUE NaN 252.2893 276.3565
## 42 42 5 4 TRUE NaN 253.3115 279.7854
## 43 43 6 4 TRUE NaN 254.6914 283.5721
## 44 44 7 4 TRUE NaN 255.4246 286.7120
## 45 45 8 4 TRUE NaN 256.8903 290.5844
## 46 46 0 5 TRUE 0.00000000 247.1801 264.0271
## 47 47 1 5 TRUE NaN 249.1740 268.4278
## 48 48 2 5 TRUE NaN 250.0418 271.7022
## 49 49 3 5 TRUE NaN 251.6649 275.7320
## 50 50 4 5 TRUE NaN 253.5073 279.9812
## 51 51 5 5 TRUE NaN 251.8211 280.7018
## 52 52 6 5 TRUE NaN 256.6831 287.9705
## 53 53 7 5 TRUE NaN 257.0474 290.7414
## 54 54 8 5 TRUE NaN 258.3707 294.4715
## 55 55 0 6 TRUE NaN 249.1565 268.4102
## 56 56 1 6 TRUE NaN 251.0801 272.7406
## 57 57 2 6 TRUE NaN 249.3798 273.4470
## 58 58 3 6 TRUE NaN 252.9817 279.4556
## 59 59 4 6 TRUE NaN 255.0609 283.9415
## 60 60 5 6 TRUE NaN 257.2174 288.5047
## 61 61 6 6 TRUE NaN 253.4441 287.1382
## 62 62 7 6 TRUE NaN 255.4015 291.5022
## 63 63 8 6 TRUE NaN 257.3537 295.8612
## 64 64 0 7 TRUE NaN 250.4645 272.1250
## 65 65 1 7 TRUE NaN 251.9881 276.0553
## 66 66 2 7 TRUE NaN 253.9873 280.4613
## 67 67 3 7 TRUE NaN 253.1436 282.0242
## 68 68 4 7 TRUE NaN 255.5472 286.8346
## 69 69 5 7 TRUE NaN 255.4467 289.1408
## 70 70 6 7 TRUE NaN 255.3983 291.4991
## 71 71 7 7 TRUE NaN 257.3901 295.8976
## 72 72 8 7 TRUE NaN 259.3368 300.2510
## 73 73 0 8 TRUE NaN 252.1410 276.2082
## 74 74 1 8 TRUE NaN 253.9876 280.4615
## 75 75 2 8 TRUE NaN 255.9476 284.8282
## 76 76 3 8 TRUE NaN 256.3011 287.5884
## 77 77 4 8 TRUE NaN 257.3049 290.9990
## 78 78 5 8 TRUE NaN 259.5819 295.6827
## 79 79 6 8 TRUE NaN 257.3662 295.8737
## 80 80 7 8 TRUE NaN 259.3520 300.2662
## 81 81 8 8 TRUE NaN 260.4098 303.7307
Топ 5 моделек
# remove invalid LB test results
ARMAs <- ARMAs[!is.nan(ARMAs$LB),]
# keep only stationary & without residual correlation
ARMAs <- ARMAs[ARMAs$stat == 1 & ARMAs$LB >= alpha,]
# sort by SBIC (keep top-5)
ARMAs[order(ARMAs$SBIC)[1:5],]
## m p q stat LB AIC SBIC
## 1 1 0 0 TRUE 0.2256757 245.0298 249.8432
## 10 10 0 1 TRUE 0.3621605 244.7482 251.9684
## 2 2 1 0 TRUE 0.3340769 244.8582 252.0784
## 19 19 0 2 TRUE 0.2285823 246.7431 256.3700
## 11 11 1 1 TRUE 0.2283344 246.7436 256.3705
# estimate the top-5 specs again and check the outputs
ARMAs2 <- foreach(m=ARMAs[order(ARMAs$SBIC)[1:5],1]) %do% {
arima(diff_1, c(pq[m,1],0,pq[m,2]))
}
## PLOT the BEST MODEL RESULTS
#create a new time series object
best_arma <- data.frame(index = index(df),
diff_1 = as.numeric(diff_1),
d1fitted = as.numeric(diff_log_1-ARMAs2[[1]]$residuals),
d1residuals = as.numeric(ARMAs2[[1]]$residuals),
df = as.numeric(df),
dffitted = as.numeric(df-ARMAs2[[1]]$residuals))
best_arma <- xts(best_arma[,2:6], order.by = best_arma$index)
colnames(best_arma) <- c('Log diffs', 'Fitted log diffs', 'Residuals', 'Raw data in levels', 'Fitted levels')
plot(best_arma,
main='Fitted MA(1) for the log differences results')#, legend.loc='topright')
addLegend("topright", on=1,
lty = c(1, 1, 1, 1, 1),
lwd = c(2, 2, 2, 2, 2))