library(Quandl)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library("forecast")
## Loading required package: timeDate
## This is forecast 6.2
library("tseries")
library(zoo)
cpi_1<-Quandl("FRED/CPILFENS", api_key="5NyVfHfxdScCYsskDP59", type= "zoo")
cpi<- ts(cpi_1[,2], start=c(1957,1), frequency = 12)
tsdisplay(cpi,xlab="Monthly Time period (1959-2014)", ylab= "CPI", main="Consumer Price Index (Monthly)")
lcpi<-log(cpi)
Based on the plot, we can say that there is exponential increase in the data.Based on the ACF(very slow decay) and PACF, the level data doesn’t look stationary. Now, let’s check the stationarity of the log transformed data based on ADF and KPSS tests.
adf.test(lcpi)
##
## Augmented Dickey-Fuller Test
##
## data: lcpi
## Dickey-Fuller = -0.50029, Lag order = 8, p-value = 0.9818
## alternative hypothesis: stationary
kpss.test(lcpi, null="Trend")
## Warning in kpss.test(lcpi, null = "Trend"): p-value smaller than printed p-
## value
##
## KPSS Test for Trend Stationarity
##
## data: lcpi
## KPSS Trend = 1.9296, Truncation lag parameter = 6, p-value = 0.01
ADF and KPSS test suggests that the data is not stationary on the level which supports the ACF and PACF plot on the level.
Based on the ADF and KPSS test, if we difference log transformed data and run the ADF and KPSS tests again, we get the following results:
dcpi<-diff(lcpi)
tsdisplay(dcpi,xlab="Monthly Time period (1959-2014)", ylab= "Differenced CPI", main="Differenced CPI(Monthly)")
adf.test(dcpi)
##
## Augmented Dickey-Fuller Test
##
## data: dcpi
## Dickey-Fuller = -3.8707, Lag order = 8, p-value = 0.01546
## alternative hypothesis: stationary
kpss.test(dcpi, null="Trend")
## Warning in kpss.test(dcpi, null = "Trend"): p-value smaller than printed p-
## value
##
## KPSS Test for Trend Stationarity
##
## data: dcpi
## KPSS Trend = 1.2229, Truncation lag parameter = 6, p-value = 0.01
Here, based on the ADF and KPSS test, we can’t reject the hypothesis of non-stationarity. Therefore, the data is still non-stationary after 1st differencing.
ddcpi<-diff(diff(cpi), frequency=12)
tsdisplay(ddcpi,xlab="Monthly Time period (1948-2014)", ylab= "Double Differenced CPI", main="Double Differenced CPI (Monthly)")
adf.test(ddcpi)
## Warning in adf.test(ddcpi): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: ddcpi
## Dickey-Fuller = -12.45, Lag order = 8, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(ddcpi, null="Trend")
## Warning in kpss.test(ddcpi, null = "Trend"): p-value greater than printed
## p-value
##
## KPSS Test for Trend Stationarity
##
## data: ddcpi
## KPSS Trend = 0.0071678, Truncation lag parameter = 6, p-value =
## 0.1
With double differencing, the log transformed data is Stationary by ADF and KPSS test.
As it has been mentioned in the question that we need to split the data, for model estimation and forecasting, data had been taken upto the end of 2014.
ddcpi1 <- window(ddcpi, end=c(2014,12))
ddcpi2 <- window(ddcpi, start=c(2015,1))
Model Estimation:
ar6ma2_1<- arima(ddcpi1, order=c(6,0,2),seasonal=list(order=c(0,0,1),period=12))
ar6ma2_1
##
## Call:
## arima(x = ddcpi1, order = c(6, 0, 2), seasonal = list(order = c(0, 0, 1), period = 12))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ma1 ma2
## 0.6573 -0.2743 -0.0499 -0.0113 0.1772 0.0509 -1.2369 0.2596
## s.e. 0.2036 0.0926 0.0586 0.0519 0.0479 0.0592 0.2011 0.1927
## sma1 intercept
## 0.3519 3e-04
## s.e. 0.0302 5e-04
##
## sigma^2 estimated as 0.04042: log likelihood = 126.59, aic = -231.19
tsdiag(ar6ma2_1, gof.lag=24)
ar6ma3_2<- arima(ddcpi1, order=c(6,0,3),seasonal=list(order=c(0,0,2),period=12))
ar6ma3_2
##
## Call:
## arima(x = ddcpi1, order = c(6, 0, 3), seasonal = list(order = c(0, 0, 2), period = 12))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ma1 ma2
## -0.0721 0.5321 -0.2879 -0.0477 0.2067 0.1633 -0.5352 -0.9013
## s.e. 0.1471 0.0844 0.0564 0.0542 0.0395 0.0468 0.1471 0.0500
## ma3 sma1 sma2 intercept
## 0.4555 0.3976 0.3295 4e-04
## s.e. 0.1277 0.0391 0.0342 5e-04
##
## sigma^2 estimated as 0.03584: log likelihood = 167.5, aic = -309
tsdiag(ar6ma3_2, gof.lag=24)
ar6ma3_3<- arima(ddcpi1, order=c(6,0,3),seasonal=list(order=c(0,0,3),period=12))
ar6ma3_3
##
## Call:
## arima(x = ddcpi1, order = c(6, 0, 3), seasonal = list(order = c(0, 0, 3), period = 12))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ma1 ma2
## -0.0584 0.5835 -0.2717 -0.0363 0.2047 0.1601 -0.5571 -0.9209
## s.e. 0.1319 0.0787 0.0576 0.0501 0.0387 0.0468 0.1312 0.0394
## ma3 sma1 sma2 sma3 intercept
## 0.4916 0.4103 0.3409 0.1919 4e-04
## s.e. 0.1180 0.0423 0.0365 0.0357 5e-04
##
## sigma^2 estimated as 0.03442: log likelihood = 181.45, aic = -334.91
tsdiag(ar6ma3_3, gof.lag=24)
ar6ma3_4<- arima(ddcpi, order=c(6,0,3),seasonal=list(order=c(0,0,4),period=12))
ar6ma3_4
##
## Call:
## arima(x = ddcpi, order = c(6, 0, 3), seasonal = list(order = c(0, 0, 4), period = 12))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ma1 ma2
## 0.0105 0.6758 -0.2663 -0.0498 0.2060 0.1706 -0.6341 -0.9365
## s.e. 0.1044 0.0630 0.0583 0.0476 0.0382 0.0474 0.1036 0.0351
## ma3 sma1 sma2 sma3 sma4 intercept
## 0.5706 0.3771 0.3671 0.2536 0.1895 4e-04
## s.e. 0.0951 0.0424 0.0428 0.0365 0.0352 2e-04
##
## sigma^2 estimated as 0.03294: log likelihood = 198.33, aic = -366.67
tsdiag(ar6ma3_4, gof.lag=24)
Based on AIC, BIC,LJung-Box and Standardized residuals, ARIMA(6,1,3)(0,1,3) and ARIMA(6,1,3)(0,1,4) can be suggested for forecasting.
ar6ma3_3.fcast <- forecast(ar6ma3_3, h=24)
plot(ar6ma3_3.fcast, xlim=c(2010,2016))
ar6ma3_4.fcast <- forecast(ar6ma3_4, h=24)
plot(ar6ma3_4.fcast, xlim=c(2010,2016))