library(Quandl)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library("forecast")
## Loading required package: timeDate
## This is forecast 6.2
library("tseries")
library(zoo)
cpi_1<-Quandl("FRED/CPILFENS", api_key="5NyVfHfxdScCYsskDP59", type= "zoo")
cpi<- ts(cpi_1[,2], start=c(1957,1), frequency = 12)
tsdisplay(cpi,xlab="Monthly Time period (1959-2014)", ylab= "CPI", main="Consumer Price Index (Monthly)")

lcpi<-log(cpi)

Based on the plot, we can say that there is exponential increase in the data.Based on the ACF(very slow decay) and PACF, the level data doesn’t look stationary. Now, let’s check the stationarity of the log transformed data based on ADF and KPSS tests.

ADF and KPSS Test:

1. Level Data:

adf.test(lcpi)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  lcpi
## Dickey-Fuller = -0.50029, Lag order = 8, p-value = 0.9818
## alternative hypothesis: stationary
kpss.test(lcpi, null="Trend")
## Warning in kpss.test(lcpi, null = "Trend"): p-value smaller than printed p-
## value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  lcpi
## KPSS Trend = 1.9296, Truncation lag parameter = 6, p-value = 0.01

ADF and KPSS test suggests that the data is not stationary on the level which supports the ACF and PACF plot on the level.

2. On differenced data:

(i) On 1st differenced data:

Based on the ADF and KPSS test, if we difference log transformed data and run the ADF and KPSS tests again, we get the following results:

dcpi<-diff(lcpi)
tsdisplay(dcpi,xlab="Monthly Time period (1959-2014)", ylab= "Differenced CPI", main="Differenced CPI(Monthly)")

adf.test(dcpi)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  dcpi
## Dickey-Fuller = -3.8707, Lag order = 8, p-value = 0.01546
## alternative hypothesis: stationary
kpss.test(dcpi, null="Trend")
## Warning in kpss.test(dcpi, null = "Trend"): p-value smaller than printed p-
## value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  dcpi
## KPSS Trend = 1.2229, Truncation lag parameter = 6, p-value = 0.01

Here, based on the ADF and KPSS test, we can’t reject the hypothesis of non-stationarity. Therefore, the data is still non-stationary after 1st differencing.

(ii) On 2nd differenced (Seasonal Difference) data:
ddcpi<-diff(diff(cpi), frequency=12)
tsdisplay(ddcpi,xlab="Monthly Time period (1948-2014)", ylab= "Double Differenced CPI", main="Double Differenced CPI (Monthly)")

adf.test(ddcpi)
## Warning in adf.test(ddcpi): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ddcpi
## Dickey-Fuller = -12.45, Lag order = 8, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(ddcpi, null="Trend")
## Warning in kpss.test(ddcpi, null = "Trend"): p-value greater than printed
## p-value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  ddcpi
## KPSS Trend = 0.0071678, Truncation lag parameter = 6, p-value =
## 0.1

With double differencing, the log transformed data is Stationary by ADF and KPSS test.

As it has been mentioned in the question that we need to split the data, for model estimation and forecasting, data had been taken upto the end of 2014.

ddcpi1 <- window(ddcpi, end=c(2014,12))
ddcpi2 <- window(ddcpi, start=c(2015,1))

Model Estimation:

ar6ma2_1<- arima(ddcpi1, order=c(6,0,2),seasonal=list(order=c(0,0,1),period=12))
ar6ma2_1
## 
## Call:
## arima(x = ddcpi1, order = c(6, 0, 2), seasonal = list(order = c(0, 0, 1), period = 12))
## 
## Coefficients:
##          ar1      ar2      ar3      ar4     ar5     ar6      ma1     ma2
##       0.6573  -0.2743  -0.0499  -0.0113  0.1772  0.0509  -1.2369  0.2596
## s.e.  0.2036   0.0926   0.0586   0.0519  0.0479  0.0592   0.2011  0.1927
##         sma1  intercept
##       0.3519      3e-04
## s.e.  0.0302      5e-04
## 
## sigma^2 estimated as 0.04042:  log likelihood = 126.59,  aic = -231.19
tsdiag(ar6ma2_1, gof.lag=24)

ar6ma3_2<- arima(ddcpi1, order=c(6,0,3),seasonal=list(order=c(0,0,2),period=12))
ar6ma3_2
## 
## Call:
## arima(x = ddcpi1, order = c(6, 0, 3), seasonal = list(order = c(0, 0, 2), period = 12))
## 
## Coefficients:
##           ar1     ar2      ar3      ar4     ar5     ar6      ma1      ma2
##       -0.0721  0.5321  -0.2879  -0.0477  0.2067  0.1633  -0.5352  -0.9013
## s.e.   0.1471  0.0844   0.0564   0.0542  0.0395  0.0468   0.1471   0.0500
##          ma3    sma1    sma2  intercept
##       0.4555  0.3976  0.3295      4e-04
## s.e.  0.1277  0.0391  0.0342      5e-04
## 
## sigma^2 estimated as 0.03584:  log likelihood = 167.5,  aic = -309
tsdiag(ar6ma3_2, gof.lag=24)

ar6ma3_3<- arima(ddcpi1, order=c(6,0,3),seasonal=list(order=c(0,0,3),period=12))
ar6ma3_3
## 
## Call:
## arima(x = ddcpi1, order = c(6, 0, 3), seasonal = list(order = c(0, 0, 3), period = 12))
## 
## Coefficients:
##           ar1     ar2      ar3      ar4     ar5     ar6      ma1      ma2
##       -0.0584  0.5835  -0.2717  -0.0363  0.2047  0.1601  -0.5571  -0.9209
## s.e.   0.1319  0.0787   0.0576   0.0501  0.0387  0.0468   0.1312   0.0394
##          ma3    sma1    sma2    sma3  intercept
##       0.4916  0.4103  0.3409  0.1919      4e-04
## s.e.  0.1180  0.0423  0.0365  0.0357      5e-04
## 
## sigma^2 estimated as 0.03442:  log likelihood = 181.45,  aic = -334.91
tsdiag(ar6ma3_3, gof.lag=24)

ar6ma3_4<- arima(ddcpi, order=c(6,0,3),seasonal=list(order=c(0,0,4),period=12))
ar6ma3_4
## 
## Call:
## arima(x = ddcpi, order = c(6, 0, 3), seasonal = list(order = c(0, 0, 4), period = 12))
## 
## Coefficients:
##          ar1     ar2      ar3      ar4     ar5     ar6      ma1      ma2
##       0.0105  0.6758  -0.2663  -0.0498  0.2060  0.1706  -0.6341  -0.9365
## s.e.  0.1044  0.0630   0.0583   0.0476  0.0382  0.0474   0.1036   0.0351
##          ma3    sma1    sma2    sma3    sma4  intercept
##       0.5706  0.3771  0.3671  0.2536  0.1895      4e-04
## s.e.  0.0951  0.0424  0.0428  0.0365  0.0352      2e-04
## 
## sigma^2 estimated as 0.03294:  log likelihood = 198.33,  aic = -366.67
tsdiag(ar6ma3_4, gof.lag=24)

Based on AIC, BIC,LJung-Box and Standardized residuals, ARIMA(6,1,3)(0,1,3) and ARIMA(6,1,3)(0,1,4) can be suggested for forecasting.

Forecasting:

ar6ma3_3.fcast <- forecast(ar6ma3_3, h=24)
plot(ar6ma3_3.fcast, xlim=c(2010,2016))

ar6ma3_4.fcast <- forecast(ar6ma3_4, h=24)
plot(ar6ma3_4.fcast, xlim=c(2010,2016))