library(forecast)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: timeDate
## This is forecast 6.2
library(zoo)
library(tseries)
#Problem 3
  #Consider the monthly time series for Consumer Price Index for All Urban Consumers and do;
  #Test it for nonstationarity using ADF and KPPS tests.
  #Build a Time Series Until the End of 2014 following the Box Jenkings Methodology
  # Forecast until the end of 2016 once you have checked the model adequacy.

METHODOLOGY

cpi_n <- read.csv(file="C:\\Users\\t420\\Desktop\\PowerHouse.csv", header=TRUE, sep=",")

cpi <- ts(cpi_n[,2],start=c(1957,1), frequency=12)

str(cpi)
##  Time-Series [1:707] from 1957 to 2016: 244 244 243 243 242 ...
head(cpi)
## [1] 244.075 243.985 243.359 242.651 242.436 242.354
tail(cpi)
## [1] 28.9 28.8 28.8 28.7 28.5 28.5
tsdisplay(cpi, lag.max = 250, xlab=" Years 1959-2015", ylab="Consumer Price Index", main="Consumer Price Index, monthly")

## The plots above shows there is slow decay of data. Let's check the stationarity of data using ADF and KPSS test
dcpi_12<-diff(cpi)
tsdisplay(dcpi_12, lag.max = 250, xlab=" Years 1959-2015", ylab="CPI", main="Monthly Consumer Price Index from 1959 to 2015")

adf.test(dcpi_12)
## Warning in adf.test(dcpi_12): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  dcpi_12
## Dickey-Fuller = -5.3541, Lag order = 8, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(dcpi_12,null="Trend")
## Warning in kpss.test(dcpi_12, null = "Trend"): p-value smaller than printed
## p-value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  dcpi_12
## KPSS Trend = 1.2552, Truncation lag parameter = 6, p-value = 0.01

The p-value in ADF test is 0.01 and p-value in KPSS test is 0.01. Therefore, we reject the null hypothesis in both the cases. Let’s check the second difference test.

cp <- diff(diff(cpi))
tsdisplay(cp, lag.max = 100, xlab=" Years", ylab="CPI", main="Double Differenced CPI")

adf.test(cp)
## Warning in adf.test(cp): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  cp
## Dickey-Fuller = -12.662, Lag order = 8, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(cp)
## Warning in kpss.test(cp): p-value greater than printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  cp
## KPSS Level = 0.014023, Truncation lag parameter = 6, p-value = 0.1

Both suggest the stationarity of data.

cp_all <- cp
cp_2014 <- window(cp_all, end=c(2014,12))
cp_2015 <- window(cp_all, start=c(2015,1))

Let’s Check the Adequacy of the model considering the number of spikes in ACF and PACF model.

arm <- arima(cp_2014, order=c(5,0,1), seasonal=list(order=c(0,0,1),period=12))
tsdisplay(residuals(arm))

arm
## 
## Call:
## arima(x = cp_2014, order = c(5, 0, 1), seasonal = list(order = c(0, 0, 1), period = 12))
## 
## Coefficients:
##          ar1      ar2      ar3      ar4     ar5      ma1    sma1
##       0.3837  -0.1949  -0.1232  -0.0773  0.1719  -0.9463  0.3586
## s.e.  0.0467   0.0442   0.0442   0.0433  0.0445   0.0258  0.0306
##       intercept
##           5e-04
## s.e.      7e-04
## 
## sigma^2 estimated as 0.04135:  log likelihood = 118.76,  aic = -219.51
tsdiag(arm,gof.lag=36)

arm_1 <- arima(cp_2014, order=c(6,0,2), seasonal=list(order=c(0,0,1),period=12))
tsdisplay(residuals(arm_1))

arm_1
## 
## Call:
## arima(x = cp_2014, order = c(6, 0, 2), seasonal = list(order = c(0, 0, 1), period = 12))
## 
## Coefficients:
##          ar1      ar2      ar3      ar4     ar5     ar6      ma1     ma2
##       0.6613  -0.2844  -0.0472  -0.0155  0.1722  0.0456  -1.2406  0.2637
## s.e.  0.2162   0.0966   0.0612   0.0525  0.0484  0.0595   0.2139  0.2047
##        sma1  intercept
##       0.357      5e-04
## s.e.  0.030      5e-04
## 
## sigma^2 estimated as 0.04098:  log likelihood = 121.75,  aic = -221.5
tsdiag(arm_1,gof.lag=36)

arm_2 <- arima(cp_2014, order=c(7,0,1), seasonal=list(order=c(0,0,1),period=12))
tsdisplay(residuals(arm_2))

arm_2
## 
## Call:
## arima(x = cp_2014, order = c(7, 0, 1), seasonal = list(order = c(0, 0, 1), period = 12))
## 
## Coefficients:
##          ar1      ar2      ar3      ar4     ar5     ar6     ar7      ma1
##       0.3898  -0.1854  -0.0903  -0.0352  0.1744  0.0632  0.0781  -0.9710
## s.e.  0.0402   0.0421   0.0423   0.0426  0.0422  0.0411  0.0392   0.0127
##         sma1  intercept
##       0.3527      5e-04
## s.e.  0.0302      5e-04
## 
## sigma^2 estimated as 0.04084:  log likelihood = 123.02,  aic = -224.03
tsdiag(arm_2,gof.lag=36)

Among all these models, ARIMA(6,0,2)(0,0,1) has the highest forecasting power.

ARMA.fcast1 <- forecast(arm_1, h=24)
plot(ARMA.fcast1, xlim=c(2010,2015))