library(forecast)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: timeDate
## This is forecast 6.2
library(zoo)
library(tseries)
#Problem 3
#Consider the monthly time series for Consumer Price Index for All Urban Consumers and do;
#Test it for nonstationarity using ADF and KPPS tests.
#Build a Time Series Until the End of 2014 following the Box Jenkings Methodology
# Forecast until the end of 2016 once you have checked the model adequacy.
cpi_n <- read.csv(file="C:\\Users\\t420\\Desktop\\PowerHouse.csv", header=TRUE, sep=",")
cpi <- ts(cpi_n[,2],start=c(1957,1), frequency=12)
str(cpi)
## Time-Series [1:707] from 1957 to 2016: 244 244 243 243 242 ...
head(cpi)
## [1] 244.075 243.985 243.359 242.651 242.436 242.354
tail(cpi)
## [1] 28.9 28.8 28.8 28.7 28.5 28.5
tsdisplay(cpi, lag.max = 250, xlab=" Years 1959-2015", ylab="Consumer Price Index", main="Consumer Price Index, monthly")
## The plots above shows there is slow decay of data. Let's check the stationarity of data using ADF and KPSS test
dcpi_12<-diff(cpi)
tsdisplay(dcpi_12, lag.max = 250, xlab=" Years 1959-2015", ylab="CPI", main="Monthly Consumer Price Index from 1959 to 2015")
adf.test(dcpi_12)
## Warning in adf.test(dcpi_12): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: dcpi_12
## Dickey-Fuller = -5.3541, Lag order = 8, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(dcpi_12,null="Trend")
## Warning in kpss.test(dcpi_12, null = "Trend"): p-value smaller than printed
## p-value
##
## KPSS Test for Trend Stationarity
##
## data: dcpi_12
## KPSS Trend = 1.2552, Truncation lag parameter = 6, p-value = 0.01
The p-value in ADF test is 0.01 and p-value in KPSS test is 0.01. Therefore, we reject the null hypothesis in both the cases. Let’s check the second difference test.
cp <- diff(diff(cpi))
tsdisplay(cp, lag.max = 100, xlab=" Years", ylab="CPI", main="Double Differenced CPI")
adf.test(cp)
## Warning in adf.test(cp): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: cp
## Dickey-Fuller = -12.662, Lag order = 8, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(cp)
## Warning in kpss.test(cp): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: cp
## KPSS Level = 0.014023, Truncation lag parameter = 6, p-value = 0.1
Both suggest the stationarity of data.
cp_all <- cp
cp_2014 <- window(cp_all, end=c(2014,12))
cp_2015 <- window(cp_all, start=c(2015,1))
Let’s Check the Adequacy of the model considering the number of spikes in ACF and PACF model.
arm <- arima(cp_2014, order=c(5,0,1), seasonal=list(order=c(0,0,1),period=12))
tsdisplay(residuals(arm))
arm
##
## Call:
## arima(x = cp_2014, order = c(5, 0, 1), seasonal = list(order = c(0, 0, 1), period = 12))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1 sma1
## 0.3837 -0.1949 -0.1232 -0.0773 0.1719 -0.9463 0.3586
## s.e. 0.0467 0.0442 0.0442 0.0433 0.0445 0.0258 0.0306
## intercept
## 5e-04
## s.e. 7e-04
##
## sigma^2 estimated as 0.04135: log likelihood = 118.76, aic = -219.51
tsdiag(arm,gof.lag=36)
arm_1 <- arima(cp_2014, order=c(6,0,2), seasonal=list(order=c(0,0,1),period=12))
tsdisplay(residuals(arm_1))
arm_1
##
## Call:
## arima(x = cp_2014, order = c(6, 0, 2), seasonal = list(order = c(0, 0, 1), period = 12))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ma1 ma2
## 0.6613 -0.2844 -0.0472 -0.0155 0.1722 0.0456 -1.2406 0.2637
## s.e. 0.2162 0.0966 0.0612 0.0525 0.0484 0.0595 0.2139 0.2047
## sma1 intercept
## 0.357 5e-04
## s.e. 0.030 5e-04
##
## sigma^2 estimated as 0.04098: log likelihood = 121.75, aic = -221.5
tsdiag(arm_1,gof.lag=36)
arm_2 <- arima(cp_2014, order=c(7,0,1), seasonal=list(order=c(0,0,1),period=12))
tsdisplay(residuals(arm_2))
arm_2
##
## Call:
## arima(x = cp_2014, order = c(7, 0, 1), seasonal = list(order = c(0, 0, 1), period = 12))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ma1
## 0.3898 -0.1854 -0.0903 -0.0352 0.1744 0.0632 0.0781 -0.9710
## s.e. 0.0402 0.0421 0.0423 0.0426 0.0422 0.0411 0.0392 0.0127
## sma1 intercept
## 0.3527 5e-04
## s.e. 0.0302 5e-04
##
## sigma^2 estimated as 0.04084: log likelihood = 123.02, aic = -224.03
tsdiag(arm_2,gof.lag=36)
Among all these models, ARIMA(6,0,2)(0,0,1) has the highest forecasting power.
ARMA.fcast1 <- forecast(arm_1, h=24)
plot(ARMA.fcast1, xlim=c(2010,2015))