library(Quandl)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library("forecast")
## Loading required package: timeDate
## This is forecast 6.2
library("tseries")
hs<-Quandl("FRED/HOUSTNSA", type="zoo")
head(hs)
## Jan 1959 Feb 1959 Mar 1959 Apr 1959 May 1959 Jun 1959
## 96.2 99.0 127.7 150.8 152.5 147.8
tail(hs)
## Aug 2015 Sep 2015 Oct 2015 Nov 2015 Dec 2015 Jan 2016
## 99.2 111.6 90.9 89.9 77.5 73.6
As it is indicated that, the forecast should be done based on the data up to the end of 2013.Therefore, the data had been split into two parts.
hs1 <- window(hs, end = as.yearmon(2013,"Dec"))
hs2<- window(hs, start=as.yearmon(2014, "Jan"))
plot(hs1, xlab="Monthly Time period (Jan 1959- Dec 2013)", ylab= "Housing starts")
acf(hs1)
pacf(hs1)
Checking the ACF, PACF we can say that the data has seasonality and the data in level is non-stationary.
dhs1<- diff(hs1)
plot(dhs1)
acf(dhs1, lags=24)
## Warning in plot.window(...): "lags" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "lags" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "lags" is not
## a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "lags" is not
## a graphical parameter
## Warning in box(...): "lags" is not a graphical parameter
## Warning in title(...): "lags" is not a graphical parameter
pacf(dhs1, lags=24)
## Warning in plot.window(...): "lags" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "lags" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "lags" is not
## a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "lags" is not
## a graphical parameter
## Warning in box(...): "lags" is not a graphical parameter
## Warning in title(...): "lags" is not a graphical parameter
From the plot,it looks like the data has become stationary after differencing. But as we need to adjust the seasonality as well, we need to do a seasonal differing.
sdhs1<-diff(diff(hs1),12)
plot(sdhs1)
adf.test(sdhs1)
## Warning in adf.test(sdhs1): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: sdhs1
## Dickey-Fuller = -8.0486, Lag order = 8, p-value = 0.01
## alternative hypothesis: stationary
acf(sdhs1, lags=24)
## Warning in plot.window(...): "lags" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "lags" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "lags" is not
## a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "lags" is not
## a graphical parameter
## Warning in box(...): "lags" is not a graphical parameter
## Warning in title(...): "lags" is not a graphical parameter
pacf(sdhs1, lags=24)
## Warning in plot.window(...): "lags" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "lags" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "lags" is not
## a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "lags" is not
## a graphical parameter
## Warning in box(...): "lags" is not a graphical parameter
## Warning in title(...): "lags" is not a graphical parameter
Based on the ADF test, it can be stated that the data is stationary. Now, based on the acf,pacf and AIC,BIC, we need to do select the ARIMA model.
sar2<-arima(sdhs1,order=c(2,0,0),seasonal=list(order=c(2,0,0),period=12))
sar2
##
## Call:
## arima(x = sdhs1, order = c(2, 0, 0), seasonal = list(order = c(2, 0, 0), period = 12))
##
## Coefficients:
## ar1 ar2 sar1 sar2 intercept
## -0.3624 -0.0973 -0.5094 -0.2381 0.0313
## s.e. 0.0398 0.0397 0.0389 0.0387 0.1869
##
## sigma^2 estimated as 141.2: log likelihood = -2478.59, aic = 4969.17
tsdiag(sar2, gof.lag=24)
sar3<-arima(hs1,order=c(2,1,0),seasonal=list(order=c(3,1,0),period=12))
sar3
##
## Call:
## arima(x = hs1, order = c(2, 1, 0), seasonal = list(order = c(3, 1, 0), period = 12))
##
## Coefficients:
## ar1 ar2 sar1 sar2 sar3
## -0.3377 -0.0971 -0.5774 -0.3698 -0.2611
## s.e. 0.0401 0.0396 0.0389 0.0421 0.0385
##
## sigma^2 estimated as 131.3: log likelihood = -2456.65, aic = 4925.29
tsdiag(sar3, gof.lag=24)
sma1<-arima(hs1,order=c(0,1,1),seasonal=list(order=c(0,1,1),period=12))
sma1
##
## Call:
## arima(x = hs1, order = c(0, 1, 1), seasonal = list(order = c(0, 1, 1), period = 12))
##
## Coefficients:
## ma1 sma1
## -0.3013 -0.8593
## s.e. 0.0367 0.0247
##
## sigma^2 estimated as 117.5: log likelihood = -2426.29, aic = 4858.57
tsdiag(sma1, gof.lag=24)
sma1ar3<-arima(hs1,order=c(2,1,1),seasonal=list(order=c(3,1,1),period=12))
sma1ar3
##
## Call:
## arima(x = hs1, order = c(2, 1, 1), seasonal = list(order = c(3, 1, 1), period = 12))
##
## Coefficients:
## ar1 ar2 ma1 sar1 sar2 sar3 sma1
## -0.3985 -0.1197 0.0694 0.1118 -0.0235 -0.1101 -0.8588
## s.e. 0.3674 0.1142 0.3699 0.0494 0.0445 0.0447 0.0323
##
## sigma^2 estimated as 114: log likelihood = -2416.97, aic = 4849.94
tsdiag(sma1ar3, gof.lag=24)
Based on AIC, BIC, LJung-box and Standardized residuals, we can suggest that ARIMA(0,1,1)(0,1,1), ARIMA(2,1,1)(3,1,1) and ARIMA(2,1,0)(3,1,0) can be the good models for forecasting.
sma1.fcast <- forecast(sma1, h=36)
plot(sma1.fcast, xlim=c(2010,2016))
sma1ar3.fcast <- forecast(sma1ar3, h=36)
plot(sma1ar3.fcast, xlim=c(2010,2016))
sar3.fcast <- forecast(sar3, h=36)
plot(sar3.fcast, xlim=c(2010,2016))