library(Quandl)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library("forecast")
## Loading required package: timeDate
## This is forecast 6.2
library("tseries")
hs<-Quandl("FRED/HOUSTNSA", type="zoo")
head(hs)
## Jan 1959 Feb 1959 Mar 1959 Apr 1959 May 1959 Jun 1959 
##     96.2     99.0    127.7    150.8    152.5    147.8
tail(hs)
## Aug 2015 Sep 2015 Oct 2015 Nov 2015 Dec 2015 Jan 2016 
##     99.2    111.6     90.9     89.9     77.5     73.6

As it is indicated that, the forecast should be done based on the data up to the end of 2013.Therefore, the data had been split into two parts.

hs1 <- window(hs, end = as.yearmon(2013,"Dec"))
hs2<- window(hs, start=as.yearmon(2014, "Jan"))
plot(hs1, xlab="Monthly Time period (Jan 1959- Dec 2013)", ylab= "Housing starts")

Model Identification, Esmitation and Diagnosis

acf(hs1)

pacf(hs1)

Checking the ACF, PACF we can say that the data has seasonality and the data in level is non-stationary.

dhs1<- diff(hs1)
plot(dhs1)

acf(dhs1, lags=24)
## Warning in plot.window(...): "lags" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "lags" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "lags" is not
## a graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "lags" is not
## a graphical parameter
## Warning in box(...): "lags" is not a graphical parameter
## Warning in title(...): "lags" is not a graphical parameter

pacf(dhs1, lags=24)
## Warning in plot.window(...): "lags" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "lags" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "lags" is not
## a graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "lags" is not
## a graphical parameter
## Warning in box(...): "lags" is not a graphical parameter
## Warning in title(...): "lags" is not a graphical parameter

From the plot,it looks like the data has become stationary after differencing. But as we need to adjust the seasonality as well, we need to do a seasonal differing.

sdhs1<-diff(diff(hs1),12)
plot(sdhs1)

adf.test(sdhs1)
## Warning in adf.test(sdhs1): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  sdhs1
## Dickey-Fuller = -8.0486, Lag order = 8, p-value = 0.01
## alternative hypothesis: stationary
acf(sdhs1, lags=24)
## Warning in plot.window(...): "lags" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "lags" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "lags" is not
## a graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "lags" is not
## a graphical parameter
## Warning in box(...): "lags" is not a graphical parameter
## Warning in title(...): "lags" is not a graphical parameter

pacf(sdhs1, lags=24)
## Warning in plot.window(...): "lags" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "lags" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "lags" is not
## a graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "lags" is not
## a graphical parameter
## Warning in box(...): "lags" is not a graphical parameter
## Warning in title(...): "lags" is not a graphical parameter

Based on the ADF test, it can be stated that the data is stationary. Now, based on the acf,pacf and AIC,BIC, we need to do select the ARIMA model.

sar2<-arima(sdhs1,order=c(2,0,0),seasonal=list(order=c(2,0,0),period=12))
sar2
## 
## Call:
## arima(x = sdhs1, order = c(2, 0, 0), seasonal = list(order = c(2, 0, 0), period = 12))
## 
## Coefficients:
##           ar1      ar2     sar1     sar2  intercept
##       -0.3624  -0.0973  -0.5094  -0.2381     0.0313
## s.e.   0.0398   0.0397   0.0389   0.0387     0.1869
## 
## sigma^2 estimated as 141.2:  log likelihood = -2478.59,  aic = 4969.17
tsdiag(sar2, gof.lag=24)

sar3<-arima(hs1,order=c(2,1,0),seasonal=list(order=c(3,1,0),period=12))
sar3
## 
## Call:
## arima(x = hs1, order = c(2, 1, 0), seasonal = list(order = c(3, 1, 0), period = 12))
## 
## Coefficients:
##           ar1      ar2     sar1     sar2     sar3
##       -0.3377  -0.0971  -0.5774  -0.3698  -0.2611
## s.e.   0.0401   0.0396   0.0389   0.0421   0.0385
## 
## sigma^2 estimated as 131.3:  log likelihood = -2456.65,  aic = 4925.29
tsdiag(sar3, gof.lag=24)

sma1<-arima(hs1,order=c(0,1,1),seasonal=list(order=c(0,1,1),period=12))
sma1
## 
## Call:
## arima(x = hs1, order = c(0, 1, 1), seasonal = list(order = c(0, 1, 1), period = 12))
## 
## Coefficients:
##           ma1     sma1
##       -0.3013  -0.8593
## s.e.   0.0367   0.0247
## 
## sigma^2 estimated as 117.5:  log likelihood = -2426.29,  aic = 4858.57
tsdiag(sma1, gof.lag=24)

sma1ar3<-arima(hs1,order=c(2,1,1),seasonal=list(order=c(3,1,1),period=12))
sma1ar3
## 
## Call:
## arima(x = hs1, order = c(2, 1, 1), seasonal = list(order = c(3, 1, 1), period = 12))
## 
## Coefficients:
##           ar1      ar2     ma1    sar1     sar2     sar3     sma1
##       -0.3985  -0.1197  0.0694  0.1118  -0.0235  -0.1101  -0.8588
## s.e.   0.3674   0.1142  0.3699  0.0494   0.0445   0.0447   0.0323
## 
## sigma^2 estimated as 114:  log likelihood = -2416.97,  aic = 4849.94
tsdiag(sma1ar3, gof.lag=24)

Based on AIC, BIC, LJung-box and Standardized residuals, we can suggest that ARIMA(0,1,1)(0,1,1), ARIMA(2,1,1)(3,1,1) and ARIMA(2,1,0)(3,1,0) can be the good models for forecasting.

Forecast

sma1.fcast <- forecast(sma1, h=36)
plot(sma1.fcast, xlim=c(2010,2016))

sma1ar3.fcast <- forecast(sma1ar3, h=36)
plot(sma1ar3.fcast, xlim=c(2010,2016))

sar3.fcast <- forecast(sar3, h=36)
plot(sar3.fcast, xlim=c(2010,2016))