In this exercise we will forecast based on time series given for Housing Starts (New Privately Owned Housing Units). Let’s download the data that is not seasonally adjusted.

library(Quandl)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
HS <- Quandl("FRED/HOUSTNSA", type="zoo")
str(HS)
## 'zooreg' series from Jan 1959 to Jan 2016
##   Data: num [1:685] 96.2 99 127.7 150.8 152.5 ...
##   Index: Class 'yearmon'  num [1:685] 1959 1959 1959 1959 1959 ...
##   Frequency: 12
head(HS)
## Jan 1959 Feb 1959 Mar 1959 Apr 1959 May 1959 Jun 1959 
##     96.2     99.0    127.7    150.8    152.5    147.8
tail(HS)
## Aug 2015 Sep 2015 Oct 2015 Nov 2015 Dec 2015 Jan 2016 
##     99.2    111.6     90.9     89.9     77.5     73.6
plot(HS, xlab=" Years ", ylab="Thousands of Units", main="Housing Starts: Total: New Privately Owned Housing Units Started")

Now we split sample into two parts - estimation sample (from Jan 1959 to Jan 2016) and prediction sample (from Feb 2016 to Dec 2016)

HS1 <- window(HS, end=c(2013,12))
## Warning in `<=.default`(all.indexes, end): longer object length is not a
## multiple of shorter object length
HS2 <- window(HS, start=c(2014,1))
## Warning in `>=.default`(all.indexes, start): longer object length is not a
## multiple of shorter object length

Since the data is unadjusted we will transform it based on log, log-change, seasonal log change:

log_HS <- log(HS)
dlog_HS1 <- diff(log_HS)
dlog_HS12 <- diff(log_HS,12)
dlog_HS12_1 <- diff(diff(log_HS),12)

Now we plot the graphs of the transformed data:

par(mfrow=c(2,2))
plot(log_HS, main=expression(log(HS)))
plot(dlog_HS1, main=expression(paste(Delta, "log(HS)")))
plot(dlog_HS12, main=expression(paste(Delta[12], "log(HS)")))
plot(dlog_HS12_1, main=expression(paste(Delta, Delta[12], "log(HS)")))

ACF/PACF of the adjusted data will be:

par(mfrow=c(2,4))
acf(as.data.frame(log_HS),type='correlation',lag=24, main=expression(log(HS)))
acf(as.data.frame(dlog_HS1),type='correlation',lag=24, main=expression(paste(Delta, "log(HS)")))
acf(as.data.frame(dlog_HS12),type='correlation',lag=24, main=expression(paste(Delta[12], "log(HS)")))
acf(as.data.frame(dlog_HS12_1),type='correlation',lag=24, main=expression(paste(Delta, Delta[12], "log(HS)")))
acf(as.data.frame(log_HS),type='partial',lag=24, main=expression(log(HS)))
acf(as.data.frame(dlog_HS1),type='partial',lag=24, main=expression(paste(Delta, "log(HS)")))
acf(as.data.frame(dlog_HS12),type='partial',lag=24, main=expression(paste(Delta[12], "log(HS)")))
acf(as.data.frame(dlog_HS12_1),type='partial',lag=24, main=expression(paste(Delta, Delta[12], "log(HS)")))

based on analysis of ACF/PACF we can \(Delta, Delta[12], "log(HS)"))\) dlog_HS12_1 might be the most appropriate one. So we estimate model with twice differenced data:

m1 <- arima(dlog_HS12_1,order=c(2,1,1),seasonal=list(order=c(1,1,1),period=12))
m1
## 
## Call:
## arima(x = dlog_HS12_1, order = c(2, 1, 1), seasonal = list(order = c(1, 1, 1), 
##     period = 12))
## 
## Coefficients:
##           ar1      ar2      ma1     sar1     sma1
##       -0.3931  -0.1482  -1.0000  -0.4508  -1.0000
## s.e.   0.0386   0.0386   0.0074   0.0353   0.0146
## 
## sigma^2 estimated as 0.01087:  log likelihood = 517.96,  aic = -1023.91

Examine of residuals

tsdiag(m1,gof.lag=36)

Forecasting

library(forecast)
## Loading required package: timeDate
## This is forecast 6.2
m1.fcast <- forecast(m1, h=24)
plot(m1.fcast, xlim=c(2000, 2016))