In this exercise we will forecast based on time series given for Housing Starts (New Privately Owned Housing Units). Let’s download the data that is not seasonally adjusted.
library(Quandl)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
HS <- Quandl("FRED/HOUSTNSA", type="zoo")
str(HS)
## 'zooreg' series from Jan 1959 to Jan 2016
## Data: num [1:685] 96.2 99 127.7 150.8 152.5 ...
## Index: Class 'yearmon' num [1:685] 1959 1959 1959 1959 1959 ...
## Frequency: 12
head(HS)
## Jan 1959 Feb 1959 Mar 1959 Apr 1959 May 1959 Jun 1959
## 96.2 99.0 127.7 150.8 152.5 147.8
tail(HS)
## Aug 2015 Sep 2015 Oct 2015 Nov 2015 Dec 2015 Jan 2016
## 99.2 111.6 90.9 89.9 77.5 73.6
plot(HS, xlab=" Years ", ylab="Thousands of Units", main="Housing Starts: Total: New Privately Owned Housing Units Started")
Now we split sample into two parts - estimation sample (from Jan 1959 to Jan 2016) and prediction sample (from Feb 2016 to Dec 2016)
HS1 <- window(HS, end=c(2013,12))
## Warning in `<=.default`(all.indexes, end): longer object length is not a
## multiple of shorter object length
HS2 <- window(HS, start=c(2014,1))
## Warning in `>=.default`(all.indexes, start): longer object length is not a
## multiple of shorter object length
Since the data is unadjusted we will transform it based on log, log-change, seasonal log change:
log_HS <- log(HS)
dlog_HS1 <- diff(log_HS)
dlog_HS12 <- diff(log_HS,12)
dlog_HS12_1 <- diff(diff(log_HS),12)
Now we plot the graphs of the transformed data:
par(mfrow=c(2,2))
plot(log_HS, main=expression(log(HS)))
plot(dlog_HS1, main=expression(paste(Delta, "log(HS)")))
plot(dlog_HS12, main=expression(paste(Delta[12], "log(HS)")))
plot(dlog_HS12_1, main=expression(paste(Delta, Delta[12], "log(HS)")))
ACF/PACF of the adjusted data will be:
par(mfrow=c(2,4))
acf(as.data.frame(log_HS),type='correlation',lag=24, main=expression(log(HS)))
acf(as.data.frame(dlog_HS1),type='correlation',lag=24, main=expression(paste(Delta, "log(HS)")))
acf(as.data.frame(dlog_HS12),type='correlation',lag=24, main=expression(paste(Delta[12], "log(HS)")))
acf(as.data.frame(dlog_HS12_1),type='correlation',lag=24, main=expression(paste(Delta, Delta[12], "log(HS)")))
acf(as.data.frame(log_HS),type='partial',lag=24, main=expression(log(HS)))
acf(as.data.frame(dlog_HS1),type='partial',lag=24, main=expression(paste(Delta, "log(HS)")))
acf(as.data.frame(dlog_HS12),type='partial',lag=24, main=expression(paste(Delta[12], "log(HS)")))
acf(as.data.frame(dlog_HS12_1),type='partial',lag=24, main=expression(paste(Delta, Delta[12], "log(HS)")))
based on analysis of ACF/PACF we can \(Delta, Delta[12], "log(HS)"))\) dlog_HS12_1 might be the most appropriate one. So we estimate model with twice differenced data:
m1 <- arima(dlog_HS12_1,order=c(2,1,1),seasonal=list(order=c(1,1,1),period=12))
m1
##
## Call:
## arima(x = dlog_HS12_1, order = c(2, 1, 1), seasonal = list(order = c(1, 1, 1),
## period = 12))
##
## Coefficients:
## ar1 ar2 ma1 sar1 sma1
## -0.3931 -0.1482 -1.0000 -0.4508 -1.0000
## s.e. 0.0386 0.0386 0.0074 0.0353 0.0146
##
## sigma^2 estimated as 0.01087: log likelihood = 517.96, aic = -1023.91
Examine of residuals
tsdiag(m1,gof.lag=36)
Forecasting
library(forecast)
## Loading required package: timeDate
## This is forecast 6.2
m1.fcast <- forecast(m1, h=24)
plot(m1.fcast, xlim=c(2000, 2016))