Total Private Residential Construction Spending

library(Quandl)
## Warning: package 'Quandl' was built under R version 3.3.3
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(zoo)
library(xts)
library(dygraphs)
library(knitr)
library(forecast)
## Loading required package: timeDate
## This is forecast 7.3
library(urca)
Quandl.api_key('rxVQhZ8_nxxeo2yy4Uz4')
y <- Quandl("FRED/PRRESCON", type="ts") 

We plot the original and log-transformed Total Private Residential Construction Spending.

ly <- log(y)
plot(y, xlab=" Years 1993-2016", ylab="", main="Total Private Residential Construction Spending" )

plot(ly, xlab="Years 1993-2016", ylab="", main="Log Total Private Residential Construction Spending" )

The time series shows an exponential trend (with a possible structural break) as well as a seasonal pattern is present.For further analysis we will use log-transformed data untill the end of 2013 to build an estimation model.

endof2013 <- 2013 + (11/12)
ly.est <- window(ly, end = endof2013)
ly.pred <- window(ly, start = endof2013 + (1/12))
dl.y <- diff(ly.est,lag=1)
dl.y12 <- diff(ly.est,12)
dl.y12_1 <- diff(diff(ly.est,12), 1)
par(mfrow=c(2,2))
plot(ly.est, xlab="", ylab="", main=expression(log(y)))
plot(dl.y, xlab="", ylab="", main=expression(paste(Delta, "log(y)")))
plot(dl.y12, xlab="", ylab="", main=expression(paste(Delta[12], "log(y)")))
plot(dl.y12_1, xlab="", ylab="", main=expression(paste(Delta, Delta[12], "log(y)")))

The time series in differences and seasonal differences is the only that looks weakly stationary. To verify this is true, we look at ACFs and PACFs.

library(forecast)

maxlag <-24
par(mfrow=c(2,4))
Acf(ly.est, type='correlation', lag=maxlag, ylab="", main=expression(paste("ACF for log(y)")))
Acf(dl.y, type='correlation', lag=maxlag, ylab="", main=expression(paste("ACF for ", Delta,"log(y)")))
Acf(dl.y12, type='correlation', lag=maxlag, ylab="", main=expression(paste("ACF for ", Delta[12], "log(y)")))
Acf(dl.y12_1, type='correlation', lag=maxlag, ylab="", main=expression(paste("ACF for ", Delta, Delta[12], "log(y)")))
Acf(ly, type='partial', lag=maxlag, ylab="", main=expression(paste("PACF for log(y)")))
Acf(dl.y, type='partial', lag=maxlag, ylab="", main=expression(paste("PACF for ", Delta, "log(y)")))
Acf(dl.y12, type='partial', lag=maxlag, ylab="", main=expression(paste("PACF for ", Delta[12], "log(y)")))
Acf(dl.y12_1, type='partial', lag=maxlag, ylab="", main=expression(paste("PACF for ", Delta,Delta[12], "log(y)")))

So the last time series is the only weekly stationary according to ACFs and PACFs.

Tests for presence of a unit root

ADF test

library(tseries)
adf.test(ly)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ly
## Dickey-Fuller = -1.709, Lag order = 6, p-value = 0.6988
## alternative hypothesis: stationary

By ADF test we cannot reject the \(H_0\) hypothesis that the time series has a unit root.

KPSS test

library(tseries)
kpss.test(ly, null="Trend")
## Warning in kpss.test(ly, null = "Trend"): p-value smaller than printed p-
## value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  ly
## KPSS Trend = 0.8985, Truncation lag parameter = 3, p-value = 0.01
library(urca)
ly.urkpss <- ur.kpss(ly, type="tau", lags="short")
summary(ly.urkpss)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: tau with 5 lags. 
## 
## Value of test-statistic is: 0.6327 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.119 0.146  0.176 0.216

By KPSS test we reject the \(H_0\) hypothesis that the time series is stationary.

Building the model

We use autoarima function to find the best specification for our model.

M12.bic <- auto.arima(ly.est, ic="bic", seasonal=TRUE, stationary=FALSE, stepwise=FALSE)
M12.bic
## Series: ly.est 
## ARIMA(1,1,2)(0,0,2)[12]                    
## 
## Coefficients:
##          ar1     ma1     ma2    sma1    sma2
##       0.0462  0.4704  0.5356  1.1878  0.7469
## s.e.  0.1233  0.1064  0.0736  0.0660  0.0490
## 
## sigma^2 estimated as 0.0009834:  log likelihood=501.49
## AIC=-990.98   AICc=-990.64   BIC=-969.83
M12.aic <- auto.arima(ly.est, ic="aic", seasonal=TRUE, stationary=FALSE, stepwise=FALSE)
M12.aic
## Series: ly.est 
## ARIMA(1,1,2)(0,0,2)[12] with drift         
## 
## Coefficients:
##          ar1     ma1     ma2    sma1    sma2   drift
##       0.0452  0.4709  0.5358  1.1873  0.7468  0.0032
## s.e.  0.1232  0.1062  0.0735  0.0660  0.0489  0.0116
## 
## sigma^2 estimated as 0.0009872:  log likelihood=501.53
## AIC=-989.06   AICc=-988.59   BIC=-964.38
M12.aicc <- auto.arima(ly.est, ic="aicc", seasonal=TRUE, stationary=FALSE, stepwise=FALSE)
M12.aicc
## Series: ly.est 
## ARIMA(1,1,2)(0,0,2)[12] with drift         
## 
## Coefficients:
##          ar1     ma1     ma2    sma1    sma2   drift
##       0.0452  0.4709  0.5358  1.1873  0.7468  0.0032
## s.e.  0.1232  0.1062  0.0735  0.0660  0.0489  0.0116
## 
## sigma^2 estimated as 0.0009872:  log likelihood=501.53
## AIC=-989.06   AICc=-988.59   BIC=-964.38

ARIMA(1,1,0)(1,0,0)[12] is the best specification suggested by Autoarima using BIC, AIC, AICc criterias.

Forecast

At first, we construct the multiplestep forecast.

library(forecast)
M12.bic.f.h <- forecast(M12.bic, length(ly.pred))
plot(M12.bic.f.h, type="o", pch=16, xlim=c(2012,2017), ylim=c(9.5,11),
main="ARIMA Model Multistep Forecast - TPRCS")
lines(M12.bic.f.h$mean, type="p", pch=16, lty="dashed", col="blue")
lines(ly, type="o", pch=16, lty="dashed")

Rolling scheme forecast

library(forecast)
M12.bic.f.rol <- zoo()
for(i in 1:length(ly.pred))
{M12.bic.rsc <- window( ly, start=1993+(i-1)/12, end=endof2013+(i-1)/12 )
M12.bic.updt <- arima(M12.bic.rsc, order=c(1,1,2), seasonal=list(order=c(0,0,2),period=NA))
M12.bic.f.rol <- c(M12.bic.f.rol, forecast(M12.bic.updt, 1)$mean)
}
M12.bic.f.rol <- as.ts(M12.bic.f.rol)
accuracy(M12.bic.f.rol, ly.pred)
##                     ME       RMSE        MAE           MPE      MAPE
## Test set -3.099263e-05 0.03353639 0.02724083 -0.0001054584 0.2608801
##                 ACF1 Theil's U
## Test set -0.04410045  0.442365
plot(M12.bic.f.h, type="o", pch=16, xlim=c(2012,2017), ylim=c(9.5,11),
main="Multistep vs. Rolling Scheme forecasts - TPRCS")
lines(M12.bic.f.h$mean, type="p", pch=16, lty="dashed", col="blue")
lines(M12.bic.f.rol, type="p", pch=16, lty="dashed", col="red")
lines(ly, type="o", pch=16, lty="dashed")

Now we compare our model to the restricted Seasonal AR (9,1,0)(1,0,0)[12], which was offered in the class.

ARIMA(1,1,0)(1,0,0)[12]

class.arima <- arima(ly.est, order=c(9,1,0), seasonal=list(order=c(1,0,0),period=NA))
class.arima
## 
## Call:
## arima(x = ly.est, order = c(9, 1, 0), seasonal = list(order = c(1, 0, 0), period = NA))
## 
## Coefficients:
##          ar1    ar2      ar3      ar4     ar5      ar6      ar7     ar8
##       0.4704  0.282  -0.1059  -0.0256  0.0536  -0.0065  -0.1653  0.1691
## s.e.  0.0624  0.068   0.0705   0.0701  0.0702   0.0693   0.0702  0.0680
##           ar9    sar1
##       -0.1576  0.9751
## s.e.   0.0623  0.0087
## 
## sigma^2 estimated as 0.0003045:  log likelihood = 641.66,  aic = -1261.32

Restricted ARIMA(1,1,0)(1,0,0)[12]

class.restrarima <- arima(ly.est, order=c(9,1,0), seasonal=list(order=c(1,0,0),period=NA),  transform.pars = FALSE, fixed = c(NA,NA, 0, 0, 0, 0, NA, NA, NA, NA))
class.restrarima 
## 
## Call:
## arima(x = ly.est, order = c(9, 1, 0), seasonal = list(order = c(1, 0, 0), period = NA), 
##     transform.pars = FALSE, fixed = c(NA, NA, 0, 0, 0, 0, NA, NA, NA, NA))
## 
## Coefficients:
##          ar1     ar2  ar3  ar4  ar5  ar6      ar7     ar8      ar9    sar1
##       0.4446  0.2377    0    0    0    0  -0.1620  0.1649  -0.1615  0.9773
## s.e.  0.0599  0.0601    0    0    0    0   0.0617  0.0654   0.0623  0.0080
## 
## sigma^2 estimated as 0.0003067:  log likelihood = 640.23,  aic = -1266.46

We compare error accuracy between our model and restricted arima.

library(forecast)
par(mfrow=c(2,1))
accuracy(M12.bic.f.rol, ly.pred)
##                     ME       RMSE        MAE           MPE      MAPE
## Test set -3.099263e-05 0.03353639 0.02724083 -0.0001054584 0.2608801
##                 ACF1 Theil's U
## Test set -0.04410045  0.442365
accuracy(class.restrarima)
##                        ME      RMSE        MAE         MPE      MAPE
## Training set 0.0002291197 0.0174892 0.01367225 0.003050552 0.1341786
##                   MASE       ACF1
## Training set 0.1995947 0.01831891

Restricted ARIMA rolling scheme

library(forecast)
class.restrarima.f.rol <- zoo()
for(i in 1:length(ly.pred))
{
class.restrarima.rsc <- window( ly, start=1993+(i-1)/12, end=endof2013+(i-1)/12 )
class.restrarima.updt <- arima(class.restrarima.rsc, order=c(9,1,0), seasonal=list(order=c(1,0,0),period=NA), transform.pars = FALSE, fixed = c(NA,NA, 0, 0, 0, 0, NA, NA, NA, NA))
class.restrarima.f.rol <- c(class.restrarima.f.rol, forecast(class.restrarima.updt, 1)$mean)
}
class.restrarima.f.rol <- as.ts(class.restrarima.f.rol)
accuracy(class.restrarima.f.rol, ly.pred)
##                    ME       RMSE       MAE         MPE      MAPE
## Test set -0.001900123 0.02202464 0.0163457 -0.01841198 0.1562592
##                ACF1 Theil's U
## Test set -0.1134615 0.2864665

We compare our model and the restricted ARIMA

plot(M12.bic.f.rol, type="o", pch=16, xlim=c(2013,2017), ylim=c(9.8,10.8),
main="ARIMA(9,1,0)(1,0,0) vs. ARIMA(1,1,2)(0,0,2) Rolling Scheme forecasts - TPRCS")
lines(class.restrarima.f.rol, type="p", pch=16, lty="dashed", col="blue")
lines(M12.bic.f.rol, type="p", pch=16, lty="dashed", col="red")
lines(ly, type="o", pch=16, lty="dashed")

Conclusion

If we visually compare two rolling scheme forecasts for both ARIMA(9,1,0)(1,0,0) and ARIMA(1,1,2)(0,0,2), we can barely tell which one is a better forecast. However, discussed in class model seems to be preferable as it has lower ME, RMSE, MAE, Mape and only MPE is lower for our suggested model.