Introduction

we will analyze thedata for Total Private Residential Construction Spending in the United States FRED/PRRESCON. We use the three step Box-Jenkins methodology over the estimation sub-sample and performs two types of forecasting schemes over the prediction sub-sample.

Import data:

Use the monthly data for Total Private Residential Construction Spending. We split the data into an estimation sample and a prediction sample.

library(Quandl)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(zoo)
library(xts)
library(dygraphs)
library(knitr)
library(forecast)
library(urca)
Quandl.api_key('3fnnc4EE2uzFb1a44mAQ')
y <- Quandl("FRED/PRRESCON", type="ts")
ly <- log(y)
plot(y, xlab=" Years 1993-2016", ylab="", main="Total Private Residential Construction Spending" )

plot(ly, xlab="Years 1993-2016", ylab="", main="Log Total Private Residential Construction Spending" )

endof2013 <- 2013 + (11/12)
ly.est <- window(ly, end = endof2013)
ly.pred <- window(ly, start = endof2013 + (1/12))
dl.y <- diff(ly.est,lag=1)
dl.y12 <- diff(ly.est,12)
dl.y12_1 <- diff(diff(ly.est,12), 1)
par(mfrow=c(2,2))
plot(ly.est, xlab="", ylab="", main=expression(log(y)))
plot(dl.y, xlab="", ylab="", main=expression(paste(Delta, "log(y)")))
plot(dl.y12, xlab="", ylab="", main=expression(paste(Delta[12], "log(y)")))
plot(dl.y12_1, xlab="", ylab="", main=expression(paste(Delta, Delta[12], "log(y)")))

library(forecast)
maxlag <-24
par(mfrow=c(2,4))
Acf(ly.est, type='correlation', lag=maxlag, ylab="", main=expression(paste("ACF for log(y)")))
Acf(dl.y, type='correlation', lag=maxlag, ylab="", main=expression(paste("ACF for ", Delta,"log(y)")))
Acf(dl.y12, type='correlation', lag=maxlag, ylab="", main=expression(paste("ACF for ", Delta[12], "log(y)")))
Acf(dl.y12_1, type='correlation', lag=maxlag, ylab="", main=expression(paste("ACF for ", Delta, Delta[12], "log(y)")))
Acf(ly, type='partial', lag=maxlag, ylab="", main=expression(paste("PACF for log(y)")))
Acf(dl.y, type='partial', lag=maxlag, ylab="", main=expression(paste("PACF for ", Delta, "log(y)")))
Acf(dl.y12, type='partial', lag=maxlag, ylab="", main=expression(paste("PACF for ", Delta[12], "log(y)")))
Acf(dl.y12_1, type='partial', lag=maxlag, ylab="", main=expression(paste("PACF for ", Delta,Delta[12],"log(y)")))

According to ACFs and PACFs, only one that weekly stationary (the last one).

ADF test

library(tseries)
adf.test(ly)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ly
## Dickey-Fuller = -1.6426, Lag order = 6, p-value = 0.7268
## alternative hypothesis: stationary

Cannot rejectthe the hypothesis that the time series has a unit root.

KPSS

kpss.test(ly, null="Trend")
## Warning in kpss.test(ly, null = "Trend"): p-value smaller than printed p-
## value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  ly
## KPSS Trend = 0.89259, Truncation lag parameter = 3, p-value = 0.01
ly.urkpss <- ur.kpss(ly, type="tau", lags="short")
summary(ly.urkpss)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: tau with 5 lags. 
## 
## Value of test-statistic is: 0.6284 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.119 0.146  0.176 0.216

Here, reject the hypothesisthe that time series is stationary.

To find the best specification for our model we use autoarima function.

M12.bic <- auto.arima(ly.est, ic="bic", seasonal=TRUE, stationary=FALSE, stepwise=FALSE)
M12.bic
## Series: ly.est 
## ARIMA(1,1,2)(0,0,2)[12]                    
## 
## Coefficients:
##          ar1     ma1     ma2    sma1    sma2
##       0.0462  0.4704  0.5356  1.1878  0.7469
## s.e.  0.1233  0.1064  0.0736  0.0660  0.0490
## 
## sigma^2 estimated as 0.0009834:  log likelihood=501.49
## AIC=-990.98   AICc=-990.64   BIC=-969.83
M12.aic <- auto.arima(ly.est, ic="aic", seasonal=TRUE, stationary=FALSE, stepwise=FALSE)
M12.aic
## Series: ly.est 
## ARIMA(1,1,2)(0,0,2)[12] with drift         
## 
## Coefficients:
##          ar1     ma1     ma2    sma1    sma2   drift
##       0.0452  0.4709  0.5358  1.1873  0.7468  0.0032
## s.e.  0.1232  0.1062  0.0735  0.0660  0.0489  0.0116
## 
## sigma^2 estimated as 0.0009872:  log likelihood=501.53
## AIC=-989.06   AICc=-988.59   BIC=-964.38
M12.aicc <- auto.arima(ly.est, ic="aicc", seasonal=TRUE, stationary=FALSE, stepwise=FALSE)
M12.aicc
## Series: ly.est 
## ARIMA(1,1,2)(0,0,2)[12] with drift         
## 
## Coefficients:
##          ar1     ma1     ma2    sma1    sma2   drift
##       0.0452  0.4709  0.5358  1.1873  0.7468  0.0032
## s.e.  0.1232  0.1062  0.0735  0.0660  0.0489  0.0116
## 
## sigma^2 estimated as 0.0009872:  log likelihood=501.53
## AIC=-989.06   AICc=-988.59   BIC=-964.38

Among all models, ARIMA(9,1,0)(1,0,0)[12] is the best model.

M12.bic.f.h <- forecast(M12.bic, length(ly.pred))
plot(M12.bic.f.h, type="o", pch=16, xlim=c(2012,2017), ylim=c(9.5,11),
main="ARIMA Model Multistep Forecast - TPRCS")
lines(M12.bic.f.h$mean, type="p", pch=16, lty="dashed", col="blue")
lines(ly, type="o", pch=16, lty="dashed")

M12.bic.f.rol <- zoo()
for(i in 1:length(ly.pred))
{M12.bic.rsc <- window( ly, start=1993+(i-1)/12, end=endof2013+(i-1)/12 )
M12.bic.updt <- arima(M12.bic.rsc, order=c(1,1,2), seasonal=list(order=c(0,0,2),period=NA))
M12.bic.f.rol <- c(M12.bic.f.rol, forecast(M12.bic.updt, 1)$mean)
}
M12.bic.f.rol <- as.ts(M12.bic.f.rol)
accuracy(M12.bic.f.rol, ly.pred)
##                   ME       RMSE        MAE        MPE      MAPE
## Test set 0.001441019 0.03322324 0.02706904 0.01389008 0.2590543
##                 ACF1 Theil's U
## Test set -0.03243961 0.4182325
plot(M12.bic.f.h, type="o", pch=16, xlim=c(2012,2017), ylim=c(9.5,11),
main="Multistep vs. Rolling Scheme forecasts - TPRCS")
lines(M12.bic.f.h$mean, type="p", pch=16, lty="dashed", col="blue")
lines(M12.bic.f.rol, type="p", pch=16, lty="dashed", col="red")
lines(ly, type="o", pch=16, lty="dashed")

class.arima <- arima(ly.est, order=c(9,1,0), seasonal=list(order=c(1,0,0),period=NA))
class.arima
## 
## Call:
## arima(x = ly.est, order = c(9, 1, 0), seasonal = list(order = c(1, 0, 0), period = NA))
## 
## Coefficients:
##          ar1    ar2      ar3      ar4     ar5      ar6      ar7     ar8
##       0.4704  0.282  -0.1059  -0.0256  0.0536  -0.0065  -0.1653  0.1691
## s.e.  0.0624  0.068   0.0705   0.0701  0.0702   0.0693   0.0702  0.0680
##           ar9    sar1
##       -0.1576  0.9751
## s.e.   0.0623  0.0087
## 
## sigma^2 estimated as 0.0003045:  log likelihood = 641.66,  aic = -1261.32
class.restrarima <- arima(ly.est, order=c(9,1,0), seasonal=list(order=c(1,0,0),period=NA),  transform.pars = FALSE, fixed = c(NA,NA, 0, 0, 0, 0, NA, NA, NA, NA))
class.restrarima
## 
## Call:
## arima(x = ly.est, order = c(9, 1, 0), seasonal = list(order = c(1, 0, 0), period = NA), 
##     transform.pars = FALSE, fixed = c(NA, NA, 0, 0, 0, 0, NA, NA, NA, NA))
## 
## Coefficients:
##          ar1     ar2  ar3  ar4  ar5  ar6      ar7     ar8      ar9    sar1
##       0.4446  0.2377    0    0    0    0  -0.1620  0.1649  -0.1615  0.9773
## s.e.  0.0599  0.0601    0    0    0    0   0.0617  0.0654   0.0623  0.0080
## 
## sigma^2 estimated as 0.0003067:  log likelihood = 640.23,  aic = -1266.46
library(forecast)
par(mfrow=c(2,1))
accuracy(M12.bic.f.rol, ly.pred)
##                   ME       RMSE        MAE        MPE      MAPE
## Test set 0.001441019 0.03322324 0.02706904 0.01389008 0.2590543
##                 ACF1 Theil's U
## Test set -0.03243961 0.4182325
accuracy(class.restrarima)
##                        ME      RMSE        MAE         MPE      MAPE
## Training set 0.0002291197 0.0174892 0.01367225 0.003050552 0.1341786
##                   MASE       ACF1
## Training set 0.1995947 0.01831891
class.restrarima.f.rol <- zoo()
for(i in 1:length(ly.pred))
{
class.restrarima.rsc <- window( ly, start=1993+(i-1)/12, end=endof2013+(i-1)/12 )
class.restrarima.updt <- arima(class.restrarima.rsc, order=c(9,1,0), seasonal=list(order=c(1,0,0),period=NA), transform.pars = FALSE, fixed = c(NA,NA, 0, 0, 0, 0, NA, NA, NA, NA))
class.restrarima.f.rol <- c(class.restrarima.f.rol, forecast(class.restrarima.updt, 1)$mean)
}
class.restrarima.f.rol <- as.ts(class.restrarima.f.rol)
accuracy(class.restrarima.f.rol, ly.pred)
##                    ME       RMSE        MAE         MPE      MAPE
## Test set -0.001658742 0.02113642 0.01584025 -0.01602349 0.1513248
##                ACF1 Theil's U
## Test set -0.1104002 0.2621738
plot(M12.bic.f.rol, type="o", pch=16, xlim=c(2013,2017), ylim=c(9.8,10.8),
main="ARIMA(9,1,0)(1,0,0) vs. ARIMA(1,1,2)(0,0,2) Rolling Scheme forecasts - TPRCS")
lines(class.restrarima.f.rol, type="p", pch=16, lty="dashed", col="blue")
lines(M12.bic.f.rol, type="p", pch=16, lty="dashed", col="red")
lines(ly, type="o", pch=16, lty="dashed")

Conclusion

ARIMA(9,1,0)(3,0,0) model best produces data that looks like the actual data we see in the TPRCS of the United States out of all of the models tested. We also find that using the rolling forecasting scheme produces much more accurate forecasts of TPRCS than the multi-step forecasting scheme.