Total Private Residential Construction Spending monthly data

library(Quandl)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(zoo)
library(xts)
library(dygraphs)
library(knitr)
library(forecast)
library(urca)
Quandl.api_key('CEFP3eWxEJwr_uUP9a2D')
x <- Quandl("FRED/PRRESCON", type="ts") 

Plots

Original

plot(x, xlab=" Years", ylab="", main="TPRCS" )

## Log transformed ts

lx <- log(x)
plot(lx, xlab="Years", ylab="", main="Log TPRCS" )

This ta shows an exponential trend with a possible structural break also a seasonal pattern is presented.

endof2013 <- 2013 + (11/12)
lx.est <- window(lx, end = endof2013)
lx.pred <- window(lx, start = endof2013 + (1/12))
dl.x <- diff(lx.est,lag=1)
dl.x12 <- diff(lx.est,12)
dl.x12_1 <- diff(diff(lx.est,12), 1)
par(mfrow=c(2,2))
plot(lx.est, xlab="years", ylab="value", main=expression(log(x)))
plot(dl.x, xlab="years", ylab="value", main=expression(paste(Delta, "log(x)")))
plot(dl.x12, xlab="years", ylab="value", main=expression(paste(Delta[12], "log(x)")))
plot(dl.x12_1, xlab="years", ylab="value", main=expression(paste(Delta, Delta[12], "log(x)")))

The ts in diff and seasonal diff is the what looks stationary but not very solid. Now we have to look at ACFs and PACFs analysis.

maxlag <-24
par(mfrow=c(2,4))
Acf(lx.est, type='correlation', lag=maxlag, ylab="", main=expression(paste("ACF for log(x)")))
Acf(dl.x, type='correlation', lag=maxlag, ylab="", main=expression(paste("ACF for ", Delta,"log(x)")))
Acf(dl.x12, type='correlation', lag=maxlag, ylab="", main=expression(paste("ACF for ", Delta[12], "log(x)")))
Acf(dl.x12_1, type='correlation', lag=maxlag, ylab="", main=expression(paste("ACF for ", Delta, Delta[12], "log(x)")))
Acf(lx, type='partial', lag=maxlag, ylab="", main=expression(paste("PACF for log(x)")))
Acf(dl.x, type='partial', lag=maxlag, ylab="", main=expression(paste("PACF for ", Delta, "log(x)")))
Acf(dl.x12, type='partial', lag=maxlag, ylab="", main=expression(paste("PACF for ", Delta[12], "log(x)")))
Acf(dl.x12_1, type='partial', lag=maxlag, ylab="", main=expression(paste("PACF for ", Delta,Delta[12], "log(x)")))

Unit root tests

ADF test

library(tseries)
adf.test(lx)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  lx
## Dickey-Fuller = -1.6426, Lag order = 6, p-value = 0.7268
## alternative hypothesis: stationary

ADF test shows that we cannot reject the null hypothesis that the time series has a unit root.

KPSS test

kpss.test(lx, null="Trend")
## Warning in kpss.test(lx, null = "Trend"): p-value smaller than printed p-
## value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  lx
## KPSS Trend = 0.89259, Truncation lag parameter = 3, p-value = 0.01
lx.urkpss <- ur.kpss(lx, type="tau", lags="short")
summary(lx.urkpss)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: tau with 5 lags. 
## 
## Value of test-statistic is: 0.6284 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.119 0.146  0.176 0.216

KPSS test shows that we reject the null hypothesis that the time series is stationary.

The model

to find the best model for out forecast we use the Auto-Arima fn.

M12.bic <- auto.arima(lx.est, ic="bic", seasonal=TRUE, stationary=FALSE, stepwise=FALSE)
M12.bic
## Series: lx.est 
## ARIMA(1,1,2)(0,0,2)[12]                    
## 
## Coefficients:
##          ar1     ma1     ma2    sma1    sma2
##       0.0462  0.4704  0.5356  1.1878  0.7469
## s.e.  0.1233  0.1064  0.0736  0.0660  0.0490
## 
## sigma^2 estimated as 0.0009834:  log likelihood=501.49
## AIC=-990.98   AICc=-990.64   BIC=-969.83
M12.aic <- auto.arima(lx.est, ic="aic", seasonal=TRUE, stationary=FALSE, stepwise=FALSE)
M12.aic
## Series: lx.est 
## ARIMA(1,1,2)(0,0,2)[12] with drift         
## 
## Coefficients:
##          ar1     ma1     ma2    sma1    sma2   drift
##       0.0452  0.4709  0.5358  1.1873  0.7468  0.0032
## s.e.  0.1232  0.1062  0.0735  0.0660  0.0489  0.0116
## 
## sigma^2 estimated as 0.0009872:  log likelihood=501.53
## AIC=-989.06   AICc=-988.59   BIC=-964.38
M12.aicc <- auto.arima(lx.est, ic="aicc", seasonal=TRUE, stationary=FALSE, stepwise=FALSE)
M12.aicc
## Series: lx.est 
## ARIMA(1,1,2)(0,0,2)[12] with drift         
## 
## Coefficients:
##          ar1     ma1     ma2    sma1    sma2   drift
##       0.0452  0.4709  0.5358  1.1873  0.7468  0.0032
## s.e.  0.1232  0.1062  0.0735  0.0660  0.0489  0.0116
## 
## sigma^2 estimated as 0.0009872:  log likelihood=501.53
## AIC=-989.06   AICc=-988.59   BIC=-964.38

We find that ARIMA(1,1,0)(1,0,0)[12] is the best fit forour forecast model.

Forecast

multiple forecast

M12.bic.f.h <- forecast(M12.bic, length(lx.pred))
plot(M12.bic.f.h, type="o", pch=16, xlim=c(2012,2017), ylim=c(9.5,11),
main="ARIMA Model (Multistep Forecast) - TPRCS")
lines(M12.bic.f.h$mean, type="p", pch=16, lty="dashed", col="blue")
lines(lx, type="o", pch=16, lty="dashed")

Rolling scheme forecast

M12.bic.f.rol <- zoo()
for(i in 1:length(lx.pred))
{M12.bic.rsc <- window( lx, start=1993+(i-1)/12, end=endof2013+(i-1)/12 )
M12.bic.updt <- arima(M12.bic.rsc, order=c(1,1,2), seasonal=list(order=c(0,0,2),period=NA))
M12.bic.f.rol <- c(M12.bic.f.rol, forecast(M12.bic.updt, 1)$mean)
}
M12.bic.f.rol <- as.ts(M12.bic.f.rol)
accuracy(M12.bic.f.rol, lx.pred)
##                   ME       RMSE        MAE        MPE      MAPE
## Test set 0.001441019 0.03322324 0.02706904 0.01389008 0.2590543
##                 ACF1 Theil's U
## Test set -0.03243961 0.4182325
plot(M12.bic.f.h, type="o", pch=16, xlim=c(2012,2017), ylim=c(9.5,11),
main="Multistep vs. Rolling Scheme forecasts - TPRCS")
lines(M12.bic.f.h$mean, type="p", pch=16, lty="dashed", col="blue")
lines(M12.bic.f.rol, type="p", pch=16, lty="dashed", col="red")
lines(lx, type="o", pch=16, lty="dashed")

comparison of our model ARIMA(1,1,0)(1,0,0)[12] to the restricted Seasonal AR (9,1,0)(1,0,0)[12].

class.arima <- arima(lx.est, order=c(9,1,0), seasonal=list(order=c(1,0,0),period=NA))
class.arima
## 
## Call:
## arima(x = lx.est, order = c(9, 1, 0), seasonal = list(order = c(1, 0, 0), period = NA))
## 
## Coefficients:
##          ar1    ar2      ar3      ar4     ar5      ar6      ar7     ar8
##       0.4704  0.282  -0.1059  -0.0256  0.0536  -0.0065  -0.1653  0.1691
## s.e.  0.0624  0.068   0.0705   0.0701  0.0702   0.0693   0.0702  0.0680
##           ar9    sar1
##       -0.1576  0.9751
## s.e.   0.0623  0.0087
## 
## sigma^2 estimated as 0.0003045:  log likelihood = 641.66,  aic = -1261.32

Restricted ARIMA(1,1,0)(1,0,0)[12]

class.restrarima <- arima(lx.est, order=c(9,1,0), seasonal=list(order=c(1,0,0),period=NA),  transform.pars = FALSE, fixed = c(NA,NA, 0, 0, 0, 0, NA, NA, NA, NA))
class.restrarima 
## 
## Call:
## arima(x = lx.est, order = c(9, 1, 0), seasonal = list(order = c(1, 0, 0), period = NA), 
##     transform.pars = FALSE, fixed = c(NA, NA, 0, 0, 0, 0, NA, NA, NA, NA))
## 
## Coefficients:
##          ar1     ar2  ar3  ar4  ar5  ar6      ar7     ar8      ar9    sar1
##       0.4446  0.2377    0    0    0    0  -0.1620  0.1649  -0.1615  0.9773
## s.e.  0.0599  0.0601    0    0    0    0   0.0617  0.0654   0.0623  0.0080
## 
## sigma^2 estimated as 0.0003067:  log likelihood = 640.23,  aic = -1266.46

Accuracy issue

par(mfrow=c(2,1))
accuracy(M12.bic.f.rol, lx.pred)
##                   ME       RMSE        MAE        MPE      MAPE
## Test set 0.001441019 0.03322324 0.02706904 0.01389008 0.2590543
##                 ACF1 Theil's U
## Test set -0.03243961 0.4182325
accuracy(class.restrarima)
##                        ME      RMSE        MAE         MPE      MAPE
## Training set 0.0002291197 0.0174892 0.01367225 0.003050552 0.1341786
##                   MASE       ACF1
## Training set 0.1995947 0.01831891

Restricted ARIMA rolling scheme

class.restrarima.f.rol <- zoo()
for(i in 1:length(lx.pred))
{
class.restrarima.rsc <- window( lx, start=1993+(i-1)/12, end=endof2013+(i-1)/12 )
class.restrarima.updt <- arima(class.restrarima.rsc, order=c(9,1,0), seasonal=list(order=c(1,0,0),period=NA), transform.pars = FALSE, fixed = c(NA,NA, 0, 0, 0, 0, NA, NA, NA, NA))
class.restrarima.f.rol <- c(class.restrarima.f.rol, forecast(class.restrarima.updt, 1)$mean)
}
class.restrarima.f.rol <- as.ts(class.restrarima.f.rol)
accuracy(class.restrarima.f.rol, lx.pred)
##                    ME       RMSE        MAE         MPE      MAPE
## Test set -0.001658742 0.02113642 0.01584025 -0.01602349 0.1513248
##                ACF1 Theil's U
## Test set -0.1104002 0.2621738
plot(M12.bic.f.rol, type="o", pch=16, xlim=c(2013,2017), ylim=c(9.8,10.8),
main="ARIMA Model vs. restricted ARIMA Rolling Scheme forecasts - TPRCS")
lines(class.restrarima.f.rol, type="p", pch=16, lty="dashed", col="blue")
lines(M12.bic.f.rol, type="p", pch=16, lty="dashed", col="red")
lines(lx, type="o", pch=16, lty="dashed")

Conclusion

We cannot definitly say which one is a better forecast between ARIMA (9,1,0)(1,0,0) and restricted ARIMA.