Total Private Residential Construction Spending

library(Quandl)

## Warning: package 'Quandl' was built under R version 3.3.3

## Loading required package: xts

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(zoo)
library(xts)
library(dygraphs)
library(knitr)
library(forecast)

## Loading required package: timeDate

## This is forecast 7.3

library(urca)
Quandl.api_key('rxVQhZ8_nxxeo2yy4Uz4')
y <- Quandl("FRED/PRRESCON", type="ts")

We plot the original and log-transformed Total Private Residential Construction Spending.

ly <- log(y)
plot(y, xlab=" Years 1993-2016", ylab="", main="Total Private Residential Construction Spending" )

plot(ly, xlab="Years 1993-2016", ylab="", main="Log Total Private Residential Construction Spending" )

The time series shows an exponential trend (with a possible structural break) as well as a seasonal pattern is present.For further analysis we will use log-transformed data untill the end of 2013 to build an estimation model.

endof2013 <- 2013 + (11/12)
ly.est <- window(ly, end = endof2013)
ly.pred <- window(ly, start = endof2013 + (1/12))

dl.y <- diff(ly.est,lag=1)
dl.y12 <- diff(ly.est,12)
dl.y12_1 <- diff(diff(ly.est,12), 1)
par(mfrow=c(2,2))
plot(ly.est, xlab="", ylab="", main=expression(log(y)))
plot(dl.y, xlab="", ylab="", main=expression(paste(Delta, "log(y)")))
plot(dl.y12, xlab="", ylab="", main=expression(paste(Delta[12], "log(y)")))
plot(dl.y12_1, xlab="", ylab="", main=expression(paste(Delta, Delta[12], "log(y)")))

The time series in differences and seasonal differences is the only that looks weakly stationary. To verify this is true, we look at ACFs and PACFs.

library(forecast)

maxlag <-24
par(mfrow=c(2,4))
Acf(ly.est, type='correlation', lag=maxlag, ylab="", main=expression(paste("ACF for log(y)")))
Acf(dl.y, type='correlation', lag=maxlag, ylab="", main=expression(paste("ACF for ", Delta,"log(y)")))
Acf(dl.y12, type='correlation', lag=maxlag, ylab="", main=expression(paste("ACF for ", Delta[12], "log(y)")))
Acf(dl.y12_1, type='correlation', lag=maxlag, ylab="", main=expression(paste("ACF for ", Delta, Delta[12], "log(y)")))
Acf(ly, type='partial', lag=maxlag, ylab="", main=expression(paste("PACF for log(y)")))
Acf(dl.y, type='partial', lag=maxlag, ylab="", main=expression(paste("PACF for ", Delta, "log(y)")))
Acf(dl.y12, type='partial', lag=maxlag, ylab="", main=expression(paste("PACF for ", Delta[12], "log(y)")))
Acf(dl.y12_1, type='partial', lag=maxlag, ylab="", main=expression(paste("PACF for ", Delta,Delta[12], "log(y)")))

So the last time series is the only weekly stationary according to ACFs and PACFs.

Tests for presence of a unit root

ADF test

library(tseries)
adf.test(ly)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  ly
## Dickey-Fuller = -1.709, Lag order = 6, p-value = 0.6988
## alternative hypothesis: stationary

By ADF test we cannot reject the \(H_0\) hypothesis that the time series has a unit root.

KPSS test

library(tseries)
kpss.test(ly, null="Trend")

## Warning in kpss.test(ly, null = "Trend"): p-value smaller than printed p-
## value

## 
##  KPSS Test for Trend Stationarity
## 
## data:  ly
## KPSS Trend = 0.8985, Truncation lag parameter = 3, p-value = 0.01

library(urca)
ly.urkpss <- ur.kpss(ly, type="tau", lags="short")
summary(ly.urkpss)

## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: tau with 5 lags. 
## 
## Value of test-statistic is: 0.6327 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.119 0.146  0.176 0.216

By KPSS test we reject the \(H_0\) hypothesis that the time series is stationary.

Building the model

We use autoarima function to find the best specification for our model.

M12.bic <- auto.arima(ly.est, ic="bic", seasonal=TRUE, stationary=FALSE, stepwise=FALSE)
M12.bic

## Series: ly.est 
## ARIMA(1,1,2)(0,0,2)[12]                    
## 
## Coefficients:
##          ar1     ma1     ma2    sma1    sma2
##       0.0462  0.4704  0.5356  1.1878  0.7469
## s.e.  0.1233  0.1064  0.0736  0.0660  0.0490
## 
## sigma^2 estimated as 0.0009834:  log likelihood=501.49
## AIC=-990.98   AICc=-990.64   BIC=-969.83

M12.aic <- auto.arima(ly.est, ic="aic", seasonal=TRUE, stationary=FALSE, stepwise=FALSE)
M12.aic

## Series: ly.est 
## ARIMA(1,1,2)(0,0,2)[12] with drift         
## 
## Coefficients:
##          ar1     ma1     ma2    sma1    sma2   drift
##       0.0452  0.4709  0.5358  1.1873  0.7468  0.0032
## s.e.  0.1232  0.1062  0.0735  0.0660  0.0489  0.0116
## 
## sigma^2 estimated as 0.0009872:  log likelihood=501.53
## AIC=-989.06   AICc=-988.59   BIC=-964.38

M12.aicc <- auto.arima(ly.est, ic="aicc", seasonal=TRUE, stationary=FALSE, stepwise=FALSE)
M12.aicc

## Series: ly.est 
## ARIMA(1,1,2)(0,0,2)[12] with drift         
## 
## Coefficients:
##          ar1     ma1     ma2    sma1    sma2   drift
##       0.0452  0.4709  0.5358  1.1873  0.7468  0.0032
## s.e.  0.1232  0.1062  0.0735  0.0660  0.0489  0.0116
## 
## sigma^2 estimated as 0.0009872:  log likelihood=501.53
## AIC=-989.06   AICc=-988.59   BIC=-964.38

ARIMA(1,1,0)(1,0,0)[12] is the best specification suggested by Autoarima using BIC, AIC, AICc criterias.

Forecast

At first, we construct the multiplestep forecast.

library(forecast)
M12.bic.f.h <- forecast(M12.bic, length(ly.pred))
plot(M12.bic.f.h, type="o", pch=16, xlim=c(2012,2017), ylim=c(9.5,11),
main="ARIMA Model Multistep Forecast - TPRCS")
lines(M12.bic.f.h$mean, type="p", pch=16, lty="dashed", col="blue")
lines(ly, type="o", pch=16, lty="dashed")

Rolling scheme forecast

library(forecast)
M12.bic.f.rol <- zoo()
for(i in 1:length(ly.pred))
{M12.bic.rsc <- window( ly, start=1993+(i-1)/12, end=endof2013+(i-1)/12 )
M12.bic.updt <- arima(M12.bic.rsc, order=c(1,1,2), seasonal=list(order=c(0,0,2),period=NA))
M12.bic.f.rol <- c(M12.bic.f.rol, forecast(M12.bic.updt, 1)$mean)
}
M12.bic.f.rol <- as.ts(M12.bic.f.rol)
accuracy(M12.bic.f.rol, ly.pred)

##                     ME       RMSE        MAE           MPE      MAPE
## Test set -3.099263e-05 0.03353639 0.02724083 -0.0001054584 0.2608801
##                 ACF1 Theil's U
## Test set -0.04410045  0.442365

plot(M12.bic.f.h, type="o", pch=16, xlim=c(2012,2017), ylim=c(9.5,11),
main="Multistep vs. Rolling Scheme forecasts - TPRCS")
lines(M12.bic.f.h$mean, type="p", pch=16, lty="dashed", col="blue")
lines(M12.bic.f.rol, type="p", pch=16, lty="dashed", col="red")
lines(ly, type="o", pch=16, lty="dashed")

Now we compare our model to the restricted Seasonal AR (9,1,0)(1,0,0)[12], which was offered in the class.

ARIMA(1,1,0)(1,0,0)[12]

class.arima <- arima(ly.est, order=c(9,1,0), seasonal=list(order=c(1,0,0),period=NA))
class.arima

## 
## Call:
## arima(x = ly.est, order = c(9, 1, 0), seasonal = list(order = c(1, 0, 0), period = NA))
## 
## Coefficients:
##          ar1    ar2      ar3      ar4     ar5      ar6      ar7     ar8
##       0.4704  0.282  -0.1059  -0.0256  0.0536  -0.0065  -0.1653  0.1691
## s.e.  0.0624  0.068   0.0705   0.0701  0.0702   0.0693   0.0702  0.0680
##           ar9    sar1
##       -0.1576  0.9751
## s.e.   0.0623  0.0087
## 
## sigma^2 estimated as 0.0003045:  log likelihood = 641.66,  aic = -1261.32

Restricted ARIMA(1,1,0)(1,0,0)[12]

class.restrarima <- arima(ly.est, order=c(9,1,0), seasonal=list(order=c(1,0,0),period=NA),  transform.pars = FALSE, fixed = c(NA,NA, 0, 0, 0, 0, NA, NA, NA, NA))
class.restrarima

## 
## Call:
## arima(x = ly.est, order = c(9, 1, 0), seasonal = list(order = c(1, 0, 0), period = NA), 
##     transform.pars = FALSE, fixed = c(NA, NA, 0, 0, 0, 0, NA, NA, NA, NA))
## 
## Coefficients:
##          ar1     ar2  ar3  ar4  ar5  ar6      ar7     ar8      ar9    sar1
##       0.4446  0.2377    0    0    0    0  -0.1620  0.1649  -0.1615  0.9773
## s.e.  0.0599  0.0601    0    0    0    0   0.0617  0.0654   0.0623  0.0080
## 
## sigma^2 estimated as 0.0003067:  log likelihood = 640.23,  aic = -1266.46

We compare error accuracy between our model and restricted arima.

library(forecast)
par(mfrow=c(2,1))
accuracy(M12.bic.f.rol, ly.pred)

##                     ME       RMSE        MAE           MPE      MAPE
## Test set -3.099263e-05 0.03353639 0.02724083 -0.0001054584 0.2608801
##                 ACF1 Theil's U
## Test set -0.04410045  0.442365

accuracy(class.restrarima)

##                        ME      RMSE        MAE         MPE      MAPE
## Training set 0.0002291197 0.0174892 0.01367225 0.003050552 0.1341786
##                   MASE       ACF1
## Training set 0.1995947 0.01831891

Restricted ARIMA rolling scheme

library(forecast)
class.restrarima.f.rol <- zoo()
for(i in 1:length(ly.pred))
{
class.restrarima.rsc <- window( ly, start=1993+(i-1)/12, end=endof2013+(i-1)/12 )
class.restrarima.updt <- arima(class.restrarima.rsc, order=c(9,1,0), seasonal=list(order=c(1,0,0),period=NA), transform.pars = FALSE, fixed = c(NA,NA, 0, 0, 0, 0, NA, NA, NA, NA))
class.restrarima.f.rol <- c(class.restrarima.f.rol, forecast(class.restrarima.updt, 1)$mean)
}
class.restrarima.f.rol <- as.ts(class.restrarima.f.rol)
accuracy(class.restrarima.f.rol, ly.pred)

##                    ME       RMSE       MAE         MPE      MAPE
## Test set -0.001900123 0.02202464 0.0163457 -0.01841198 0.1562592
##                ACF1 Theil's U
## Test set -0.1134615 0.2864665

We compare our model and the restricted ARIMA

plot(M12.bic.f.rol, type="o", pch=16, xlim=c(2013,2017), ylim=c(9.8,10.8),
main="ARIMA(9,1,0)(1,0,0) vs. ARIMA(1,1,2)(0,0,2) Rolling Scheme forecasts - TPRCS")
lines(class.restrarima.f.rol, type="p", pch=16, lty="dashed", col="blue")
lines(M12.bic.f.rol, type="p", pch=16, lty="dashed", col="red")
lines(ly, type="o", pch=16, lty="dashed")

Conclusion

If we visually compare two rolling scheme forecasts for both ARIMA(9,1,0)(1,0,0) and ARIMA(1,1,2)(0,0,2), we can barely tell which one is a better forecast. However, discussed in class model seems to be preferable as it has lower ME, RMSE, MAE, Mape and only MPE is lower for our suggested model.

Homework 5, Problem 1

Andrii Baryshpolets

6 April 2017

Total Private Residential Construction Spending

Tests for presence of a unit root

Building the model

Forecast

Conclusion