we will analyze thedata for Total Private Residential Construction Spending in the United States FRED/PRRESCON. We use the three step Box-Jenkins methodology over the estimation sub-sample and performs two types of forecasting schemes over the prediction sub-sample.
Use the monthly data for Total Private Residential Construction Spending. We split the data into an estimation sample and a prediction sample.
library(Quandl)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(zoo)
library(xts)
library(dygraphs)
library(knitr)
library(forecast)
library(urca)
Quandl.api_key('3fnnc4EE2uzFb1a44mAQ')
y <- Quandl("FRED/PRRESCON", type="ts")
ly <- log(y)
plot(y, xlab=" Years 1993-2016", ylab="", main="Total Private Residential Construction Spending" )
plot(ly, xlab="Years 1993-2016", ylab="", main="Log Total Private Residential Construction Spending" )
endof2013 <- 2013 + (11/12)
ly.est <- window(ly, end = endof2013)
ly.pred <- window(ly, start = endof2013 + (1/12))
dl.y <- diff(ly.est,lag=1)
dl.y12 <- diff(ly.est,12)
dl.y12_1 <- diff(diff(ly.est,12), 1)
par(mfrow=c(2,2))
plot(ly.est, xlab="", ylab="", main=expression(log(y)))
plot(dl.y, xlab="", ylab="", main=expression(paste(Delta, "log(y)")))
plot(dl.y12, xlab="", ylab="", main=expression(paste(Delta[12], "log(y)")))
plot(dl.y12_1, xlab="", ylab="", main=expression(paste(Delta, Delta[12], "log(y)")))
library(forecast)
maxlag <-24
par(mfrow=c(2,4))
Acf(ly.est, type='correlation', lag=maxlag, ylab="", main=expression(paste("ACF for log(y)")))
Acf(dl.y, type='correlation', lag=maxlag, ylab="", main=expression(paste("ACF for ", Delta,"log(y)")))
Acf(dl.y12, type='correlation', lag=maxlag, ylab="", main=expression(paste("ACF for ", Delta[12], "log(y)")))
Acf(dl.y12_1, type='correlation', lag=maxlag, ylab="", main=expression(paste("ACF for ", Delta, Delta[12], "log(y)")))
Acf(ly, type='partial', lag=maxlag, ylab="", main=expression(paste("PACF for log(y)")))
Acf(dl.y, type='partial', lag=maxlag, ylab="", main=expression(paste("PACF for ", Delta, "log(y)")))
Acf(dl.y12, type='partial', lag=maxlag, ylab="", main=expression(paste("PACF for ", Delta[12], "log(y)")))
Acf(dl.y12_1, type='partial', lag=maxlag, ylab="", main=expression(paste("PACF for ", Delta,Delta[12],"log(y)")))
According to ACFs and PACFs, only one that weekly stationary (the last one).
library(tseries)
adf.test(ly)
##
## Augmented Dickey-Fuller Test
##
## data: ly
## Dickey-Fuller = -1.6426, Lag order = 6, p-value = 0.7268
## alternative hypothesis: stationary
Cannot rejectthe the hypothesis that the time series has a unit root.
kpss.test(ly, null="Trend")
## Warning in kpss.test(ly, null = "Trend"): p-value smaller than printed p-
## value
##
## KPSS Test for Trend Stationarity
##
## data: ly
## KPSS Trend = 0.89259, Truncation lag parameter = 3, p-value = 0.01
ly.urkpss <- ur.kpss(ly, type="tau", lags="short")
summary(ly.urkpss)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: tau with 5 lags.
##
## Value of test-statistic is: 0.6284
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.119 0.146 0.176 0.216
Here, reject the hypothesisthe that time series is stationary.
M12.bic <- auto.arima(ly.est, ic="bic", seasonal=TRUE, stationary=FALSE, stepwise=FALSE)
M12.bic
## Series: ly.est
## ARIMA(1,1,2)(0,0,2)[12]
##
## Coefficients:
## ar1 ma1 ma2 sma1 sma2
## 0.0462 0.4704 0.5356 1.1878 0.7469
## s.e. 0.1233 0.1064 0.0736 0.0660 0.0490
##
## sigma^2 estimated as 0.0009834: log likelihood=501.49
## AIC=-990.98 AICc=-990.64 BIC=-969.83
M12.aic <- auto.arima(ly.est, ic="aic", seasonal=TRUE, stationary=FALSE, stepwise=FALSE)
M12.aic
## Series: ly.est
## ARIMA(1,1,2)(0,0,2)[12] with drift
##
## Coefficients:
## ar1 ma1 ma2 sma1 sma2 drift
## 0.0452 0.4709 0.5358 1.1873 0.7468 0.0032
## s.e. 0.1232 0.1062 0.0735 0.0660 0.0489 0.0116
##
## sigma^2 estimated as 0.0009872: log likelihood=501.53
## AIC=-989.06 AICc=-988.59 BIC=-964.38
M12.aicc <- auto.arima(ly.est, ic="aicc", seasonal=TRUE, stationary=FALSE, stepwise=FALSE)
M12.aicc
## Series: ly.est
## ARIMA(1,1,2)(0,0,2)[12] with drift
##
## Coefficients:
## ar1 ma1 ma2 sma1 sma2 drift
## 0.0452 0.4709 0.5358 1.1873 0.7468 0.0032
## s.e. 0.1232 0.1062 0.0735 0.0660 0.0489 0.0116
##
## sigma^2 estimated as 0.0009872: log likelihood=501.53
## AIC=-989.06 AICc=-988.59 BIC=-964.38
Among all models, ARIMA(9,1,0)(1,0,0)[12] is the best model.
M12.bic.f.h <- forecast(M12.bic, length(ly.pred))
plot(M12.bic.f.h, type="o", pch=16, xlim=c(2012,2017), ylim=c(9.5,11),
main="ARIMA Model Multistep Forecast - TPRCS")
lines(M12.bic.f.h$mean, type="p", pch=16, lty="dashed", col="blue")
lines(ly, type="o", pch=16, lty="dashed")
M12.bic.f.rol <- zoo()
for(i in 1:length(ly.pred))
{M12.bic.rsc <- window( ly, start=1993+(i-1)/12, end=endof2013+(i-1)/12 )
M12.bic.updt <- arima(M12.bic.rsc, order=c(1,1,2), seasonal=list(order=c(0,0,2),period=NA))
M12.bic.f.rol <- c(M12.bic.f.rol, forecast(M12.bic.updt, 1)$mean)
}
M12.bic.f.rol <- as.ts(M12.bic.f.rol)
accuracy(M12.bic.f.rol, ly.pred)
## ME RMSE MAE MPE MAPE
## Test set 0.001441019 0.03322324 0.02706904 0.01389008 0.2590543
## ACF1 Theil's U
## Test set -0.03243961 0.4182325
plot(M12.bic.f.h, type="o", pch=16, xlim=c(2012,2017), ylim=c(9.5,11),
main="Multistep vs. Rolling Scheme forecasts - TPRCS")
lines(M12.bic.f.h$mean, type="p", pch=16, lty="dashed", col="blue")
lines(M12.bic.f.rol, type="p", pch=16, lty="dashed", col="red")
lines(ly, type="o", pch=16, lty="dashed")
class.arima <- arima(ly.est, order=c(9,1,0), seasonal=list(order=c(1,0,0),period=NA))
class.arima
##
## Call:
## arima(x = ly.est, order = c(9, 1, 0), seasonal = list(order = c(1, 0, 0), period = NA))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## 0.4704 0.282 -0.1059 -0.0256 0.0536 -0.0065 -0.1653 0.1691
## s.e. 0.0624 0.068 0.0705 0.0701 0.0702 0.0693 0.0702 0.0680
## ar9 sar1
## -0.1576 0.9751
## s.e. 0.0623 0.0087
##
## sigma^2 estimated as 0.0003045: log likelihood = 641.66, aic = -1261.32
class.restrarima <- arima(ly.est, order=c(9,1,0), seasonal=list(order=c(1,0,0),period=NA), transform.pars = FALSE, fixed = c(NA,NA, 0, 0, 0, 0, NA, NA, NA, NA))
class.restrarima
##
## Call:
## arima(x = ly.est, order = c(9, 1, 0), seasonal = list(order = c(1, 0, 0), period = NA),
## transform.pars = FALSE, fixed = c(NA, NA, 0, 0, 0, 0, NA, NA, NA, NA))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8 ar9 sar1
## 0.4446 0.2377 0 0 0 0 -0.1620 0.1649 -0.1615 0.9773
## s.e. 0.0599 0.0601 0 0 0 0 0.0617 0.0654 0.0623 0.0080
##
## sigma^2 estimated as 0.0003067: log likelihood = 640.23, aic = -1266.46
library(forecast)
par(mfrow=c(2,1))
accuracy(M12.bic.f.rol, ly.pred)
## ME RMSE MAE MPE MAPE
## Test set 0.001441019 0.03322324 0.02706904 0.01389008 0.2590543
## ACF1 Theil's U
## Test set -0.03243961 0.4182325
accuracy(class.restrarima)
## ME RMSE MAE MPE MAPE
## Training set 0.0002291197 0.0174892 0.01367225 0.003050552 0.1341786
## MASE ACF1
## Training set 0.1995947 0.01831891
class.restrarima.f.rol <- zoo()
for(i in 1:length(ly.pred))
{
class.restrarima.rsc <- window( ly, start=1993+(i-1)/12, end=endof2013+(i-1)/12 )
class.restrarima.updt <- arima(class.restrarima.rsc, order=c(9,1,0), seasonal=list(order=c(1,0,0),period=NA), transform.pars = FALSE, fixed = c(NA,NA, 0, 0, 0, 0, NA, NA, NA, NA))
class.restrarima.f.rol <- c(class.restrarima.f.rol, forecast(class.restrarima.updt, 1)$mean)
}
class.restrarima.f.rol <- as.ts(class.restrarima.f.rol)
accuracy(class.restrarima.f.rol, ly.pred)
## ME RMSE MAE MPE MAPE
## Test set -0.001658742 0.02113642 0.01584025 -0.01602349 0.1513248
## ACF1 Theil's U
## Test set -0.1104002 0.2621738
plot(M12.bic.f.rol, type="o", pch=16, xlim=c(2013,2017), ylim=c(9.8,10.8),
main="ARIMA(9,1,0)(1,0,0) vs. ARIMA(1,1,2)(0,0,2) Rolling Scheme forecasts - TPRCS")
lines(class.restrarima.f.rol, type="p", pch=16, lty="dashed", col="blue")
lines(M12.bic.f.rol, type="p", pch=16, lty="dashed", col="red")
lines(ly, type="o", pch=16, lty="dashed")
ARIMA(9,1,0)(3,0,0) model best produces data that looks like the actual data we see in the TPRCS of the United States out of all of the models tested. We also find that using the rolling forecasting scheme produces much more accurate forecasts of TPRCS than the multi-step forecasting scheme.