library(Quandl)
## Warning: package 'Quandl' was built under R version 3.3.3
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(zoo)
library(xts)
library(dygraphs)
library(knitr)
library(forecast)
## Loading required package: timeDate
## This is forecast 7.3
library(urca)
Quandl.api_key('rxVQhZ8_nxxeo2yy4Uz4')
y <- Quandl("FRED/PRRESCON", type="ts")
We plot the original and log-transformed Total Private Residential Construction Spending.
ly <- log(y)
plot(y, xlab=" Years 1993-2016", ylab="", main="Total Private Residential Construction Spending" )
plot(ly, xlab="Years 1993-2016", ylab="", main="Log Total Private Residential Construction Spending" )
The time series shows an exponential trend (with a possible structural break) as well as a seasonal pattern is present.For further analysis we will use log-transformed data untill the end of 2013 to build an estimation model.
endof2013 <- 2013 + (11/12)
ly.est <- window(ly, end = endof2013)
ly.pred <- window(ly, start = endof2013 + (1/12))
dl.y <- diff(ly.est,lag=1)
dl.y12 <- diff(ly.est,12)
dl.y12_1 <- diff(diff(ly.est,12), 1)
par(mfrow=c(2,2))
plot(ly.est, xlab="", ylab="", main=expression(log(y)))
plot(dl.y, xlab="", ylab="", main=expression(paste(Delta, "log(y)")))
plot(dl.y12, xlab="", ylab="", main=expression(paste(Delta[12], "log(y)")))
plot(dl.y12_1, xlab="", ylab="", main=expression(paste(Delta, Delta[12], "log(y)")))
The time series in differences and seasonal differences is the only that looks weakly stationary. To verify this is true, we look at ACFs and PACFs.
library(forecast)
maxlag <-24
par(mfrow=c(2,4))
Acf(ly.est, type='correlation', lag=maxlag, ylab="", main=expression(paste("ACF for log(y)")))
Acf(dl.y, type='correlation', lag=maxlag, ylab="", main=expression(paste("ACF for ", Delta,"log(y)")))
Acf(dl.y12, type='correlation', lag=maxlag, ylab="", main=expression(paste("ACF for ", Delta[12], "log(y)")))
Acf(dl.y12_1, type='correlation', lag=maxlag, ylab="", main=expression(paste("ACF for ", Delta, Delta[12], "log(y)")))
Acf(ly, type='partial', lag=maxlag, ylab="", main=expression(paste("PACF for log(y)")))
Acf(dl.y, type='partial', lag=maxlag, ylab="", main=expression(paste("PACF for ", Delta, "log(y)")))
Acf(dl.y12, type='partial', lag=maxlag, ylab="", main=expression(paste("PACF for ", Delta[12], "log(y)")))
Acf(dl.y12_1, type='partial', lag=maxlag, ylab="", main=expression(paste("PACF for ", Delta,Delta[12], "log(y)")))
So the last time series is the only weekly stationary according to ACFs and PACFs.
ADF test
library(tseries)
adf.test(ly)
##
## Augmented Dickey-Fuller Test
##
## data: ly
## Dickey-Fuller = -1.709, Lag order = 6, p-value = 0.6988
## alternative hypothesis: stationary
By ADF test we cannot reject the \(H_0\) hypothesis that the time series has a unit root.
KPSS test
library(tseries)
kpss.test(ly, null="Trend")
## Warning in kpss.test(ly, null = "Trend"): p-value smaller than printed p-
## value
##
## KPSS Test for Trend Stationarity
##
## data: ly
## KPSS Trend = 0.8985, Truncation lag parameter = 3, p-value = 0.01
library(urca)
ly.urkpss <- ur.kpss(ly, type="tau", lags="short")
summary(ly.urkpss)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: tau with 5 lags.
##
## Value of test-statistic is: 0.6327
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.119 0.146 0.176 0.216
By KPSS test we reject the \(H_0\) hypothesis that the time series is stationary.
We use autoarima function to find the best specification for our model.
M12.bic <- auto.arima(ly.est, ic="bic", seasonal=TRUE, stationary=FALSE, stepwise=FALSE)
M12.bic
## Series: ly.est
## ARIMA(1,1,2)(0,0,2)[12]
##
## Coefficients:
## ar1 ma1 ma2 sma1 sma2
## 0.0462 0.4704 0.5356 1.1878 0.7469
## s.e. 0.1233 0.1064 0.0736 0.0660 0.0490
##
## sigma^2 estimated as 0.0009834: log likelihood=501.49
## AIC=-990.98 AICc=-990.64 BIC=-969.83
M12.aic <- auto.arima(ly.est, ic="aic", seasonal=TRUE, stationary=FALSE, stepwise=FALSE)
M12.aic
## Series: ly.est
## ARIMA(1,1,2)(0,0,2)[12] with drift
##
## Coefficients:
## ar1 ma1 ma2 sma1 sma2 drift
## 0.0452 0.4709 0.5358 1.1873 0.7468 0.0032
## s.e. 0.1232 0.1062 0.0735 0.0660 0.0489 0.0116
##
## sigma^2 estimated as 0.0009872: log likelihood=501.53
## AIC=-989.06 AICc=-988.59 BIC=-964.38
M12.aicc <- auto.arima(ly.est, ic="aicc", seasonal=TRUE, stationary=FALSE, stepwise=FALSE)
M12.aicc
## Series: ly.est
## ARIMA(1,1,2)(0,0,2)[12] with drift
##
## Coefficients:
## ar1 ma1 ma2 sma1 sma2 drift
## 0.0452 0.4709 0.5358 1.1873 0.7468 0.0032
## s.e. 0.1232 0.1062 0.0735 0.0660 0.0489 0.0116
##
## sigma^2 estimated as 0.0009872: log likelihood=501.53
## AIC=-989.06 AICc=-988.59 BIC=-964.38
ARIMA(1,1,0)(1,0,0)[12] is the best specification suggested by Autoarima using BIC, AIC, AICc criterias.
At first, we construct the multiplestep forecast.
library(forecast)
M12.bic.f.h <- forecast(M12.bic, length(ly.pred))
plot(M12.bic.f.h, type="o", pch=16, xlim=c(2012,2017), ylim=c(9.5,11),
main="ARIMA Model Multistep Forecast - TPRCS")
lines(M12.bic.f.h$mean, type="p", pch=16, lty="dashed", col="blue")
lines(ly, type="o", pch=16, lty="dashed")
Rolling scheme forecast
library(forecast)
M12.bic.f.rol <- zoo()
for(i in 1:length(ly.pred))
{M12.bic.rsc <- window( ly, start=1993+(i-1)/12, end=endof2013+(i-1)/12 )
M12.bic.updt <- arima(M12.bic.rsc, order=c(1,1,2), seasonal=list(order=c(0,0,2),period=NA))
M12.bic.f.rol <- c(M12.bic.f.rol, forecast(M12.bic.updt, 1)$mean)
}
M12.bic.f.rol <- as.ts(M12.bic.f.rol)
accuracy(M12.bic.f.rol, ly.pred)
## ME RMSE MAE MPE MAPE
## Test set -3.099263e-05 0.03353639 0.02724083 -0.0001054584 0.2608801
## ACF1 Theil's U
## Test set -0.04410045 0.442365
plot(M12.bic.f.h, type="o", pch=16, xlim=c(2012,2017), ylim=c(9.5,11),
main="Multistep vs. Rolling Scheme forecasts - TPRCS")
lines(M12.bic.f.h$mean, type="p", pch=16, lty="dashed", col="blue")
lines(M12.bic.f.rol, type="p", pch=16, lty="dashed", col="red")
lines(ly, type="o", pch=16, lty="dashed")
Now we compare our model to the restricted Seasonal AR (9,1,0)(1,0,0)[12], which was offered in the class.
ARIMA(1,1,0)(1,0,0)[12]
class.arima <- arima(ly.est, order=c(9,1,0), seasonal=list(order=c(1,0,0),period=NA))
class.arima
##
## Call:
## arima(x = ly.est, order = c(9, 1, 0), seasonal = list(order = c(1, 0, 0), period = NA))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## 0.4704 0.282 -0.1059 -0.0256 0.0536 -0.0065 -0.1653 0.1691
## s.e. 0.0624 0.068 0.0705 0.0701 0.0702 0.0693 0.0702 0.0680
## ar9 sar1
## -0.1576 0.9751
## s.e. 0.0623 0.0087
##
## sigma^2 estimated as 0.0003045: log likelihood = 641.66, aic = -1261.32
Restricted ARIMA(1,1,0)(1,0,0)[12]
class.restrarima <- arima(ly.est, order=c(9,1,0), seasonal=list(order=c(1,0,0),period=NA), transform.pars = FALSE, fixed = c(NA,NA, 0, 0, 0, 0, NA, NA, NA, NA))
class.restrarima
##
## Call:
## arima(x = ly.est, order = c(9, 1, 0), seasonal = list(order = c(1, 0, 0), period = NA),
## transform.pars = FALSE, fixed = c(NA, NA, 0, 0, 0, 0, NA, NA, NA, NA))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8 ar9 sar1
## 0.4446 0.2377 0 0 0 0 -0.1620 0.1649 -0.1615 0.9773
## s.e. 0.0599 0.0601 0 0 0 0 0.0617 0.0654 0.0623 0.0080
##
## sigma^2 estimated as 0.0003067: log likelihood = 640.23, aic = -1266.46
We compare error accuracy between our model and restricted arima.
library(forecast)
par(mfrow=c(2,1))
accuracy(M12.bic.f.rol, ly.pred)
## ME RMSE MAE MPE MAPE
## Test set -3.099263e-05 0.03353639 0.02724083 -0.0001054584 0.2608801
## ACF1 Theil's U
## Test set -0.04410045 0.442365
accuracy(class.restrarima)
## ME RMSE MAE MPE MAPE
## Training set 0.0002291197 0.0174892 0.01367225 0.003050552 0.1341786
## MASE ACF1
## Training set 0.1995947 0.01831891
Restricted ARIMA rolling scheme
library(forecast)
class.restrarima.f.rol <- zoo()
for(i in 1:length(ly.pred))
{
class.restrarima.rsc <- window( ly, start=1993+(i-1)/12, end=endof2013+(i-1)/12 )
class.restrarima.updt <- arima(class.restrarima.rsc, order=c(9,1,0), seasonal=list(order=c(1,0,0),period=NA), transform.pars = FALSE, fixed = c(NA,NA, 0, 0, 0, 0, NA, NA, NA, NA))
class.restrarima.f.rol <- c(class.restrarima.f.rol, forecast(class.restrarima.updt, 1)$mean)
}
class.restrarima.f.rol <- as.ts(class.restrarima.f.rol)
accuracy(class.restrarima.f.rol, ly.pred)
## ME RMSE MAE MPE MAPE
## Test set -0.001900123 0.02202464 0.0163457 -0.01841198 0.1562592
## ACF1 Theil's U
## Test set -0.1134615 0.2864665
We compare our model and the restricted ARIMA
plot(M12.bic.f.rol, type="o", pch=16, xlim=c(2013,2017), ylim=c(9.8,10.8),
main="ARIMA(9,1,0)(1,0,0) vs. ARIMA(1,1,2)(0,0,2) Rolling Scheme forecasts - TPRCS")
lines(class.restrarima.f.rol, type="p", pch=16, lty="dashed", col="blue")
lines(M12.bic.f.rol, type="p", pch=16, lty="dashed", col="red")
lines(ly, type="o", pch=16, lty="dashed")
If we visually compare two rolling scheme forecasts for both ARIMA(9,1,0)(1,0,0) and ARIMA(1,1,2)(0,0,2), we can barely tell which one is a better forecast. However, discussed in class model seems to be preferable as it has lower ME, RMSE, MAE, Mape and only MPE is lower for our suggested model.