Obtain the monthly data for Total Private Residential Construction Spending FRED/PRRESCON. Follow the Box-Jenkins methodology to build a time series model based on the data until the end of 2013. Use the estimated model to produce and plot a multistep forecast and a sequence of one month ahead forecasts until the end of 2016. Comment on the accuracy of the two forecasts.
library(Quandl)
Quandl.api_key("KmxULt3z1Vz1neVxGioB")
yall <- Quandl("FRED/PRRESCON", type = "ts")
library(forecast)
We use the data until the end of 2013
startM = 1993
endM= 2013 + (11/12)
y <- window(yall, end= endM)
maxlag <- 60
We first look at the log transformation of the data. We can see that the Logged data looks slightly stationary. Therefore, we use logged data.
ly <- log(y)
par(mfrow=c(2,1))
plot(y)
plot(ly)
dly1 <-diff(ly)
dly12 <- diff(ly, lag=12)
d2ly1_12 <- diff(diff(ly), lag=12)
par(mfrow=c(3,4))
plot(ly, ylab='', xlab='year', main='log(TPCS: Residential)')
plot(dly1, ylab='', xlab='year', main=expression(paste(Delta, "log(TPCS: Residential)")))
plot(dly12, ylab='', xlab='year', main=expression(paste(Delta[12], "log(TPCS: Residential)")))
plot(d2ly1_12, ylab='', xlab='year', main=expression(paste(Delta,Delta[12], "log(TPCS: Residential)")))
Acf(ly, type="correlation", lag.max=maxlag, main=expression(paste("ACF for log(TPCS: Residential)")))
Acf(dly1, type="correlation", lag.max=maxlag, main=expression(paste("ACF for ",Delta, "log(TPCS: Residential)")))
Acf(dly12, type="correlation", lag.max=maxlag, main=expression(paste("ACF for ",Delta[12], "log(TPCS: Residential)")))
Acf(d2ly1_12, type="correlation", lag.max=maxlag, main=expression(paste("ACF for ",Delta,Delta[12], "log(TPCS: Residential)")))
Acf(ly, type="partial", lag.max=maxlag, main=expression(paste("PACF for log(TPCS: Residential)")))
Acf(dly1, type="partial", lag.max=maxlag, main=expression(paste("PACF for ",Delta, "log(TPCS: Residential)")))
Acf(dly12, type="partial", lag.max=maxlag, main=expression(paste("PACF for ",Delta[12], "log(TPCS: Residential)")))
Acf(d2ly1_12, type="partial", lag.max=maxlag, main=expression(paste("PACF for ",Delta,Delta[12], "log(TPCS: Residential)")))
*By using the results(PACF in second column), we can assume that there is 12-period seasonality and some possible models in the following(AR = 3,6,9 which is significant). We don’t use first column and third column because ACF show that an unit root exists. Forth column is not used because time series plot is not stationary.
\[ARIMA(3,1,0)(1,0,0)_{12}\] \[ARIMA(3,1,0)(2,0,0)_{12}\] \[ARIMA(3,1,0)(3,0,0)_{12}\] \[ARIMA(6,1,0)(1,0,0)_{12}\] \[ARIMA(6,1,0)(2,0,0)_{12}\] \[ARIMA(6,1,0)(3,0,0)_{12}\] \[ARIMA(9,1,0)(1,0,0)_{12}\] \[ARIMA(9,1,0)(2,0,0)_{12}\] \[ARIMA(9,1,0)(3,0,0)_{12}\]
m1 <- Arima(ly, order = c(3,1,0), seasonal = list(order = c(1,0,0), period = 12))
m2 <- Arima(ly, order = c(3,1,0), seasonal = list(order = c(2,0,0), period = 12))
m3 <- Arima(ly, order = c(3,1,0), seasonal = list(order = c(3,0,0), period = 12))
m4 <- Arima(ly, order = c(6,1,0), seasonal = list(order = c(1,0,0), period = 12))
m5 <- Arima(ly, order = c(6,1,0), seasonal = list(order = c(2,0,0), period = 12))
m6 <- Arima(ly, order = c(6,1,0), seasonal = list(order = c(3,0,0), period = 12))
m7 <- Arima(ly, order = c(9,1,0), seasonal = list(order = c(1,0,0), period = 12))
m8 <- Arima(ly, order = c(9,1,0), seasonal = list(order = c(2,0,0), period = 12))
m9 <- Arima(ly, order = c(9,1,0), seasonal = list(order = c(3,0,0), period = 12))
m1
## Series: ly
## ARIMA(3,1,0)(1,0,0)[12]
##
## Coefficients:
## ar1 ar2 ar3 sar1
## 0.4469 0.3149 -0.1304 0.9705
## s.e. 0.0625 0.0659 0.0631 0.0098
##
## sigma^2 estimated as 0.0003338: log likelihood=633.29
## AIC=-1256.57 AICc=-1256.33 BIC=-1238.94
m2
## Series: ly
## ARIMA(3,1,0)(2,0,0)[12]
##
## Coefficients:
## ar1 ar2 ar3 sar1 sar2
## 0.4248 0.3305 -0.1286 0.8934 0.0796
## s.e. 0.0652 0.0663 0.0632 0.0668 0.0681
##
## sigma^2 estimated as 0.000333: log likelihood=633.96
## AIC=-1255.92 AICc=-1255.58 BIC=-1234.77
m3
## Series: ly
## ARIMA(3,1,0)(3,0,0)[12]
##
## Coefficients:
## ar1 ar2 ar3 sar1 sar2 sar3
## 0.4474 0.3196 -0.1343 0.9148 0.2180 -0.1652
## s.e. 0.0652 0.0671 0.0631 0.0649 0.0867 0.0667
##
## sigma^2 estimated as 0.0003255: log likelihood=636.97
## AIC=-1259.95 AICc=-1259.49 BIC=-1235.27
m4
## Series: ly
## ARIMA(6,1,0)(1,0,0)[12]
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 sar1
## 0.4403 0.3238 -0.1089 -0.0154 -0.0075 -0.0345 0.9704
## s.e. 0.0630 0.0693 0.0730 0.0723 0.0690 0.0631 0.0099
##
## sigma^2 estimated as 0.0003369: log likelihood=633.67
## AIC=-1251.35 AICc=-1250.75 BIC=-1223.15
m5
## Series: ly
## ARIMA(6,1,0)(2,0,0)[12]
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 sar1 sar2
## 0.4221 0.3343 -0.1125 -0.0029 -0.0127 -0.0295 0.8998 0.0730
## s.e. 0.0653 0.0694 0.0728 0.0730 0.0687 0.0633 0.0681 0.0696
##
## sigma^2 estimated as 0.0003365: log likelihood=634.22
## AIC=-1250.44 AICc=-1249.69 BIC=-1218.71
m6
## Series: ly
## ARIMA(6,1,0)(3,0,0)[12]
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 sar1 sar2
## 0.4401 0.331 -0.0961 -0.0327 -0.0205 -0.0379 0.9272 0.2233
## s.e. 0.0650 0.070 0.0735 0.0739 0.0695 0.0634 0.0656 0.0876
## sar3
## -0.1839
## s.e. 0.0682
##
## sigma^2 estimated as 0.0003272: log likelihood=637.77
## AIC=-1255.54 AICc=-1254.62 BIC=-1220.28
m7
## Series: ly
## ARIMA(9,1,0)(1,0,0)[12]
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## 0.4704 0.282 -0.1059 -0.0256 0.0536 -0.0065 -0.1653 0.1691
## s.e. 0.0624 0.068 0.0705 0.0701 0.0702 0.0693 0.0702 0.0680
## ar9 sar1
## -0.1576 0.9751
## s.e. 0.0623 0.0087
##
## sigma^2 estimated as 0.0003175: log likelihood=641.66
## AIC=-1261.32 AICc=-1260.22 BIC=-1222.54
m8
## Series: ly
## ARIMA(9,1,0)(2,0,0)[12]
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## 0.4592 0.2897 -0.1074 -0.0186 0.0514 -0.0072 -0.1623 0.1676
## s.e. 0.0646 0.0687 0.0704 0.0708 0.0702 0.0692 0.0702 0.0678
## ar9 sar1 sar2
## -0.1537 0.9300 0.0463
## s.e. 0.0626 0.0685 0.0697
##
## sigma^2 estimated as 0.0003181: log likelihood=641.88
## AIC=-1259.76 AICc=-1258.45 BIC=-1217.46
m9
## Series: ly
## ARIMA(9,1,0)(3,0,0)[12]
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## 0.4728 0.2888 -0.0975 -0.0425 0.0428 -0.0068 -0.1831 0.1657
## s.e. 0.0645 0.0693 0.0709 0.0714 0.0708 0.0696 0.0710 0.0684
## ar9 sar1 sar2 sar3
## -0.1297 0.9498 0.1997 -0.1785
## s.e. 0.0634 0.0655 0.0897 0.0689
##
## sigma^2 estimated as 0.0003102: log likelihood=645.16
## AIC=-1264.31 AICc=-1262.78 BIC=-1218.48
library(lmtest)
coeftest(m1)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.4469279 0.0624528 7.1563 8.291e-13 ***
## ar2 0.3149367 0.0659130 4.7781 1.770e-06 ***
## ar3 -0.1303912 0.0630687 -2.0674 0.03869 *
## sar1 0.9704849 0.0097798 99.2337 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(m2)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.424813 0.065192 6.5163 7.206e-11 ***
## ar2 0.330488 0.066270 4.9870 6.132e-07 ***
## ar3 -0.128631 0.063165 -2.0364 0.04171 *
## sar1 0.893412 0.066792 13.3760 < 2.2e-16 ***
## sar2 0.079569 0.068117 1.1681 0.24276
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(m3)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.447409 0.065190 6.8632 6.735e-12 ***
## ar2 0.319623 0.067116 4.7623 1.914e-06 ***
## ar3 -0.134264 0.063126 -2.1269 0.03343 *
## sar1 0.914789 0.064910 14.0932 < 2.2e-16 ***
## sar2 0.218006 0.086664 2.5155 0.01189 *
## sar3 -0.165155 0.066671 -2.4772 0.01324 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(m4)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.4402846 0.0630097 6.9876 2.797e-12 ***
## ar2 0.3237536 0.0692747 4.6735 2.961e-06 ***
## ar3 -0.1089202 0.0730093 -1.4919 0.1357
## ar4 -0.0153860 0.0722843 -0.2129 0.8314
## ar5 -0.0074968 0.0689623 -0.1087 0.9134
## ar6 -0.0344728 0.0630853 -0.5464 0.5848
## sar1 0.9704100 0.0098721 98.2986 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(m5)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.4220792 0.0652767 6.4660 1.006e-10 ***
## ar2 0.3342769 0.0694220 4.8151 1.471e-06 ***
## ar3 -0.1124978 0.0727596 -1.5462 0.1221
## ar4 -0.0029161 0.0729756 -0.0400 0.9681
## ar5 -0.0127194 0.0687297 -0.1851 0.8532
## ar6 -0.0295185 0.0632769 -0.4665 0.6409
## sar1 0.8997814 0.0681264 13.2075 < 2.2e-16 ***
## sar2 0.0730367 0.0696164 1.0491 0.2941
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(m6)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.440142 0.065037 6.7676 1.309e-11 ***
## ar2 0.331043 0.069978 4.7307 2.238e-06 ***
## ar3 -0.096061 0.073522 -1.3066 0.191363
## ar4 -0.032680 0.073857 -0.4425 0.658149
## ar5 -0.020523 0.069535 -0.2951 0.767882
## ar6 -0.037888 0.063354 -0.5980 0.549816
## sar1 0.927166 0.065628 14.1277 < 2.2e-16 ***
## sar2 0.223257 0.087626 2.5478 0.010840 *
## sar3 -0.183919 0.068235 -2.6954 0.007031 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(m7)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.4703773 0.0623996 7.5382 4.767e-14 ***
## ar2 0.2820121 0.0680209 4.1460 3.384e-05 ***
## ar3 -0.1058885 0.0705028 -1.5019 0.13312
## ar4 -0.0256217 0.0701460 -0.3653 0.71492
## ar5 0.0536097 0.0701798 0.7639 0.44493
## ar6 -0.0065123 0.0692519 -0.0940 0.92508
## ar7 -0.1652794 0.0701934 -2.3546 0.01854 *
## ar8 0.1691307 0.0680060 2.4870 0.01288 *
## ar9 -0.1575900 0.0623307 -2.5283 0.01146 *
## sar1 0.9751227 0.0087154 111.8848 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(m8)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.4591894 0.0646098 7.1071 1.185e-12 ***
## ar2 0.2897294 0.0686781 4.2187 2.458e-05 ***
## ar3 -0.1073921 0.0704296 -1.5248 0.12731
## ar4 -0.0185966 0.0708335 -0.2625 0.79291
## ar5 0.0513942 0.0701596 0.7325 0.46384
## ar6 -0.0072167 0.0691903 -0.1043 0.91693
## ar7 -0.1622641 0.0702475 -2.3099 0.02089 *
## ar8 0.1676495 0.0678018 2.4726 0.01341 *
## ar9 -0.1537284 0.0626132 -2.4552 0.01408 *
## sar1 0.9300006 0.0685434 13.5681 < 2.2e-16 ***
## sar2 0.0462903 0.0697002 0.6641 0.50660
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(m9)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.4728226 0.0644808 7.3328 2.255e-13 ***
## ar2 0.2888245 0.0693256 4.1662 3.097e-05 ***
## ar3 -0.0975103 0.0709124 -1.3751 0.169106
## ar4 -0.0424683 0.0714265 -0.5946 0.552128
## ar5 0.0428145 0.0707961 0.6048 0.545340
## ar6 -0.0067584 0.0695897 -0.0971 0.922633
## ar7 -0.1830725 0.0709697 -2.5796 0.009892 **
## ar8 0.1656991 0.0683534 2.4242 0.015344 *
## ar9 -0.1297447 0.0633500 -2.0481 0.040554 *
## sar1 0.9497588 0.0655228 14.4951 < 2.2e-16 ***
## sar2 0.1996688 0.0897112 2.2257 0.026035 *
## sar3 -0.1785334 0.0688674 -2.5924 0.009530 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
m9.fixed <- Arima(ly, order = c(9,1,0), seasonal = list(order = c(3,0,0), period = 12), fixed = c(NA, 0, NA, NA, 0, 0, NA, 0, 0, NA, NA, NA))
m9.fixed
## Series: ly
## ARIMA(9,1,0)(3,0,0)[12]
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8 ar9 sar1
## 0.5636 0 -0.0060 0.0577 0 0 -0.1648 0 0 0.9855
## s.e. 0.0601 0 0.0696 0.0667 0 0 0.0540 0 0 0.0648
## sar2 sar3
## 0.1858 -0.2037
## s.e. 0.0898 0.0677
##
## sigma^2 estimated as 0.000348: log likelihood=630.93
## AIC=-1245.86 AICc=-1245.27 BIC=-1217.66
The results indicate that the restricted model 9ARIMA(9,1,0)(3,0,0)_{12}) has the lowest AIC and the second lowest BIC. The model 1 and model 3 are following.
Now, we examine the adquacy of models.
tsdiag(m1, gof.lag = 60)
tsdiag(m3, gof.lag = 60)
tsdiag(m9, gof.lag = 60)
tsdiag(m9.fixed, gof.lag = 60)
tsdiag(m1, gof.lag = 36)
* However, accoring to the residuals, m9.fixed does not show significant p-values. On the other hand, m9 has highly significant p-value through a lot of lag with no significant lag on ACF. Therefore, we conclude that m9(\[ARIMA(9,1,0)(3,0,0)_{12}\]) which shows the second lowest AIC is the best model.
end <- 2013 + (11/12)
forecasting.length <- window(yall, start = end + (1/12))
m.f.h <- forecast(m9.fixed, length(forecasting.length))
fstM <- 1993
lstM <- 2013 + (11/12)
y2 <- window(yall, start=lstM + 1/12)
m.f.rol <- list()
for(i in 1:(length(y2)+1))
{
yy <-window(yall, start= fstM, end=lstM+(i-1)/12)
m.updt <- Arima(log(yy), order = c(9,1,0), seasonal = list(order = c(3,0,0), period = 12))
m.updt.f.1 <- forecast(m.updt, 1)
m.f.rol$mean <- rbind(m.f.rol$mean, as.zoo(m.updt.f.1$mean))
m.f.rol$lower <- rbind(m.f.rol$lower, m.updt.f.1$lower)
m.f.rol$upper <- rbind(m.f.rol$upper, m.updt.f.1$upper)
}
m.f.rol$mean <- as.ts(m.f.rol$mean)
m.f.rol$level <- m.updt.f.1$level
m.f.rol$x <- window(m.updt.f.1$x, end=lstM)
class(m.f.rol) <- class(m.f.h)
par(mfrow=c(2,1))
plot(m.f.h, xlim=c(2012,2017), main="Multistep Ahead Forecasts")
lines(m.f.h$mean, type="p", pch=16, lty="dashed", col="blue")
lines(log(yall), pch=16, lty="dashed")
plot(m.f.rol, xlim=c(2012,2017), main = "1-step Ahead Recursive Forecasts")
lines(m.f.h$mean, type="p", pch=16, lty="dashed", col="red")
lines(log(yall),pch=16, lty="dashed")
par(mfrow=c(2,1))
plot(log(y2) - m.f.h$mean, main="Forecast Error: Multistep Ahead Forecasts")
abline(h=0, lty="dashed")
plot(log(y2)-m.f.rol$mean, main="Forecast Error: 1-Month Ahead Recursive Forecasts")
abline(h=0, lty="dashed")
accuracy(m.f.h$mean, log(y2))
## ME RMSE MAE MPE MAPE ACF1
## Test set -0.1997252 0.2317089 0.1997252 -1.902745 1.902745 0.9219684
## Theil's U
## Test set 2.894997
accuracy(m.f.rol$mean, log(y2))
## ME RMSE MAE MPE MAPE
## Test set -0.001849487 0.02179941 0.01644549 -0.01785468 0.1571361
## ACF1 Theil's U
## Test set -0.1046747 0.2696915
Using above analysis, we can conclude that \[ARIMA(9,1,0)(3,0,0)_{12}\] is the best model for the Total Private Residential Construction Spending of the United States. Additionally, we find that 1-month Ahead Recursive Forecasting works better performence than multistep Ahead Forecasting.