INTRODUCTION

Obtain the monthly data for Total Private Residential Construction Spending FRED/PRRESCON. Follow the Box-Jenkins methodology to build a time series model based on the data until the end of 2013. Use the estimated model to produce and plot a multistep forecast and a sequence of one month ahead forecasts until the end of 2016. Comment on the accuracy of the two forecasts.

Import data

library(Quandl)
Quandl.api_key("KmxULt3z1Vz1neVxGioB")
yall <- Quandl("FRED/PRRESCON", type = "ts")
library(forecast)

Split data

We use the data until the end of 2013

startM = 1993
endM= 2013 + (11/12)
y <- window(yall, end= endM)
maxlag <- 60

Box Jenkins Methodology

We first look at the log transformation of the data. We can see that the Logged data looks slightly stationary. Therefore, we use logged data.

ly <- log(y)
par(mfrow=c(2,1))
plot(y)
plot(ly)

Now, we use Box-Jenkins Methodology to find good fitted ARIMA model.

dly1 <-diff(ly)
dly12 <- diff(ly, lag=12)
d2ly1_12 <- diff(diff(ly), lag=12)

par(mfrow=c(3,4))
plot(ly, ylab='', xlab='year', main='log(TPCS: Residential)')
plot(dly1, ylab='', xlab='year', main=expression(paste(Delta, "log(TPCS: Residential)")))
plot(dly12, ylab='', xlab='year', main=expression(paste(Delta[12], "log(TPCS: Residential)")))
plot(d2ly1_12, ylab='', xlab='year', main=expression(paste(Delta,Delta[12], "log(TPCS: Residential)")))

Acf(ly, type="correlation", lag.max=maxlag, main=expression(paste("ACF for log(TPCS: Residential)")))
Acf(dly1, type="correlation", lag.max=maxlag, main=expression(paste("ACF for ",Delta, "log(TPCS: Residential)")))
Acf(dly12, type="correlation", lag.max=maxlag, main=expression(paste("ACF for ",Delta[12], "log(TPCS: Residential)")))
Acf(d2ly1_12, type="correlation", lag.max=maxlag, main=expression(paste("ACF for ",Delta,Delta[12], "log(TPCS: Residential)")))

Acf(ly, type="partial", lag.max=maxlag, main=expression(paste("PACF for log(TPCS: Residential)")))
Acf(dly1, type="partial", lag.max=maxlag, main=expression(paste("PACF for ",Delta, "log(TPCS: Residential)")))
Acf(dly12, type="partial", lag.max=maxlag, main=expression(paste("PACF for ",Delta[12], "log(TPCS: Residential)")))
Acf(d2ly1_12, type="partial", lag.max=maxlag, main=expression(paste("PACF for ",Delta,Delta[12], "log(TPCS: Residential)")))

*By using the results(PACF in second column), we can assume that there is 12-period seasonality and some possible models in the following(AR = 3,6,9 which is significant). We don’t use first column and third column because ACF show that an unit root exists. Forth column is not used because time series plot is not stationary.

\[ARIMA(3,1,0)(1,0,0)_{12}\] \[ARIMA(3,1,0)(2,0,0)_{12}\] \[ARIMA(3,1,0)(3,0,0)_{12}\] \[ARIMA(6,1,0)(1,0,0)_{12}\] \[ARIMA(6,1,0)(2,0,0)_{12}\] \[ARIMA(6,1,0)(3,0,0)_{12}\] \[ARIMA(9,1,0)(1,0,0)_{12}\] \[ARIMA(9,1,0)(2,0,0)_{12}\] \[ARIMA(9,1,0)(3,0,0)_{12}\]

Now, we examine each ARIMA model.

m1 <- Arima(ly, order = c(3,1,0), seasonal = list(order = c(1,0,0), period = 12))
m2 <- Arima(ly, order = c(3,1,0), seasonal = list(order = c(2,0,0), period = 12))
m3 <- Arima(ly, order = c(3,1,0), seasonal = list(order = c(3,0,0), period = 12))
m4 <- Arima(ly, order = c(6,1,0), seasonal = list(order = c(1,0,0), period = 12))
m5 <- Arima(ly, order = c(6,1,0), seasonal = list(order = c(2,0,0), period = 12))
m6 <- Arima(ly, order = c(6,1,0), seasonal = list(order = c(3,0,0), period = 12))
m7 <- Arima(ly, order = c(9,1,0), seasonal = list(order = c(1,0,0), period = 12))
m8 <- Arima(ly, order = c(9,1,0), seasonal = list(order = c(2,0,0), period = 12))
m9 <- Arima(ly, order = c(9,1,0), seasonal = list(order = c(3,0,0), period = 12))
m1

## Series: ly 
## ARIMA(3,1,0)(1,0,0)[12]                    
## 
## Coefficients:
##          ar1     ar2      ar3    sar1
##       0.4469  0.3149  -0.1304  0.9705
## s.e.  0.0625  0.0659   0.0631  0.0098
## 
## sigma^2 estimated as 0.0003338:  log likelihood=633.29
## AIC=-1256.57   AICc=-1256.33   BIC=-1238.94

m2

## Series: ly 
## ARIMA(3,1,0)(2,0,0)[12]                    
## 
## Coefficients:
##          ar1     ar2      ar3    sar1    sar2
##       0.4248  0.3305  -0.1286  0.8934  0.0796
## s.e.  0.0652  0.0663   0.0632  0.0668  0.0681
## 
## sigma^2 estimated as 0.000333:  log likelihood=633.96
## AIC=-1255.92   AICc=-1255.58   BIC=-1234.77

m3

## Series: ly 
## ARIMA(3,1,0)(3,0,0)[12]                    
## 
## Coefficients:
##          ar1     ar2      ar3    sar1    sar2     sar3
##       0.4474  0.3196  -0.1343  0.9148  0.2180  -0.1652
## s.e.  0.0652  0.0671   0.0631  0.0649  0.0867   0.0667
## 
## sigma^2 estimated as 0.0003255:  log likelihood=636.97
## AIC=-1259.95   AICc=-1259.49   BIC=-1235.27

m4

## Series: ly 
## ARIMA(6,1,0)(1,0,0)[12]                    
## 
## Coefficients:
##          ar1     ar2      ar3      ar4      ar5      ar6    sar1
##       0.4403  0.3238  -0.1089  -0.0154  -0.0075  -0.0345  0.9704
## s.e.  0.0630  0.0693   0.0730   0.0723   0.0690   0.0631  0.0099
## 
## sigma^2 estimated as 0.0003369:  log likelihood=633.67
## AIC=-1251.35   AICc=-1250.75   BIC=-1223.15

m5

## Series: ly 
## ARIMA(6,1,0)(2,0,0)[12]                    
## 
## Coefficients:
##          ar1     ar2      ar3      ar4      ar5      ar6    sar1    sar2
##       0.4221  0.3343  -0.1125  -0.0029  -0.0127  -0.0295  0.8998  0.0730
## s.e.  0.0653  0.0694   0.0728   0.0730   0.0687   0.0633  0.0681  0.0696
## 
## sigma^2 estimated as 0.0003365:  log likelihood=634.22
## AIC=-1250.44   AICc=-1249.69   BIC=-1218.71

m6

## Series: ly 
## ARIMA(6,1,0)(3,0,0)[12]                    
## 
## Coefficients:
##          ar1    ar2      ar3      ar4      ar5      ar6    sar1    sar2
##       0.4401  0.331  -0.0961  -0.0327  -0.0205  -0.0379  0.9272  0.2233
## s.e.  0.0650  0.070   0.0735   0.0739   0.0695   0.0634  0.0656  0.0876
##          sar3
##       -0.1839
## s.e.   0.0682
## 
## sigma^2 estimated as 0.0003272:  log likelihood=637.77
## AIC=-1255.54   AICc=-1254.62   BIC=-1220.28

m7

## Series: ly 
## ARIMA(9,1,0)(1,0,0)[12]                    
## 
## Coefficients:
##          ar1    ar2      ar3      ar4     ar5      ar6      ar7     ar8
##       0.4704  0.282  -0.1059  -0.0256  0.0536  -0.0065  -0.1653  0.1691
## s.e.  0.0624  0.068   0.0705   0.0701  0.0702   0.0693   0.0702  0.0680
##           ar9    sar1
##       -0.1576  0.9751
## s.e.   0.0623  0.0087
## 
## sigma^2 estimated as 0.0003175:  log likelihood=641.66
## AIC=-1261.32   AICc=-1260.22   BIC=-1222.54

m8

## Series: ly 
## ARIMA(9,1,0)(2,0,0)[12]                    
## 
## Coefficients:
##          ar1     ar2      ar3      ar4     ar5      ar6      ar7     ar8
##       0.4592  0.2897  -0.1074  -0.0186  0.0514  -0.0072  -0.1623  0.1676
## s.e.  0.0646  0.0687   0.0704   0.0708  0.0702   0.0692   0.0702  0.0678
##           ar9    sar1    sar2
##       -0.1537  0.9300  0.0463
## s.e.   0.0626  0.0685  0.0697
## 
## sigma^2 estimated as 0.0003181:  log likelihood=641.88
## AIC=-1259.76   AICc=-1258.45   BIC=-1217.46

m9

## Series: ly 
## ARIMA(9,1,0)(3,0,0)[12]                    
## 
## Coefficients:
##          ar1     ar2      ar3      ar4     ar5      ar6      ar7     ar8
##       0.4728  0.2888  -0.0975  -0.0425  0.0428  -0.0068  -0.1831  0.1657
## s.e.  0.0645  0.0693   0.0709   0.0714  0.0708   0.0696   0.0710  0.0684
##           ar9    sar1    sar2     sar3
##       -0.1297  0.9498  0.1997  -0.1785
## s.e.   0.0634  0.0655  0.0897   0.0689
## 
## sigma^2 estimated as 0.0003102:  log likelihood=645.16
## AIC=-1264.31   AICc=-1262.78   BIC=-1218.48

According to AIC, model 9(ARIMA(9,1,0)(3,0,0)[12]) is the best model. Model 7,3 also have smaller AIC.

According to BIC, model 1(ARIMA(3,1,0)(1,0,0)[12]) is the best model. Model 3,2 also have smaller BIC.

library(lmtest)
coeftest(m1)

## 
## z test of coefficients:
## 
##        Estimate Std. Error z value  Pr(>|z|)    
## ar1   0.4469279  0.0624528  7.1563 8.291e-13 ***
## ar2   0.3149367  0.0659130  4.7781 1.770e-06 ***
## ar3  -0.1303912  0.0630687 -2.0674   0.03869 *  
## sar1  0.9704849  0.0097798 99.2337 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

coeftest(m2)

## 
## z test of coefficients:
## 
##       Estimate Std. Error z value  Pr(>|z|)    
## ar1   0.424813   0.065192  6.5163 7.206e-11 ***
## ar2   0.330488   0.066270  4.9870 6.132e-07 ***
## ar3  -0.128631   0.063165 -2.0364   0.04171 *  
## sar1  0.893412   0.066792 13.3760 < 2.2e-16 ***
## sar2  0.079569   0.068117  1.1681   0.24276    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

coeftest(m3)

## 
## z test of coefficients:
## 
##       Estimate Std. Error z value  Pr(>|z|)    
## ar1   0.447409   0.065190  6.8632 6.735e-12 ***
## ar2   0.319623   0.067116  4.7623 1.914e-06 ***
## ar3  -0.134264   0.063126 -2.1269   0.03343 *  
## sar1  0.914789   0.064910 14.0932 < 2.2e-16 ***
## sar2  0.218006   0.086664  2.5155   0.01189 *  
## sar3 -0.165155   0.066671 -2.4772   0.01324 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

coeftest(m4)

## 
## z test of coefficients:
## 
##        Estimate Std. Error z value  Pr(>|z|)    
## ar1   0.4402846  0.0630097  6.9876 2.797e-12 ***
## ar2   0.3237536  0.0692747  4.6735 2.961e-06 ***
## ar3  -0.1089202  0.0730093 -1.4919    0.1357    
## ar4  -0.0153860  0.0722843 -0.2129    0.8314    
## ar5  -0.0074968  0.0689623 -0.1087    0.9134    
## ar6  -0.0344728  0.0630853 -0.5464    0.5848    
## sar1  0.9704100  0.0098721 98.2986 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

coeftest(m5)

## 
## z test of coefficients:
## 
##        Estimate Std. Error z value  Pr(>|z|)    
## ar1   0.4220792  0.0652767  6.4660 1.006e-10 ***
## ar2   0.3342769  0.0694220  4.8151 1.471e-06 ***
## ar3  -0.1124978  0.0727596 -1.5462    0.1221    
## ar4  -0.0029161  0.0729756 -0.0400    0.9681    
## ar5  -0.0127194  0.0687297 -0.1851    0.8532    
## ar6  -0.0295185  0.0632769 -0.4665    0.6409    
## sar1  0.8997814  0.0681264 13.2075 < 2.2e-16 ***
## sar2  0.0730367  0.0696164  1.0491    0.2941    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

coeftest(m6)

## 
## z test of coefficients:
## 
##       Estimate Std. Error z value  Pr(>|z|)    
## ar1   0.440142   0.065037  6.7676 1.309e-11 ***
## ar2   0.331043   0.069978  4.7307 2.238e-06 ***
## ar3  -0.096061   0.073522 -1.3066  0.191363    
## ar4  -0.032680   0.073857 -0.4425  0.658149    
## ar5  -0.020523   0.069535 -0.2951  0.767882    
## ar6  -0.037888   0.063354 -0.5980  0.549816    
## sar1  0.927166   0.065628 14.1277 < 2.2e-16 ***
## sar2  0.223257   0.087626  2.5478  0.010840 *  
## sar3 -0.183919   0.068235 -2.6954  0.007031 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

coeftest(m7)

## 
## z test of coefficients:
## 
##        Estimate Std. Error  z value  Pr(>|z|)    
## ar1   0.4703773  0.0623996   7.5382 4.767e-14 ***
## ar2   0.2820121  0.0680209   4.1460 3.384e-05 ***
## ar3  -0.1058885  0.0705028  -1.5019   0.13312    
## ar4  -0.0256217  0.0701460  -0.3653   0.71492    
## ar5   0.0536097  0.0701798   0.7639   0.44493    
## ar6  -0.0065123  0.0692519  -0.0940   0.92508    
## ar7  -0.1652794  0.0701934  -2.3546   0.01854 *  
## ar8   0.1691307  0.0680060   2.4870   0.01288 *  
## ar9  -0.1575900  0.0623307  -2.5283   0.01146 *  
## sar1  0.9751227  0.0087154 111.8848 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

coeftest(m8)

## 
## z test of coefficients:
## 
##        Estimate Std. Error z value  Pr(>|z|)    
## ar1   0.4591894  0.0646098  7.1071 1.185e-12 ***
## ar2   0.2897294  0.0686781  4.2187 2.458e-05 ***
## ar3  -0.1073921  0.0704296 -1.5248   0.12731    
## ar4  -0.0185966  0.0708335 -0.2625   0.79291    
## ar5   0.0513942  0.0701596  0.7325   0.46384    
## ar6  -0.0072167  0.0691903 -0.1043   0.91693    
## ar7  -0.1622641  0.0702475 -2.3099   0.02089 *  
## ar8   0.1676495  0.0678018  2.4726   0.01341 *  
## ar9  -0.1537284  0.0626132 -2.4552   0.01408 *  
## sar1  0.9300006  0.0685434 13.5681 < 2.2e-16 ***
## sar2  0.0462903  0.0697002  0.6641   0.50660    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

coeftest(m9)

## 
## z test of coefficients:
## 
##        Estimate Std. Error z value  Pr(>|z|)    
## ar1   0.4728226  0.0644808  7.3328 2.255e-13 ***
## ar2   0.2888245  0.0693256  4.1662 3.097e-05 ***
## ar3  -0.0975103  0.0709124 -1.3751  0.169106    
## ar4  -0.0424683  0.0714265 -0.5946  0.552128    
## ar5   0.0428145  0.0707961  0.6048  0.545340    
## ar6  -0.0067584  0.0695897 -0.0971  0.922633    
## ar7  -0.1830725  0.0709697 -2.5796  0.009892 ** 
## ar8   0.1656991  0.0683534  2.4242  0.015344 *  
## ar9  -0.1297447  0.0633500 -2.0481  0.040554 *  
## sar1  0.9497588  0.0655228 14.4951 < 2.2e-16 ***
## sar2  0.1996688  0.0897112  2.2257  0.026035 *  
## sar3 -0.1785334  0.0688674 -2.5924  0.009530 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

However, m9 has insignificant coefficients(ar3,ar4,ar5,ar6), m3 and m1 don’t has insignificant coefficients Therefore, we check restricted models.

m9.fixed <- Arima(ly, order = c(9,1,0), seasonal = list(order = c(3,0,0), period = 12), fixed = c(NA, 0, NA, NA, 0, 0, NA, 0, 0, NA, NA, NA))

m9.fixed

## Series: ly 
## ARIMA(9,1,0)(3,0,0)[12]                    
## 
## Coefficients:
##          ar1  ar2      ar3     ar4  ar5  ar6      ar7  ar8  ar9    sar1
##       0.5636    0  -0.0060  0.0577    0    0  -0.1648    0    0  0.9855
## s.e.  0.0601    0   0.0696  0.0667    0    0   0.0540    0    0  0.0648
##         sar2     sar3
##       0.1858  -0.2037
## s.e.  0.0898   0.0677
## 
## sigma^2 estimated as 0.000348:  log likelihood=630.93
## AIC=-1245.86   AICc=-1245.27   BIC=-1217.66

The results indicate that the restricted model 9ARIMA(9,1,0)(3,0,0)_{12}) has the lowest AIC and the second lowest BIC. The model 1 and model 3 are following.
Now, we examine the adquacy of models.

tsdiag(m1, gof.lag = 60)

tsdiag(m3, gof.lag = 60)

tsdiag(m9, gof.lag = 60)

tsdiag(m9.fixed, gof.lag = 60)

tsdiag(m1, gof.lag = 36)

* However, accoring to the residuals, m9.fixed does not show significant p-values. On the other hand, m9 has highly significant p-value through a lot of lag with no significant lag on ACF. Therefore, we conclude that m9(\[ARIMA(9,1,0)(3,0,0)_{12}\]) which shows the second lowest AIC is the best model.

Forecasting

Now, we will forecast the data and examine the accuracy

Multistep Ahead Forecasting

end <- 2013 + (11/12)
forecasting.length <- window(yall, start = end + (1/12))
m.f.h <- forecast(m9.fixed, length(forecasting.length))

1 month Ahead Recursive Forecasting

fstM <- 1993
lstM <- 2013 + (11/12)
y2 <- window(yall, start=lstM + 1/12)
m.f.rol <- list() 
for(i in 1:(length(y2)+1))
{
  yy <-window(yall, start= fstM, end=lstM+(i-1)/12)
  m.updt <- Arima(log(yy), order = c(9,1,0), seasonal = list(order = c(3,0,0), period = 12))
  m.updt.f.1 <- forecast(m.updt, 1)
  m.f.rol$mean <- rbind(m.f.rol$mean, as.zoo(m.updt.f.1$mean))
  m.f.rol$lower <- rbind(m.f.rol$lower, m.updt.f.1$lower)
  m.f.rol$upper <- rbind(m.f.rol$upper, m.updt.f.1$upper)
}
m.f.rol$mean <- as.ts(m.f.rol$mean)
m.f.rol$level <- m.updt.f.1$level
m.f.rol$x <- window(m.updt.f.1$x, end=lstM)
class(m.f.rol) <- class(m.f.h)

Comparing two forecastings

par(mfrow=c(2,1))
plot(m.f.h, xlim=c(2012,2017), main="Multistep Ahead Forecasts")
lines(m.f.h$mean, type="p", pch=16, lty="dashed", col="blue")
lines(log(yall), pch=16, lty="dashed")
plot(m.f.rol, xlim=c(2012,2017), main = "1-step Ahead Recursive Forecasts")
lines(m.f.h$mean, type="p", pch=16, lty="dashed", col="red")
lines(log(yall),pch=16, lty="dashed")

par(mfrow=c(2,1))
plot(log(y2) - m.f.h$mean, main="Forecast Error: Multistep Ahead Forecasts")
abline(h=0, lty="dashed")
plot(log(y2)-m.f.rol$mean, main="Forecast Error: 1-Month Ahead Recursive Forecasts")
abline(h=0, lty="dashed")

accuracy(m.f.h$mean, log(y2))

##                  ME      RMSE       MAE       MPE     MAPE      ACF1
## Test set -0.1997252 0.2317089 0.1997252 -1.902745 1.902745 0.9219684
##          Theil's U
## Test set  2.894997

accuracy(m.f.rol$mean, log(y2))

##                    ME       RMSE        MAE         MPE      MAPE
## Test set -0.001849487 0.02179941 0.01644549 -0.01785468 0.1571361
##                ACF1 Theil's U
## Test set -0.1046747 0.2696915

According to forecasting plot(more narrow), erros plot(more stable), and errors table(significantly lower errors), we can see that 1-month Ahead Recursive Forecasting shows better performence than multistep Ahead Forecasting.

Conclusion

Using above analysis, we can conclude that \[ARIMA(9,1,0)(3,0,0)_{12}\] is the best model for the Total Private Residential Construction Spending of the United States. Additionally, we find that 1-month Ahead Recursive Forecasting works better performence than multistep Ahead Forecasting.

ECO 5316, Hw 5

Problem 1

Seonjun Kang