INTRODUCTION

Obtain the monthly data for Retail and Food Services Sales FRED/RSAFSNA. Use intervention analysis approach to incorporate explicitly the effect of 2008 recession on this time series: (1) Identify and estimate a seasonal ARIMA model for pre-intervention period up until June 2008 (2) Identify and estimate a transfer function model for the period up until December 2014, using July 2008 as the date of intervention (3) Use the estimated transfer function model to produce and plot a multistep forecast, and a sequence of one month ahead forecasts until the end of 2016. Comment on the accuracy of the two forecasts.

Import data

library(Quandl)
Quandl.api_key("KmxULt3z1Vz1neVxGioB")
yall <- Quandl("FRED/RSAFSNA", type = "ts")
library(forecast)
library(xts)

Split data

We use the data until the end of 2013

startM = 1992
endM= 2008 + (5/12)
y <- window(yall, end= endM)
maxlag <- 60

Pre-Intervention Model

Box Jenkins Methodology

We first look at the log transformation of the data. We can see that the Logged data looks slightly stationary. Therefore, we use logged data.

ly <- log(y)
par(mfrow=c(2,1))
plot(y)
plot(ly)

Now, we use Box-Jenkins Methodology to find good fitted ARIMA model.

dly1 <-diff(ly)
dly12 <- diff(ly, lag=12)
d2ly1_12 <- diff(diff(ly), lag=12)

par(mfrow=c(3,4))
plot(ly, ylab='', xlab='year', main='log(RFSS: Residential)')
plot(dly1, ylab='', xlab='year', main=expression(paste(Delta, "log(RFSS: Residential)")))
plot(dly12, ylab='', xlab='year', main=expression(paste(Delta[12], "log(RFSS: Residential)")))
plot(d2ly1_12, ylab='', xlab='year', main=expression(paste(Delta,Delta[12], "log(RFSS: Residential)")))

Acf(ly, type="correlation", lag.max=maxlag, main=expression(paste("ACF for log(RFSS: Residential)")))
Acf(dly1, type="correlation", lag.max=maxlag, main=expression(paste("ACF for ",Delta, "log(RFSS: Residential)")))
Acf(dly12, type="correlation", lag.max=maxlag, main=expression(paste("ACF for ",Delta[12], "log(RFSS: Residential)")))
Acf(d2ly1_12, type="correlation", lag.max=maxlag, main=expression(paste("ACF for ",Delta,Delta[12], "log(RFSS: Residential)")))

Acf(ly, type="partial", lag.max=maxlag, main=expression(paste("PACF for log(RFSS: Residential)")))
Acf(dly1, type="partial", lag.max=maxlag, main=expression(paste("PACF for ",Delta, "log(RFSS: Residential)")))
Acf(dly12, type="partial", lag.max=maxlag, main=expression(paste("PACF for ",Delta[12], "log(RFSS: Residential)")))
Acf(d2ly1_12, type="partial", lag.max=maxlag, main=expression(paste("PACF for ",Delta,Delta[12], "log(RFSS: Residential)")))

*By using the results(PACF in second column), we can assume that there is 12-period seasonality and some possible models in the following(AR = 1,2,11 which is significant; MA = 1 which is significant). We don’t use first column and third column because ACF show that an unit root exists. Forth column is not used because time series plot is not stationary.

\[ARIMA(1,1,0)(1,0,0)_{12}\] \[ARIMA(1,1,0)(1,1,0)_{12}\] \[ARIMA(1,1,0)(2,0,0)_{12}\] \[ARIMA(1,1,0)(2,1,0)_{12}\] \[ARIMA(1,1,1)(1,0,0)_{12}\] \[ARIMA(1,1,1)(2,0,0)_{12}\] \[ARIMA(1,1,1)(1,1,0)_{12}\] \[ARIMA(1,1,1)(2,1,0)_{12}\] \[ARIMA(4,1,0)(1,0,0)_{12}\] \[ARIMA(4,1,0)(1,1,0)_{12}\] \[ARIMA(4,1,0)(2,0,0)_{12}\] \[ARIMA(4,1,0)(2,1,0)_{12}\] \[ARIMA(4,1,1)(1,0,0)_{12}\] \[ARIMA(4,1,1)(1,1,0)_{12}\] \[ARIMA(4,1,1)(2,0,0)_{12}\] \[ARIMA(4,1,1)(2,1,0)_{12}\] \[ARIMA(9,1,0)(1,0,0)_{12}\] \[ARIMA(9,1,0)(1,1,0)_{12}\] \[ARIMA(9,1,0)(2,0,0)_{12}\] \[ARIMA(9,1,0)(2,1,0)_{12}\] \[ARIMA(9,1,1)(1,0,0)_{12}\] \[ARIMA(9,1,1)(1,1,0)_{12}\] \[ARIMA(9,1,1)(2,0,0)_{12}\] \[ARIMA(9,1,1)(2,1,0)_{12}\]

Now, we examine each ARIMA model.

m1 <- Arima(ly, order = c(1,1,0), seasonal = list(order = c(1,0,0), period = 12))
m2 <- Arima(ly, order = c(1,1,0), seasonal = list(order = c(2,0,0), period = 12))
m3 <- Arima(ly, order = c(1,1,0), seasonal = list(order = c(1,1,0), period = 12))
m4 <- Arima(ly, order = c(1,1,0), seasonal = list(order = c(2,1,0), period = 12))
m5 <- Arima(ly, order = c(1,1,1), seasonal = list(order = c(1,0,0), period = 12))
m6 <- Arima(ly, order = c(1,1,1), seasonal = list(order = c(2,0,0), period = 12))
m7 <- Arima(ly, order = c(1,1,1), seasonal = list(order = c(1,1,0), period = 12))
m8 <- Arima(ly, order = c(1,1,1), seasonal = list(order = c(2,1,0), period = 12))
m9 <- Arima(ly, order = c(4,1,0), seasonal = list(order = c(1,0,0), period = 12))
m10 <- Arima(ly, order = c(4,1,0), seasonal = list(order = c(2,0,0), period = 12))
m11 <- Arima(ly, order = c(4,1,0), seasonal = list(order = c(1,1,0), period = 12))
m12 <- Arima(ly, order = c(4,1,0), seasonal = list(order = c(2,1,0), period = 12))
m13 <- Arima(ly, order = c(4,1,1), seasonal = list(order = c(1,0,0), period = 12))
m14 <- Arima(ly, order = c(4,1,1), seasonal = list(order = c(2,0,0), period = 12))
m15 <- Arima(ly, order = c(4,1,1), seasonal = list(order = c(1,1,0), period = 12))
m16 <- Arima(ly, order = c(4,1,1), seasonal = list(order = c(2,1,0), period = 12))
m17 <- Arima(ly, order = c(9,1,0), seasonal = list(order = c(1,0,0), period = 12))
m18 <- Arima(ly, order = c(9,1,0), seasonal = list(order = c(2,0,0), period = 12))
m19 <- Arima(ly, order = c(9,1,0), seasonal = list(order = c(1,1,0), period = 12))
m20 <- Arima(ly, order = c(9,1,0), seasonal = list(order = c(2,1,0), period = 12))
m21 <- Arima(ly, order = c(9,1,1), seasonal = list(order = c(1,0,0), period = 12))
m22 <- Arima(ly, order = c(9,1,1), seasonal = list(order = c(2,0,0), period = 12))
m23 <- Arima(ly, order = c(9,1,1), seasonal = list(order = c(1,1,0), period = 12))
m24 <- Arima(ly, order = c(9,1,1), seasonal = list(order = c(2,1,0), period = 12))
m1

## Series: ly 
## ARIMA(1,1,0)(1,0,0)[12]                    
## 
## Coefficients:
##           ar1    sar1
##       -0.4744  0.9688
## s.e.   0.0627  0.0107
## 
## sigma^2 estimated as 0.0005385:  log likelihood=446.13
## AIC=-886.26   AICc=-886.14   BIC=-876.41

m2

## Series: ly 
## ARIMA(1,1,0)(2,0,0)[12]                    
## 
## Coefficients:
##           ar1    sar1    sar2
##       -0.4872  0.8072  0.1675
## s.e.   0.0624  0.0733  0.0748
## 
## sigma^2 estimated as 0.0005253:  log likelihood=448.58
## AIC=-889.16   AICc=-888.95   BIC=-876.03

m3

## Series: ly 
## ARIMA(1,1,0)(1,1,0)[12]                    
## 
## Coefficients:
##           ar1     sar1
##       -0.4951  -0.1863
## s.e.   0.0640   0.0742
## 
## sigma^2 estimated as 0.0005381:  log likelihood=436.22
## AIC=-866.44   AICc=-866.31   BIC=-856.78

m4

## Series: ly 
## ARIMA(1,1,0)(2,1,0)[12]                    
## 
## Coefficients:
##           ar1     sar1     sar2
##       -0.5123  -0.2541  -0.2981
## s.e.   0.0639   0.0740   0.0730
## 
## sigma^2 estimated as 0.0004922:  log likelihood=443.99
## AIC=-879.98   AICc=-879.76   BIC=-867.1

m5

## Series: ly 
## ARIMA(1,1,1)(1,0,0)[12]                    
## 
## Coefficients:
##           ar1     ma1    sar1
##       -0.0729  -0.711  0.9644
## s.e.   0.0899   0.059  0.0119
## 
## sigma^2 estimated as 0.0004348:  log likelihood=468.25
## AIC=-928.51   AICc=-928.3   BIC=-915.38

m6

## Series: ly 
## ARIMA(1,1,1)(2,0,0)[12]                    
## 
## Coefficients:
##           ar1      ma1    sar1    sar2
##       -0.1251  -0.6885  0.7686  0.2039
## s.e.   0.0733   0.0539  0.0627  0.0644
## 
## sigma^2 estimated as 0.0004186:  log likelihood=471.79
## AIC=-933.58   AICc=-933.26   BIC=-917.16

m7

## Series: ly 
## ARIMA(1,1,1)(1,1,0)[12]                    
## 
## Coefficients:
##           ar1      ma1     sar1
##       -0.1458  -0.6704  -0.2259
## s.e.   0.0928   0.0627   0.0748
## 
## sigma^2 estimated as 0.0004311:  log likelihood=457.32
## AIC=-906.64   AICc=-906.41   BIC=-893.75

m8

## Series: ly 
## ARIMA(1,1,1)(2,1,0)[12]                    
## 
## Coefficients:
##           ar1      ma1     sar1     sar2
##       -0.1808  -0.6556  -0.3092  -0.2944
## s.e.   0.0951   0.0657   0.0767   0.0734
## 
## sigma^2 estimated as 0.0003958:  log likelihood=464.82
## AIC=-919.64   AICc=-919.31   BIC=-903.54

m9

## Series: ly 
## ARIMA(4,1,0)(1,0,0)[12]                    
## 
## Coefficients:
##           ar1      ar2      ar3      ar4    sar1
##       -0.7275  -0.5889  -0.2332  -0.2219  0.9665
## s.e.   0.0695   0.0864   0.0871   0.0716  0.0114
## 
## sigma^2 estimated as 0.0004246:  log likelihood=471.26
## AIC=-930.52   AICc=-930.07   BIC=-910.82

m10

## Series: ly 
## ARIMA(4,1,0)(2,0,0)[12]                    
## 
## Coefficients:
##           ar1      ar2      ar3     ar4    sar1    sar2
##       -0.7570  -0.6210  -0.1700  -0.216  0.6629  0.3160
## s.e.   0.0701   0.0894   0.0911   0.072  0.0731  0.0746
## 
## sigma^2 estimated as 0.0003859:  log likelihood=479.39
## AIC=-944.78   AICc=-944.19   BIC=-921.8

m11

## Series: ly 
## ARIMA(4,1,0)(1,1,0)[12]                    
## 
## Coefficients:
##           ar1      ar2      ar3      ar4     sar1
##       -0.7557  -0.6018  -0.1284  -0.1920  -0.3367
## s.e.   0.0728   0.0930   0.0940   0.0742   0.0736
## 
## sigma^2 estimated as 0.0003966:  log likelihood=465.7
## AIC=-919.41   AICc=-918.94   BIC=-900.09

m12

## Series: ly 
## ARIMA(4,1,0)(2,1,0)[12]                    
## 
## Coefficients:
##           ar1      ar2      ar3      ar4     sar1     sar2
##       -0.7990  -0.5907  -0.0601  -0.1369  -0.4853  -0.3294
## s.e.   0.0745   0.0957   0.0997   0.0770   0.0803   0.0738
## 
## sigma^2 estimated as 0.0003564:  log likelihood=474.83
## AIC=-935.66   AICc=-935.03   BIC=-913.12

m13

## Series: ly 
## ARIMA(4,1,1)(1,0,0)[12]                    
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ma1    sar1
##       -0.2781  -0.2768  -0.0281  -0.2026  -0.4853  0.9662
## s.e.   0.1689   0.1392   0.1147   0.0788   0.1631  0.0115
## 
## sigma^2 estimated as 0.000417:  log likelihood=473.52
## AIC=-933.05   AICc=-932.46   BIC=-910.07

m14

## Series: ly 
## ARIMA(4,1,1)(2,0,0)[12]                    
## 
## Coefficients:
##           ar1      ar2     ar3      ar4      ma1    sar1    sar2
##       -0.3582  -0.3296  0.0180  -0.2304  -0.4260  0.6699  0.3086
## s.e.   0.1801   0.1541  0.1245   0.0744   0.1763  0.0736  0.0751
## 
## sigma^2 estimated as 0.000381:  log likelihood=481.21
## AIC=-946.42   AICc=-945.66   BIC=-920.16

m15

## Series: ly 
## ARIMA(4,1,1)(1,1,0)[12]                    
## 
## Coefficients:
##           ar1      ar2     ar3      ar4      ma1     sar1
##       -0.3436  -0.3022  0.0608  -0.2219  -0.4352  -0.3303
## s.e.   0.1895   0.1620  0.1295   0.0757   0.1854   0.0740
## 
## sigma^2 estimated as 0.0003919:  log likelihood=467.37
## AIC=-920.73   AICc=-920.1   BIC=-898.19

m16

## Series: ly 
## ARIMA(4,1,1)(2,1,0)[12]                    
## 
## Coefficients:
##           ar1      ar2     ar3      ar4      ma1     sar1     sar2
##       -0.3548  -0.2512  0.1427  -0.1874  -0.4566  -0.4721  -0.3227
## s.e.   0.2195   0.1930  0.1485   0.0758   0.2148   0.0807   0.0740
## 
## sigma^2 estimated as 0.000354:  log likelihood=476.1
## AIC=-936.2   AICc=-935.38   BIC=-910.44

m17

## Series: ly 
## ARIMA(9,1,0)(1,0,0)[12]                    
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5      ar6      ar7
##       -0.7313  -0.5904  -0.3548  -0.3677  -0.1614  -0.0467  -0.0432
## s.e.   0.0700   0.0881   0.0989   0.1027   0.1054   0.1020   0.0985
##          ar8     ar9    sar1
##       0.0052  0.2314  0.9703
## s.e.  0.0877  0.0707  0.0105
## 
## sigma^2 estimated as 0.0003851:  log likelihood=482.17
## AIC=-942.34   AICc=-940.91   BIC=-906.22

m18

## Series: ly 
## ARIMA(9,1,0)(2,0,0)[12]                    
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5     ar6     ar7     ar8
##       -0.7352  -0.5826  -0.2610  -0.3252  -0.0685  0.0197  0.0321  0.0480
## s.e.   0.0686   0.0878   0.0996   0.1008   0.1045  0.1004  0.0994  0.0884
##          ar9    sar1    sar2
##       0.3034  0.5962  0.3866
## s.e.  0.0708  0.0724  0.0735
## 
## sigma^2 estimated as 0.000334:  log likelihood=494.26
## AIC=-964.52   AICc=-962.82   BIC=-925.12

m19

## Series: ly 
## ARIMA(9,1,0)(1,1,0)[12]                    
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5     ar6     ar7     ar8
##       -0.7229  -0.5610  -0.2226  -0.3012  -0.0553  0.0209  0.0296  0.0312
## s.e.   0.0701   0.0906   0.1018   0.1035   0.1069  0.1037  0.1034  0.0929
##          ar9     sar1
##       0.3035  -0.4028
## s.e.  0.0735   0.0727
## 
## sigma^2 estimated as 0.0003447:  log likelihood=480.96
## AIC=-939.93   AICc=-938.4   BIC=-904.5

m20

## Series: ly 
## ARIMA(9,1,0)(2,1,0)[12]                    
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5     ar6     ar7     ar8
##       -0.7506  -0.5369  -0.1355  -0.2392  -0.0263  0.0236  0.0421  0.0329
## s.e.   0.0700   0.0920   0.1052   0.1045   0.1068  0.1037  0.1048  0.0949
##          ar9     sar1     sar2
##       0.3087  -0.5686  -0.3380
## s.e.  0.0746   0.0804   0.0743
## 
## sigma^2 estimated as 0.0003089:  log likelihood=490.37
## AIC=-956.74   AICc=-954.93   BIC=-918.1

m21

## Series: ly 
## ARIMA(9,1,1)(1,0,0)[12]                    
## 
## Coefficients:
##           ar1      ar2      ar3      ar4     ar5     ar6     ar7     ar8
##       -0.2979  -0.2583  -0.0854  -0.2065  0.0170  0.0600  0.0072  0.0812
## s.e.   0.1739   0.1405   0.1237   0.0936  0.0963  0.0807  0.0748  0.0727
##          ar9      ma1    sar1
##       0.2994  -0.4612  0.9732
## s.e.  0.0703   0.1745  0.0097
## 
## sigma^2 estimated as 0.0003814:  log likelihood=483.08
## AIC=-942.17   AICc=-940.47   BIC=-902.77

m22

## Series: ly 
## ARIMA(9,1,1)(2,0,0)[12]                    
## 
## Coefficients:
##           ar1      ar2      ar3      ar4     ar5     ar6     ar7     ar8
##       -0.4944  -0.3921  -0.1133  -0.2605  0.0189  0.0607  0.0399  0.0790
## s.e.   0.4044   0.3237   0.2574   0.1333  0.1608  0.1027  0.0833  0.0851
##          ar9      ma1    sar1    sar2
##       0.3419  -0.2653  0.6038  0.3801
## s.e.  0.0821   0.4438  0.0760  0.0765
## 
## sigma^2 estimated as 0.0003341:  log likelihood=494.36
## AIC=-962.73   AICc=-960.74   BIC=-920.05

m23

## Series: ly 
## ARIMA(9,1,1)(1,1,0)[12]                    
## 
## Coefficients:
##           ar1      ar2     ar3      ar4     ar5     ar6     ar7     ar8
##       -0.3490  -0.2662  0.0022  -0.2129  0.0716  0.0813  0.0416  0.0762
## s.e.   0.2125   0.1754  0.1442   0.0896  0.1002  0.0829  0.0793  0.0762
##          ar9      ma1     sar1
##       0.3671  -0.4171  -0.3904
## s.e.  0.0721   0.2288   0.0736
## 
## sigma^2 estimated as 0.0003441:  log likelihood=481.61
## AIC=-939.22   AICc=-937.4   BIC=-900.57

m24

## Series: ly 
## ARIMA(9,1,1)(2,1,0)[12]                    
## 
## Coefficients:
##           ar1      ar2      ar3      ar4      ar5     ar6     ar7     ar8
##       -0.8288  -0.6009  -0.1801  -0.2506  -0.0487  0.0137  0.0390  0.0222
## s.e.   0.2944   0.2527   0.1983   0.1192   0.1416  0.1164  0.1121  0.1089
##          ar9     ma1     sar1     sar2
##       0.2907  0.0860  -0.5694  -0.3410
## s.e.  0.1041  0.3133   0.0805   0.0746
## 
## sigma^2 estimated as 0.0003105:  log likelihood=490.4
## AIC=-954.81   AICc=-952.68   BIC=-912.94

According to AIC, model 18(ARIMA(9,1,0)(2,0,0)[12]) is the best model. Model 22,20 also have smaller AIC.

According to BIC, model 18(ARIMA(9,1,0)(2,0,0)[12]) is the best model. Model 10,14,22 also have smaller BIC.

library(lmtest)

coeftest(m18)

## 
## z test of coefficients:
## 
##       Estimate Std. Error  z value  Pr(>|z|)    
## ar1  -0.735236   0.068584 -10.7202 < 2.2e-16 ***
## ar2  -0.582608   0.087829  -6.6335 3.279e-11 ***
## ar3  -0.261000   0.099636  -2.6195  0.008805 ** 
## ar4  -0.325168   0.100775  -3.2267  0.001252 ** 
## ar5  -0.068514   0.104461  -0.6559  0.511899    
## ar6   0.019703   0.100359   0.1963  0.844357    
## ar7   0.032088   0.099407   0.3228  0.746854    
## ar8   0.048025   0.088415   0.5432  0.587009    
## ar9   0.303402   0.070801   4.2853 1.825e-05 ***
## sar1  0.596184   0.072382   8.2366 < 2.2e-16 ***
## sar2  0.386640   0.073451   5.2639 1.410e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

coeftest(m20)

## 
## z test of coefficients:
## 
##       Estimate Std. Error  z value  Pr(>|z|)    
## ar1  -0.750632   0.070044 -10.7166 < 2.2e-16 ***
## ar2  -0.536945   0.091957  -5.8391 5.249e-09 ***
## ar3  -0.135536   0.105166  -1.2888    0.1975    
## ar4  -0.239247   0.104538  -2.2886    0.0221 *  
## ar5  -0.026339   0.106795  -0.2466    0.8052    
## ar6   0.023575   0.103716   0.2273    0.8202    
## ar7   0.042079   0.104801   0.4015    0.6880    
## ar8   0.032901   0.094939   0.3466    0.7289    
## ar9   0.308701   0.074626   4.1366 3.524e-05 ***
## sar1 -0.568598   0.080353  -7.0763 1.481e-12 ***
## sar2 -0.338043   0.074287  -4.5505 5.351e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

However, m20 has insignificant coefficients(ar3,ar5,ar6, ar7, ar8), and m18 has insignificant coefficients(ar5,ar6, ar7, ar8), and m1. Therefore, we check restricted models.

m18.fixed <- Arima(ly, order = c(9,1,0), seasonal = list(order = c(2,0,0), period = 12), fixed = c(NA, NA, NA, NA, 0, 0, 0, 0, NA, NA, NA))
m20.fixed <- Arima(ly, order = c(9,1,0), seasonal = list(order = c(2,1,0), period = 12), fixed = c(NA, NA, 0, NA, 0, 0, 0, 0, NA, NA, NA))

m18.fixed

## Series: ly 
## ARIMA(9,1,0)(2,0,0)[12]                    
## 
## Coefficients:
##           ar1      ar2      ar3      ar4  ar5  ar6  ar7  ar8     ar9
##       -0.7197  -0.5669  -0.2162  -0.2784    0    0    0    0  0.2975
## s.e.   0.0653   0.0834   0.0845   0.0673    0    0    0    0  0.0531
##         sar1    sar2
##       0.5876  0.3959
## s.e.  0.0707  0.0718
## 
## sigma^2 estimated as 0.0003359:  log likelihood=493.44
## AIC=-970.87   AICc=-970.11   BIC=-944.61

m20.fixed

## Series: ly 
## ARIMA(9,1,0)(2,1,0)[12]                    
## 
## Coefficients:
##           ar1      ar2  ar3      ar4  ar5  ar6  ar7  ar8     ar9     sar1
##       -0.7136  -0.4466    0  -0.1537    0    0    0    0  0.2932  -0.5899
## s.e.   0.0638   0.0595    0   0.0537    0    0    0    0  0.0501   0.0744
##          sar2
##       -0.3580
## s.e.   0.0718
## 
## sigma^2 estimated as 0.0003117:  log likelihood=489.32
## AIC=-964.64   AICc=-964.01   BIC=-942.1

The results indicate that the restricted model 18 ARIMA(9,1,0)(2,0,0)_{12}) has the lowest AIC and BIC
Now, we examine the adquacy of models.

#detach(package:TSA, unload=TRUE)
tsdiag(m18, gof.lag = 60)

tsdiag(m20, gof.lag = 60)

tsdiag(m18.fixed, gof.lag = 60)

tsdiag(m20.fixed, gof.lag = 60)

tsdiag(m22, gof.lag = 60)

tsdiag(m14, gof.lag = 60)

tsdiag(m10, gof.lag = 60)

tsdiag(m24, gof.lag = 60)

* However, accoring to the residuals, m20 has highly significant p-value and with no significant lag on ACF. Therefore, we conclude that the m20(ARIMA(9,1,0)(2,1,0)[12]), is the best model.

Pre-Intervention Forecasting

#endM= 2008 + (5/12)
pp <- window(yall, start = endM + (1/12))
multi.f <- forecast(m20, length(pp))
multi.f.err <- as.ts(log(pp)) - multi.f$mean

plot forecast

par(mfrow=c(2,2), mar=c(2,4,2,2), cex=0.9)
plot(multi.f)
lines(index(yall), log(yall))
plot(multi.f.err, main="Multistep Forecast Error")
abline(h=0, lty="dashed")
Acf(multi.f.err, type="correlation", lag.max=48, ylab="ACF", main="")
Acf(multi.f.err, type="partial", lag.max=48, ylab="PACF", main="")

Identify and estimate a transfer function model

Now, we are going to identify and estimate a transfer function model.

endM2= 2014 + (11/12)
yy <- window(yall, end= 2014 + (11/12))

Create Pulse variable

p08 <- 1*(index(yy)==2008+(7-1)/12)

Estimate a trasfer function model

We create two model to check whether the purse of 2008 disappears gradually or not.

library(TSA)
tm <- arimax(log(yy), order = c(9,1,0), seasonal = list(order = c(2,1,0), period = 12), xtransf = data.frame(p08), transfer=list(c(1,0)))

tm2 <- arimax(log(yy), order=c(9,1,0), seasonal=list(order=c(2,1,0), period=12), xtransf=data.frame(p08,p08), transfer=list(c(0,0),c(1,0)))

Examine the adquacy of model

detach("package:TSA", unload=TRUE)
tsdiag(tm, gof.lag = 60)

two variables are significant in Ljung-Box statistic. That is, the performence of residual looks good.

tsdiag(tm2, gof.lag = 60)

library(lmtest)
coeftest(tm)

## 
## z test of coefficients:
## 
##          Estimate Std. Error z value  Pr(>|z|)    
## ar1     -0.538938   0.060487 -8.9100 < 2.2e-16 ***
## ar2     -0.251263   0.073996 -3.3957 0.0006847 ***
## ar3      0.181812   0.072221  2.5174 0.0118211 *  
## ar4      0.019934   0.072970  0.2732 0.7847184    
## ar5      0.121405   0.071683  1.6936 0.0903319 .  
## ar6      0.054558   0.072093  0.7568 0.4491863    
## ar7     -0.014093   0.071858 -0.1961 0.8445124    
## ar8      0.017884   0.070777  0.2527 0.8005150    
## ar9      0.273097   0.062124  4.3960 1.103e-05 ***
## sar1    -0.550754   0.064641 -8.5201 < 2.2e-16 ***
## sar2    -0.340668   0.061172 -5.5691 2.561e-08 ***
## p08-AR1  0.600358   0.152717  3.9312 8.453e-05 ***
## p08-MA0  0.028620   0.014604  1.9598 0.0500213 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

coeftest(tm2)

## 
## z test of coefficients:
## 
##             Estimate Std. Error z value  Pr(>|z|)    
## ar1       -0.5478995  0.0602537 -9.0932 < 2.2e-16 ***
## ar2       -0.2567102  0.0744539 -3.4479 0.0005649 ***
## ar3        0.1774832  0.0725167  2.4475 0.0143859 *  
## ar4        0.0220732  0.0723950  0.3049 0.7604426    
## ar5        0.1242871  0.0717564  1.7321 0.0832613 .  
## ar6        0.0628484  0.0726512  0.8651 0.3870006    
## ar7       -0.0030995  0.0729497 -0.0425 0.9661095    
## ar8        0.0290504  0.0719098  0.4040 0.6862248    
## ar9        0.2787411  0.0624270  4.4651 8.004e-06 ***
## sar1      -0.5552020  0.0644777 -8.6108 < 2.2e-16 ***
## sar2      -0.3392107  0.0611680 -5.5456 2.930e-08 ***
## p08-MA0   -0.0359644  0.0337073 -1.0670 0.2859895    
## p08.1-AR1  0.5040000  0.1641640  3.0701 0.0021399 ** 
## p08.1-MA0  0.0615226  0.0348982  1.7629 0.0779148 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

In tm model, ar3, ar4 become insignificant. Other coefficients are significant.
In tm2 model, ar3, ar4 become insignificant. Other coefficients are significant except p08-MA0 and p08.1-MA0.
By using above results, we assume that the purse of 2008 disappears gradually.

Forecasting

Now, we will forecast the data and examine the accuracy

Multistep Ahead Forecasting

# construct forecast for the model with transfer function
library(mFilter)
hmax <- 60
p08x <- c(p08, rep(0,hmax))

tf3 <- tm$coef["p08-MA0"]*filter(p08x, filter=tm$coef["p08-AR1"], method='recursive',side=1)
m3x <- Arima(log(yy), order=c(9,1,0),  seasonal=list(order=c(2,1,0), period=12), xreg=tf3[1:length(yy)])
m3x

## Series: log(yy) 
## ARIMA(9,1,0)(2,1,0)[12]                    
## 
## Coefficients:
##           ar1      ar2     ar3     ar4     ar5     ar6      ar7     ar8
##       -0.5389  -0.2512  0.1818  0.0199  0.1214  0.0546  -0.0141  0.0179
## s.e.   0.0605   0.0739  0.0721  0.0730  0.0717  0.0720   0.0719  0.0707
##          ar9     sar1     sar2  tf3[1:length(yy)]
##       0.2731  -0.5508  -0.3406             1.0001
## s.e.  0.0621   0.0647   0.0612             0.5101
## 
## sigma^2 estimated as 0.0003694:  log likelihood=671.86
## AIC=-1317.71   AICc=-1316.25   BIC=-1271.28

m3x.f.h <- forecast(m3x, h=hmax, xreg=tf3[(length(yy)+1):(length(yy)+hmax)])


#colnames(tf4) <- c("tfA","tfB")
#yy <- window(yall, end= 2013 + (11/12))
tf4 <- cbind(tm2$coef["p08-MA0"]*p08x, tm2$coef["p08.1-MA0"]*filter(p08x, filter=tm2$coef["p08.1-AR1"], method='recursive'),side=1)
m4x <- Arima(log(yy), order=c(9,1,0),  seasonal=list(order=c(2,1,0), period=12), xreg=tf4[1:length(yy)], )
m4x

## Series: log(yy) 
## ARIMA(9,1,0)(2,1,0)[12]                    
## 
## Coefficients:
##           ar1      ar2     ar3      ar4     ar5     ar6      ar7      ar8
##       -0.5186  -0.2201  0.1836  -0.0042  0.1034  0.0500  -0.0314  -0.0046
## s.e.   0.0598   0.0719  0.0712   0.0719  0.0702  0.0714   0.0699   0.0690
##          ar9     sar1    sar2  tf4[1:length(yy)]
##       0.2651  -0.5423  -0.343            -0.3397
## s.e.  0.0617   0.0641   0.061             0.3748
## 
## sigma^2 estimated as 0.0003733:  log likelihood=670.41
## AIC=-1314.83   AICc=-1313.37   BIC=-1268.39

m4x.f.h <- forecast(m4x, h=hmax, xreg=tf4[(length(yy)+1):(length(yy)+hmax)])

# plot forecast for the model with transfer function

plot(m3x.f.h, xlim=c(2013,2016))
lines(log(yall), xlim=c(2013,2020), col="black", lwd=2)

plot(m4x.f.h, xlim=c(2013,2016))
lines(log(yall), xlim=c(2013,2020), col="black", lwd=2)

### 1 month Ahead Recursive Forecasting

library(TSA)
#fstM <- 1992
lstM= 2014 + (11/12)

y1 <- window(yall, end=lstM)
y2 <- window(yall, start=lstM+1/12)
#P911 <- 1*(index(y)==2001+(9-1)/12)
#endM2= 2014 + (11/12)
p08 <- 1*(index(yy)==2008+(7-1)/12)

m4x.f.rec <- list()
for(i in 1:(length(y2)+1))
{
    y <- window(yall, end=endM2+(i-1)/12 )
    m4.updt <- arimax(log(y), order=c(9,1,0), seasonal=list(order=c(2,0,0), period=12),
                      xtransf=data.frame(p08,p08), transfer=list(c(0,0),c(1,0)))
    p08 <- c(p08, 0)
    tf4 <- cbind(tm2$coef["p08-MA0"]*p08,
                 tm2$coef["p08.1-MA0"]*filter(p08, filter=tm2$coef["p08.1-AR1"], method='recursive'))
    m4x.updt <- Arima(log(y),
                      order=c(9,1,0), seasonal=list(order=c(2,1,0), period=12), xreg=tf4[1:length(y),])
    m4x.updt.f.1 <- forecast(m4x.updt, h=1, xreg=t(as.matrix(tf4[(length(y)+1):(length(y)+1),])))
    m4x.f.rec$mean <- rbind(m4x.f.rec$mean, as.zoo(m4x.updt.f.1$mean))
    m4x.f.rec$lower <- rbind(m4x.f.rec$lower, m4x.updt.f.1$lower)
    m4x.f.rec$upper <- rbind(m4x.f.rec$upper, m4x.updt.f.1$upper)
}
m4x.f.rec$mean <- as.ts(m4x.f.rec$mean)
m4x.f.rec$level <- m4x.updt.f.1$level
m4x.f.rec$x <- window(m4x.updt.f.1$x, end=lstM)
class(m4x.f.rec) <- class(m4x.f.h)

Comparing two forecastings

plot(m4x.f.h, xlim=c(2014,2016), main="Multistep Ahead Forecasts")
lines(m4x.f.h$mean, type="p", pch=16, lty="dashed", col="blue")
lines(log(yall), xlim=c(2014,2016), pch=16, lty="dashed")

plot(m4x.f.rec, xlim=c(2014,2016), main = "1-step Ahead Recursive Forecasts")
lines(m4x.f.rec$mean, type="p", pch=16, lty="dashed", col="red")
lines(log(yall),pch=16, lty="dashed")

par(mfrow=c(2,1))
plot(log(y2) - m4x.f.h$mean, main="Forecast Error: Multistep Ahead Forecasts")
abline(h=0, lty="dashed")
plot(log(y2)-m4x.f.rec$mean, main="Forecast Error: 1-Month Ahead Recursive Forecasts")
abline(h=0, lty="dashed")

accuracy(m4x.f.h$mean, log(y2))

##                   ME       RMSE       MAE        MPE      MAPE      ACF1
## Test set -0.02388794 0.02732028 0.0240795 -0.1836821 0.1851514 -0.179771
##          Theil's U
## Test set 0.2943631

accuracy(m4x.f.rec$mean, log(y2))

##                     ME       RMSE        MAE          MPE      MAPE
## Test set -0.0005645576 0.01683895 0.01300661 -0.004582583 0.1000556
##                ACF1 Theil's U
## Test set -0.2272892 0.1792283

According to forecasting plot(more narrow), erros plot(more stable), and errors table(significantly lower errors), we can see that 1-month Ahead Recursive Forecasting shows better performence than multistep Ahead Forecasting.

Conclusion

Using above analysis, we can conclude that \[ARIMA(9,1,0)(2,1,0)_{12}\] is the best model for the Retail and Food Services Sales. Additionally, we find that 1-month Ahead Recursive Forecasting works better performence than multistep Ahead Forecasting.

ECO 5316, Hw 5

Problem 2

Seonjun Kang