Discussion #4
Go to Data Market (https://datamarket.com/data/list/?q=cat:ecc%20provider:tsdl (Links to an external site.)Links to an external site.) Pick a time series of interest to you. Build an auto.arima model as well as an ETS model. Which performed better? Now hold out 6 months of data for a test set and try to forecast using the ETS and the auto.arima. Which performs better on the hold-out set?

I continue with - Weekly closings of the Dow-Jones industrial average, July 1971 – August 1974.

# Loading libraries
library(forecast)
library(xts)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(TTR)
library(tseries)

# LOADING DATASET
# weekly-closings-of-the-dowjones.csv
 dj<-read.csv(file.choose())

names(dj)
## [1] "Week"        "close.price"
names(dj)[2]<-c("close.price")

head(dj)
##       Week close.price
## 1 1971-W27      890.19
## 2 1971-W28      901.80
## 3 1971-W29      888.51
## 4 1971-W30      887.78
## 5 1971-W31      858.43
## 6 1971-W32      850.61

Plotting data

# Plot original series
plot(dj$close.price,type="l",xlab="Week",ylab="closing price",main="historical data")

# boxplots by month//Qtr
dj.ts.q<-ts(dj,frequency = 4)
dj.ts.m<-ts(dj[,2],frequency = 12)

# boxplot(dj.ts.m[[1]],dj.ts.m["close.price"])

Given the weekly data, I am still trying to find out how to make a monthly/quarterly boxplot as discussed in class tonight. Any suggestions pals/ Prof?

Forecasting data

# length(dj.ts.m) # 162 observations

plot(decompose(dj.ts.m))

The trend here is not quite additive, hence considering multiplicative ets models going forward.

ets1<-ets(dj.ts.m, model="ANN")
out_ets1<-forecast(ets1,h=24)
e1<-summary(ets1)
## ETS(A,N,N) 
## 
## Call:
##  ets(y = dj.ts.m, model = "ANN") 
## 
##   Smoothing parameters:
##     alpha = 0.9609 
## 
##   Initial states:
##     l = 890.6509 
## 
##   sigma:  19.7332
## 
##      AIC     AICc      BIC 
## 1796.457 1796.609 1805.720 
## 
## Training set error measures:
##                     ME     RMSE      MAE        MPE     MAPE     MASE
## Training set -0.878919 19.73321 15.42662 -0.1324464 1.714936 0.327965
##                      ACF1
## Training set -0.002265973
e1
##                     ME     RMSE      MAE        MPE     MAPE     MASE
## Training set -0.878919 19.73321 15.42662 -0.1324464 1.714936 0.327965
##                      ACF1
## Training set -0.002265973
ets2<-ets(dj.ts.m, model="MAM")
out_ets2<-forecast(ets2,h=24)
e2<-summary(ets2)
## ETS(M,Ad,M) 
## 
## Call:
##  ets(y = dj.ts.m, model = "MAM") 
## 
##   Smoothing parameters:
##     alpha = 0.8698 
##     beta  = 1e-04 
##     gamma = 1e-04 
##     phi   = 0.9778 
## 
##   Initial states:
##     l = 902.3724 
##     b = -0.47 
##     s=1.0132 1.016 1.0032 0.9918 0.9867 0.9847
##            0.991 0.9918 0.9997 1.0005 1.0112 1.0103
## 
##   sigma:  0.0209
## 
##      AIC     AICc      BIC 
## 1813.064 1817.847 1868.640 
## 
## Training set error measures:
##                      ME     RMSE     MAE        MPE     MAPE     MASE
## Training set -0.8376435 18.77796 15.1895 -0.1275791 1.687307 0.322924
##                    ACF1
## Training set 0.04204195
e2
##                      ME     RMSE     MAE        MPE     MAPE     MASE
## Training set -0.8376435 18.77796 15.1895 -0.1275791 1.687307 0.322924
##                    ACF1
## Training set 0.04204195
# autoar<-auto.arima(dj.ts.m, stepwise=FALSE, approximation=FALSE)
# out_autoar<-forecast(autoar,h=24)
# a1<-summary(autoar)
# a1

arho<-auto.arima(dj.ts.m[1:138], stepwise=FALSE, approximation=FALSE)
out_arho<-forecast(arho,h=24)
a2<-summary(arho)
## Series: dj.ts.m[1:138] 
## ARIMA(3,1,2) 
## 
## Coefficients:
##           ar1      ar2     ar3     ma1     ma2
##       -1.6155  -0.8870  0.0179  1.6831  0.8962
## s.e.   0.1189   0.1657  0.0973  0.0856  0.0761
## 
## sigma^2 estimated as 358.5:  log likelihood=-595.23
## AIC=1202.46   AICc=1203.11   BIC=1219.98
## 
## Training set error measures:
##                      ME    RMSE      MAE         MPE     MAPE     MASE
## Training set -0.4767547 18.5175 14.36303 -0.07613014 1.573807 0.927219
##                     ACF1
## Training set -0.00324757
a2
##                      ME    RMSE      MAE         MPE     MAPE     MASE
## Training set -0.4767547 18.5175 14.36303 -0.07613014 1.573807 0.927219
##                     ACF1
## Training set -0.00324757
par(mfrow=c(2,2))
plot(ets1); 

plot(ets2); 

# plot(autoar); 
plot(arho); 

# accuracy(out_ets1,dj.ts.m)
# accuracy(out_ets2,dj.ts.m)
# accuracy(out_autoar,dj.ts.m)
accuracy(out_arho,dj.ts.m[139:162])
##                      ME     RMSE      MAE         MPE     MAPE     MASE
## Training set -0.4767547 18.51750 14.36303 -0.07613014 1.573807 0.927219
## Test set      9.1047033 34.97182 30.53813  0.92917329 3.671156 1.971418
##                     ACF1
## Training set -0.00324757
## Test set              NA

Plotting forecasts

# par(mfrow=c(1,2))
# plot()
# plot()

Results
Which ones better ?

# accuracy(fcst,act)

comparison=matrix(c(AIC(ets1),BIC(ets1),e1[2],e1[3],
                    AIC(ets2),BIC(ets2),e2[2],e2[3],
                    AIC(arho),BIC(arho),a2[2],a2[3])
                  ,ncol=4,byrow=TRUE)
colnames(comparison)=c("AIC","BIC","RMSE","MAE")
rownames(comparison)=c("model1-ANN","model2-MAM","model3-auto.arima")
comparison.table<-as.table(comparison)
as.table(comparison)
##                          AIC        BIC       RMSE        MAE
## model1-ANN        1796.45672 1805.71951   19.73321   15.42662
## model2-MAM        1813.06367 1868.64040   18.77796   15.18950
## model3-auto.arima 1202.46476 1219.98464   18.51750   14.36303

Conclusion
Based on the training errors: AIC, RMSE infact all error measures auto.arima seems to be resulting the better accuracy and least bias as well as variance. However, we need to look into the test errors to confirm this and I will update it the hold out analysis once its working.