Discussion #4
Go to Data Market (https://datamarket.com/data/list/?q=cat:ecc%20provider:tsdl (Links to an external site.)Links to an external site.) Pick a time series of interest to you. Build an auto.arima model as well as an ETS model. Which performed better? Now hold out 6 months of data for a test set and try to forecast using the ETS and the auto.arima. Which performs better on the hold-out set?
I continue with - Weekly closings of the Dow-Jones industrial average, July 1971 – August 1974.
# Loading libraries
library(forecast)
library(xts)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(TTR)
library(tseries)
# LOADING DATASET
# weekly-closings-of-the-dowjones.csv
dj<-read.csv(file.choose())
names(dj)
## [1] "Week" "close.price"
names(dj)[2]<-c("close.price")
head(dj)
## Week close.price
## 1 1971-W27 890.19
## 2 1971-W28 901.80
## 3 1971-W29 888.51
## 4 1971-W30 887.78
## 5 1971-W31 858.43
## 6 1971-W32 850.61
Plotting data
# Plot original series
plot(dj$close.price,type="l",xlab="Week",ylab="closing price",main="historical data")
# boxplots by month//Qtr
dj.ts.q<-ts(dj,frequency = 4)
dj.ts.m<-ts(dj[,2],frequency = 12)
# boxplot(dj.ts.m[[1]],dj.ts.m["close.price"])
Given the weekly data, I am still trying to find out how to make a monthly/quarterly boxplot as discussed in class tonight. Any suggestions pals/ Prof?
Forecasting data
# length(dj.ts.m) # 162 observations
plot(decompose(dj.ts.m))
The trend here is not quite additive, hence considering multiplicative ets models going forward.
ets1<-ets(dj.ts.m, model="ANN")
out_ets1<-forecast(ets1,h=24)
e1<-summary(ets1)
## ETS(A,N,N)
##
## Call:
## ets(y = dj.ts.m, model = "ANN")
##
## Smoothing parameters:
## alpha = 0.9609
##
## Initial states:
## l = 890.6509
##
## sigma: 19.7332
##
## AIC AICc BIC
## 1796.457 1796.609 1805.720
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.878919 19.73321 15.42662 -0.1324464 1.714936 0.327965
## ACF1
## Training set -0.002265973
e1
## ME RMSE MAE MPE MAPE MASE
## Training set -0.878919 19.73321 15.42662 -0.1324464 1.714936 0.327965
## ACF1
## Training set -0.002265973
ets2<-ets(dj.ts.m, model="MAM")
out_ets2<-forecast(ets2,h=24)
e2<-summary(ets2)
## ETS(M,Ad,M)
##
## Call:
## ets(y = dj.ts.m, model = "MAM")
##
## Smoothing parameters:
## alpha = 0.8698
## beta = 1e-04
## gamma = 1e-04
## phi = 0.9778
##
## Initial states:
## l = 902.3724
## b = -0.47
## s=1.0132 1.016 1.0032 0.9918 0.9867 0.9847
## 0.991 0.9918 0.9997 1.0005 1.0112 1.0103
##
## sigma: 0.0209
##
## AIC AICc BIC
## 1813.064 1817.847 1868.640
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.8376435 18.77796 15.1895 -0.1275791 1.687307 0.322924
## ACF1
## Training set 0.04204195
e2
## ME RMSE MAE MPE MAPE MASE
## Training set -0.8376435 18.77796 15.1895 -0.1275791 1.687307 0.322924
## ACF1
## Training set 0.04204195
# autoar<-auto.arima(dj.ts.m, stepwise=FALSE, approximation=FALSE)
# out_autoar<-forecast(autoar,h=24)
# a1<-summary(autoar)
# a1
arho<-auto.arima(dj.ts.m[1:138], stepwise=FALSE, approximation=FALSE)
out_arho<-forecast(arho,h=24)
a2<-summary(arho)
## Series: dj.ts.m[1:138]
## ARIMA(3,1,2)
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2
## -1.6155 -0.8870 0.0179 1.6831 0.8962
## s.e. 0.1189 0.1657 0.0973 0.0856 0.0761
##
## sigma^2 estimated as 358.5: log likelihood=-595.23
## AIC=1202.46 AICc=1203.11 BIC=1219.98
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.4767547 18.5175 14.36303 -0.07613014 1.573807 0.927219
## ACF1
## Training set -0.00324757
a2
## ME RMSE MAE MPE MAPE MASE
## Training set -0.4767547 18.5175 14.36303 -0.07613014 1.573807 0.927219
## ACF1
## Training set -0.00324757
par(mfrow=c(2,2))
plot(ets1);
plot(ets2);
# plot(autoar);
plot(arho);
# accuracy(out_ets1,dj.ts.m)
# accuracy(out_ets2,dj.ts.m)
# accuracy(out_autoar,dj.ts.m)
accuracy(out_arho,dj.ts.m[139:162])
## ME RMSE MAE MPE MAPE MASE
## Training set -0.4767547 18.51750 14.36303 -0.07613014 1.573807 0.927219
## Test set 9.1047033 34.97182 30.53813 0.92917329 3.671156 1.971418
## ACF1
## Training set -0.00324757
## Test set NA
Plotting forecasts
# par(mfrow=c(1,2))
# plot()
# plot()
Results
Which ones better ?
# accuracy(fcst,act)
comparison=matrix(c(AIC(ets1),BIC(ets1),e1[2],e1[3],
AIC(ets2),BIC(ets2),e2[2],e2[3],
AIC(arho),BIC(arho),a2[2],a2[3])
,ncol=4,byrow=TRUE)
colnames(comparison)=c("AIC","BIC","RMSE","MAE")
rownames(comparison)=c("model1-ANN","model2-MAM","model3-auto.arima")
comparison.table<-as.table(comparison)
as.table(comparison)
## AIC BIC RMSE MAE
## model1-ANN 1796.45672 1805.71951 19.73321 15.42662
## model2-MAM 1813.06367 1868.64040 18.77796 15.18950
## model3-auto.arima 1202.46476 1219.98464 18.51750 14.36303
Conclusion
Based on the training errors: AIC, RMSE infact all error measures auto.arima seems to be resulting the better accuracy and least bias as well as variance. However, we need to look into the test errors to confirm this and I will update it the hold out analysis once its working.