Discussion #6
Download 5-years of historical daily data from any stock of your choosing. Finance.Yahoo.Com is a good place to start. Forecast the daily adjusted closing price of your stock using time series components and at least one external regressor (e.g., transaction volume at t-1).

I picked - Weekly closings of the Dow-Jones industrial average, July 1971 – August 1974.

# Loading libraries
library(forecast)
library(xts)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(TTR)
library(tseries)

# LOADING DATASET
require(quantmod)
## Loading required package: quantmod
## Version 0.4-0 included new data defaults. See ?getSymbols.
G <- new.env()
getSymbols("GOOG", env = G, src = "yahoo",
          from = as.Date("2013-04-09"), to = as.Date("2018-04-10"),return.class='ts')
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
## 
## This message is shown once per session and may be disabled by setting 
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
## 
## WARNING: There have been significant changes to Yahoo Finance data.
## Please see the Warning section of '?getSymbols.yahoo' for details.
## 
## This message is shown once per session and may be disabled by setting
## options("getSymbols.yahoo.warning"=FALSE).
## [1] "GOOG"
g<-G$GOOG
head(g)
## Time Series:
## Start = 1 
## End = 6 
## Frequency = 1 
##   GOOG.Open GOOG.High GOOG.Low GOOG.Close GOOG.Volume GOOG.Adjusted
## 1  385.2444  389.3427 384.0571   386.3124     4342600      386.3124
## 2  388.9304  393.6149 385.4927   392.5369     3982800      392.5369
## 3  393.8782  393.9875 389.4967   392.6412     4083700      392.6412
## 4  393.4361  393.4907 388.9354   392.4724     3294500      392.4724
## 5  390.4356  395.9249 385.9995   388.4386     4938000      388.4386
## 6  390.7535  395.4281 389.4272   394.1216     3506500      394.1216
colnames(g)
## [1] "GOOG.Open"     "GOOG.High"     "GOOG.Low"      "GOOG.Close"   
## [5] "GOOG.Volume"   "GOOG.Adjusted"

Plotting data

# Plot original series
plot(g)

# g.ts<-ts(g$GOOG.Adjusted,frequency = 252) # no. of days its traded in 1 yr. 

There is clear trend in the data with slight bit of seasonality. But we will see those details below by decomposing it.

Decomposing the time series

# boxplot(g[,"GOOG.Adjusted"]) ; boxplot(g[,"GOOG.Volume"])
plot(decompose(ts(g[,"GOOG.Adjusted"],frequency = 252)))

Forecasting data

library(tseries)
g.adj.train<-g[2:1250,c(5)]
g.adj.test<-g[1251:1260,c(5)]

vol.train<-g[1:1249,6]
vol.test<-g[1251:1260,6]

model1<-auto.arima(g.adj.train)
model2<-auto.arima(g.adj.train,xreg = vol.train)
model3<-garch(g.adj.train)
## 
##  ***** ESTIMATION WITH ANALYTICAL GRADIENT ***** 
## 
## 
##      I     INITIAL X(I)        D(I)
## 
##      1     2.163933e+12     1.000e+00
##      2     5.000000e-02     1.000e+00
##      3     5.000000e-02     1.000e+00
## 
##     IT   NF      F         RELDF    PRELDF    RELDX   STPPAR   D*STEP   NPRELDF
##      0    1  1.935e+04
##      1    2  1.895e+04  2.05e-02  2.64e-01  2.3e-13  5.1e+03  1.0e+00  6.73e+02
##      2    4  1.893e+04  1.09e-03  1.05e-03  1.1e-14  5.9e+00  5.0e-02  3.43e+00
##      3    6  1.890e+04  1.75e-03  1.73e-03  2.1e-14  2.0e+00  1.0e-01  2.52e-02
##      4    8  1.889e+04  2.85e-04  2.84e-04  4.2e-15  1.4e+01  2.0e-02  9.13e-03
##      5   10  1.888e+04  5.07e-04  5.07e-04  8.4e-15  2.6e+00  4.0e-02  3.89e-03
##      6   11  1.888e+04 -5.30e+05  7.67e-04  1.7e-14  3.8e+00  8.0e-02  2.07e-03
## 
##  ***** FALSE CONVERGENCE *****
## 
##  FUNCTION     1.888006e+04   RELDX        1.680e-14
##  FUNC. EVALS      11         GRAD. EVALS       6
##  PRELDF       7.669e-04      NPRELDF      2.073e-03
## 
##      I      FINAL X(I)        D(I)          G(I)
## 
##      1    2.163933e+12     1.000e+00     3.843e-11
##      2    9.534937e-01     1.000e+00     9.188e+01
##      3    3.068877e-02     1.000e+00     2.000e+02
## Warning in garch(g.adj.train): singular information
library(rugarch)
## Warning: package 'rugarch' was built under R version 3.4.4
## Loading required package: parallel
## 
## Attaching package: 'rugarch'
## The following object is masked from 'package:stats':
## 
##     sigma
spec <- ugarchspec(mean.model=list(armaOrder=c(1,1)), distribution="std")
model4 <- ugarchfit(spec=spec, data=g.adj.train)
## Warning in .makefitmodel(garchmodel = "sGARCH", f = .sgarchLLH, T = T, m = m, : 
## rugarch-->warning: failed to invert hessian
library(fGarch)
## Warning: package 'fGarch' was built under R version 3.4.4
## Loading required package: timeDate
## Warning: package 'timeDate' was built under R version 3.4.3
## Loading required package: timeSeries
## Warning: package 'timeSeries' was built under R version 3.4.4
## 
## Attaching package: 'timeSeries'
## The following object is masked from 'package:zoo':
## 
##     time<-
## Loading required package: fBasics
## Warning: package 'fBasics' was built under R version 3.4.4
## 
## Attaching package: 'fBasics'
## The following object is masked from 'package:TTR':
## 
##     volatility
# model5 <- garchFit(data=g.adj.train,formula=~garch(1,1), cond.dist = "sged")

library(betategarch)
## Warning: package 'betategarch' was built under R version 3.4.4
model5<-tegarch(g.adj.train)


summary(model1)
## Series: g.adj.train 
## ARIMA(1,1,3) 
## 
## Coefficients:
##          ar1      ma1      ma2     ma3
##       0.7087  -1.1395  -0.0050  0.1638
## s.e.  0.0869   0.0944   0.0555  0.0545
## 
## sigma^2 estimated as 1.115e+12:  log likelihood=-19079.52
## AIC=38169.04   AICc=38169.09   BIC=38194.69
## 
## Training set error measures:
##                     ME    RMSE      MAE       MPE     MAPE      MASE
## Training set -32734.89 1053976 591687.2 -78.57762 93.51828 0.9086218
##                       ACF1
## Training set -0.0001106508
summary(model2)
## Series: g.adj.train 
## Regression with ARIMA(1,1,3) errors 
## 
## Coefficients:
##          ar1      ma1      ma2     ma3       xreg
##       0.7084  -1.1379  -0.0053  0.1630  -1526.717
## s.e.  0.0885   0.0959   0.0559  0.0549   1723.061
## 
## sigma^2 estimated as 1.116e+12:  log likelihood=-19079.12
## AIC=38170.24   AICc=38170.31   BIC=38201.01
## 
## Training set error measures:
##                     ME    RMSE      MAE       MPE     MAPE      MASE
## Training set -19491.29 1053645 588271.4 -78.03165 93.46546 0.9033765
##                      ACF1
## Training set 0.0005508406
summary(model3)
## 
## Call:
## garch(x = g.adj.train)
## 
## Model:
## GARCH(1,1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## 0.002446 0.600827 0.733287 0.916356 3.406319 
## 
## Coefficient(s):
##     Estimate  Std. Error  t value Pr(>|t|)
## a0 2.164e+12          NA       NA       NA
## a1 9.535e-01          NA       NA       NA
## b1 3.069e-02          NA       NA       NA
## 
## Diagnostic Tests:
##  Jarque Bera Test
## 
## data:  Residuals
## X-squared = 4849.4, df = 2, p-value < 2.2e-16
## 
## 
##  Box-Ljung test
## 
## data:  Squared.Residuals
## X-squared = 15.467, df = 1, p-value = 8.395e-05
summary(model4)
##    Length     Class      Mode 
##         1 uGARCHfit        S4
summary(model5)
## $date
## [1] "Sun Apr 22 23:15:57 2018"
## 
## $par
##      omega       phi1     kappa1  kappastar         df       skew 
## 15.1454234  0.8768580  0.3016843  0.2235583  2.0000012  0.4536325 
## 
## $objective
## [1] -19370.66
## 
## $convergence
## [1] 1
## 
## $iterations
## [1] 39
## 
## $evaluations
## function gradient 
##       89      238 
## 
## $message
## [1] "false convergence (8)"
# plot in-sample volatility estimates
plot(sqrt(252) * model4@fit$sigma, type='l')

# plot(sqrt(252) * model5[, "sigma"])

Plotting forecasts

f1<-forecast(model1, h=10)
f2<-forecast(model2, h=10, xreg = vol.test)
# f3<-forecast(model3, h=10)
# f3<-forecast(model3, h=10)
f5<-predict.tegarch(model5, n.ahead = 10)

Results
Which ones better ?

accuracy(f1, g.adj.test)
##                     ME    RMSE      MAE       MPE     MAPE      MASE
## Training set -32734.89 1053976 591687.2 -78.57762 93.51828 0.9086218
## Test set     534316.21  782428 667163.5  16.57041 25.72889 1.0245267
##                       ACF1
## Training set -0.0001106508
## Test set                NA
accuracy(f2, g.adj.test)
##                     ME    RMSE      MAE       MPE     MAPE      MASE
## Training set -19491.29 1053645 588271.4 -78.03165 93.46546 0.9033765
## Test set     426721.20  714271 630167.3  11.74609 25.26629 0.9677137
##                      ACF1
## Training set 0.0005508406
## Test set               NA
accuracy(ts(f5), g.adj.test)
##                   ME       RMSE        MAE       MPE     MAPE
## Test set -4406922689 4492892940 4406922689 -204896.1 204896.1

Conclusion
With this market-driven prices of Google we see that GARCH(1,1) performs as good as arima(2,1,3) without a regressor, lower test errors from RMSE, MASE point of view.