Discussion #6
Download 5-years of historical daily data from any stock of your choosing. Finance.Yahoo.Com is a good place to start. Forecast the daily adjusted closing price of your stock using time series components and at least one external regressor (e.g., transaction volume at t-1).
I picked - Weekly closings of the Dow-Jones industrial average, July 1971 – August 1974.
# Loading libraries
library(forecast)
library(xts)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(TTR)
library(tseries)
# LOADING DATASET
require(quantmod)
## Loading required package: quantmod
## Version 0.4-0 included new data defaults. See ?getSymbols.
G <- new.env()
getSymbols("GOOG", env = G, src = "yahoo",
from = as.Date("2013-04-09"), to = as.Date("2018-04-10"),return.class='ts')
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
##
## WARNING: There have been significant changes to Yahoo Finance data.
## Please see the Warning section of '?getSymbols.yahoo' for details.
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.yahoo.warning"=FALSE).
## [1] "GOOG"
g<-G$GOOG
head(g)
## Time Series:
## Start = 1
## End = 6
## Frequency = 1
## GOOG.Open GOOG.High GOOG.Low GOOG.Close GOOG.Volume GOOG.Adjusted
## 1 385.2444 389.3427 384.0571 386.3124 4342600 386.3124
## 2 388.9304 393.6149 385.4927 392.5369 3982800 392.5369
## 3 393.8782 393.9875 389.4967 392.6412 4083700 392.6412
## 4 393.4361 393.4907 388.9354 392.4724 3294500 392.4724
## 5 390.4356 395.9249 385.9995 388.4386 4938000 388.4386
## 6 390.7535 395.4281 389.4272 394.1216 3506500 394.1216
colnames(g)
## [1] "GOOG.Open" "GOOG.High" "GOOG.Low" "GOOG.Close"
## [5] "GOOG.Volume" "GOOG.Adjusted"
Plotting data
# Plot original series
plot(g)
# g.ts<-ts(g$GOOG.Adjusted,frequency = 252) # no. of days its traded in 1 yr.
There is clear trend in the data with slight bit of seasonality. But we will see those details below by decomposing it.
Decomposing the time series
# boxplot(g[,"GOOG.Adjusted"]) ; boxplot(g[,"GOOG.Volume"])
plot(decompose(ts(g[,"GOOG.Adjusted"],frequency = 252)))
Forecasting data
library(tseries)
g.adj.train<-g[2:1250,c(5)]
g.adj.test<-g[1251:1260,c(5)]
vol.train<-g[1:1249,6]
vol.test<-g[1251:1260,6]
model1<-auto.arima(g.adj.train)
model2<-auto.arima(g.adj.train,xreg = vol.train)
model3<-garch(g.adj.train)
##
## ***** ESTIMATION WITH ANALYTICAL GRADIENT *****
##
##
## I INITIAL X(I) D(I)
##
## 1 2.163933e+12 1.000e+00
## 2 5.000000e-02 1.000e+00
## 3 5.000000e-02 1.000e+00
##
## IT NF F RELDF PRELDF RELDX STPPAR D*STEP NPRELDF
## 0 1 1.935e+04
## 1 2 1.895e+04 2.05e-02 2.64e-01 2.3e-13 5.1e+03 1.0e+00 6.73e+02
## 2 4 1.893e+04 1.09e-03 1.05e-03 1.1e-14 5.9e+00 5.0e-02 3.43e+00
## 3 6 1.890e+04 1.75e-03 1.73e-03 2.1e-14 2.0e+00 1.0e-01 2.52e-02
## 4 8 1.889e+04 2.85e-04 2.84e-04 4.2e-15 1.4e+01 2.0e-02 9.13e-03
## 5 10 1.888e+04 5.07e-04 5.07e-04 8.4e-15 2.6e+00 4.0e-02 3.89e-03
## 6 11 1.888e+04 -5.30e+05 7.67e-04 1.7e-14 3.8e+00 8.0e-02 2.07e-03
##
## ***** FALSE CONVERGENCE *****
##
## FUNCTION 1.888006e+04 RELDX 1.680e-14
## FUNC. EVALS 11 GRAD. EVALS 6
## PRELDF 7.669e-04 NPRELDF 2.073e-03
##
## I FINAL X(I) D(I) G(I)
##
## 1 2.163933e+12 1.000e+00 3.843e-11
## 2 9.534937e-01 1.000e+00 9.188e+01
## 3 3.068877e-02 1.000e+00 2.000e+02
## Warning in garch(g.adj.train): singular information
library(rugarch)
## Warning: package 'rugarch' was built under R version 3.4.4
## Loading required package: parallel
##
## Attaching package: 'rugarch'
## The following object is masked from 'package:stats':
##
## sigma
spec <- ugarchspec(mean.model=list(armaOrder=c(1,1)), distribution="std")
model4 <- ugarchfit(spec=spec, data=g.adj.train)
## Warning in .makefitmodel(garchmodel = "sGARCH", f = .sgarchLLH, T = T, m = m, :
## rugarch-->warning: failed to invert hessian
library(fGarch)
## Warning: package 'fGarch' was built under R version 3.4.4
## Loading required package: timeDate
## Warning: package 'timeDate' was built under R version 3.4.3
## Loading required package: timeSeries
## Warning: package 'timeSeries' was built under R version 3.4.4
##
## Attaching package: 'timeSeries'
## The following object is masked from 'package:zoo':
##
## time<-
## Loading required package: fBasics
## Warning: package 'fBasics' was built under R version 3.4.4
##
## Attaching package: 'fBasics'
## The following object is masked from 'package:TTR':
##
## volatility
# model5 <- garchFit(data=g.adj.train,formula=~garch(1,1), cond.dist = "sged")
library(betategarch)
## Warning: package 'betategarch' was built under R version 3.4.4
model5<-tegarch(g.adj.train)
summary(model1)
## Series: g.adj.train
## ARIMA(1,1,3)
##
## Coefficients:
## ar1 ma1 ma2 ma3
## 0.7087 -1.1395 -0.0050 0.1638
## s.e. 0.0869 0.0944 0.0555 0.0545
##
## sigma^2 estimated as 1.115e+12: log likelihood=-19079.52
## AIC=38169.04 AICc=38169.09 BIC=38194.69
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -32734.89 1053976 591687.2 -78.57762 93.51828 0.9086218
## ACF1
## Training set -0.0001106508
summary(model2)
## Series: g.adj.train
## Regression with ARIMA(1,1,3) errors
##
## Coefficients:
## ar1 ma1 ma2 ma3 xreg
## 0.7084 -1.1379 -0.0053 0.1630 -1526.717
## s.e. 0.0885 0.0959 0.0559 0.0549 1723.061
##
## sigma^2 estimated as 1.116e+12: log likelihood=-19079.12
## AIC=38170.24 AICc=38170.31 BIC=38201.01
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -19491.29 1053645 588271.4 -78.03165 93.46546 0.9033765
## ACF1
## Training set 0.0005508406
summary(model3)
##
## Call:
## garch(x = g.adj.train)
##
## Model:
## GARCH(1,1)
##
## Residuals:
## Min 1Q Median 3Q Max
## 0.002446 0.600827 0.733287 0.916356 3.406319
##
## Coefficient(s):
## Estimate Std. Error t value Pr(>|t|)
## a0 2.164e+12 NA NA NA
## a1 9.535e-01 NA NA NA
## b1 3.069e-02 NA NA NA
##
## Diagnostic Tests:
## Jarque Bera Test
##
## data: Residuals
## X-squared = 4849.4, df = 2, p-value < 2.2e-16
##
##
## Box-Ljung test
##
## data: Squared.Residuals
## X-squared = 15.467, df = 1, p-value = 8.395e-05
summary(model4)
## Length Class Mode
## 1 uGARCHfit S4
summary(model5)
## $date
## [1] "Sun Apr 22 23:15:57 2018"
##
## $par
## omega phi1 kappa1 kappastar df skew
## 15.1454234 0.8768580 0.3016843 0.2235583 2.0000012 0.4536325
##
## $objective
## [1] -19370.66
##
## $convergence
## [1] 1
##
## $iterations
## [1] 39
##
## $evaluations
## function gradient
## 89 238
##
## $message
## [1] "false convergence (8)"
# plot in-sample volatility estimates
plot(sqrt(252) * model4@fit$sigma, type='l')
# plot(sqrt(252) * model5[, "sigma"])
Plotting forecasts
f1<-forecast(model1, h=10)
f2<-forecast(model2, h=10, xreg = vol.test)
# f3<-forecast(model3, h=10)
# f3<-forecast(model3, h=10)
f5<-predict.tegarch(model5, n.ahead = 10)
Results
Which ones better ?
accuracy(f1, g.adj.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -32734.89 1053976 591687.2 -78.57762 93.51828 0.9086218
## Test set 534316.21 782428 667163.5 16.57041 25.72889 1.0245267
## ACF1
## Training set -0.0001106508
## Test set NA
accuracy(f2, g.adj.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -19491.29 1053645 588271.4 -78.03165 93.46546 0.9033765
## Test set 426721.20 714271 630167.3 11.74609 25.26629 0.9677137
## ACF1
## Training set 0.0005508406
## Test set NA
accuracy(ts(f5), g.adj.test)
## ME RMSE MAE MPE MAPE
## Test set -4406922689 4492892940 4406922689 -204896.1 204896.1
Conclusion
With this market-driven prices of Google we see that GARCH(1,1) performs as good as arima(2,1,3) without a regressor, lower test errors from RMSE, MASE point of view.