The objective of this document is to apply training and testing datasets, (from Machine Learning techniques), to stock trading analysis in the R programming language. This document utilizes the “QuantMod” and “PerformanceAnalytics” R packages for Backtesting of Automated Trading Stategies. Holt-Winters and GARCH Filtering are used for Price Prediction.
# Set Working Directory
setwd("C:/Users/johnakwei/Dropbox/Programming/StockTrading/YahooFinanceGainersLosers")
# Required Packages
# if (!require("quantmod")) { install.packages("quantmod"); require("quantmod") }
# if (!require("PerformanceAnalytics")) { install.packages("PerformanceAnalytics"); require("PerformanceAnalytics") }
# if (!require("rgl")) { install.packages("rgl"); require("rgl") }
# if (!require("rugarch")) { install.packages("rugarch"); require("rugarch") }
library(ggplot2)
library(xts)
library(zoo)
library(tseries)
library(forecast)
library(quantmod)
library(PerformanceAnalytics)
library(caret)
library(rugarch)
# Import Original data
ticker_lookup <- getSymbols("GOOG", auto.assign=F,
from="2014-01-01", to="2017-01-01")
# write.csv(ticker_lookup, "ticker_lookup.csv")
ticker_lookup2 <- as.data.frame(cbind(ticker_lookup[,1], ticker_lookup[,2], ticker_lookup[,3], ticker_lookup[,4], ticker_lookup[,5], ticker_lookup[,6]))
TCKR <- as.data.frame(cbind(date = rownames(ticker_lookup2), ticker_lookup2))
colnames(TCKR) <- c("date","open","high ","low ","close ","volume ","adjusted")
rownames(TCKR) <- NULL
rm(ticker_lookup, ticker_lookup2)
# Create Training and Test Data
TCKRTrain <- TCKR[1:567,]
TCKRTest <- TCKR[568:756,]
# Time Series Processing
TCKRTrainTS <- ts(TCKRTrain$close, start=c(2014, 1, 1),
end=c(2016, 4, 5), frequency=250)
TCKRTestTS <- ts(TCKRTest$close, start=c(2016, 4, 4),
end=c(2017, 1, 1), frequency=250)
# Fit Exponential Smoothing
fit_ES <- HoltWinters(TCKRTrainTS)
# Forecast 20 Days
TCKRForecast <- forecast(fit_ES, h=20)
# Accuracy of Exponential Smoothing Predictive Model
accuracy(TCKRForecast, TCKRTestTS)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.7567337 12.36518 6.315612 0.1584173 1.090240 0.01061984
## Test set -33.0843421 49.40416 35.392489 -4.6871517 4.991725 0.16992610
## ACF1 Theil's U
## Training set -0.005877338 NA
## Test set 0.848029852 4.549523
# Realized Data Plot
plot(TCKR$date, TCKR$close, col="orange", xlab="Date",
ylab="Prices", main="Plot of Ticker Data")
# Exponential Smoothing Plot
plot(TCKRForecast, xlab="Time", ylab="Prices",
main="Holt-Winters Forecast - 20 Days")
# Output Exponential Smoothing Data to Excel
TCKRFrcstSeries <- c(TCKRTrain$close[1:567], TCKRForecast$mean[1:20])
# .csv of original data and all 3 forecasted values
TCKRMerge <- c(TCKRTrain$close[1:567], rep(NA, 20))
dateColumn <- c(as.Date(TCKRTrain$date[1:567]),
as.Date(TCKRTest$date[1:20]))
TCKRMergeTest <- c(rep(NA, 567), as.numeric(TCKRTest$close[1:20]))
allData <- data.frame(TCKRMerge, TCKRFrcstSeries)
Comparison <- data.frame(dateColumn, TCKRMerge,
TCKRMergeTest, TCKRFrcstSeries)
names(Comparison) <- c("Date", "Train_Data", "Test_Data", "HW_Forecast")
tail(Comparison, 30)
## Date Train_Data Test_Data HW_Forecast
## 558 2016-03-21 742.09 NA 742.0900
## 559 2016-03-22 740.75 NA 740.7500
## 560 2016-03-23 738.06 NA 738.0600
## 561 2016-03-24 735.30 NA 735.3000
## 562 2016-03-28 733.53 NA 733.5300
## 563 2016-03-29 744.77 NA 744.7700
## 564 2016-03-30 750.53 NA 750.5300
## 565 2016-03-31 744.95 NA 744.9500
## 566 2016-04-01 749.91 NA 749.9100
## 567 2016-04-04 745.29 NA 745.2900
## 568 2016-04-05 NA 737.80 747.7955
## 569 2016-04-06 NA 745.69 747.7115
## 570 2016-04-07 NA 740.28 750.0822
## 571 2016-04-08 NA 739.15 744.3641
## 572 2016-04-11 NA 736.10 741.4964
## 573 2016-04-12 NA 743.09 745.8130
## 574 2016-04-13 NA 751.72 751.1398
## 575 2016-04-14 NA 753.20 752.6830
## 576 2016-04-15 NA 759.00 759.4063
## 577 2016-04-18 NA 766.61 758.4920
## 578 2016-04-19 NA 753.93 769.9277
## 579 2016-04-20 NA 752.67 786.5731
## 580 2016-04-21 NA 759.14 792.4501
## 581 2016-04-22 NA 718.77 788.0976
## 582 2016-04-25 NA 723.15 771.9331
## 583 2016-04-26 NA 708.14 763.7044
## 584 2016-04-27 NA 705.84 764.7288
## 585 2016-04-28 NA 691.02 788.9019
## 586 2016-04-29 NA 693.01 783.1890
## 587 2016-05-02 NA 698.21 784.2773
plot(Comparison$HW_Forecast[1:587], type="l", col="blue",
main="Forecast Comparison Plot", xlab="Time", ylab="Values")
lines(Comparison$Test_Data[1:587], col="green")
lines(Comparison$Train_Datat[1:587], col="red")
# write.csv(allData, "allTickerData.csv")
# write.csv(Comparison, "TickerDataComparison.csv")
tickerData.xts <- xts(as.numeric(Cl(TCKRTrain)),
order.by=as.Date(TCKRTrain$date))
tickerData.z = zoo(x=Cl(TCKRTrain),
order.by=as.Date(TCKRTrain$date))
# Specify the prices and store our models
prices <- tickerData.xts[,1]
# Create indicator
sma <- SMA(tickerData.xts, n=1)
# Calculate the indicators we need for our strategy
CCI20 <- CCI(prices, 20)
RSI3 <- RSI(prices, 3)
DEMA10 <- DEMA(prices, n = 10, v = 1, wilder = FALSE)
DEMA10c <- prices - DEMA10
DEMA10c <- DEMA10c/.0001
buy.signal <- ifelse(RSI3 < 30 & CCI20 > -290 & CCI20 < -100 & DEMA10c > -40 & DEMA10c < 750, 1, NA)
# Construct trading rule
sig <- Lag(ifelse(sma$SMA < buy.signal, 1, -1))
# The trading rules/equity curve
retSMA <- ROC(tickerData.xts) * sig
chartSeries(tickerData.xts, theme = chartTheme('white'),
TA=c(addVo(),addBBands(), addMACD()))
TCKRTS <- ts(TCKRTrain$close, start=c(2014, 1, 1),
end=c(2016, 1, 1), frequency=250)
# Seasonal decomposition
fit <- stl(TCKRTS, s.window="period")
monthplot(TCKRTS, col="green", main="Month Plot of TWTR, Seasonal Adjustment")
# triple exponential - models level, trend, and seasonal components
fit <- HoltWinters(TCKRTS)
# predict next three future values
TWTRForecast <- forecast(fit, 20)
plot(TWTRForecast, main="TWTR Price Forecast (20 Days) from Holt-Winters Filtering")
# New Forecast Data
TWTRFrcstSeries <- c(TCKRTrain$close[1:567], TWTRForecast$mean[1:20])
Comparison$NewForecast <- TWTRFrcstSeries
names(Comparison) <- c("Date", "Train_Data","Test_Data",
"HW_Forecast", "New_Forecast")
tail(Comparison, 30)
## Date Train_Data Test_Data HW_Forecast New_Forecast
## 558 2016-03-21 742.09 NA 742.0900 742.0900
## 559 2016-03-22 740.75 NA 740.7500 740.7500
## 560 2016-03-23 738.06 NA 738.0600 738.0600
## 561 2016-03-24 735.30 NA 735.3000 735.3000
## 562 2016-03-28 733.53 NA 733.5300 733.5300
## 563 2016-03-29 744.77 NA 744.7700 744.7700
## 564 2016-03-30 750.53 NA 750.5300 750.5300
## 565 2016-03-31 744.95 NA 744.9500 744.9500
## 566 2016-04-01 749.91 NA 749.9100 749.9100
## 567 2016-04-04 745.29 NA 745.2900 745.2900
## 568 2016-04-05 NA 737.80 747.7955 758.6653
## 569 2016-04-06 NA 745.69 747.7115 757.7018
## 570 2016-04-07 NA 740.28 750.0822 747.3589
## 571 2016-04-08 NA 739.15 744.3641 736.0305
## 572 2016-04-11 NA 736.10 741.4964 735.7642
## 573 2016-04-12 NA 743.09 745.8130 737.9528
## 574 2016-04-13 NA 751.72 751.1398 732.0523
## 575 2016-04-14 NA 753.20 752.6830 729.0013
## 576 2016-04-15 NA 759.00 759.4063 733.1340
## 577 2016-04-18 NA 766.61 758.4920 738.2772
## 578 2016-04-19 NA 753.93 769.9277 739.6377
## 579 2016-04-20 NA 752.67 786.5731 746.1743
## 580 2016-04-21 NA 759.14 792.4501 745.0739
## 581 2016-04-22 NA 718.77 788.0976 756.3278
## 582 2016-04-25 NA 723.15 771.9331 772.7907
## 583 2016-04-26 NA 708.14 763.7044 778.4858
## 584 2016-04-27 NA 705.84 764.7288 773.9525
## 585 2016-04-28 NA 691.02 788.9019 757.6062
## 586 2016-04-29 NA 693.01 783.1890 749.1947
## 587 2016-05-02 NA 698.21 784.2773 750.0360
plot(Comparison$HW_Forecast[1:587], type="l", col="blue",
main="Forecast Comparison Plot", xlab="Time", ylab="Values")
lines(Comparison$New_Forecast[1:587], col="black")
lines(Comparison$Test_Data[1:587], col="green")
lines(Comparison$Train_Datat[1:587], col="red")
spec <- ugarchspec()
nrow(expand.grid(GARCH=1:14, VEX=0:1, VT=0:1, Mean=0:1, ARCHM=0:2,
ARFIMA=0:1, MEX=0:1, DISTR=1:10))
## [1] 13440
spec <- ugarchspec(variance.model=list(model='eGARCH', garchOrder=c(1, 1)),
distribution='std')
# Fit models with Generalized Auto-Regressive Conditional Heteroskadacity
all.fitted.model <- ugarchfit(spec, TCKRTrainTS, solver='hybrid')
coefBMtable <- data.frame(coef(all.fitted.model))
names(coefBMtable) <- "GARCH Parameters"
coefBMtable
## GARCH Parameters
## mu 1.113141e+03
## ar1 9.996090e-01
## ma1 -1.872695e-03
## omega 5.864394e-02
## alpha1 3.870138e-02
## beta1 9.871125e-01
## gamma1 3.092318e-02
## shape 3.189047e+00
par(mfrow = c(1,2))
plot(all.fitted.model, which=8)
plot(all.fitted.model, which=9)
par(mfrow = c(1,1))
forc1 = ugarchforecast(all.fitted.model, n.ahead=20)
plot(forc1, which = 3)
# GARCH Forecast Data
GARCHFrcstSeries <- c(TCKRTrain$close[1:567],
forc1@forecast$seriesFor[1:20])
Comparison$GARCHForecast <- GARCHFrcstSeries
names(Comparison) <- c("Date", "Train_Data", "Test_Data", "HW_Forecast",
"New_Forecast", "GARCH_Forecast")
tail(data.frame(Comparison[1], Comparison[3:6]), 30)
## Date Test_Data HW_Forecast New_Forecast GARCH_Forecast
## 558 2016-03-21 NA 742.0900 742.0900 742.0900
## 559 2016-03-22 NA 740.7500 740.7500 740.7500
## 560 2016-03-23 NA 738.0600 738.0600 738.0600
## 561 2016-03-24 NA 735.3000 735.3000 735.3000
## 562 2016-03-28 NA 733.5300 733.5300 733.5300
## 563 2016-03-29 NA 744.7700 744.7700 744.7700
## 564 2016-03-30 NA 750.5300 750.5300 750.5300
## 565 2016-03-31 NA 744.9500 744.9500 744.9500
## 566 2016-04-01 NA 749.9100 749.9100 749.9100
## 567 2016-04-04 NA 745.2900 745.2900 745.2900
## 568 2016-04-05 737.80 747.7955 758.6653 759.0415
## 569 2016-04-06 745.69 747.7115 757.7018 759.1799
## 570 2016-04-07 740.28 750.0822 747.3589 759.3183
## 571 2016-04-08 739.15 744.3641 736.0305 759.4567
## 572 2016-04-11 736.10 741.4964 735.7642 759.5950
## 573 2016-04-12 743.09 745.8130 737.9528 759.7332
## 574 2016-04-13 751.72 751.1398 732.0523 759.8714
## 575 2016-04-14 753.20 752.6830 729.0013 760.0095
## 576 2016-04-15 759.00 759.4063 733.1340 760.1476
## 577 2016-04-18 766.61 758.4920 738.2772 760.2856
## 578 2016-04-19 753.93 769.9277 739.6377 760.4236
## 579 2016-04-20 752.67 786.5731 746.1743 760.5615
## 580 2016-04-21 759.14 792.4501 745.0739 760.6993
## 581 2016-04-22 718.77 788.0976 756.3278 760.8371
## 582 2016-04-25 723.15 771.9331 772.7907 760.9749
## 583 2016-04-26 708.14 763.7044 778.4858 761.1126
## 584 2016-04-27 705.84 764.7288 773.9525 761.2502
## 585 2016-04-28 691.02 788.9019 757.6062 761.3878
## 586 2016-04-29 693.01 783.1890 749.1947 761.5254
## 587 2016-05-02 698.21 784.2773 750.0360 761.6628
plot(Comparison$HW_Forecast[1:587], type="l", col="blue",
main="Forecast Comparison Plot", xlab="Time", ylab="Values")
lines(Comparison$GARCH_Forecast[1:587], col="orange")
lines(Comparison$New_Forecast[1:587], col="black")
lines(Comparison$Test_Data[1:587], col="green")
lines(Comparison$Train_Datat[1:587], col="red")