ContextBase Logo

Synopsis

The objective of this document is to apply training and testing datasets, (from Machine Learning techniques), to stock trading analysis in the R programming language. This document utilizes the “QuantMod” and “PerformanceAnalytics” R packages for Backtesting of Automated Trading Stategies. Holt-Winters and GARCH Filtering are used for Price Prediction.


Working Directory, and Required Packages

# Set Working Directory
setwd("C:/Users/johnakwei/Dropbox/Programming/StockTrading/YahooFinanceGainersLosers")

# Required Packages
# if (!require("quantmod")) { install.packages("quantmod"); require("quantmod") }
# if (!require("PerformanceAnalytics")) { install.packages("PerformanceAnalytics"); require("PerformanceAnalytics") }
# if (!require("rgl")) { install.packages("rgl"); require("rgl") }
# if (!require("rugarch")) { install.packages("rugarch"); require("rugarch") }

library(ggplot2)
library(xts)
library(zoo)
library(tseries)
library(forecast)
library(quantmod)
library(PerformanceAnalytics)
library(caret)
library(rugarch)


Downloading Stock Ticker Data from Yahoo Finances

# Import Original data
ticker_lookup <- getSymbols("GOOG", auto.assign=F,
                            from="2014-01-01", to="2017-01-01")

# write.csv(ticker_lookup, "ticker_lookup.csv")

ticker_lookup2 <- as.data.frame(cbind(ticker_lookup[,1], ticker_lookup[,2], ticker_lookup[,3], ticker_lookup[,4], ticker_lookup[,5], ticker_lookup[,6]))

TCKR <-  as.data.frame(cbind(date = rownames(ticker_lookup2), ticker_lookup2))
colnames(TCKR) <- c("date","open","high ","low ","close ","volume ","adjusted")
rownames(TCKR) <- NULL

rm(ticker_lookup, ticker_lookup2)


Create Training and Testing data sets

# Create Training and Test Data
TCKRTrain <- TCKR[1:567,]
TCKRTest <- TCKR[568:756,]


Create Time Series data

# Time Series Processing
TCKRTrainTS <- ts(TCKRTrain$close, start=c(2014, 1, 1),
                         end=c(2016, 4, 5), frequency=250)

TCKRTestTS <- ts(TCKRTest$close, start=c(2016, 4, 4),
                        end=c(2017, 1, 1), frequency=250)


Forecasting Model #1 - Holt Winters Filtering

# Fit Exponential Smoothing
fit_ES <- HoltWinters(TCKRTrainTS)

# Forecast 20 Days
TCKRForecast <- forecast(fit_ES, h=20)

# Accuracy of Exponential Smoothing Predictive Model
accuracy(TCKRForecast, TCKRTestTS)
##                       ME     RMSE       MAE        MPE     MAPE       MASE
## Training set   0.7567337 12.36518  6.315612  0.1584173 1.090240 0.01061984
## Test set     -33.0843421 49.40416 35.392489 -4.6871517 4.991725 0.16992610
##                      ACF1 Theil's U
## Training set -0.005877338        NA
## Test set      0.848029852  4.549523
# Realized Data Plot
plot(TCKR$date, TCKR$close, col="orange", xlab="Date",
     ylab="Prices", main="Plot of Ticker Data")

# Exponential Smoothing Plot
plot(TCKRForecast, xlab="Time", ylab="Prices",
     main="Holt-Winters Forecast - 20 Days")

# Output Exponential Smoothing Data to Excel
TCKRFrcstSeries <- c(TCKRTrain$close[1:567], TCKRForecast$mean[1:20])

# .csv of original data and all 3 forecasted values
TCKRMerge <- c(TCKRTrain$close[1:567], rep(NA, 20))

dateColumn <- c(as.Date(TCKRTrain$date[1:567]),
                as.Date(TCKRTest$date[1:20]))

TCKRMergeTest <- c(rep(NA, 567), as.numeric(TCKRTest$close[1:20]))

allData <- data.frame(TCKRMerge, TCKRFrcstSeries)

Comparison <- data.frame(dateColumn, TCKRMerge,
                         TCKRMergeTest, TCKRFrcstSeries)
names(Comparison) <- c("Date", "Train_Data", "Test_Data", "HW_Forecast")

tail(Comparison, 30)
##           Date Train_Data Test_Data HW_Forecast
## 558 2016-03-21     742.09        NA    742.0900
## 559 2016-03-22     740.75        NA    740.7500
## 560 2016-03-23     738.06        NA    738.0600
## 561 2016-03-24     735.30        NA    735.3000
## 562 2016-03-28     733.53        NA    733.5300
## 563 2016-03-29     744.77        NA    744.7700
## 564 2016-03-30     750.53        NA    750.5300
## 565 2016-03-31     744.95        NA    744.9500
## 566 2016-04-01     749.91        NA    749.9100
## 567 2016-04-04     745.29        NA    745.2900
## 568 2016-04-05         NA    737.80    747.7955
## 569 2016-04-06         NA    745.69    747.7115
## 570 2016-04-07         NA    740.28    750.0822
## 571 2016-04-08         NA    739.15    744.3641
## 572 2016-04-11         NA    736.10    741.4964
## 573 2016-04-12         NA    743.09    745.8130
## 574 2016-04-13         NA    751.72    751.1398
## 575 2016-04-14         NA    753.20    752.6830
## 576 2016-04-15         NA    759.00    759.4063
## 577 2016-04-18         NA    766.61    758.4920
## 578 2016-04-19         NA    753.93    769.9277
## 579 2016-04-20         NA    752.67    786.5731
## 580 2016-04-21         NA    759.14    792.4501
## 581 2016-04-22         NA    718.77    788.0976
## 582 2016-04-25         NA    723.15    771.9331
## 583 2016-04-26         NA    708.14    763.7044
## 584 2016-04-27         NA    705.84    764.7288
## 585 2016-04-28         NA    691.02    788.9019
## 586 2016-04-29         NA    693.01    783.1890
## 587 2016-05-02         NA    698.21    784.2773
plot(Comparison$HW_Forecast[1:587], type="l", col="blue",
     main="Forecast Comparison Plot", xlab="Time", ylab="Values")
lines(Comparison$Test_Data[1:587], col="green")
lines(Comparison$Train_Datat[1:587], col="red")

# write.csv(allData, "allTickerData.csv")

# write.csv(Comparison, "TickerDataComparison.csv")


Forecasting Model #2 - Holt Winters Exponential Smoothing

tickerData.xts <- xts(as.numeric(Cl(TCKRTrain)),
                      order.by=as.Date(TCKRTrain$date))
tickerData.z = zoo(x=Cl(TCKRTrain),
                   order.by=as.Date(TCKRTrain$date))

# Specify the prices and store our models
prices <- tickerData.xts[,1]

# Create indicator
sma <- SMA(tickerData.xts, n=1)

# Calculate the indicators we need for our strategy
CCI20 <- CCI(prices, 20)
RSI3 <- RSI(prices, 3)
DEMA10 <- DEMA(prices, n = 10, v = 1, wilder = FALSE)
DEMA10c <- prices - DEMA10  
DEMA10c <- DEMA10c/.0001

buy.signal <- ifelse(RSI3 < 30 & CCI20 > -290 & CCI20 < -100 & DEMA10c > -40 & DEMA10c < 750, 1, NA)

# Construct trading rule
sig <- Lag(ifelse(sma$SMA < buy.signal, 1, -1))

# The trading rules/equity curve
retSMA <- ROC(tickerData.xts) * sig

chartSeries(tickerData.xts, theme = chartTheme('white'),
            TA=c(addVo(),addBBands(), addMACD()))

TCKRTS <- ts(TCKRTrain$close, start=c(2014, 1, 1),
                    end=c(2016, 1, 1), frequency=250)

# Seasonal decomposition
fit <- stl(TCKRTS, s.window="period")
monthplot(TCKRTS, col="green", main="Month Plot of TWTR, Seasonal Adjustment")

# triple exponential - models level, trend, and seasonal components
fit <- HoltWinters(TCKRTS)

# predict next three future values
TWTRForecast <- forecast(fit, 20)
plot(TWTRForecast, main="TWTR Price Forecast (20 Days) from Holt-Winters Filtering")

# New Forecast Data
TWTRFrcstSeries <- c(TCKRTrain$close[1:567], TWTRForecast$mean[1:20])

Comparison$NewForecast <- TWTRFrcstSeries
names(Comparison) <- c("Date", "Train_Data","Test_Data",
                       "HW_Forecast", "New_Forecast")

tail(Comparison, 30)
##           Date Train_Data Test_Data HW_Forecast New_Forecast
## 558 2016-03-21     742.09        NA    742.0900     742.0900
## 559 2016-03-22     740.75        NA    740.7500     740.7500
## 560 2016-03-23     738.06        NA    738.0600     738.0600
## 561 2016-03-24     735.30        NA    735.3000     735.3000
## 562 2016-03-28     733.53        NA    733.5300     733.5300
## 563 2016-03-29     744.77        NA    744.7700     744.7700
## 564 2016-03-30     750.53        NA    750.5300     750.5300
## 565 2016-03-31     744.95        NA    744.9500     744.9500
## 566 2016-04-01     749.91        NA    749.9100     749.9100
## 567 2016-04-04     745.29        NA    745.2900     745.2900
## 568 2016-04-05         NA    737.80    747.7955     758.6653
## 569 2016-04-06         NA    745.69    747.7115     757.7018
## 570 2016-04-07         NA    740.28    750.0822     747.3589
## 571 2016-04-08         NA    739.15    744.3641     736.0305
## 572 2016-04-11         NA    736.10    741.4964     735.7642
## 573 2016-04-12         NA    743.09    745.8130     737.9528
## 574 2016-04-13         NA    751.72    751.1398     732.0523
## 575 2016-04-14         NA    753.20    752.6830     729.0013
## 576 2016-04-15         NA    759.00    759.4063     733.1340
## 577 2016-04-18         NA    766.61    758.4920     738.2772
## 578 2016-04-19         NA    753.93    769.9277     739.6377
## 579 2016-04-20         NA    752.67    786.5731     746.1743
## 580 2016-04-21         NA    759.14    792.4501     745.0739
## 581 2016-04-22         NA    718.77    788.0976     756.3278
## 582 2016-04-25         NA    723.15    771.9331     772.7907
## 583 2016-04-26         NA    708.14    763.7044     778.4858
## 584 2016-04-27         NA    705.84    764.7288     773.9525
## 585 2016-04-28         NA    691.02    788.9019     757.6062
## 586 2016-04-29         NA    693.01    783.1890     749.1947
## 587 2016-05-02         NA    698.21    784.2773     750.0360
plot(Comparison$HW_Forecast[1:587], type="l", col="blue",
     main="Forecast Comparison Plot", xlab="Time", ylab="Values")
lines(Comparison$New_Forecast[1:587], col="black")
lines(Comparison$Test_Data[1:587], col="green")
lines(Comparison$Train_Datat[1:587], col="red")


Forecasting Model #3 - GARCH (Generalized AutoRegression of Conditional Heteroskadacity) Forecasting

spec <- ugarchspec()
nrow(expand.grid(GARCH=1:14, VEX=0:1, VT=0:1, Mean=0:1, ARCHM=0:2,
                 ARFIMA=0:1, MEX=0:1, DISTR=1:10))
## [1] 13440
spec <- ugarchspec(variance.model=list(model='eGARCH', garchOrder=c(1, 1)),
                   distribution='std')

# Fit models with Generalized Auto-Regressive Conditional Heteroskadacity
all.fitted.model <- ugarchfit(spec, TCKRTrainTS, solver='hybrid')

coefBMtable <- data.frame(coef(all.fitted.model))
names(coefBMtable) <- "GARCH Parameters"
coefBMtable
##        GARCH Parameters
## mu         1.113141e+03
## ar1        9.996090e-01
## ma1       -1.872695e-03
## omega      5.864394e-02
## alpha1     3.870138e-02
## beta1      9.871125e-01
## gamma1     3.092318e-02
## shape      3.189047e+00
par(mfrow = c(1,2))
plot(all.fitted.model, which=8)
plot(all.fitted.model, which=9)

par(mfrow = c(1,1))

forc1 = ugarchforecast(all.fitted.model, n.ahead=20)

plot(forc1, which = 3)

# GARCH Forecast Data
GARCHFrcstSeries <- c(TCKRTrain$close[1:567],
                      forc1@forecast$seriesFor[1:20])

Comparison$GARCHForecast <- GARCHFrcstSeries
names(Comparison) <- c("Date", "Train_Data", "Test_Data", "HW_Forecast",
                       "New_Forecast", "GARCH_Forecast")

tail(data.frame(Comparison[1], Comparison[3:6]), 30)
##           Date Test_Data HW_Forecast New_Forecast GARCH_Forecast
## 558 2016-03-21        NA    742.0900     742.0900       742.0900
## 559 2016-03-22        NA    740.7500     740.7500       740.7500
## 560 2016-03-23        NA    738.0600     738.0600       738.0600
## 561 2016-03-24        NA    735.3000     735.3000       735.3000
## 562 2016-03-28        NA    733.5300     733.5300       733.5300
## 563 2016-03-29        NA    744.7700     744.7700       744.7700
## 564 2016-03-30        NA    750.5300     750.5300       750.5300
## 565 2016-03-31        NA    744.9500     744.9500       744.9500
## 566 2016-04-01        NA    749.9100     749.9100       749.9100
## 567 2016-04-04        NA    745.2900     745.2900       745.2900
## 568 2016-04-05    737.80    747.7955     758.6653       759.0415
## 569 2016-04-06    745.69    747.7115     757.7018       759.1799
## 570 2016-04-07    740.28    750.0822     747.3589       759.3183
## 571 2016-04-08    739.15    744.3641     736.0305       759.4567
## 572 2016-04-11    736.10    741.4964     735.7642       759.5950
## 573 2016-04-12    743.09    745.8130     737.9528       759.7332
## 574 2016-04-13    751.72    751.1398     732.0523       759.8714
## 575 2016-04-14    753.20    752.6830     729.0013       760.0095
## 576 2016-04-15    759.00    759.4063     733.1340       760.1476
## 577 2016-04-18    766.61    758.4920     738.2772       760.2856
## 578 2016-04-19    753.93    769.9277     739.6377       760.4236
## 579 2016-04-20    752.67    786.5731     746.1743       760.5615
## 580 2016-04-21    759.14    792.4501     745.0739       760.6993
## 581 2016-04-22    718.77    788.0976     756.3278       760.8371
## 582 2016-04-25    723.15    771.9331     772.7907       760.9749
## 583 2016-04-26    708.14    763.7044     778.4858       761.1126
## 584 2016-04-27    705.84    764.7288     773.9525       761.2502
## 585 2016-04-28    691.02    788.9019     757.6062       761.3878
## 586 2016-04-29    693.01    783.1890     749.1947       761.5254
## 587 2016-05-02    698.21    784.2773     750.0360       761.6628
plot(Comparison$HW_Forecast[1:587], type="l", col="blue",
     main="Forecast Comparison Plot", xlab="Time", ylab="Values")
lines(Comparison$GARCH_Forecast[1:587], col="orange")
lines(Comparison$New_Forecast[1:587], col="black")
lines(Comparison$Test_Data[1:587], col="green")
lines(Comparison$Train_Datat[1:587], col="red")