library(quantmod)
## Warning: package 'quantmod' was built under R version 3.6.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 3.6.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.6.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 3.6.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Version 0.4-0 included new data defaults. See ?getSymbols.
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.6.3
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(e1071)
## Warning: package 'e1071' was built under R version 3.6.3
library(rpart)
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 3.6.3
library(ROCR)
## Warning: package 'ROCR' was built under R version 3.6.3
library(dygraphs)
## Warning: package 'dygraphs' was built under R version 3.6.3
# http://www.cse.scu.edu/~mwang2/projects/Predict_stockMarket_16w.pdf
#https://rpubs.com/enghanwenalvin/299141
#https://www.aclweb.org/anthology/W19-6403.pdf
options(warn = -1)
tryCatch({
SYM <- 'FB'
print('--------------------------------------------------------------------
-----')
print(paste('Predicting the output for', SYM, sep = ' '))
trainPerc <- 0.75
#Percent of data to be used as Training Data and remaining will be used as Testing data
date <- as.Date(Sys.Date() )
endDate <- date+1#as.Date("2016-01-01")
d <- as.POSIXlt(endDate)
d$year <- d$year - 2
#To take last 2 years of data
startDate <- as.Date(d)
STOCK <- getSymbols(
SYM,
env = NULL,
src = "yahoo",
from = startDate,
to = endDate
)
STOCK[502,]<-STOCK[501,]
# STOCK[502,]
# STOCK<-na.approx(STOCK)
RSI <- RSI(Op(STOCK), n = 3)
#Calculate a 3-period relative strength index (RSI) off the open price
EMA5 <- EMA(Op(STOCK), n = 5)
#Calculate a 5-period exponential moving average (EMA)
EMAcross <- Op(STOCK) - EMA5
#Let us explore the difference between the open price and our 5-period EMA
MACD <- MACD(Op(STOCK),
fast = 12,
slow = 26,
signal = 9)
#Calculate a MACD with standard parameters
MACD <- MACD[, 2]
#Grab just the signal line to use as our indicator.
SMI <- SMI(
Op(STOCK),
n = 13,
slow = 25,
fast = 2,
signal = 9
)
#Stochastic Oscillator with standard parameters
SMI <- SMI[, 1]
#Grab just the oscillator to use as our indicator
WPR <- WPR(Cl(STOCK), n = 14)
WPR <- WPR[, 1]
#Williams %R with standard parameters
ADX <- ADX(STOCK, n = 14)
ADX <- ADX[, 1]
#Average Directional Index with standard parameters
CCI <- CCI(Cl(STOCK), n = 14)
CCI <- CCI[, 1]
#Commodity Channel Index with standard parameters
CMO <- CMO(Cl(STOCK), n = 14)
CMO <- CMO[, 1]
#Collateralized Mortgage Obligation with standard parameters
ROC <- ROC(Cl(STOCK), n = 2)
ROC <- ROC[, 1]
#Price Rate Of Change with standard parameters
PriceChange <- Cl(STOCK) - Op(STOCK)
#Calculate the difference between the close price and open price
Class <- ifelse(PriceChange > 0, 'UP', 'DOWN')
#Create a binary classification variable, the variable we are trying to predict.
DataSet <-
data.frame(Class, RSI, EMAcross, MACD, SMI, WPR, ADX, CCI, CMO, ROC)
#Create our data set
colnames(DataSet) <-
c("Class",
"RSI",
"EMAcross",
"MACD",
"SMI",
"WPR",
"ADX",
"CCI",
"CMO",
"ROC")
#Name the columns
#DataSet <- DataSet[-c(1:33), ]
#Get rid of the data where the indicators are being calculated
TrainingSet <- DataSet[1:floor(nrow(DataSet) * trainPerc), ]
#Use 2/3 of the data to build the tree
TestSet <-
DataSet[(floor(nrow(DataSet) * trainPerc) + 1):nrow(DataSet), ]
#And leave out 1/3 data to test our strategy
SVM <-
svm(
Class ~ RSI + EMAcross + WPR + ADX + CMO + CCI + ROC,
data = TrainingSet,
kernel = "radial",
type = "C-classification",
na.action = na.omit,
cost = 1,
gamma = 1 / 5
)
#Specifying the indicators to we want to use to predict the class.
print(SVM)
# TestSet[126,]<-NA
confmat <-
table(predict(SVM, TestSet, type = "class"),
TestSet[, 1],
dnn = list('predicted', 'actual'))
#Building confusion matrix
print(confmat)
acc <-
(confmat[1, "DOWN"] + confmat[2, "UP"]) * 100 / (confmat[2, "DOWN"] + confmat[1, "UP"] + confmat[1, "DOWN"] + confmat[2, "UP"])
#Calculating accuracy
xy <- paste('SVM : Considering the output for', SYM, sep = ' ')
yz <-
paste('Accuracy =',
acc,
sep = ' ')
print(xy)
print(yz)
predds <- data.frame(predict(SVM, TestSet), TestSet$Class)
colnames(predds) <- c("pred", "truth")
predds[, 1] <- ifelse(predds[, 1] == 'UP', 1, 0)
predds[, 2] <- ifelse(predds[, 2] == 'UP', 1, 0)
pred <- prediction(predds$pred, predds$truth)
perf = performance(pred, measure = "tpr", x.measure = "fpr")
auc.perf = performance(pred, measure = 'auc', col = "red")
#Calculating the AUC
rmse.perf = performance(pred, measure = 'rmse')
#Calculating the RMSE
RMSE <- paste('RMSE =', rmse.perf@y.values, sep = ' ')
AUC <- paste('AUC =', auc.perf@y.values, sep = ' ')
print(AUC)
print(RMSE)
plot(perf, col = 1:10)
abline(a = 0, b = 1, col = "red")
#Plotting ROC curve
print('--------------------------------------------------------------------
-----')
}, error = function(e) {
print(e)
})
## [1] "--------------------------------------------------------------------\n-----"
## [1] "Predicting\tthe\toutput\tfor\tFB"
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
##
## Call:
## svm(formula = Class ~ RSI + EMAcross + WPR + ADX + CMO + CCI + ROC,
## data = TrainingSet, kernel = "radial", type = "C-classification",
## cost = 1, gamma = 1/5, na.action = na.omit)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 217
##
## actual
## predicted DOWN UP
## DOWN 43 15
## UP 12 56
## [1] "SVM\t:\tConsidering\tthe\toutput\tfor\tFB"
## [1] "Accuracy\t=\t78.5714285714286"
## [1] "AUC\t=\t0.785275288092189"
## [1] "RMSE\t=\t0.462910049886276"

## [1] "--------------------------------------------------------------------\n-----"
dataplot<-cbind(predds, TestSet)
dataplot<-cbind(dataplot, as.data.frame(STOCK[377:502]))
dataplot$wrongdown<-ifelse(dataplot$truth==1 & dataplot$pred==0,dataplot$FB.Close, NA)
dataplot<-as.xts(dataplot)
dateWindow <- c("2010-01-29", "2012-11-10")
dygraph(dataplot[,c('FB.Open','FB.Close',"wrongdown")])%>% dySeries("wrongdown", strokeWidth =4, pointSize = 4, color = 'blue', label = "wrong")%>% dyRangeSelector()
wrongdown<-dataplot[dataplot$truth==1 & dataplot$pred==0,]
dygraph(dataplot[,c('FB.Open')])