Closing period of trade long and short signal classification Case Santander using random forest

testit <- function() {
  message("testing package startup messages")
  packageStartupMessage("initializing ...", appendLF = FALSE)
  Sys.sleep(1)
  packageStartupMessage(" done")
}

suppressPackageStartupMessages(testit())
## testing package startup messages
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(ggplot2)
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
setwd("~/git/")

DATAITX <- load("SAN_270412.RData")

SAN <- h05n

head(SAN)
##                      hora precio volumen broker.comprador broker.vendedor
## 1 2012-04-27 09:00:00.000   4.59      60             9838            8830
## 2 2012-04-27 09:00:00.000   4.59      33             9816            8830
## 3 2012-04-27 09:00:00.000   4.59     100             9838            8830
## 4 2012-04-27 09:00:00.000   4.59   13300             9838            8830
## 5 2012-04-27 09:00:00.000   4.59     200             9843            8830
## 6 2012-04-27 09:00:00.000   4.59     680             9838            8830

converting it into OHLCV format

list <- strsplit(SAN$hora, " ")

library("plyr")
SANhft <- ldply(list)

SANHFT<- cbind(SANhft[,1:2],SAN[,2:3])
colnames(SANHFT) <- c( "Date","Time","Price","Volume")



SANxts <- as.xts(SANHFT[, 3:4], order.by=as.POSIXct(SANHFT[, 2], format=' %H:%M:%S'))
Santander<-to.minutes(SANxts)
chartSeries(Santander) 

plot of chunk unnamed-chunk-3

data aggregation for HFF time series

X <- strptime(SAN$hora, format = "%Y-%m-%d %H:%M:%S.000")

time <- cut(X, "1 min")
timestamp <- table(time)
z <- as.matrix(timestamp)


z<-z[-c(312,313,314,315,506,511,512,513,514,515), ]
Morders <- as.matrix(z)

Dividing dataset for in sample and out of sample

FirstInsampleSet<-Santander[1:452,]
FirstInsampleSetVol<-Santander[34:452,5]
FirstInsampleSetCprice<-Santander[34:452,4]

Calculating indicators for the period of 9,30 to 4:35

reChart(subset = "first 6 hours")

plot of chunk unnamed-chunk-6

MordersM<-as.data.frame(Morders)

TA iNDICATORS

ATRindicatorIS <- ATR(FirstInsampleSet, n = 1)

ADXIndicatorIS <- ADX(FirstInsampleSet, n = 2)

AroonIndicatorIS <- aroon(FirstInsampleSet[, 2:3], n = 1)


SMIindicatorIS <- SMI(FirstInsampleSet[, 2:4], n = 2, nFast = 2, nSlow = 2,nSig = 2, maType = SMA,bounded = TRUE)

BBandIndicatorIS <- BBands(FirstInsampleSet[, 2:4])

MACDindicatorIS <- MACD(FirstInsampleSet[,4])

rsiIS <- RSI(FirstInsampleSet[,5], n = 1, maType = "WMA")


InSampleInput<-cbind(ATRindicatorIS,ADXIndicatorIS,AroonIndicatorIS,SMIindicatorIS, BBandIndicatorIS,MACDindicatorIS,rsiIS )


InSampleInputData<-data.frame(date = index(InSampleInput), InSampleInput, row.names=NULL)

InSampleInputDataM<-InSampleInputData[34:452,]

InSampleInputDataM<-InSampleInputDataM[,-c(1,3,5,8,12,13,15,16,17,19)]

adding volume and matching limit orders variable

FirstInsampleSetVolM<-data.frame(date = index(FirstInsampleSetVol),FirstInsampleSetVol, row.names=NULL)

lagdataFS<-cbind(FirstInsampleSetVolM,InSampleInputDataM,MordersM[34:452,])

lagdataFSM<-lagdataFS[,-c(1)]

colnames(lagdataFSM)<-c("volume","TrueRange","TrueHigh","PDirInd","NDirInd","ADX","Aroonup","AroonDown","SMIsignal","pctB","MACDSignal","Rsi","MlimitOrders")




Vol<-lagdataFSM[,1]

Volume<-Lag(Vol)


TR<-lagdataFSM[,2]

TrueRange<-Lag(TR)



TH<-lagdataFSM[,3]

TrueHigh<-Lag(TH)


Pdir<-lagdataFSM[,4]

PdirIND<-Lag(Pdir)


Ndir<-lagdataFSM[,5]

NdirIND<-Lag(Ndir)


adx<-lagdataFSM[,6]

ADX<-Lag(adx)


aroonUP<-lagdataFSM[,7]

AroonUP<-Lag(aroonUP)


aroonDWN<-lagdataFSM[,8]

AroonDWN<-Lag(aroonDWN)


SMIsignal<-lagdataFSM[,9]

SMIsignalM<-Lag(SMIsignal)


pctB<-lagdataFSM[,10]

PctB<-Lag(pctB)


MACDSignal<-lagdataFSM[,11]
MACDSignalM<-Lag(MACDSignal)


rsi<-lagdataFSM[,12]
RSI<-Lag(rsi)

MlimitOrders<-lagdataFSM[,13]
MlimitOrdersM<-Lag(MlimitOrders)


InsamplelaggedData<-cbind(Volume,TrueRange,TrueHigh,PdirIND,NdirIND,ADX,AroonUP,AroonDWN,SMIsignalM,PctB,MACDSignalM,RSI,MlimitOrdersM)

colnames(InsamplelaggedData)<-c("volume","TrueRange","TrueHigh","PDirInd","NDirInd","ADX","Aroonup","AroonDown","SMIsignal","pctB","MACDSignal","Rsi","MlimitOrders")


InsamplelaggedData<-as.data.frame(InsamplelaggedData)

FirstInsampleSetCpriceM<-as.matrix(FirstInsampleSetCprice)

lagInputDATAInSa<-cbind(FirstInsampleSetCpriceM,InsamplelaggedData)

colnames(lagInputDATAInSa)<-c("Cprice","volume","TrueRange","TrueHigh","PDirInd","NDirInd","ADX","Aroonup","AroonDown","SMIsignal","pctB","MACDSignal","Rsi","MlimitOrders")


head(lagInputDATAInSa)
##                     Cprice volume TrueRange TrueHigh  PDirInd   NDirInd
## 2015-01-09 09:33:59  4.652     NA        NA       NA       NA        NA
## 2015-01-09 09:34:58  4.653 163499     0.020    4.648 80.85587 7.1345550
## 2015-01-09 09:35:45  4.655  79453     0.005    4.650 69.36511 5.1279537
## 2015-01-09 09:36:57  4.660  41362     0.007    4.654 63.98047 2.8687805
## 2015-01-09 09:37:59  4.650 119249     0.004    4.655 50.92653 1.9080712
## 2015-01-09 09:38:57  4.635 102763     0.016    4.659 32.04703 0.5186282
##                          ADX Aroonup AroonDown SMIsignal      pctB
## 2015-01-09 09:33:59       NA      NA        NA        NA        NA
## 2015-01-09 09:34:58 68.43514     100         0  48.23377 0.9632944
## 2015-01-09 09:35:45 77.33377     100         0  69.23404 0.9159944
## 2015-01-09 09:36:57 84.37547     100         0  64.65340 0.8988301
## 2015-01-09 09:37:59 88.57633     100       100  56.26551 0.8528020
## 2015-01-09 09:38:57 92.69560       0         0  56.62393 0.8000342
##                     MACDSignal Rsi MlimitOrders
## 2015-01-09 09:33:59         NA  NA           NA
## 2015-01-09 09:34:58  0.2499475   0           98
## 2015-01-09 09:35:45  0.2627466   0           52
## 2015-01-09 09:36:57  0.2756341   0           29
## 2015-01-09 09:37:59  0.2889856 100           80
## 2015-01-09 09:38:57  0.2978238   0           71

time series cross valdiation

myTimeControl <- trainControl(method = "timeslice", initialWindow =380,horizon =38, fixedWindow = TRUE)

Insample training and testing with RF method

library(randomForest)
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
rfModel <- train(Cprice~volume+TrueRange+TrueHigh+PDirInd+NDirInd+ADX+Aroonup+AroonDown+SMIsignal+pctB+MACDSignal+Rsi+MlimitOrders,data =lagInputDATAInSa,method = "rf",tunelength = 10, ntrees = 1000, importance = TRUE,trControl =myTimeControl)

varImp(rfModel)
## rf variable importance
## 
##               Overall
## TrueHigh     100.0000
## MACDSignal    14.1180
## pctB           9.6065
## NDirInd        4.2294
## SMIsignal      3.1624
## MlimitOrders   2.5948
## ADX            2.2530
## PDirInd        2.0639
## TrueRange      1.3648
## AroonDown      0.9289
## Aroonup        0.5666
## volume         0.4626
## Rsi            0.0000
plot(varImp(rfModel))

plot of chunk unnamed-chunk-10

plot(rfModel)

plot of chunk unnamed-chunk-10

preperation of out of sample data from 4:30 to 5:30

FirstOutsampleSet<-Santander[453:507,]
FirstOutsampleSetVol<-Santander[486:507,5]
FirstOutsampleSetCprice<-Santander[486:507,4]

FirstOutsampleSetCpriceM<-as.matrix(FirstOutsampleSetCprice)

Calculation of technical Indicators

ATRindicatorOS <- ATR(FirstOutsampleSet, n = 1)

ADXIndicatorOS <- ADX(FirstOutsampleSet, n = 2)

AroonIndicatorOS <- aroon(FirstOutsampleSet[, 2:3], n = 1)


SMIindicatorOS <- SMI(FirstOutsampleSet[, 2:4], n = 2, nFast = 2, nSlow = 2,nSig = 2, maType = SMA,bounded = TRUE)

BBandIndicatorOS <- BBands(FirstOutsampleSet[, 2:4])

MACDindicatorOS <- MACD(FirstOutsampleSet[,4])

rsiOS <- RSI(FirstOutsampleSet[,5], n = 1, maType = "WMA")


OutSampleInput<-cbind(ATRindicatorOS,ADXIndicatorOS,AroonIndicatorOS,SMIindicatorOS, BBandIndicatorOS,MACDindicatorOS,rsiOS )


OutSampleInputData<-data.frame(date = index(OutSampleInput), OutSampleInput, row.names=NULL)

OutSampleInputDataM<-OutSampleInputData[34:55,]

OutSampleInputDataM<-OutSampleInputDataM[,-c(1,3,5,8,12,13,15,16,17,19)]

adding volume and matching limit orders variable

FirstOutsampleSetVolM<-data.frame(date = index(FirstOutsampleSetVol),FirstOutsampleSetVol, row.names=NULL)

lagdataOFS<-cbind(FirstOutsampleSetVolM,OutSampleInputDataM,MordersM[486:507,])

lagdataOFS<-lagdataOFS[,-c(1)]

colnames(lagdataOFS)<-c("volume","TrueRange","TrueHigh","PDirInd","NDirInd","ADX","Aroonup","AroonDown","SMIsignal","pctB","MACDSignal","Rsi","MlimitOrders")

Vol1<-lagdataOFS[,1]

Volume1<-Lag(Vol1)


TR1<-lagdataOFS[,2]

TrueRange1<-Lag(TR1)



TH1<-lagdataOFS[,3]

TrueHigh1<-Lag(TH1)


Pdir1<-lagdataOFS[,4]

PdirIND1<-Lag(Pdir1)


Ndir1<-lagdataOFS[,5]

NdirIND1<-Lag(Ndir1)


adx1<-lagdataOFS[,6]

ADX1<-Lag(adx1)


aroonUP1<-lagdataOFS[,7]

AroonUP1<-Lag(aroonUP1)


aroonDWN1<-lagdataOFS[,8]

AroonDWN1<-Lag(aroonDWN1)


SMIsignal1<-lagdataOFS[,9]

SMIsignalM1<-Lag(SMIsignal1)


pctB1<-lagdataOFS[,10]

PctB1<-Lag(pctB1)


MACDSignal1<-lagdataOFS[,11]
MACDSignalM1<-Lag(MACDSignal1)


rsi1<-lagdataOFS[,12]
RSI1<-Lag(rsi1)

MlimitOrders1<-lagdataOFS[,13]
MlimitOrdersM1<-Lag(MlimitOrders1)


OutsamplelaggedData<-cbind(Volume1,TrueRange1,TrueHigh1,PdirIND1,NdirIND1,ADX1,AroonUP1,AroonDWN1,SMIsignalM1,PctB1,MACDSignalM1,RSI1,MlimitOrdersM1)

colnames(OutsamplelaggedData)<-c("volume","TrueRange","TrueHigh","PDirInd","NDirInd","ADX","Aroonup","AroonDown","SMIsignal","pctB","MACDSignal","Rsi","MlimitOrders")


OutsamplelaggedData<-as.data.frame(OutsamplelaggedData)


lagInputDATAInOut<-cbind(FirstOutsampleSetCpriceM,OutsamplelaggedData)

colnames(lagInputDATAInOut)<-c("Cprice","volume","TrueRange","TrueHigh","PDirInd","NDirInd","ADX","Aroonup","AroonDown","SMIsignal","pctB","MACDSignal","Rsi","MlimitOrders")


head(lagInputDATAInOut)
##                     Cprice volume TrueRange TrueHigh  PDirInd   NDirInd
## 2015-01-09 17:09:59  4.803     NA        NA       NA       NA        NA
## 2015-01-09 17:10:59  4.805  17483     0.001    4.801 18.13819  2.592819
## 2015-01-09 17:11:53  4.801  91247     0.005    4.805 51.91585  1.177094
## 2015-01-09 17:12:58  4.804  82720     0.002    4.805 36.13261 31.220821
## 2015-01-09 17:13:59  4.806  47689     0.002    4.803 22.47008 19.415551
## 2015-01-09 17:14:58  4.803 101686     0.005    4.806 47.01660  6.716770
##                          ADX Aroonup AroonDown SMIsignal      pctB
## 2015-01-09 17:09:59       NA      NA        NA        NA        NA
## 2015-01-09 17:10:59 72.39325     100       100  78.93278 0.4663065
## 2015-01-09 17:11:53 83.97958     100         0  64.18919 0.5529743
## 2015-01-09 17:12:58 45.63607       0       100  27.00000 0.3952218
## 2015-01-09 17:13:59 26.46431     100       100 -13.21739 0.4624164
## 2015-01-09 17:14:58 50.73197     100         0 -19.38406 0.6912891
##                     MACDSignal Rsi MlimitOrders
## 2015-01-09 17:09:59         NA  NA           NA
## 2015-01-09 17:10:59 0.10572681   0           31
## 2015-01-09 17:11:53 0.09889191 100           50
## 2015-01-09 17:12:58 0.09200165   0           58
## 2015-01-09 17:13:59 0.08622499   0           45
## 2015-01-09 17:14:58 0.08191694 100          108

Out of sample set forvalidation

set.seed(100)
rfPredict<-predict(rfModel ,lagInputDATAInOut)

 outPrice<-lagInputDATAInOut[2:22,1]


cor(outPrice,rfPredict)
## [1] 0.9309703
library(forecast)
## Loading required package: timeDate
## This is forecast 5.7
accuracy(rfPredict,outPrice,d=NULL)
##                   ME        RMSE         MAE         MPE       MAPE
## Test set -0.00136924 0.004902901 0.003708243 -0.02837482 0.07689761

Buy Hold Sell signal generator

#Trade logic - Look for trend 
#If one step ahead logarithmic return is > 0 Buy 
#If one step ahead logarithmic return is < 0 Sell
#If one step ahead logarithmic return is = 0 Hold 


signal <- Delt(rfPredict,k=1,type="log")

MarketReturn <- Delt(outPrice,k=1,type="log")


tradingfunc<-function(x){ 

                 ifelse(x>0,"Long",ifelse(x<0,"Short",NA))

                    }

AlgoTRADE <- apply(signal,1,tradingfunc)

MarketTRADE <- apply(MarketReturn,1,tradingfunc)

Table<-table(AlgoTRADE,MarketTRADE)



confusionMatrix(AlgoTRADE,MarketTRADE)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction Long Short
##      Long    11     3
##      Short    2     2
##                                           
##                Accuracy : 0.7222          
##                  95% CI : (0.4652, 0.9031)
##     No Information Rate : 0.7222          
##     P-Value [Acc > NIR] : 0.6175          
##                                           
##                   Kappa : 0.2623          
##  Mcnemar's Test P-Value : 1.0000          
##                                           
##             Sensitivity : 0.8462          
##             Specificity : 0.4000          
##          Pos Pred Value : 0.7857          
##          Neg Pred Value : 0.5000          
##              Prevalence : 0.7222          
##          Detection Rate : 0.6111          
##    Detection Prevalence : 0.7778          
##       Balanced Accuracy : 0.6231          
##                                           
##        'Positive' Class : Long            
##