testit <- function() {
message("testing package startup messages")
packageStartupMessage("initializing ...", appendLF = FALSE)
Sys.sleep(1)
packageStartupMessage(" done")
}
suppressPackageStartupMessages(testit())
## testing package startup messages
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(ggplot2)
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
setwd("~/git/")
DATAITX <- load("SAN_270412.RData")
SAN <- h05n
head(SAN)
## hora precio volumen broker.comprador broker.vendedor
## 1 2012-04-27 09:00:00.000 4.59 60 9838 8830
## 2 2012-04-27 09:00:00.000 4.59 33 9816 8830
## 3 2012-04-27 09:00:00.000 4.59 100 9838 8830
## 4 2012-04-27 09:00:00.000 4.59 13300 9838 8830
## 5 2012-04-27 09:00:00.000 4.59 200 9843 8830
## 6 2012-04-27 09:00:00.000 4.59 680 9838 8830
converting it into OHLCV format
list <- strsplit(SAN$hora, " ")
library("plyr")
SANhft <- ldply(list)
SANHFT<- cbind(SANhft[,1:2],SAN[,2:3])
colnames(SANHFT) <- c( "Date","Time","Price","Volume")
SANxts <- as.xts(SANHFT[, 3:4], order.by=as.POSIXct(SANHFT[, 2], format=' %H:%M:%S'))
Santander<-to.minutes(SANxts)
chartSeries(Santander)
data aggregation for HFF time series
X <- strptime(SAN$hora, format = "%Y-%m-%d %H:%M:%S.000")
time <- cut(X, "1 min")
timestamp <- table(time)
z <- as.matrix(timestamp)
z<-z[-c(312,313,314,315,506,511,512,513,514,515), ]
Morders <- as.matrix(z)
Dividing dataset for in sample and out of sample
FirstInsampleSet<-Santander[1:452,]
FirstInsampleSetVol<-Santander[34:452,5]
FirstInsampleSetCprice<-Santander[34:452,4]
Calculating indicators for the period of 9,30 to 4:35
reChart(subset = "first 6 hours")
MordersM<-as.data.frame(Morders)
TA iNDICATORS
ATRindicatorIS <- ATR(FirstInsampleSet, n = 1)
ADXIndicatorIS <- ADX(FirstInsampleSet, n = 2)
AroonIndicatorIS <- aroon(FirstInsampleSet[, 2:3], n = 1)
SMIindicatorIS <- SMI(FirstInsampleSet[, 2:4], n = 2, nFast = 2, nSlow = 2,nSig = 2, maType = SMA,bounded = TRUE)
BBandIndicatorIS <- BBands(FirstInsampleSet[, 2:4])
MACDindicatorIS <- MACD(FirstInsampleSet[,4])
rsiIS <- RSI(FirstInsampleSet[,5], n = 1, maType = "WMA")
InSampleInput<-cbind(ATRindicatorIS,ADXIndicatorIS,AroonIndicatorIS,SMIindicatorIS, BBandIndicatorIS,MACDindicatorIS,rsiIS )
InSampleInputData<-data.frame(date = index(InSampleInput), InSampleInput, row.names=NULL)
InSampleInputDataM<-InSampleInputData[34:452,]
InSampleInputDataM<-InSampleInputDataM[,-c(1,3,5,8,12,13,15,16,17,19)]
adding volume and matching limit orders variable
FirstInsampleSetVolM<-data.frame(date = index(FirstInsampleSetVol),FirstInsampleSetVol, row.names=NULL)
lagdataFS<-cbind(FirstInsampleSetVolM,InSampleInputDataM,MordersM[34:452,])
lagdataFSM<-lagdataFS[,-c(1)]
colnames(lagdataFSM)<-c("volume","TrueRange","TrueHigh","PDirInd","NDirInd","ADX","Aroonup","AroonDown","SMIsignal","pctB","MACDSignal","Rsi","MlimitOrders")
Vol<-lagdataFSM[,1]
Volume<-Lag(Vol)
TR<-lagdataFSM[,2]
TrueRange<-Lag(TR)
TH<-lagdataFSM[,3]
TrueHigh<-Lag(TH)
Pdir<-lagdataFSM[,4]
PdirIND<-Lag(Pdir)
Ndir<-lagdataFSM[,5]
NdirIND<-Lag(Ndir)
adx<-lagdataFSM[,6]
ADX<-Lag(adx)
aroonUP<-lagdataFSM[,7]
AroonUP<-Lag(aroonUP)
aroonDWN<-lagdataFSM[,8]
AroonDWN<-Lag(aroonDWN)
SMIsignal<-lagdataFSM[,9]
SMIsignalM<-Lag(SMIsignal)
pctB<-lagdataFSM[,10]
PctB<-Lag(pctB)
MACDSignal<-lagdataFSM[,11]
MACDSignalM<-Lag(MACDSignal)
rsi<-lagdataFSM[,12]
RSI<-Lag(rsi)
MlimitOrders<-lagdataFSM[,13]
MlimitOrdersM<-Lag(MlimitOrders)
InsamplelaggedData<-cbind(Volume,TrueRange,TrueHigh,PdirIND,NdirIND,ADX,AroonUP,AroonDWN,SMIsignalM,PctB,MACDSignalM,RSI,MlimitOrdersM)
colnames(InsamplelaggedData)<-c("volume","TrueRange","TrueHigh","PDirInd","NDirInd","ADX","Aroonup","AroonDown","SMIsignal","pctB","MACDSignal","Rsi","MlimitOrders")
InsamplelaggedData<-as.data.frame(InsamplelaggedData)
FirstInsampleSetCpriceM<-as.matrix(FirstInsampleSetCprice)
lagInputDATAInSa<-cbind(FirstInsampleSetCpriceM,InsamplelaggedData)
colnames(lagInputDATAInSa)<-c("Cprice","volume","TrueRange","TrueHigh","PDirInd","NDirInd","ADX","Aroonup","AroonDown","SMIsignal","pctB","MACDSignal","Rsi","MlimitOrders")
head(lagInputDATAInSa)
## Cprice volume TrueRange TrueHigh PDirInd NDirInd
## 2015-01-09 09:33:59 4.652 NA NA NA NA NA
## 2015-01-09 09:34:58 4.653 163499 0.020 4.648 80.85587 7.1345550
## 2015-01-09 09:35:45 4.655 79453 0.005 4.650 69.36511 5.1279537
## 2015-01-09 09:36:57 4.660 41362 0.007 4.654 63.98047 2.8687805
## 2015-01-09 09:37:59 4.650 119249 0.004 4.655 50.92653 1.9080712
## 2015-01-09 09:38:57 4.635 102763 0.016 4.659 32.04703 0.5186282
## ADX Aroonup AroonDown SMIsignal pctB
## 2015-01-09 09:33:59 NA NA NA NA NA
## 2015-01-09 09:34:58 68.43514 100 0 48.23377 0.9632944
## 2015-01-09 09:35:45 77.33377 100 0 69.23404 0.9159944
## 2015-01-09 09:36:57 84.37547 100 0 64.65340 0.8988301
## 2015-01-09 09:37:59 88.57633 100 100 56.26551 0.8528020
## 2015-01-09 09:38:57 92.69560 0 0 56.62393 0.8000342
## MACDSignal Rsi MlimitOrders
## 2015-01-09 09:33:59 NA NA NA
## 2015-01-09 09:34:58 0.2499475 0 98
## 2015-01-09 09:35:45 0.2627466 0 52
## 2015-01-09 09:36:57 0.2756341 0 29
## 2015-01-09 09:37:59 0.2889856 100 80
## 2015-01-09 09:38:57 0.2978238 0 71
time series cross valdiation
myTimeControl <- trainControl(method = "timeslice", initialWindow =380,horizon =38, fixedWindow = TRUE)
Insample training and testing with RF method
library(randomForest)
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
rfModel <- train(Cprice~volume+TrueRange+TrueHigh+PDirInd+NDirInd+ADX+Aroonup+AroonDown+SMIsignal+pctB+MACDSignal+Rsi+MlimitOrders,data =lagInputDATAInSa,method = "rf",tunelength = 10, ntrees = 1000, importance = TRUE,trControl =myTimeControl)
varImp(rfModel)
## rf variable importance
##
## Overall
## TrueHigh 100.0000
## MACDSignal 14.1180
## pctB 9.6065
## NDirInd 4.2294
## SMIsignal 3.1624
## MlimitOrders 2.5948
## ADX 2.2530
## PDirInd 2.0639
## TrueRange 1.3648
## AroonDown 0.9289
## Aroonup 0.5666
## volume 0.4626
## Rsi 0.0000
plot(varImp(rfModel))
plot(rfModel)
preperation of out of sample data from 4:30 to 5:30
FirstOutsampleSet<-Santander[453:507,]
FirstOutsampleSetVol<-Santander[486:507,5]
FirstOutsampleSetCprice<-Santander[486:507,4]
FirstOutsampleSetCpriceM<-as.matrix(FirstOutsampleSetCprice)
Calculation of technical Indicators
ATRindicatorOS <- ATR(FirstOutsampleSet, n = 1)
ADXIndicatorOS <- ADX(FirstOutsampleSet, n = 2)
AroonIndicatorOS <- aroon(FirstOutsampleSet[, 2:3], n = 1)
SMIindicatorOS <- SMI(FirstOutsampleSet[, 2:4], n = 2, nFast = 2, nSlow = 2,nSig = 2, maType = SMA,bounded = TRUE)
BBandIndicatorOS <- BBands(FirstOutsampleSet[, 2:4])
MACDindicatorOS <- MACD(FirstOutsampleSet[,4])
rsiOS <- RSI(FirstOutsampleSet[,5], n = 1, maType = "WMA")
OutSampleInput<-cbind(ATRindicatorOS,ADXIndicatorOS,AroonIndicatorOS,SMIindicatorOS, BBandIndicatorOS,MACDindicatorOS,rsiOS )
OutSampleInputData<-data.frame(date = index(OutSampleInput), OutSampleInput, row.names=NULL)
OutSampleInputDataM<-OutSampleInputData[34:55,]
OutSampleInputDataM<-OutSampleInputDataM[,-c(1,3,5,8,12,13,15,16,17,19)]
adding volume and matching limit orders variable
FirstOutsampleSetVolM<-data.frame(date = index(FirstOutsampleSetVol),FirstOutsampleSetVol, row.names=NULL)
lagdataOFS<-cbind(FirstOutsampleSetVolM,OutSampleInputDataM,MordersM[486:507,])
lagdataOFS<-lagdataOFS[,-c(1)]
colnames(lagdataOFS)<-c("volume","TrueRange","TrueHigh","PDirInd","NDirInd","ADX","Aroonup","AroonDown","SMIsignal","pctB","MACDSignal","Rsi","MlimitOrders")
Vol1<-lagdataOFS[,1]
Volume1<-Lag(Vol1)
TR1<-lagdataOFS[,2]
TrueRange1<-Lag(TR1)
TH1<-lagdataOFS[,3]
TrueHigh1<-Lag(TH1)
Pdir1<-lagdataOFS[,4]
PdirIND1<-Lag(Pdir1)
Ndir1<-lagdataOFS[,5]
NdirIND1<-Lag(Ndir1)
adx1<-lagdataOFS[,6]
ADX1<-Lag(adx1)
aroonUP1<-lagdataOFS[,7]
AroonUP1<-Lag(aroonUP1)
aroonDWN1<-lagdataOFS[,8]
AroonDWN1<-Lag(aroonDWN1)
SMIsignal1<-lagdataOFS[,9]
SMIsignalM1<-Lag(SMIsignal1)
pctB1<-lagdataOFS[,10]
PctB1<-Lag(pctB1)
MACDSignal1<-lagdataOFS[,11]
MACDSignalM1<-Lag(MACDSignal1)
rsi1<-lagdataOFS[,12]
RSI1<-Lag(rsi1)
MlimitOrders1<-lagdataOFS[,13]
MlimitOrdersM1<-Lag(MlimitOrders1)
OutsamplelaggedData<-cbind(Volume1,TrueRange1,TrueHigh1,PdirIND1,NdirIND1,ADX1,AroonUP1,AroonDWN1,SMIsignalM1,PctB1,MACDSignalM1,RSI1,MlimitOrdersM1)
colnames(OutsamplelaggedData)<-c("volume","TrueRange","TrueHigh","PDirInd","NDirInd","ADX","Aroonup","AroonDown","SMIsignal","pctB","MACDSignal","Rsi","MlimitOrders")
OutsamplelaggedData<-as.data.frame(OutsamplelaggedData)
lagInputDATAInOut<-cbind(FirstOutsampleSetCpriceM,OutsamplelaggedData)
colnames(lagInputDATAInOut)<-c("Cprice","volume","TrueRange","TrueHigh","PDirInd","NDirInd","ADX","Aroonup","AroonDown","SMIsignal","pctB","MACDSignal","Rsi","MlimitOrders")
head(lagInputDATAInOut)
## Cprice volume TrueRange TrueHigh PDirInd NDirInd
## 2015-01-09 17:09:59 4.803 NA NA NA NA NA
## 2015-01-09 17:10:59 4.805 17483 0.001 4.801 18.13819 2.592819
## 2015-01-09 17:11:53 4.801 91247 0.005 4.805 51.91585 1.177094
## 2015-01-09 17:12:58 4.804 82720 0.002 4.805 36.13261 31.220821
## 2015-01-09 17:13:59 4.806 47689 0.002 4.803 22.47008 19.415551
## 2015-01-09 17:14:58 4.803 101686 0.005 4.806 47.01660 6.716770
## ADX Aroonup AroonDown SMIsignal pctB
## 2015-01-09 17:09:59 NA NA NA NA NA
## 2015-01-09 17:10:59 72.39325 100 100 78.93278 0.4663065
## 2015-01-09 17:11:53 83.97958 100 0 64.18919 0.5529743
## 2015-01-09 17:12:58 45.63607 0 100 27.00000 0.3952218
## 2015-01-09 17:13:59 26.46431 100 100 -13.21739 0.4624164
## 2015-01-09 17:14:58 50.73197 100 0 -19.38406 0.6912891
## MACDSignal Rsi MlimitOrders
## 2015-01-09 17:09:59 NA NA NA
## 2015-01-09 17:10:59 0.10572681 0 31
## 2015-01-09 17:11:53 0.09889191 100 50
## 2015-01-09 17:12:58 0.09200165 0 58
## 2015-01-09 17:13:59 0.08622499 0 45
## 2015-01-09 17:14:58 0.08191694 100 108
Out of sample set forvalidation
set.seed(100)
rfPredict<-predict(rfModel ,lagInputDATAInOut)
outPrice<-lagInputDATAInOut[2:22,1]
cor(outPrice,rfPredict)
## [1] 0.9309703
library(forecast)
## Loading required package: timeDate
## This is forecast 5.7
accuracy(rfPredict,outPrice,d=NULL)
## ME RMSE MAE MPE MAPE
## Test set -0.00136924 0.004902901 0.003708243 -0.02837482 0.07689761
Buy Hold Sell signal generator
#Trade logic - Look for trend
#If one step ahead logarithmic return is > 0 Buy
#If one step ahead logarithmic return is < 0 Sell
#If one step ahead logarithmic return is = 0 Hold
signal <- Delt(rfPredict,k=1,type="log")
MarketReturn <- Delt(outPrice,k=1,type="log")
tradingfunc<-function(x){
ifelse(x>0,"Long",ifelse(x<0,"Short",NA))
}
AlgoTRADE <- apply(signal,1,tradingfunc)
MarketTRADE <- apply(MarketReturn,1,tradingfunc)
Table<-table(AlgoTRADE,MarketTRADE)
confusionMatrix(AlgoTRADE,MarketTRADE)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Long Short
## Long 11 3
## Short 2 2
##
## Accuracy : 0.7222
## 95% CI : (0.4652, 0.9031)
## No Information Rate : 0.7222
## P-Value [Acc > NIR] : 0.6175
##
## Kappa : 0.2623
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.8462
## Specificity : 0.4000
## Pos Pred Value : 0.7857
## Neg Pred Value : 0.5000
## Prevalence : 0.7222
## Detection Rate : 0.6111
## Detection Prevalence : 0.7778
## Balanced Accuracy : 0.6231
##
## 'Positive' Class : Long
##