testit <- function() {
message("testing package startup messages")
packageStartupMessage("initializing ...", appendLF = FALSE)
Sys.sleep(1)
packageStartupMessage(" done")
}
suppressPackageStartupMessages(testit())
## testing package startup messages
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(ggplot2)
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
setwd("~/git/")
DATAITX <- load("SAN_270412.RData")
SAN <- h05n
head(SAN)
## hora precio volumen broker.comprador broker.vendedor
## 1 2012-04-27 09:00:00.000 4.59 60 9838 8830
## 2 2012-04-27 09:00:00.000 4.59 33 9816 8830
## 3 2012-04-27 09:00:00.000 4.59 100 9838 8830
## 4 2012-04-27 09:00:00.000 4.59 13300 9838 8830
## 5 2012-04-27 09:00:00.000 4.59 200 9843 8830
## 6 2012-04-27 09:00:00.000 4.59 680 9838 8830
converting it into OHLCV format
list <- strsplit(SAN$hora, " ")
library("plyr")
SANhft <- ldply(list)
SANHFT<- cbind(SANhft[,1:2],SAN[,2:3])
colnames(SANHFT) <- c( "Date","Time","Price","Volume")
SANxts <- as.xts(SANHFT[, 3:4], order.by=as.POSIXct(SANHFT[, 2], format=' %H:%M:%S'))
Santander<-to.minutes(SANxts)
chartSeries(Santander)
data aggregation for HFF time series
X <- strptime(SAN$hora, format = "%Y-%m-%d %H:%M:%S.000")
time <- cut(X, "1 min")
timestamp <- table(time)
z <- as.matrix(timestamp)
z<-z[-c(312,313,314,315,506,511,512,513,514,515), ]
Morders <- as.matrix(z)
adding indicators as independent varibales for train test set
## Average True Range Indicator
ATRindicator <- ATR(Santander, n = 1)
## Weilders Directional Index
ADXIndicator <- ADX(Santander, n = 2)
## Aroon Indicator
AroonIndicator <- aroon(Santander[, 2:3], n = 1)
## Stochastic Momentum Indicator
SMIindicator <- SMI(Santander[, 2:4], n = 2, nFast = 2, nSlow = 2,
nSig = 2, maType = SMA,
bounded = TRUE)
## Bands
BBandIndicator <- BBands(Santander[, 2:4])
## EMA
EMAIndicator <- EMA(Santander[,4], n = 1)
## MACD
MACDindicator <- MACD(Santander[,4])
## SAR
sarindicator <- SAR(Santander[,2:3], accel = c(0.02, 0.2))
## Rate of change
roc <- ROC(Santander[,5], n = 1, type = c("continuous", "discrete"),
na.pad = TRUE)
## Relative strenght index
rsi <- RSI(Santander[,5], n = 1, maType = "WMA")
CombData <- cbind(Santander,ATRindicator,ADXIndicator,AroonIndicator,SMIindicator,BBandIndicator,EMAIndicator,MACDindicator,sarindicator,roc,rsi)
CombdataM<-as.data.frame(CombData)
MordersM<-as.data.frame(Morders)
RealCombdata<-cbind(CombData[36:507,],MordersM[35:506,])
SANCOMBD<-data.frame(date = index(RealCombdata),
RealCombdata, row.names=NULL)
colnames(SANCOMBD)<-c("Date","OpenPrice","HighPrice","LowPrice","ClosePrice","Volume","TR","ATR","trueHigh","trueLow","+dirInd","-dirInd","dirind","ADX","Aroonup","AroonDown","AroonOscillator","SMI","SMISignal","dnBB","MaAvg","UpBB","pctB","EMA","MACD","MACDsignal","SAR","ROC","RSI","MLimitorders")
##PLOTTING Matching limit orders
p <- ggplot(SANCOMBD, aes(Date,MLimitorders))
p + geom_line()
preprocessing of Data using Hirarcheial clustering and cut off correlation coefficient
Filtereddata<-SANCOMBD[,-c(1,5)]
library(corrplot)
correlations <- cor(Filtereddata)
###Before selection of variables
corrplot(correlations, order = "hclust")
highCorr <- findCorrelation(correlations, cutoff = 0.70)
length(highCorr)
## [1] 15
nearZeroVar(Filtereddata)
## integer(0)
head(highCorr)
## [1] 25 19 20 22 2 18
filteredCombdata <- Filtereddata[, -highCorr]
corMatfterFiler <- cor(filteredCombdata)
###After the independent variables with high correlation is removed
corrplot(corMatfterFiler, order = "hclust")
Dividing dataset for in sample and out of sample
FirstInsampleSet<-Santander[1:452,]
FirstInsampleSetVol<-Santander[34:452,5]
FirstInsampleSetCprice<-Santander[34:452,4]
Calculating indicators for the period of 9,30 to 4:35
reChart(subset = "first 6 hours")
TA iNDICATORS
ATRindicatorIS <- ATR(FirstInsampleSet, n = 1)
ADXIndicatorIS <- ADX(FirstInsampleSet, n = 2)
AroonIndicatorIS <- aroon(FirstInsampleSet[, 2:3], n = 1)
SMIindicatorIS <- SMI(FirstInsampleSet[, 2:4], n = 2, nFast = 2, nSlow = 2,nSig = 2, maType = SMA,bounded = TRUE)
BBandIndicatorIS <- BBands(FirstInsampleSet[, 2:4])
MACDindicatorIS <- MACD(FirstInsampleSet[,4])
rsiIS <- RSI(FirstInsampleSet[,5], n = 1, maType = "WMA")
InSampleInput<-cbind(ATRindicatorIS,ADXIndicatorIS,AroonIndicatorIS,SMIindicatorIS, BBandIndicatorIS,MACDindicatorIS,rsiIS )
InSampleInputData<-data.frame(date = index(InSampleInput), InSampleInput, row.names=NULL)
InSampleInputDataM<-InSampleInputData[34:452,]
InSampleInputDataM<-InSampleInputDataM[,-c(1,3,5,8,12,13,15,16,17,19)]
adding volume and matching limit orders variable
FirstInsampleSetVolM<-data.frame(date = index(FirstInsampleSetVol),FirstInsampleSetVol, row.names=NULL)
lagdataFS<-cbind(FirstInsampleSetVolM,InSampleInputDataM,MordersM[34:452,])
lagdataFSM<-lagdataFS[,-c(1)]
colnames(lagdataFSM)<-c("volume","TrueRange","TrueHigh","PDirInd","NDirInd","ADX","Aroonup","AroonDown","SMIsignal","pctB","MACDSignal","Rsi","MlimitOrders")
Vol<-lagdataFSM[,1]
Volume<-Lag(Vol)
TR<-lagdataFSM[,2]
TrueRange<-Lag(TR)
TH<-lagdataFSM[,3]
TrueHigh<-Lag(TH)
Pdir<-lagdataFSM[,4]
PdirIND<-Lag(Pdir)
Ndir<-lagdataFSM[,5]
NdirIND<-Lag(Ndir)
adx<-lagdataFSM[,6]
ADX<-Lag(adx)
aroonUP<-lagdataFSM[,7]
AroonUP<-Lag(aroonUP)
aroonDWN<-lagdataFSM[,8]
AroonDWN<-Lag(aroonDWN)
SMIsignal<-lagdataFSM[,9]
SMIsignalM<-Lag(SMIsignal)
pctB<-lagdataFSM[,10]
PctB<-Lag(pctB)
MACDSignal<-lagdataFSM[,11]
MACDSignalM<-Lag(MACDSignal)
rsi<-lagdataFSM[,12]
RSI<-Lag(rsi)
MlimitOrders<-lagdataFSM[,13]
MlimitOrdersM<-Lag(MlimitOrders)
InsamplelaggedData<-cbind(Volume,TrueRange,TrueHigh,PdirIND,NdirIND,ADX,AroonUP,AroonDWN,SMIsignalM,PctB,MACDSignalM,RSI,MlimitOrdersM)
colnames(InsamplelaggedData)<-c("volume","TrueRange","TrueHigh","PDirInd","NDirInd","ADX","Aroonup","AroonDown","SMIsignal","pctB","MACDSignal","Rsi","MlimitOrders")
InsamplelaggedData<-as.data.frame(InsamplelaggedData)
FirstInsampleSetCpriceM<-as.matrix(FirstInsampleSetCprice)
lagInputDATAInSa<-cbind(FirstInsampleSetCpriceM,InsamplelaggedData)
colnames(lagInputDATAInSa)<-c("Cprice","volume","TrueRange","TrueHigh","PDirInd","NDirInd","ADX","Aroonup","AroonDown","SMIsignal","pctB","MACDSignal","Rsi","MlimitOrders")
head(lagInputDATAInSa)
## Cprice volume TrueRange TrueHigh PDirInd NDirInd
## 2015-01-09 09:33:59 4.652 NA NA NA NA NA
## 2015-01-09 09:34:58 4.653 163499 0.020 4.648 80.85587 7.1345550
## 2015-01-09 09:35:45 4.655 79453 0.005 4.650 69.36511 5.1279537
## 2015-01-09 09:36:57 4.660 41362 0.007 4.654 63.98047 2.8687805
## 2015-01-09 09:37:59 4.650 119249 0.004 4.655 50.92653 1.9080712
## 2015-01-09 09:38:57 4.635 102763 0.016 4.659 32.04703 0.5186282
## ADX Aroonup AroonDown SMIsignal pctB
## 2015-01-09 09:33:59 NA NA NA NA NA
## 2015-01-09 09:34:58 68.43514 100 0 48.23377 0.9632944
## 2015-01-09 09:35:45 77.33377 100 0 69.23404 0.9159944
## 2015-01-09 09:36:57 84.37547 100 0 64.65340 0.8988301
## 2015-01-09 09:37:59 88.57633 100 100 56.26551 0.8528020
## 2015-01-09 09:38:57 92.69560 0 0 56.62393 0.8000342
## MACDSignal Rsi MlimitOrders
## 2015-01-09 09:33:59 NA NA NA
## 2015-01-09 09:34:58 0.2499475 0 98
## 2015-01-09 09:35:45 0.2627466 0 52
## 2015-01-09 09:36:57 0.2756341 0 29
## 2015-01-09 09:37:59 0.2889856 100 80
## 2015-01-09 09:38:57 0.2978238 0 71
time series cross valdiation
myTimeControl <- trainControl(method = "timeslice", initialWindow =380,horizon =38, fixedWindow = TRUE)
Insample training and testing with cubist method
CubistGrid <-expand.grid(.committees = c(1, 10, 50, 100),.neighbors = c(0, 1, 5, 9))
library(Cubist)
set.seed(100)
CbModel<- train(Cprice~volume+TrueRange+TrueHigh+PDirInd+NDirInd+ADX+Aroonup+AroonDown+SMIsignal+pctB+MACDSignal+Rsi+MlimitOrders,data =lagInputDATAInSa,method = "cubist",tuneGrid =CubistGrid, trControl =myTimeControl)
CbModel
## Cubist
##
## 419 samples
## 13 predictor
##
## No pre-processing
## Resampling: Rolling Forecasting Origin Resampling (38 held-out with a fixed window)
##
## Summary of sample sizes: 380
##
## Resampling results across tuning parameters:
##
## committees neighbors RMSE Rsquared
## 1 0 0.01104401 0.5957053
## 1 1 0.01283212 0.4801673
## 1 5 0.01153085 0.5913831
## 1 9 0.01098597 0.6149879
## 10 0 0.01086826 0.5968469
## 10 1 0.01277495 0.4818908
## 10 5 0.01159198 0.5869536
## 10 9 0.01092600 0.6159503
## 50 0 0.01111725 0.5954274
## 50 1 0.01285443 0.4843449
## 50 5 0.01175726 0.5872429
## 50 9 0.01111485 0.6150267
## 100 0 0.01114953 0.5952398
## 100 1 0.01286488 0.4846352
## 100 5 0.01177832 0.5872675
## 100 9 0.01113896 0.6148999
##
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were committees = 10 and neighbors = 0.
summary(CbModel)
##
## Call:
## cubist.default(x = x, y = y, committees = param$committees)
##
##
## Cubist [Release 2.07 GPL Edition] Fri Jan 9 16:32:26 2015
## ---------------------------------
##
## Target attribute `outcome'
##
## Read 418 cases (14 attributes) from undefined.data
##
## Model 1:
##
## Rule 1/1: [418 cases, mean 4.7701, range 4.605 to 4.874, est err 0.0072]
##
## outcome = 0.1581 + 0.966 TrueHigh + 0.01 pctB - 0.018 MACDSignal
##
## Model 2:
##
## Rule 2/1: [418 cases, mean 4.7701, range 4.605 to 4.874, est err 0.0074]
##
## outcome = 0.1341 + 0.972 TrueHigh + 0.011 pctB - 0.00011 PDirInd
##
## Model 3:
##
## Rule 3/1: [418 cases, mean 4.7701, range 4.605 to 4.874, est err 0.0079]
##
## outcome = 0.2074 + 0.955 TrueHigh - 0.037 MACDSignal + 0.011 pctB
## + 6e-05 Aroonup
##
## Model 4:
##
## Rule 4/1: [418 cases, mean 4.7701, range 4.605 to 4.874, est err 0.0075]
##
## outcome = 0.1351 + 0.972 TrueHigh + 0.012 pctB - 0.00014 PDirInd
##
## Model 5:
##
## Rule 5/1: [418 cases, mean 4.7701, range 4.605 to 4.874, est err 0.0078]
##
## outcome = 0.2284 + 0.951 TrueHigh - 0.043 MACDSignal + 0.014 pctB
##
## Model 6:
##
## Rule 6/1: [418 cases, mean 4.7701, range 4.605 to 4.874, est err 0.0075]
##
## outcome = 0.1351 + 0.972 TrueHigh + 0.012 pctB - 0.00014 PDirInd
##
## Model 7:
##
## Rule 7/1: [418 cases, mean 4.7701, range 4.605 to 4.874, est err 0.0078]
##
## outcome = 0.2284 + 0.951 TrueHigh - 0.043 MACDSignal + 0.014 pctB
##
## Model 8:
##
## Rule 8/1: [418 cases, mean 4.7701, range 4.605 to 4.874, est err 0.0075]
##
## outcome = 0.1351 + 0.972 TrueHigh + 0.012 pctB - 0.00014 PDirInd
##
## Model 9:
##
## Rule 9/1: [418 cases, mean 4.7701, range 4.605 to 4.874, est err 0.0078]
##
## outcome = 0.2284 + 0.951 TrueHigh - 0.043 MACDSignal + 0.014 pctB
##
## Model 10:
##
## Rule 10/1: [418 cases, mean 4.7701, range 4.605 to 4.874, est err 0.0075]
##
## outcome = 0.1351 + 0.972 TrueHigh + 0.012 pctB - 0.00014 PDirInd
##
##
## Evaluation on training data (418 cases):
##
## Average |error| 0.0090
## Relative |error| 0.18
## Correlation coefficient 0.98
##
##
## Attribute usage:
## Conds Model
##
## 100% TrueHigh
## 100% pctB
## 50% PDirInd
## 50% MACDSignal
## 10% Aroonup
##
##
## Time: 0.3 secs
plot(CbModel)
preperation of out of sample data from 4:30 to 5:30
FirstOutsampleSet<-Santander[453:507,]
FirstOutsampleSetVol<-Santander[486:507,5]
FirstOutsampleSetCprice<-Santander[486:507,4]
FirstOutsampleSetCpriceM<-as.matrix(FirstOutsampleSetCprice)
Calculation of technical Indicators
ATRindicatorOS <- ATR(FirstOutsampleSet, n = 1)
ADXIndicatorOS <- ADX(FirstOutsampleSet, n = 2)
AroonIndicatorOS <- aroon(FirstOutsampleSet[, 2:3], n = 1)
SMIindicatorOS <- SMI(FirstOutsampleSet[, 2:4], n = 2, nFast = 2, nSlow = 2,nSig = 2, maType = SMA,bounded = TRUE)
BBandIndicatorOS <- BBands(FirstOutsampleSet[, 2:4])
MACDindicatorOS <- MACD(FirstOutsampleSet[,4])
rsiOS <- RSI(FirstOutsampleSet[,5], n = 1, maType = "WMA")
OutSampleInput<-cbind(ATRindicatorOS,ADXIndicatorOS,AroonIndicatorOS,SMIindicatorOS, BBandIndicatorOS,MACDindicatorOS,rsiOS )
OutSampleInputData<-data.frame(date = index(OutSampleInput), OutSampleInput, row.names=NULL)
OutSampleInputDataM<-OutSampleInputData[34:55,]
OutSampleInputDataM<-OutSampleInputDataM[,-c(1,3,5,8,12,13,15,16,17,19)]
adding volume and matching limit orders variable
FirstOutsampleSetVolM<-data.frame(date = index(FirstOutsampleSetVol),FirstOutsampleSetVol, row.names=NULL)
lagdataOFS<-cbind(FirstOutsampleSetVolM,OutSampleInputDataM,MordersM[486:507,])
lagdataOFS<-lagdataOFS[,-c(1)]
colnames(lagdataOFS)<-c("volume","TrueRange","TrueHigh","PDirInd","NDirInd","ADX","Aroonup","AroonDown","SMIsignal","pctB","MACDSignal","Rsi","MlimitOrders")
Vol1<-lagdataOFS[,1]
Volume1<-Lag(Vol1)
TR1<-lagdataOFS[,2]
TrueRange1<-Lag(TR1)
TH1<-lagdataOFS[,3]
TrueHigh1<-Lag(TH1)
Pdir1<-lagdataOFS[,4]
PdirIND1<-Lag(Pdir1)
Ndir1<-lagdataOFS[,5]
NdirIND1<-Lag(Ndir1)
adx1<-lagdataOFS[,6]
ADX1<-Lag(adx1)
aroonUP1<-lagdataOFS[,7]
AroonUP1<-Lag(aroonUP1)
aroonDWN1<-lagdataOFS[,8]
AroonDWN1<-Lag(aroonDWN1)
SMIsignal1<-lagdataOFS[,9]
SMIsignalM1<-Lag(SMIsignal1)
pctB1<-lagdataOFS[,10]
PctB1<-Lag(pctB1)
MACDSignal1<-lagdataOFS[,11]
MACDSignalM1<-Lag(MACDSignal1)
rsi1<-lagdataOFS[,12]
RSI1<-Lag(rsi1)
MlimitOrders1<-lagdataOFS[,13]
MlimitOrdersM1<-Lag(MlimitOrders1)
OutsamplelaggedData<-cbind(Volume1,TrueRange1,TrueHigh1,PdirIND1,NdirIND1,ADX1,AroonUP1,AroonDWN1,SMIsignalM1,PctB1,MACDSignalM1,RSI1,MlimitOrdersM1)
colnames(OutsamplelaggedData)<-c("volume","TrueRange","TrueHigh","PDirInd","NDirInd","ADX","Aroonup","AroonDown","SMIsignal","pctB","MACDSignal","Rsi","MlimitOrders")
OutsamplelaggedData<-as.data.frame(OutsamplelaggedData)
lagInputDATAInOut<-cbind(FirstOutsampleSetCpriceM,OutsamplelaggedData)
colnames(lagInputDATAInOut)<-c("Cprice","volume","TrueRange","TrueHigh","PDirInd","NDirInd","ADX","Aroonup","AroonDown","SMIsignal","pctB","MACDSignal","Rsi","MlimitOrders")
head(lagInputDATAInOut)
## Cprice volume TrueRange TrueHigh PDirInd NDirInd
## 2015-01-09 17:09:59 4.803 NA NA NA NA NA
## 2015-01-09 17:10:59 4.805 17483 0.001 4.801 18.13819 2.592819
## 2015-01-09 17:11:53 4.801 91247 0.005 4.805 51.91585 1.177094
## 2015-01-09 17:12:58 4.804 82720 0.002 4.805 36.13261 31.220821
## 2015-01-09 17:13:59 4.806 47689 0.002 4.803 22.47008 19.415551
## 2015-01-09 17:14:58 4.803 101686 0.005 4.806 47.01660 6.716770
## ADX Aroonup AroonDown SMIsignal pctB
## 2015-01-09 17:09:59 NA NA NA NA NA
## 2015-01-09 17:10:59 72.39325 100 100 78.93278 0.4663065
## 2015-01-09 17:11:53 83.97958 100 0 64.18919 0.5529743
## 2015-01-09 17:12:58 45.63607 0 100 27.00000 0.3952218
## 2015-01-09 17:13:59 26.46431 100 100 -13.21739 0.4624164
## 2015-01-09 17:14:58 50.73197 100 0 -19.38406 0.6912891
## MACDSignal Rsi MlimitOrders
## 2015-01-09 17:09:59 NA NA NA
## 2015-01-09 17:10:59 0.10572681 0 31
## 2015-01-09 17:11:53 0.09889191 100 50
## 2015-01-09 17:12:58 0.09200165 0 58
## 2015-01-09 17:13:59 0.08622499 0 45
## 2015-01-09 17:14:58 0.08191694 100 108
Out of sample set forvalidation
set.seed(100)
CubistPredict<-predict(CbModel,lagInputDATAInOut)
outPrice<-lagInputDATAInOut[2:22,1]
cor(outPrice,CubistPredict)
## [1] 0.9580667
library(forecast)
## Loading required package: timeDate
## This is forecast 5.7
accuracy(CubistPredict,outPrice,d=NULL)
## ME RMSE MAE MPE MAPE
## Test set 0.00100074 0.003368813 0.002930114 0.02074392 0.06080772
Buy Hold Sell signal generator
#Trade logic - Look for trend
#If one step ahead logarithmic return is > 0 Buy
#If one step ahead logarithmic return is < 0 Sell
#If one step ahead logarithmic return is = 0 Hold
signal <- Delt(CubistPredict,k=1,type="log")
MarketReturn <- Delt(outPrice,k=1,type="log")
tradingfunc<-function(x){
ifelse(x>0,"Long",ifelse(x<0,"Short",NA))
}
AlgoTRADE <- apply(signal,1,tradingfunc)
MarketTRADE <- apply(MarketReturn,1,tradingfunc)
Table<-table(AlgoTRADE,MarketTRADE)
confusionMatrix(AlgoTRADE,MarketTRADE)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Long Short
## Long 11 3
## Short 2 2
##
## Accuracy : 0.7222
## 95% CI : (0.4652, 0.9031)
## No Information Rate : 0.7222
## P-Value [Acc > NIR] : 0.6175
##
## Kappa : 0.2623
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.8462
## Specificity : 0.4000
## Pos Pred Value : 0.7857
## Neg Pred Value : 0.5000
## Prevalence : 0.7222
## Detection Rate : 0.6111
## Detection Prevalence : 0.7778
## Balanced Accuracy : 0.6231
##
## 'Positive' Class : Long
##