loading data

library(quantmod)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Registered S3 method overwritten by 'xts':
##   method     from
##   as.zoo.xts zoo
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Version 0.4-0 included new data defaults. See ?getSymbols.
library(xts)
library(DMwR2)
data("GSPC")
first(GSPC)
##            GSPC.Open GSPC.High GSPC.Low GSPC.Close GSPC.Volume GSPC.Adjusted
## 1970-01-02     92.06     93.54    91.79         93     8050000            93
last(GSPC)
##            GSPC.Open GSPC.High GSPC.Low GSPC.Close GSPC.Volume GSPC.Adjusted
## 2016-01-25   1906.28   1906.28  1875.97    1877.08  4401380000       1877.08

Defining the prediction task

We want to see if margin of profit %P in the next k days is attainable.So we want to make an indicator of the tendency in the next k days

T.ind <- function(quotes, tgt.margin = 0.025, n.days = 10) {
   v <- apply(HLC(quotes), 1, mean)
   v[1] <- Cl(quotes)[1]
   r <- matrix(NA, ncol = n.days, nrow = nrow(quotes))
   for (x in 1:n.days) {
   r[,x] <- Next(Delt(v,k=x),x) 
       }
   x <- apply(r, 1, function(x) sum(x[x > tgt.margin | x < -tgt.margin]))
   if (is.xts(quotes)) xts(x, time(quotes)) else x
}

Performance of new index

candleChart(last(GSPC,'3 months'),theme='white', TA=NULL)

avgPrice <- function(p) apply(HLC(p),1,mean)
addAvgPrice <- newTA(FUN=avgPrice,col=1,legend='AvgPrice')
addT.ind <- newTA(FUN=T.ind,col='red', legend='tgtRet')
addAvgPrice(on=1)

addT.ind()

# feature selection ### We use multiple specific features developed for stock data analysis then analyze these features to find out their importance.

library(TTR)
myATR <- function(x) ATR(HLC(x))[,'atr']
 mySMI <- function(x) SMI(HLC(x))[, "SMI"]
 myADX <- function(x) ADX(HLC(x))[,'ADX']
 myAroon <- function(x) aroon(cbind(Hi(x),Lo(x)))$oscillator
 myBB <- function(x) BBands(HLC(x))[, "pctB"]
 myChaikinVol <- function(x) Delt(chaikinVolatility(cbind(Hi(x),Lo(x))))[, 1]
 myCLV <- function(x) EMA(CLV(HLC(x)))[, 1]
 myEMV <- function(x) EMV(cbind(Hi(x),Lo(x)),Vo(x))[,2]
 myMACD <- function(x) MACD(Cl(x))[,2]
 myMFI <- function(x) MFI(HLC(x), Vo(x))
 mySAR <- function(x) SAR(cbind(Hi(x),Cl(x))) [,1]
 myVolat <- function(x) volatility(OHLC(x),calc="garman")[,1]

Now we use random forest to find out importance of these features.

We split data in train and test

library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
data.model <- specifyModel(T.ind(GSPC) ~ Delt(Cl(GSPC),k=1:10) +
   myATR(GSPC) + mySMI(GSPC) + myADX(GSPC) + myAroon(GSPC) +
   myBB(GSPC) + myChaikinVol(GSPC) + myCLV(GSPC) +
   CMO(Cl(GSPC)) + EMA(Delt(Cl(GSPC))) + myEMV(GSPC) +
   myVolat(GSPC) + myMACD(GSPC) + myMFI(GSPC) + RSI(Cl(GSPC)) +
   mySAR(GSPC) + runMean(Cl(GSPC)) + runSD(Cl(GSPC)))

set.seed(1234)
 rf <- buildModel(data.model,method='randomForest',
   training.per=c("1995-01-01","2005-12-30"),
   ntree=1000, importance=TRUE)

Variable importance

varImpPlot(rf@fitted.model, type = 1)

Variable importance

imp <- importance(rf@fitted.model,type = 1)
rownames(imp)[which(imp>30)]
##  [1] "myATR.GSPC"      "mySMI.GSPC"      "myADX.GSPC"      "myAroon.GSPC"   
##  [5] "myEMV.GSPC"      "myVolat.GSPC"    "myMACD.GSPC"     "myMFI.GSPC"     
##  [9] "mySAR.GSPC"      "runMean.Cl.GSPC" "runSD.Cl.GSPC"

Final dataset

data.model <- specifyModel(T.ind(GSPC) ~ myATR(GSPC) + mySMI(GSPC) +
                             myADX(GSPC)+myAroon(GSPC) + myEMV(GSPC) +                                       myVolat(GSPC) + myMACD(GSPC) + myMFI(GSPC) +                                    mySAR(GSPC) + runMean(Cl(GSPC)) + runSD(Cl(GSPC)))

Data wrangling to generate signal thresholds

Tdata.train <- as.data.frame(modelData(data.model,
  data.window=c('1970-01-02','2005-12-30')))
Tdata.eval <- na.omit(as.data.frame(modelData(data.model, data.window=c('2006-01-01','2016-01-25'))))
Tform <- as.formula('T.ind.GSPC ~ .')
## The classification task
buy.thr <- 0.1
sell.thr <- -0.1
Tdata.trainC <- cbind(Signal=trading.signals(Tdata.train[["T.ind.GSPC"]],
                 buy.thr,sell.thr),Tdata.train[,-1])
Tdata.evalC <- cbind(Signal=trading.signals(Tdata.eval[["T.ind.GSPC"]],
                buy.thr,sell.thr),Tdata.eval[,-1])
TformC <- as.formula("Signal ~ .")

Modeling using Neural net

set.seed(1234)
 library(nnet)
 ## The first column is the target variable
 norm.data <- data.frame(T.ind.GSPC=Tdata.train[[1]],scale(Tdata.train[,-1]))
 nn <- nnet(Tform, norm.data[1:1000, ], size = 5, decay = 0.01,
             maxit = 1000, linout = TRUE, trace = FALSE)
 preds <- predict(nn, norm.data[1001:2000, ])

evaluation

sigs.nn <- trading.signals(preds,0.1,-0.1)
true.sigs <- trading.signals(Tdata.train[1001:2000, "T.ind.GSPC"], 0.1, -0.1)
sigs.PR(sigs.nn,true.sigs)
##     precision    recall
## s   0.2809917 0.1931818
## b   0.3108108 0.2857143
## s+b 0.2973978 0.2373887

SVM regression

set.seed(1234)
library(e1071)
sv <- svm(Tform, Tdata.train[1:1000, ], gamma = 0.001, cost = 100)
s.preds <- predict(sv, Tdata.train[1001:2000, ])
sigs.svm <- trading.signals(s.preds, 0.1, -0.1)
true.sigs <- trading.signals(Tdata.train[1001:2000, "T.ind.GSPC"], 0.1, -0.1)
sigs.PR(sigs.svm, true.sigs)
##     precision      recall
## s       0.375 0.017045455
## b         NaN 0.000000000
## s+b     0.375 0.008902077

SVM classification

set.seed(1234)
library(kernlab)
ksv <- ksvm(Signal ~ ., Tdata.trainC[1:1000, ], C = 10)
ks.preds <- predict(ksv, Tdata.trainC[1001:2000, ])
sigs.PR(ks.preds, Tdata.trainC[1001:2000, 1])
##     precision    recall
## s   0.2365591 0.2500000
## b   0.3246753 0.1552795
## s+b 0.2623574 0.2047478

Multivariate adaptive regression splines

library(earth)
## Warning: package 'earth' was built under R version 3.6.2
## Loading required package: Formula
## Loading required package: plotmo
## Warning: package 'plotmo' was built under R version 3.6.2
## Loading required package: plotrix
## Loading required package: TeachingDemos
## Warning: package 'TeachingDemos' was built under R version 3.6.2
e <- earth(Tform, Tdata.train[1:1000, ])
e.preds <- predict(e, Tdata.train[1001:2000, ])
sigs.e <- trading.signals(e.preds, 0.1, -0.1)
true.sigs <- trading.signals(Tdata.train[1001:2000, "T.ind.GSPC"], 0.1, -0.1)
sigs.PR(sigs.e, true.sigs)
##     precision    recall
## s   0.2894737 0.2500000
## b   0.3504274 0.2546584
## s+b 0.3159851 0.2522255
(summary(e))
## Call: earth(formula=Tform, data=Tdata.train[1:1000,])
## 
##                           coefficients
## (Intercept)                  0.5241811
## h(myATR.GSPC-2.56817)        1.2724353
## h(-61.825-mySMI.GSPC)        0.0594203
## h(myADX.GSPC-40.6215)       -0.0104803
## h(50.657-myADX.GSPC)        -0.0025279
## h(myADX.GSPC-50.657)         0.0823181
## h(0.204717-myVolat.GSPC)    -0.5105416
## h(myVolat.GSPC-0.204717)    -5.6100523
## h(myVolat.GSPC-0.271459)     5.6725474
## h(mySAR.GSPC-74.7031)       -0.0693496
## h(87.944-mySAR.GSPC)        -0.0575104
## h(mySAR.GSPC-87.944)         0.0800171
## h(runMean.Cl.GSPC-79.265)    0.2074780
## h(81.942-runMean.Cl.GSPC)    0.1058493
## h(runMean.Cl.GSPC-81.942)   -0.2185753
## 
## Selected 15 of 18 terms, and 6 of 11 predictors
## Termination condition: Reached nk 23
## Importance: myVolat.GSPC, runMean.Cl.GSPC, myATR.GSPC, mySMI.GSPC, ...
## Number of terms at each degree of interaction: 1 14 (additive model)
## GCV 0.01470628    RSS 13.86568    GRSq 0.3536668    RSq 0.38939
evimp(e, trim=FALSE)
##                      nsubsets   gcv    rss
## myVolat.GSPC               13 100.0  100.0
## runMean.Cl.GSPC            13 100.0  100.0
## myATR.GSPC                 12  96.7   96.4
## mySMI.GSPC                 11  81.5   82.4
## mySAR.GSPC                  7  44.1   47.7
## myADX.GSPC                  5  57.8>  58.5>
## myAroon.GSPC-unused         0   0.0    0.0
## myEMV.GSPC-unused           0   0.0    0.0
## myMACD.GSPC-unused          0   0.0    0.0
## myMFI.GSPC-unused           0   0.0    0.0
## runSD.Cl.GSPC-unused        0   0.0    0.0
plot(e)

# simulated trader

policy1

policy.1 <- function(signals,market,opened.pos,money,
                     bet=0.2,hold.time=10,
                     exp.prof=0.025, max.loss= 0.05
                    )
   {
      d <- NROW(market) # this is the ID of today
      orders <- NULL
      nOs <- NROW(opened.pos)
      # nothing to do!
      if (!nOs && signals[d] == 'h') return(orders)
      # First lets check if we can open new positions
      # i) long positions
     if (signals[d] == 'b' && !nOs) {
        quant <- round(bet*money/Cl(market)[d],0)
        if (quant > 0)
           orders <- rbind(orders,
                  data.frame(order=c(1,-1,-1),order.type=c(1,2,3),
                     val = c(quant,
                        Cl(market)[d]*(1+exp.prof),
                        Cl(market)[d]*(1-max.loss)
                           ),
                     action = c('open','close','close'),
                     posID = c(NA,NA,NA)
                  )
           )             
     # ii) short positions
    } else if (signals[d] == 's' && !nOs) {
     # this is the nr of stocks we already need to buy
     # because of currently opened short positions
           need2buy <- sum(opened.pos[opened.pos[,'pos.type']==-1,
                 "N.stocks"])*Cl(market)[d]
           quant <- round(bet*(money-need2buy)/Cl(market)[d],0)
              if (quant > 0)
                  orders <- rbind(orders,
                     data.frame(order=c(-1,1,1),order.type=c(1,2,3),
                           val = c(quant,
                               Cl(market)[d]*(1-exp.prof),
                               Cl(market)[d]*(1+max.loss)
                           ),
                          action = c('open','close','close'),
                          posID = c(NA,NA,NA)
                     )
                    )
          }
# Now lets check if we need to close positions
# because their holding time is over
      if (nOs)
         for(i in 1:nOs) {
            if (d - opened.pos[i,'Odate'] >= hold.time)
                orders <- rbind(orders,
         data.frame(order=-opened.pos[i,'pos.type'],
               order.type=1,
               val = NA,
               action = 'close',
               posID = rownames(opened.pos)[i]
        )
               )
         }
   orders
}

policy 2

policy.2 <- function(signals,market,opened.pos,money,
                     bet=0.2,exp.prof=0.025, max.loss= 0.05
                    )
  {
    d <- NROW(market) # this is the ID of today
    orders <- NULL
    nOs <- NROW(opened.pos)
    # nothing to do!
    if (!nOs && signals[d] == 'h') return(orders)
    # First lets check if we can open new positions
    # i) long positions
    if (signals[d] == 'b') {
       quant <- round(bet*money/Cl(market)[d],0)
       if (quant > 0)
         orders <- rbind(orders,
                  data.frame(order=c(1,-1,-1),order.type=c(1,2,3),
                             val = c(quant,
                                     Cl(market)[d]*(1+exp.prof),
                                     Cl(market)[d]*(1-max.loss)
                                    ),
                             action = c('open','close','close'),
                             posID = c(NA,NA,NA)
                            )
                        )
   # ii) short positions
       } else if (signals[d] == 's') {
  # this is the money already committed to buy stocks
  # because of currently opened short positions
             need2buy <- sum(opened.pos[opened.pos[,'pos.type']==-1,
               "N.stocks"])*Cl(market)[d]
            quant <- round(bet*(money-need2buy)/Cl(market)[d],0)
            if (quant > 0)
               orders <- rbind(orders,
            data.frame(order=c(-1,1,1),order.type=c(1,2,3),
                       val = c(quant,
                               Cl(market)[d]*(1-exp.prof),
                               Cl(market)[d]*(1+max.loss)
                              ),
                      action = c('open','close','close'),
                      posID = c(NA,NA,NA)
                     )
                             )
 }
 orders
 }

Trading simulator

## Train and test periods
start <- 1
len.tr <- 1000
len.ts <- 500
tr <- start:(start+len.tr-1)
ts <- (start+len.tr):(start+len.tr+len.ts-1)
## getting the quotes for the testing period
data(GSPC)
date <- rownames(Tdata.train[start+len.tr,])
marketTP <- GSPC[paste(date,'/',sep='')][1:len.ts]
## learning the model and obtaining its signal predictions for the test period
library(e1071)
s <- svm(Tform, Tdata.train[tr,], cost=10,gamma=0.01)
p <- predict(s, Tdata.train[ts,])
sig <- trading.signals(p, 0.1, -0.1)
## now using the simulated trader during the testing period
t1 <- trading.simulator(marketTP, signals=sig, policy.func='policy.1',
                        policy.pars=list(exp.prof=0.05,bet=0.2,hold.time=30))

Evaluation

summary(t1)
## 
## == Summary of a Trading Simulation with  500  days ==
## 
## Trading policy function :  policy.1 
## Policy function parameters:
##   exp.prof  =  0.05 
##   bet  =  0.2 
##   hold.time  =  30 
## 
## Transaction costs :  5 
## Initial Equity    :  1e+06 
## Final Equity      :  1019713   Return :  1.97 %
## Number of trading positions:  8 
## 
## Use function "tradingEvaluation()" for further stats on this simulation.
tradingEvaluation(t1)
##     NTrades       NProf    PercProf          PL         Ret   RetOverBH 
##        8.00        5.00       62.50    19712.54        1.97       -4.88 
##       MaxDD SharpeRatio     AvgProf     AvgLoss       AvgPL     MaxProf 
##    25630.72        0.04        5.11       -5.00        1.32        5.26 
##     MaxLoss 
##       -5.00

Plot

plot(t1,marketTP, theme = "white", name = "SP500")

## Rentability =  1.971254 %

policy2

t2 <- trading.simulator(marketTP, sig, "policy.2", list(exp.prof = 0.05, bet = 0.3))
summary(t2)
## 
## == Summary of a Trading Simulation with  500  days ==
## 
## Trading policy function :  policy.2 
## Policy function parameters:
##   exp.prof  =  0.05 
##   bet  =  0.3 
## 
## Transaction costs :  5 
## Initial Equity    :  1e+06 
## Final Equity      :  1152332   Return :  15.23 %
## Number of trading positions:  37 
## 
## Use function "tradingEvaluation()" for further stats on this simulation.

evaluation policy 2

tradingEvaluation(t2)
##     NTrades       NProf    PercProf          PL         Ret   RetOverBH 
##       37.00       26.00       70.27   152332.30       15.23        8.38 
##       MaxDD SharpeRatio     AvgProf     AvgLoss       AvgPL     MaxProf 
##    67492.23        0.06        4.99       -4.89        2.05        5.26 
##     MaxLoss 
##       -5.00

plot t2

plot(t2,marketTP, theme = "white", name = "SP500")

## Rentability =  15.23323 %