loading data

library(quantmod)

## Loading required package: xts

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## Registered S3 method overwritten by 'xts':
##   method     from
##   as.zoo.xts zoo

## Loading required package: TTR

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## Version 0.4-0 included new data defaults. See ?getSymbols.

library(xts)
library(DMwR2)
data("GSPC")
first(GSPC)

##            GSPC.Open GSPC.High GSPC.Low GSPC.Close GSPC.Volume GSPC.Adjusted
## 1970-01-02     92.06     93.54    91.79         93     8050000            93

last(GSPC)

##            GSPC.Open GSPC.High GSPC.Low GSPC.Close GSPC.Volume GSPC.Adjusted
## 2016-01-25   1906.28   1906.28  1875.97    1877.08  4401380000       1877.08

Defining the prediction task

We want to see if margin of profit %P in the next k days is attainable.So we want to make an indicator of the tendency in the next k days

T.ind <- function(quotes, tgt.margin = 0.025, n.days = 10) {
   v <- apply(HLC(quotes), 1, mean)
   v[1] <- Cl(quotes)[1]
   r <- matrix(NA, ncol = n.days, nrow = nrow(quotes))
   for (x in 1:n.days) {
   r[,x] <- Next(Delt(v,k=x),x) 
       }
   x <- apply(r, 1, function(x) sum(x[x > tgt.margin | x < -tgt.margin]))
   if (is.xts(quotes)) xts(x, time(quotes)) else x
}

Performance of new index

candleChart(last(GSPC,'3 months'),theme='white', TA=NULL)

avgPrice <- function(p) apply(HLC(p),1,mean)
addAvgPrice <- newTA(FUN=avgPrice,col=1,legend='AvgPrice')
addT.ind <- newTA(FUN=T.ind,col='red', legend='tgtRet')
addAvgPrice(on=1)

addT.ind()

# feature selection ### We use multiple specific features developed for stock data analysis then analyze these features to find out their importance.

library(TTR)
myATR <- function(x) ATR(HLC(x))[,'atr']
 mySMI <- function(x) SMI(HLC(x))[, "SMI"]
 myADX <- function(x) ADX(HLC(x))[,'ADX']
 myAroon <- function(x) aroon(cbind(Hi(x),Lo(x)))$oscillator
 myBB <- function(x) BBands(HLC(x))[, "pctB"]
 myChaikinVol <- function(x) Delt(chaikinVolatility(cbind(Hi(x),Lo(x))))[, 1]
 myCLV <- function(x) EMA(CLV(HLC(x)))[, 1]
 myEMV <- function(x) EMV(cbind(Hi(x),Lo(x)),Vo(x))[,2]
 myMACD <- function(x) MACD(Cl(x))[,2]
 myMFI <- function(x) MFI(HLC(x), Vo(x))
 mySAR <- function(x) SAR(cbind(Hi(x),Cl(x))) [,1]
 myVolat <- function(x) volatility(OHLC(x),calc="garman")[,1]

Now we use random forest to find out importance of these features.

We split data in train and test

library(randomForest)

## randomForest 4.6-14

## Type rfNews() to see new features/changes/bug fixes.

data.model <- specifyModel(T.ind(GSPC) ~ Delt(Cl(GSPC),k=1:10) +
   myATR(GSPC) + mySMI(GSPC) + myADX(GSPC) + myAroon(GSPC) +
   myBB(GSPC) + myChaikinVol(GSPC) + myCLV(GSPC) +
   CMO(Cl(GSPC)) + EMA(Delt(Cl(GSPC))) + myEMV(GSPC) +
   myVolat(GSPC) + myMACD(GSPC) + myMFI(GSPC) + RSI(Cl(GSPC)) +
   mySAR(GSPC) + runMean(Cl(GSPC)) + runSD(Cl(GSPC)))

set.seed(1234)
 rf <- buildModel(data.model,method='randomForest',
   training.per=c("1995-01-01","2005-12-30"),
   ntree=1000, importance=TRUE)

Variable importance

varImpPlot(rf@fitted.model, type = 1)

Variable importance

imp <- importance(rf@fitted.model,type = 1)
rownames(imp)[which(imp>30)]

##  [1] "myATR.GSPC"      "mySMI.GSPC"      "myADX.GSPC"      "myAroon.GSPC"   
##  [5] "myEMV.GSPC"      "myVolat.GSPC"    "myMACD.GSPC"     "myMFI.GSPC"     
##  [9] "mySAR.GSPC"      "runMean.Cl.GSPC" "runSD.Cl.GSPC"

Final dataset

data.model <- specifyModel(T.ind(GSPC) ~ myATR(GSPC) + mySMI(GSPC) +
                             myADX(GSPC)+myAroon(GSPC) + myEMV(GSPC) +                                       myVolat(GSPC) + myMACD(GSPC) + myMFI(GSPC) +                                    mySAR(GSPC) + runMean(Cl(GSPC)) + runSD(Cl(GSPC)))

Data wrangling to generate signal thresholds

Tdata.train <- as.data.frame(modelData(data.model,
  data.window=c('1970-01-02','2005-12-30')))
Tdata.eval <- na.omit(as.data.frame(modelData(data.model, data.window=c('2006-01-01','2016-01-25'))))
Tform <- as.formula('T.ind.GSPC ~ .')
## The classification task
buy.thr <- 0.1
sell.thr <- -0.1
Tdata.trainC <- cbind(Signal=trading.signals(Tdata.train[["T.ind.GSPC"]],
                 buy.thr,sell.thr),Tdata.train[,-1])
Tdata.evalC <- cbind(Signal=trading.signals(Tdata.eval[["T.ind.GSPC"]],
                buy.thr,sell.thr),Tdata.eval[,-1])
TformC <- as.formula("Signal ~ .")

Modeling using Neural net

set.seed(1234)
 library(nnet)
 ## The first column is the target variable
 norm.data <- data.frame(T.ind.GSPC=Tdata.train[[1]],scale(Tdata.train[,-1]))
 nn <- nnet(Tform, norm.data[1:1000, ], size = 5, decay = 0.01,
             maxit = 1000, linout = TRUE, trace = FALSE)
 preds <- predict(nn, norm.data[1001:2000, ])

evaluation

sigs.nn <- trading.signals(preds,0.1,-0.1)
true.sigs <- trading.signals(Tdata.train[1001:2000, "T.ind.GSPC"], 0.1, -0.1)
sigs.PR(sigs.nn,true.sigs)

##     precision    recall
## s   0.2809917 0.1931818
## b   0.3108108 0.2857143
## s+b 0.2973978 0.2373887

SVM regression

set.seed(1234)
library(e1071)
sv <- svm(Tform, Tdata.train[1:1000, ], gamma = 0.001, cost = 100)
s.preds <- predict(sv, Tdata.train[1001:2000, ])
sigs.svm <- trading.signals(s.preds, 0.1, -0.1)
true.sigs <- trading.signals(Tdata.train[1001:2000, "T.ind.GSPC"], 0.1, -0.1)
sigs.PR(sigs.svm, true.sigs)

##     precision      recall
## s       0.375 0.017045455
## b         NaN 0.000000000
## s+b     0.375 0.008902077

SVM classification

set.seed(1234)
library(kernlab)
ksv <- ksvm(Signal ~ ., Tdata.trainC[1:1000, ], C = 10)
ks.preds <- predict(ksv, Tdata.trainC[1001:2000, ])
sigs.PR(ks.preds, Tdata.trainC[1001:2000, 1])

##     precision    recall
## s   0.2365591 0.2500000
## b   0.3246753 0.1552795
## s+b 0.2623574 0.2047478

Multivariate adaptive regression splines

library(earth)

## Warning: package 'earth' was built under R version 3.6.2

## Loading required package: Formula

## Loading required package: plotmo

## Warning: package 'plotmo' was built under R version 3.6.2

## Loading required package: plotrix

## Loading required package: TeachingDemos

## Warning: package 'TeachingDemos' was built under R version 3.6.2

e <- earth(Tform, Tdata.train[1:1000, ])
e.preds <- predict(e, Tdata.train[1001:2000, ])
sigs.e <- trading.signals(e.preds, 0.1, -0.1)
true.sigs <- trading.signals(Tdata.train[1001:2000, "T.ind.GSPC"], 0.1, -0.1)
sigs.PR(sigs.e, true.sigs)

##     precision    recall
## s   0.2894737 0.2500000
## b   0.3504274 0.2546584
## s+b 0.3159851 0.2522255

(summary(e))

## Call: earth(formula=Tform, data=Tdata.train[1:1000,])
## 
##                           coefficients
## (Intercept)                  0.5241811
## h(myATR.GSPC-2.56817)        1.2724353
## h(-61.825-mySMI.GSPC)        0.0594203
## h(myADX.GSPC-40.6215)       -0.0104803
## h(50.657-myADX.GSPC)        -0.0025279
## h(myADX.GSPC-50.657)         0.0823181
## h(0.204717-myVolat.GSPC)    -0.5105416
## h(myVolat.GSPC-0.204717)    -5.6100523
## h(myVolat.GSPC-0.271459)     5.6725474
## h(mySAR.GSPC-74.7031)       -0.0693496
## h(87.944-mySAR.GSPC)        -0.0575104
## h(mySAR.GSPC-87.944)         0.0800171
## h(runMean.Cl.GSPC-79.265)    0.2074780
## h(81.942-runMean.Cl.GSPC)    0.1058493
## h(runMean.Cl.GSPC-81.942)   -0.2185753
## 
## Selected 15 of 18 terms, and 6 of 11 predictors
## Termination condition: Reached nk 23
## Importance: myVolat.GSPC, runMean.Cl.GSPC, myATR.GSPC, mySMI.GSPC, ...
## Number of terms at each degree of interaction: 1 14 (additive model)
## GCV 0.01470628    RSS 13.86568    GRSq 0.3536668    RSq 0.38939

evimp(e, trim=FALSE)

##                      nsubsets   gcv    rss
## myVolat.GSPC               13 100.0  100.0
## runMean.Cl.GSPC            13 100.0  100.0
## myATR.GSPC                 12  96.7   96.4
## mySMI.GSPC                 11  81.5   82.4
## mySAR.GSPC                  7  44.1   47.7
## myADX.GSPC                  5  57.8>  58.5>
## myAroon.GSPC-unused         0   0.0    0.0
## myEMV.GSPC-unused           0   0.0    0.0
## myMACD.GSPC-unused          0   0.0    0.0
## myMFI.GSPC-unused           0   0.0    0.0
## runSD.Cl.GSPC-unused        0   0.0    0.0

plot(e)

# simulated trader

policy1

policy.1 <- function(signals,market,opened.pos,money,
                     bet=0.2,hold.time=10,
                     exp.prof=0.025, max.loss= 0.05
                    )
   {
      d <- NROW(market) # this is the ID of today
      orders <- NULL
      nOs <- NROW(opened.pos)
      # nothing to do!
      if (!nOs && signals[d] == 'h') return(orders)
      # First lets check if we can open new positions
      # i) long positions
     if (signals[d] == 'b' && !nOs) {
        quant <- round(bet*money/Cl(market)[d],0)
        if (quant > 0)
           orders <- rbind(orders,
                  data.frame(order=c(1,-1,-1),order.type=c(1,2,3),
                     val = c(quant,
                        Cl(market)[d]*(1+exp.prof),
                        Cl(market)[d]*(1-max.loss)
                           ),
                     action = c('open','close','close'),
                     posID = c(NA,NA,NA)
                  )
           )             
     # ii) short positions
    } else if (signals[d] == 's' && !nOs) {
     # this is the nr of stocks we already need to buy
     # because of currently opened short positions
           need2buy <- sum(opened.pos[opened.pos[,'pos.type']==-1,
                 "N.stocks"])*Cl(market)[d]
           quant <- round(bet*(money-need2buy)/Cl(market)[d],0)
              if (quant > 0)
                  orders <- rbind(orders,
                     data.frame(order=c(-1,1,1),order.type=c(1,2,3),
                           val = c(quant,
                               Cl(market)[d]*(1-exp.prof),
                               Cl(market)[d]*(1+max.loss)
                           ),
                          action = c('open','close','close'),
                          posID = c(NA,NA,NA)
                     )
                    )
          }
# Now lets check if we need to close positions
# because their holding time is over
      if (nOs)
         for(i in 1:nOs) {
            if (d - opened.pos[i,'Odate'] >= hold.time)
                orders <- rbind(orders,
         data.frame(order=-opened.pos[i,'pos.type'],
               order.type=1,
               val = NA,
               action = 'close',
               posID = rownames(opened.pos)[i]
        )
               )
         }
   orders
}

policy 2

policy.2 <- function(signals,market,opened.pos,money,
                     bet=0.2,exp.prof=0.025, max.loss= 0.05
                    )
  {
    d <- NROW(market) # this is the ID of today
    orders <- NULL
    nOs <- NROW(opened.pos)
    # nothing to do!
    if (!nOs && signals[d] == 'h') return(orders)
    # First lets check if we can open new positions
    # i) long positions
    if (signals[d] == 'b') {
       quant <- round(bet*money/Cl(market)[d],0)
       if (quant > 0)
         orders <- rbind(orders,
                  data.frame(order=c(1,-1,-1),order.type=c(1,2,3),
                             val = c(quant,
                                     Cl(market)[d]*(1+exp.prof),
                                     Cl(market)[d]*(1-max.loss)
                                    ),
                             action = c('open','close','close'),
                             posID = c(NA,NA,NA)
                            )
                        )
   # ii) short positions
       } else if (signals[d] == 's') {
  # this is the money already committed to buy stocks
  # because of currently opened short positions
             need2buy <- sum(opened.pos[opened.pos[,'pos.type']==-1,
               "N.stocks"])*Cl(market)[d]
            quant <- round(bet*(money-need2buy)/Cl(market)[d],0)
            if (quant > 0)
               orders <- rbind(orders,
            data.frame(order=c(-1,1,1),order.type=c(1,2,3),
                       val = c(quant,
                               Cl(market)[d]*(1-exp.prof),
                               Cl(market)[d]*(1+max.loss)
                              ),
                      action = c('open','close','close'),
                      posID = c(NA,NA,NA)
                     )
                             )
 }
 orders
 }

Trading simulator

## Train and test periods
start <- 1
len.tr <- 1000
len.ts <- 500
tr <- start:(start+len.tr-1)
ts <- (start+len.tr):(start+len.tr+len.ts-1)
## getting the quotes for the testing period
data(GSPC)
date <- rownames(Tdata.train[start+len.tr,])
marketTP <- GSPC[paste(date,'/',sep='')][1:len.ts]
## learning the model and obtaining its signal predictions for the test period
library(e1071)
s <- svm(Tform, Tdata.train[tr,], cost=10,gamma=0.01)
p <- predict(s, Tdata.train[ts,])
sig <- trading.signals(p, 0.1, -0.1)
## now using the simulated trader during the testing period
t1 <- trading.simulator(marketTP, signals=sig, policy.func='policy.1',
                        policy.pars=list(exp.prof=0.05,bet=0.2,hold.time=30))

Evaluation

summary(t1)

## 
## == Summary of a Trading Simulation with  500  days ==
## 
## Trading policy function :  policy.1 
## Policy function parameters:
##   exp.prof  =  0.05 
##   bet  =  0.2 
##   hold.time  =  30 
## 
## Transaction costs :  5 
## Initial Equity    :  1e+06 
## Final Equity      :  1019713   Return :  1.97 %
## Number of trading positions:  8 
## 
## Use function "tradingEvaluation()" for further stats on this simulation.

tradingEvaluation(t1)

##     NTrades       NProf    PercProf          PL         Ret   RetOverBH 
##        8.00        5.00       62.50    19712.54        1.97       -4.88 
##       MaxDD SharpeRatio     AvgProf     AvgLoss       AvgPL     MaxProf 
##    25630.72        0.04        5.11       -5.00        1.32        5.26 
##     MaxLoss 
##       -5.00

Plot

plot(t1,marketTP, theme = "white", name = "SP500")

## Rentability =  1.971254 %

policy2

t2 <- trading.simulator(marketTP, sig, "policy.2", list(exp.prof = 0.05, bet = 0.3))
summary(t2)

## 
## == Summary of a Trading Simulation with  500  days ==
## 
## Trading policy function :  policy.2 
## Policy function parameters:
##   exp.prof  =  0.05 
##   bet  =  0.3 
## 
## Transaction costs :  5 
## Initial Equity    :  1e+06 
## Final Equity      :  1152332   Return :  15.23 %
## Number of trading positions:  37 
## 
## Use function "tradingEvaluation()" for further stats on this simulation.

evaluation policy 2

tradingEvaluation(t2)

##     NTrades       NProf    PercProf          PL         Ret   RetOverBH 
##       37.00       26.00       70.27   152332.30       15.23        8.38 
##       MaxDD SharpeRatio     AvgProf     AvgLoss       AvgPL     MaxProf 
##    67492.23        0.06        4.99       -4.89        2.05        5.26 
##     MaxLoss 
##       -5.00

plot t2

plot(t2,marketTP, theme = "white", name = "SP500")

## Rentability =  15.23323 %

stock market return

soroush

2/12/2020

loading data

Defining the prediction task

We want to see if margin of profit %P in the next k days is attainable.So we want to make an indicator of the tendency in the next k days

Performance of new index

Now we use random forest to find out importance of these features.

We split data in train and test

Variable importance

Variable importance

Final dataset

Data wrangling to generate signal thresholds

Modeling using Neural net

evaluation

SVM regression

SVM classification

Multivariate adaptive regression splines

policy1

policy 2

Trading simulator

Evaluation

Plot

policy2

evaluation policy 2

plot t2