Trading Strategies (v2)

Load R Packages and the Data Set

library(lubridate)
library(plotly)
library(dplyr)
library(fpp)
library(quantmod)
library(xts)
library(TTR)
library(PerformanceAnalytics)

setwd("C:/Users/jy/Desktop/cxt")
df <- read.csv("Data Set.csv", stringsAsFactors = F)

Pre-processing of Data

#Convert OHLC,V to numeric variables
for(i in c(2,3,4,5,6)){
      df[,i] <- as.numeric(df[,i])
}

#Convert string to date
df[,1] <- ymd(df[,1])

summary(df)

##       Date                  O                  H           
##  Min.   :2007-01-03   Min.   :-1000.00   Min.   :-1000.00  
##  1st Qu.:2008-01-02   1st Qu.:   38.16   1st Qu.:   38.69  
##  Median :2009-01-01   Median :   43.99   Median :   44.57  
##  Mean   :2009-01-01   Mean   :   43.25   Mean   :   43.87  
##  3rd Qu.:2010-01-01   3rd Qu.:   48.94   3rd Qu.:   49.50  
##  Max.   :2010-12-31   Max.   : 1000.00   Max.   : 1000.00  
##                       NA's   :1          NA's   :1         
##        L                  C                  V           
##  Min.   :-1000.00   Min.   :-1000.00   Min.   :       0  
##  1st Qu.:   37.68   1st Qu.:   38.23   1st Qu.: 2008250  
##  Median :   43.31   Median :   43.94   Median : 5756600  
##  Mean   :   42.50   Mean   :   43.19   Mean   : 6708432  
##  3rd Qu.:   48.12   3rd Qu.:   48.82   3rd Qu.: 9636300  
##  Max.   : 1000.00   Max.   : 1000.00   Max.   :33038600  
##  NA's   :1          NA's   :1

There are extreme values and NA value in the data set. Those values should be excluded.

u1 <- abs(df[,2]) < 1000
df <- df[u1,]

df <- df[complete.cases(df),]

summary(df)

##       Date                  O               H               L        
##  Min.   :2007-01-03   Min.   :15.84   Min.   :18.34   Min.   :15.83  
##  1st Qu.:2008-01-02   1st Qu.:38.16   1st Qu.:38.70   1st Qu.:37.68  
##  Median :2009-01-02   Median :43.99   Median :44.57   Median :43.31  
##  Mean   :2009-01-02   Mean   :43.33   Mean   :43.95   Mean   :42.58  
##  3rd Qu.:2010-01-04   3rd Qu.:48.93   3rd Qu.:49.50   3rd Qu.:48.10  
##  Max.   :2010-12-31   Max.   :64.58   Max.   :64.62   Max.   :62.89  
##        C               V           
##  Min.   :16.37   Min.   :  206400  
##  1st Qu.:38.24   1st Qu.: 2067800  
##  Median :43.94   Median : 5770900  
##  Mean   :43.27   Mean   : 6728457  
##  3rd Qu.:48.82   3rd Qu.: 9645300  
##  Max.   :63.80   Max.   :33038600

Candlestick Plot of the Asset Price

bbands <- BBands(df[,c("H","L","C")])
df <- cbind(df, data.frame(bbands[,1:3]))
# plot candlestick chart
p <- df %>%
  plot_ly(x = ~Date, type="candlestick",
          open = ~O, close = ~C,
          high = ~H, low = ~L, name = "Price") %>%
add_lines(y = ~up , name = "B Bands",
            line = list(color = '#ccc', width = 0.5),
            legendgroup = "Bollinger Bands",
            hoverinfo = "none") %>%
  add_lines(y = ~dn, name = "B Bands",
            line = list(color = '#ccc', width = 0.5),
            legendgroup = "Bollinger Bands",
            showlegend = FALSE, hoverinfo = "none") %>%
  add_lines(y = ~mavg, name = "Mv Avg",
            line = list(color = '#E377C2', width = 0.5),
            hoverinfo = "none") %>%
  layout(yaxis = list(title = "Price"))


# plot volume bar chart
pp <- df %>%
  plot_ly(x=~Date, y=~V, type='bar', name = "Volume") %>%
  layout(yaxis = list(title = "Volume"))

# create rangeselector buttons
rs <- list(visible = TRUE, x = 0.5, y = -0.055,
           xanchor = 'center', yref = 'paper',
           font = list(size = 9),
           buttons = list(
             list(count=1,
                  label='RESET',
                  step='all'),
             list(count=1,
                  label='1 YR',
                  step='year',
                  stepmode='backward'),
             list(count=3,
                  label='3 MO',
                  step='month',
                  stepmode='backward'),
             list(count=1,
                  label='1 MO',
                  step='month',
                  stepmode='backward')
           ))

# subplot with shared x axis
p <- subplot(p, pp, heights = c(0.7,0.2), nrows=2,
             shareX = TRUE, titleY = TRUE) %>%
  layout(title = paste("price"),
         xaxis = list(rangeselector = rs),
         legend = list(orientation = 'h', x = 0.5, y = 1,
                       xanchor = 'center', yref = 'paper',
                       font = list(size = 10),
                       bgcolor = 'transparent'))

p

We can’t possible know the High and Low of an intraday price movement without looking into the future. Therefore, we can exclude these 2 columns.

df2 <- df[,c("Date", "O", "C", "V")]
n <- nrow(df2)
df2$return <- ( df2[1:n,"C"]/c(NA, df2[1:(n-1), "C"]) ) - 1  #Today's close divided by yesterday's close
df2$return2 <- ( df2[1:n,"C"]/df2[1:n,"O"] ) - 1 #Today's close divided by today's open

summary(df2)

##       Date                  O               C               V           
##  Min.   :2007-01-03   Min.   :15.84   Min.   :16.37   Min.   :  206400  
##  1st Qu.:2008-01-02   1st Qu.:38.16   1st Qu.:38.24   1st Qu.: 2067800  
##  Median :2009-01-02   Median :43.99   Median :43.94   Median : 5770900  
##  Mean   :2009-01-02   Mean   :43.33   Mean   :43.27   Mean   : 6728457  
##  3rd Qu.:2010-01-04   3rd Qu.:48.93   3rd Qu.:48.82   3rd Qu.: 9645300  
##  Max.   :2010-12-31   Max.   :64.58   Max.   :63.80   Max.   :33038600  
##                                                                         
##      return              return2         
##  Min.   :-0.1553221   Min.   :-0.124584  
##  1st Qu.:-0.0151483   1st Qu.:-0.013086  
##  Median : 0.0002350   Median :-0.001354  
##  Mean   : 0.0009754   Mean   :-0.001331  
##  3rd Qu.: 0.0173879   3rd Qu.: 0.010367  
##  Max.   : 0.2653846   Max.   : 0.137284  
##  NA's   :1

Plot of Daily Returns

plot(x = df2$Date, y = df2$return)

There are volatility clustering near the end of 2008, which gives the clue that this time series resembles a stock price series during the subprime crisis. A strong uptrend follows after the crisis.

Histogram of Daily Returns

We will be looking at the histogram of intraday return based on the formula: close/open - 1

hist(df2$return2)

summary(df2$return2)

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -0.124584 -0.013086 -0.001354 -0.001331  0.010367  0.137284

skew <- skewness(df2$return2)

We can observe from the histogram plotted, there are more negative returns than positive returns. In other words, the distribution of daily returns are slightly skewed to the right. An obvious and simple strategy would be to short sell each day and exploit the positively skewed (skewness = 0.0978844) daily returns.

Refer to Strategy 1 and Strategy 2 at http://rpubs.com/jycheah/tradingstrat.

Strategy 3

Short sell at today’s open, and buy back at today’s close.
Repeat on each subsequent trading day.

df3 <- df2 %>% mutate(sell_return = -1*return2)

strat3 <- xts(df3$sell_return, df3$Date); 
compare <- na.omit(cbind(strat3))
colnames(compare) <- c("Strategy 3")

Cumulative Return

charts.PerformanceSummary(compare)

Equity Curve

Assume the starting capital is 1.00.

N <- nrow(df3)
equity_curve <- sapply(1:N, FUN = function(j){
    prod(df3$sell_return[1:j]+1)      
})

data.frame(eq = equity_curve, date = df3$Date) %>% ggplot(aes(x=date, y = eq)) + geom_line()

Breakeven Round Trip Cost

If each transactions reduce the capital by a factor of 1-c, then
(final capital/initial capital)*(1-c)^N = 1, where c is the breakeven round trip cost in percentage, N is the total number of sell trades initiated.

N <- nrow(df3)
c <- 1-(prod(1+df3$sell_return))^(-1/N) 
c <- c*100

Total number of trades = 1005
The breakeven round trip cost (%) = 0.1013826%.