library(lubridate)
library(plotly)
library(dplyr)
library(fpp)
library(quantmod)
library(xts)
library(TTR)
library(PerformanceAnalytics)
setwd("C:/Users/jy/Desktop/cxt")
df <- read.csv("Data Set.csv", stringsAsFactors = F)
#Convert OHLC,V to numeric variables
for(i in c(2,3,4,5,6)){
df[,i] <- as.numeric(df[,i])
}
#Convert string to date
df[,1] <- ymd(df[,1])
summary(df)
## Date O H
## Min. :2007-01-03 Min. :-1000.00 Min. :-1000.00
## 1st Qu.:2008-01-02 1st Qu.: 38.16 1st Qu.: 38.69
## Median :2009-01-01 Median : 43.99 Median : 44.57
## Mean :2009-01-01 Mean : 43.25 Mean : 43.87
## 3rd Qu.:2010-01-01 3rd Qu.: 48.94 3rd Qu.: 49.50
## Max. :2010-12-31 Max. : 1000.00 Max. : 1000.00
## NA's :1 NA's :1
## L C V
## Min. :-1000.00 Min. :-1000.00 Min. : 0
## 1st Qu.: 37.68 1st Qu.: 38.23 1st Qu.: 2008250
## Median : 43.31 Median : 43.94 Median : 5756600
## Mean : 42.50 Mean : 43.19 Mean : 6708432
## 3rd Qu.: 48.12 3rd Qu.: 48.82 3rd Qu.: 9636300
## Max. : 1000.00 Max. : 1000.00 Max. :33038600
## NA's :1 NA's :1
There are extreme values and NA value in the data set. Those values should be excluded.
u1 <- abs(df[,2]) < 1000
df <- df[u1,]
df <- df[complete.cases(df),]
summary(df)
## Date O H L
## Min. :2007-01-03 Min. :15.84 Min. :18.34 Min. :15.83
## 1st Qu.:2008-01-02 1st Qu.:38.16 1st Qu.:38.70 1st Qu.:37.68
## Median :2009-01-02 Median :43.99 Median :44.57 Median :43.31
## Mean :2009-01-02 Mean :43.33 Mean :43.95 Mean :42.58
## 3rd Qu.:2010-01-04 3rd Qu.:48.93 3rd Qu.:49.50 3rd Qu.:48.10
## Max. :2010-12-31 Max. :64.58 Max. :64.62 Max. :62.89
## C V
## Min. :16.37 Min. : 206400
## 1st Qu.:38.24 1st Qu.: 2067800
## Median :43.94 Median : 5770900
## Mean :43.27 Mean : 6728457
## 3rd Qu.:48.82 3rd Qu.: 9645300
## Max. :63.80 Max. :33038600
bbands <- BBands(df[,c("H","L","C")])
df <- cbind(df, data.frame(bbands[,1:3]))
# plot candlestick chart
p <- df %>%
plot_ly(x = ~Date, type="candlestick",
open = ~O, close = ~C,
high = ~H, low = ~L, name = "Price") %>%
add_lines(y = ~up , name = "B Bands",
line = list(color = '#ccc', width = 0.5),
legendgroup = "Bollinger Bands",
hoverinfo = "none") %>%
add_lines(y = ~dn, name = "B Bands",
line = list(color = '#ccc', width = 0.5),
legendgroup = "Bollinger Bands",
showlegend = FALSE, hoverinfo = "none") %>%
add_lines(y = ~mavg, name = "Mv Avg",
line = list(color = '#E377C2', width = 0.5),
hoverinfo = "none") %>%
layout(yaxis = list(title = "Price"))
# plot volume bar chart
pp <- df %>%
plot_ly(x=~Date, y=~V, type='bar', name = "Volume") %>%
layout(yaxis = list(title = "Volume"))
# create rangeselector buttons
rs <- list(visible = TRUE, x = 0.5, y = -0.055,
xanchor = 'center', yref = 'paper',
font = list(size = 9),
buttons = list(
list(count=1,
label='RESET',
step='all'),
list(count=1,
label='1 YR',
step='year',
stepmode='backward'),
list(count=3,
label='3 MO',
step='month',
stepmode='backward'),
list(count=1,
label='1 MO',
step='month',
stepmode='backward')
))
# subplot with shared x axis
p <- subplot(p, pp, heights = c(0.7,0.2), nrows=2,
shareX = TRUE, titleY = TRUE) %>%
layout(title = paste("price"),
xaxis = list(rangeselector = rs),
legend = list(orientation = 'h', x = 0.5, y = 1,
xanchor = 'center', yref = 'paper',
font = list(size = 10),
bgcolor = 'transparent'))
p
We can’t possible know the High and Low of an intraday price movement without looking into the future. Therefore, we can exclude these 2 columns.
df2 <- df[,c("Date", "O", "C", "V")]
n <- nrow(df2)
df2$return <- ( df2[1:n,"C"]/c(NA, df2[1:(n-1), "C"]) ) - 1 #Today's close divided by yesterday's close
df2$return2 <- ( df2[1:n,"C"]/df2[1:n,"O"] ) - 1 #Today's close divided by today's open
summary(df2)
## Date O C V
## Min. :2007-01-03 Min. :15.84 Min. :16.37 Min. : 206400
## 1st Qu.:2008-01-02 1st Qu.:38.16 1st Qu.:38.24 1st Qu.: 2067800
## Median :2009-01-02 Median :43.99 Median :43.94 Median : 5770900
## Mean :2009-01-02 Mean :43.33 Mean :43.27 Mean : 6728457
## 3rd Qu.:2010-01-04 3rd Qu.:48.93 3rd Qu.:48.82 3rd Qu.: 9645300
## Max. :2010-12-31 Max. :64.58 Max. :63.80 Max. :33038600
##
## return return2
## Min. :-0.1553221 Min. :-0.124584
## 1st Qu.:-0.0151483 1st Qu.:-0.013086
## Median : 0.0002350 Median :-0.001354
## Mean : 0.0009754 Mean :-0.001331
## 3rd Qu.: 0.0173879 3rd Qu.: 0.010367
## Max. : 0.2653846 Max. : 0.137284
## NA's :1
plot(x = df2$Date, y = df2$return)
There are volatility clustering near the end of 2008, which gives the clue that this time series resembles a stock price series during the subprime crisis. A strong uptrend follows after the crisis.
We will be looking at the histogram of intraday return based on the formula: close/open - 1
hist(df2$return2)
summary(df2$return2)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.124584 -0.013086 -0.001354 -0.001331 0.010367 0.137284
skew <- skewness(df2$return2)
We can observe from the histogram plotted, there are more negative returns than positive returns. In other words, the distribution of daily returns are slightly skewed to the right. An obvious and simple strategy would be to short sell each day and exploit the positively skewed (skewness = 0.0978844) daily returns.
Refer to Strategy 1 and Strategy 2 at http://rpubs.com/jycheah/tradingstrat.
df3 <- df2 %>% mutate(sell_return = -1*return2)
strat3 <- xts(df3$sell_return, df3$Date);
compare <- na.omit(cbind(strat3))
colnames(compare) <- c("Strategy 3")
charts.PerformanceSummary(compare)
Assume the starting capital is 1.00.
N <- nrow(df3)
equity_curve <- sapply(1:N, FUN = function(j){
prod(df3$sell_return[1:j]+1)
})
data.frame(eq = equity_curve, date = df3$Date) %>% ggplot(aes(x=date, y = eq)) + geom_line()
If each transactions reduce the capital by a factor of 1-c, then
(final capital/initial capital)*(1-c)^N = 1, where c is the breakeven round trip cost in percentage, N is the total number of sell trades initiated.
N <- nrow(df3)
c <- 1-(prod(1+df3$sell_return))^(-1/N)
c <- c*100
Total number of trades = 1005
The breakeven round trip cost (%) = 0.1013826%.