library(lubridate)
library(plotly)
library(dplyr)
library(fpp)
library(quantmod)
library(xts)
library(TTR)
library(PerformanceAnalytics)
setwd("C:/Users/jy/Desktop/cxt")
df <- read.csv("Data Set.csv", stringsAsFactors = F)
#Convert OHLC,V to numeric variables
for(i in c(2,3,4,5,6)){
df[,i] <- as.numeric(df[,i])
}
#Convert string to date
df[,1] <- ymd(df[,1])
summary(df)
## Date O H
## Min. :2007-01-03 Min. :-1000.00 Min. :-1000.00
## 1st Qu.:2008-01-02 1st Qu.: 38.16 1st Qu.: 38.69
## Median :2009-01-01 Median : 43.99 Median : 44.57
## Mean :2009-01-01 Mean : 43.25 Mean : 43.87
## 3rd Qu.:2010-01-01 3rd Qu.: 48.94 3rd Qu.: 49.50
## Max. :2010-12-31 Max. : 1000.00 Max. : 1000.00
## NA's :1 NA's :1
## L C V
## Min. :-1000.00 Min. :-1000.00 Min. : 0
## 1st Qu.: 37.68 1st Qu.: 38.23 1st Qu.: 2008250
## Median : 43.31 Median : 43.94 Median : 5756600
## Mean : 42.50 Mean : 43.19 Mean : 6708432
## 3rd Qu.: 48.12 3rd Qu.: 48.82 3rd Qu.: 9636300
## Max. : 1000.00 Max. : 1000.00 Max. :33038600
## NA's :1 NA's :1
There are extreme values and NA value in the data set. Those values should be excluded.
u1 <- abs(df[,2]) < 1000
df <- df[u1,]
df <- df[complete.cases(df),]
summary(df)
## Date O H L
## Min. :2007-01-03 Min. :15.84 Min. :18.34 Min. :15.83
## 1st Qu.:2008-01-02 1st Qu.:38.16 1st Qu.:38.70 1st Qu.:37.68
## Median :2009-01-02 Median :43.99 Median :44.57 Median :43.31
## Mean :2009-01-02 Mean :43.33 Mean :43.95 Mean :42.58
## 3rd Qu.:2010-01-04 3rd Qu.:48.93 3rd Qu.:49.50 3rd Qu.:48.10
## Max. :2010-12-31 Max. :64.58 Max. :64.62 Max. :62.89
## C V
## Min. :16.37 Min. : 206400
## 1st Qu.:38.24 1st Qu.: 2067800
## Median :43.94 Median : 5770900
## Mean :43.27 Mean : 6728457
## 3rd Qu.:48.82 3rd Qu.: 9645300
## Max. :63.80 Max. :33038600
bbands <- BBands(df[,c("H","L","C")])
df <- cbind(df, data.frame(bbands[,1:3]))
# plot candlestick chart
p <- df %>%
plot_ly(x = ~Date, type="candlestick",
open = ~O, close = ~C,
high = ~H, low = ~L, name = "Price") %>%
add_lines(y = ~up , name = "B Bands",
line = list(color = '#ccc', width = 0.5),
legendgroup = "Bollinger Bands",
hoverinfo = "none") %>%
add_lines(y = ~dn, name = "B Bands",
line = list(color = '#ccc', width = 0.5),
legendgroup = "Bollinger Bands",
showlegend = FALSE, hoverinfo = "none") %>%
add_lines(y = ~mavg, name = "Mv Avg",
line = list(color = '#E377C2', width = 0.5),
hoverinfo = "none") %>%
layout(yaxis = list(title = "Price"))
# plot volume bar chart
pp <- df %>%
plot_ly(x=~Date, y=~V, type='bar', name = "Volume") %>%
layout(yaxis = list(title = "Volume"))
# create rangeselector buttons
rs <- list(visible = TRUE, x = 0.5, y = -0.055,
xanchor = 'center', yref = 'paper',
font = list(size = 9),
buttons = list(
list(count=1,
label='RESET',
step='all'),
list(count=1,
label='1 YR',
step='year',
stepmode='backward'),
list(count=3,
label='3 MO',
step='month',
stepmode='backward'),
list(count=1,
label='1 MO',
step='month',
stepmode='backward')
))
# subplot with shared x axis
p <- subplot(p, pp, heights = c(0.7,0.2), nrows=2,
shareX = TRUE, titleY = TRUE) %>%
layout(title = paste("price"),
xaxis = list(rangeselector = rs),
legend = list(orientation = 'h', x = 0.5, y = 1,
xanchor = 'center', yref = 'paper',
font = list(size = 10),
bgcolor = 'transparent'))
p
We can’t possible know the High and Low of an intraday price movement without looking into the future. Therefore, we can exclude these 2 columns.
df2 <- df[,c("Date", "O", "C", "V")]
n <- nrow(df2)
df2$return <- ( df2[1:n,"C"]/c(NA, df2[1:(n-1), "C"]) ) - 1 #Today's close divided by yesterday's close
df2$closedivopen <- ( df2[1:n,"C"]/df2[1:n,"O"] ) - 1 #Today's close divided by today's open
summary(df2)
## Date O C V
## Min. :2007-01-03 Min. :15.84 Min. :16.37 Min. : 206400
## 1st Qu.:2008-01-02 1st Qu.:38.16 1st Qu.:38.24 1st Qu.: 2067800
## Median :2009-01-02 Median :43.99 Median :43.94 Median : 5770900
## Mean :2009-01-02 Mean :43.33 Mean :43.27 Mean : 6728457
## 3rd Qu.:2010-01-04 3rd Qu.:48.93 3rd Qu.:48.82 3rd Qu.: 9645300
## Max. :2010-12-31 Max. :64.58 Max. :63.80 Max. :33038600
##
## return closedivopen
## Min. :-0.1553221 Min. :-0.124584
## 1st Qu.:-0.0151483 1st Qu.:-0.013086
## Median : 0.0002350 Median :-0.001354
## Mean : 0.0009754 Mean :-0.001331
## 3rd Qu.: 0.0173879 3rd Qu.: 0.010367
## Max. : 0.2653846 Max. : 0.137284
## NA's :1
plot(x = df2$Date, y = df2$return)
There are volatility clustering near the end of 2008, which gives the clue that this time series resembles a stock price series during the subprime crisis. A strong uptrend follows after the crisis.
*Since this series isn’t “forged”, I doubt there are any “obvious” opportunity or anomaly to capture.
Most stock prices trend upwards, and when there is an uptrend, there is strong positive autocorrelation between today’s and yesterday’s price.
Personally, I don’t prefer an intraday strategy (trading on noises are difficult especially with transaction cost). A weekly trading strategy is used, where: 1. Convert daily returns to weekly returns 2. If previous week return > a certain percentage, then buy at the opening of the start of this week. Sell at the end of this week. 3. If previous week return < a certain percentage, do nothing this week. *Based on the belief that stock prices tend to trend upwards, and downward movements are difficult to predict. Therefore, only buy and do not short sell. 4. Repeat.
For simplicity, we assume 0% round trip cost, and we do not hold the stock even though next week gives a buy signal. We will sell and rebuy again on such circumstances. We will backtest Strategy 1 with 2 different parameters, one is to set “a certain percentage” as 0% and 2nd as 2%.
wkret <- function(cl, op){
L <- length(cl)
cl[L]/op[1]-1
}
wkclose <- function(v){
L <- length(v)
v[L]
}
signal <- function(wkreturn, percent = 0 ){
wkreturn > percent
}
roundtripcost <- 0.000 #Assume 0% round trip cost
df4 <- as.data.frame( df2 %>%
mutate(yr = year(Date), wk = week(Date)) %>%
group_by(yr, wk) %>%
summarise(Date = Date[length(Date)], wkreturn = wkret(C, O), wkclose = wkclose(C))
)
df5 <- df4 %>% mutate(signal = lag(signal(wkreturn, 0), 1) ) %>%
mutate(signal2 = lag(signal(wkreturn, 0.02), 1) ) %>%
mutate(adj_wkreturn = wkreturn - roundtripcost) #adjust for round trip cost
actual <- xts(df5$adj_wkreturn, df5$Date);
signal_1 <- xts(df5$adj_wkreturn*df5$signal, df5$Date)
signal_2 <- xts(df5$adj_wkreturn*df5$signal2, df5$Date)
compare <- na.omit(cbind(actual, signal_1, signal_2))
colnames(compare) <- c("Actual", "Signal 1", "Signal 2")
charts.PerformanceSummary(compare)
nostrat <- prod(1+df5$adj_wkreturn, na.rm = T) - 1
strat1a <- prod(1+df5$adj_wkreturn*df5$signal, na.rm = T) - 1
strat1b <- prod(1+df5$adj_wkreturn*df5$signal2, na.rm = T) -1
Without implementing any strategy (just buy at the start of week and sell at the end of week), the total return is 0.4148288; Strategy 1 (0%) return: -0.1970824; Strategy 1 (2%) return: -0.5438677. In other words, Strategy 1 is a losing strategy.
The first strategy is bad in this scenario, what if we revert the “criteria”. The justification now is such that, stock price that drops, tends to rebound and revert back to the “mean” trend. *It’s bad to do curve fitting, but for now, we just try to have a look at what results this strategy would give.
wkret <- function(cl, op){
L <- length(cl)
cl[L]/op[1]-1
}
wkclose <- function(v){
L <- length(v)
v[L]
}
signal <- function(wkreturn, percent = 0 ){
wkreturn < percent
}
roundtripcost <- 0.000 #Assume 0% round trip cost
df4 <- as.data.frame( df2 %>%
mutate(yr = year(Date), wk = week(Date)) %>%
group_by(yr, wk) %>%
summarise(Date = Date[length(Date)], wkreturn = wkret(C, O), wkclose = wkclose(C))
)
df5 <- df4 %>% mutate(signal = lag(signal(wkreturn, 0), 1) ) %>%
mutate(signal2 = lag(signal(wkreturn, 0.02), 1) ) %>%
mutate(adj_wkreturn = wkreturn - roundtripcost) #adjust for round trip cost
actual <- xts(df5$adj_wkreturn, df5$Date);
signal_1 <- xts(df5$adj_wkreturn*df5$signal, df5$Date)
signal_2 <- xts(df5$adj_wkreturn*df5$signal2, df5$Date)
compare <- na.omit(cbind(actual, signal_1, signal_2))
colnames(compare) <- c("Actual", "Signal 1", "Signal 2")
charts.PerformanceSummary(compare)
nostrat <- prod(1+df5$adj_wkreturn, na.rm = T) - 1
strat2a <- prod(1+df5$adj_wkreturn*df5$signal, na.rm = T) - 1
strat2b <- prod(1+df5$adj_wkreturn*df5$signal2, na.rm = T) - 1
Without implementing any strategy (just buy at the start of week and sell at the end of week), the total return is 0.4148288; Strategy 1 (0%) return: 0.90323; Strategy 1 (2%) return: 2.3502055. In other words, Strategy 2 is quite a winning strategy.
**After I did Strategy 1, I realised I could just look at the ACF and PACF plot of the weekly returns. Based on these 2 plots, Strategy 2 should be suggested first, because the weekly returns are actually more negatively autocorrelated than positively autocorrelated.
tsdisplay(actual)
num_of_trades <- sum(df5$signal2, na.rm = T)
breakeven_roundtripcost <- strat2b/num_of_trades
The number of trades done using Strategy 2 (2%) is 125. For simplicity, assume the breakeven round trip cost is the total return generated divided by the number of trades.
Hence, the breakeven round trip cost is 0.0188016.
Strategy 1 resembles more of a qualitative approach, but it fails in this specific case, I believe it would work on a general stock market.
Strategy 2 resembles more of a statistical approach, where we try to exploit negative autocorrelation in the weekly returns, but this may fail if such autocorrelation disappears due to other external factors (fundamental reasons/changes in the economic environment).