ContextBase Logo



SPX Data Candlestick Chart

library('quantmod')

SYMBL <- getSymbols("SPX", auto.assign=F, from="2000-01-01", to="2018-07-06")

chartSeries(SYMBL, type= "candlesticks", name = "SPX Candlestick Chart", theme = chartTheme('white'))



The First Five Rows Of SPX Data

SYMBL[1:5,]
##            SPX.Open SPX.High SPX.Low SPX.Close SPX.Volume SPX.Adjusted
## 2000-03-02  1.74884  2.49834 1.74884   2.49834       2562      2.49834
## 2000-03-03  1.87375  1.87375 1.87375   1.87375         80      1.87375
## 2000-03-06  2.49834  2.49834 2.49834   2.49834        240      2.49834
## 2000-03-07  4.37209  4.37209 4.37209   4.37209          0      4.37209
## 2000-03-08       NA       NA      NA        NA         NA           NA



Load Systematic Investor Toolbox (SIT)

con = gzcon(url('http://www.systematicportfolio.com/sit.gz', 'rb'))
source(con)
close(con)



Load historical data

tickers = 'SPX'

data = getSymbols(tickers, src = 'yahoo', from = '2000-01-01', auto.assign = F)



Compute Distances

dist = rep(NA, n.reference)
query.normalized = (query - mean(query)) / sd(query)

for( i in n.query : n.reference ) {
  window = reference[ (i - n.query + 1) : i]
  window.normalized = (window - mean(window)) / sd(window)
  dist[i] = stats:::dist(rbind(query.normalized, window.normalized))
}



Find Matches

min.index = c()
n.match = 10

# only look at the minimums 
temp = dist
temp[ temp > mean(dist, na.rm=T) ] = NA

# remove n.query, points to the left/right of the minimums
for(i in 1:n.match) {
  if(any(!is.na(temp))) {
    index = which.min(temp)
    min.index[i] = index
    temp[max(0,index - 2*n.query) : min(n.reference,(index + n.query))] = NA
  }
}
n.match = len(min.index)



Plot Matches

dates = index(data)[1:len(dist)]

par(mar=c(2, 4, 2, 2))
plot(dates, dist, type='l',col='gray', main='Top Matches', ylab='Euclidean Distance', xlab='')
abline(h = mean(dist, na.rm=T), col='darkgray', lwd=2)
points(dates[min.index], dist[min.index], pch=22, col='red', bg='red')
text(dates[min.index], dist[min.index], 1:n.match, adj=c(1,1), col='black',xpd=TRUE)

plota(data, type='l', col='gray', main=tickers)
plota.lines(last(data,90), col='blue')
for(i in 1:n.match) {
  plota.lines(data[(min.index[i]-n.query + 1):min.index[i]], col='red')
}
text(index(data)[min.index - n.query/2], reference[min.index - n.query/2], 1:n.match, 
     adj=c(1,-1), col='black',xpd=TRUE)
plota.legend('Pattern,Match #','blue,red')



Overlay all Matches

matches = matrix(NA, nr=(n.match+1), nc=3*n.query)
temp = c(rep(NA, n.query), reference, query)
for(i in 1:n.match) {
  matches[i,] = temp[ (min.index[i] - n.query + 1):(min.index[i] + 2*n.query) ]   
}

# add the 'query' pattern
matches[(n.match+1),] = temp[ (len(temp) - 2*n.query + 1):(len(temp) + n.query) ]       

# normalize
for(i in 1:(n.match+1)) {
  matches[i,] = matches[i,] / matches[i,n.query]
}



Plot all Matches

temp = 100 * ( t(matches[,-c(1:n.query)]) - 1)

par(mar=c(2, 4, 2, 2))
matplot(temp, type='l',col='gray',lwd=2, lty='dotted', xlim=c(1,2.5*n.query),
        main = paste('Pattern Prediction with', n.match, 'neighbours'),ylab='Normalized', xlab='')
lines(temp[,(n.match+1)], col='black',lwd=4)

points(rep(2*n.query,n.match), temp[2*n.query,1:n.match], pch=21, lwd=2, col='gray', bg='gray')

bt.plot.dot.label <- function(x, data, xfun, col='red') {
  for(j in 1:len(xfun)) {
    y = match.fun(xfun[[j]])(data)
    points(x, y, pch=21, lwd=4, col=col, bg=col)
    text(x, y, paste(names(xfun)[j], ':', round(y,1),'%'),
         adj=c(-0.1,0), cex = 0.8, col=col,xpd=TRUE)         
  }
}

# bt.plot.dot.label(2*n.query, temp[2*n.query,1:n.match],
#                   list(Min=min,Max=max,Median=median,'Bot 25%'=function(x) quantile(x,0.25),'Top 75%'=function(x) quantile(x,0.75)))
# bt.plot.dot.label(n.query, temp[n.query,(n.match+1)], list(Current=min))



Table with predictions

temp = matrix( double(), nr=(n.match+4), 6)
rownames(temp) = c(1:n.match, spl('Current,Min,Average,Max'))
colnames(temp) = spl('Start,End,Return,Week,Month,Quarter')

# compute returns
temp[1:(n.match+1),'Return'] = matches[,2*n.query]/ matches[,n.query]
temp[1:(n.match+1),'Week'] = matches[,(2*n.query+5)]/ matches[,2*n.query]
temp[1:(n.match+1),'Month'] = matches[,(2*n.query+20)]/ matches[,2*n.query]
temp[1:(n.match+1),'Quarter'] = matches[,(2*n.query+60)]/ matches[,2*n.query]

# compute average returns
index = spl('Return,Week,Month,Quarter')
temp['Min', index] = apply(temp[1:(n.match+1),index],2,min,na.rm=T)
temp['Average', index] = apply(temp[1:(n.match+1),index],2,mean,na.rm=T)
temp['Max', index] = apply(temp[1:(n.match+1),index],2,max,na.rm=T)

# format
temp[] = plota.format(100*(temp-1),1,'','%')

# enter dates
temp['Current', 'Start'] = format(index(last(data,90)[1]), '%d %b %Y')
temp['Current', 'End'] = format(index(last(data,1)[1]), '%d %b %Y')
for(i in 1:n.match) {
  temp[i, 'Start'] = format(index(data[min.index[i] - n.query + 1]), '%d %b %Y')
  temp[i, 'End'] = format(index(data[min.index[i]]), '%d %b %Y')  
}

# plot table
plot.table(temp, smain='Match #')