SPX Data Candlestick Chart

library('quantmod')

SYMBL <- getSymbols("SPX", auto.assign=F, from="2000-01-01", to="2018-07-06")

chartSeries(SYMBL, type= "candlesticks", name = "SPX Candlestick Chart", theme = chartTheme('white'))

The First Five Rows Of SPX Data

SYMBL[1:5,]

##            SPX.Open SPX.High SPX.Low SPX.Close SPX.Volume SPX.Adjusted
## 2000-03-02  1.74884  2.49834 1.74884   2.49834       2562      2.49834
## 2000-03-03  1.87375  1.87375 1.87375   1.87375         80      1.87375
## 2000-03-06  2.49834  2.49834 2.49834   2.49834        240      2.49834
## 2000-03-07  4.37209  4.37209 4.37209   4.37209          0      4.37209
## 2000-03-08       NA       NA      NA        NA         NA           NA

Load Systematic Investor Toolbox (SIT)

con = gzcon(url('http://www.systematicportfolio.com/sit.gz', 'rb'))
source(con)
close(con)

Load historical data

tickers = 'SPX'

data = getSymbols(tickers, src = 'yahoo', from = '2000-01-01', auto.assign = F)

Setup search

data = last(data, 252*10)
reference = coredata(Cl(data))
n = len(reference)
query = reference[(n-90+1):n]   
reference = reference[1:(n-90)]

n.query = len(query)
n.reference = len(reference)

Compute Distances

dist = rep(NA, n.reference)
query.normalized = (query - mean(query)) / sd(query)

for( i in n.query : n.reference ) {
  window = reference[ (i - n.query + 1) : i]
  window.normalized = (window - mean(window)) / sd(window)
  dist[i] = stats:::dist(rbind(query.normalized, window.normalized))
}

Find Matches

min.index = c()
n.match = 10

# only look at the minimums 
temp = dist
temp[ temp > mean(dist, na.rm=T) ] = NA

# remove n.query, points to the left/right of the minimums
for(i in 1:n.match) {
  if(any(!is.na(temp))) {
    index = which.min(temp)
    min.index[i] = index
    temp[max(0,index - 2*n.query) : min(n.reference,(index + n.query))] = NA
  }
}
n.match = len(min.index)

Plot Matches

dates = index(data)[1:len(dist)]

par(mar=c(2, 4, 2, 2))
plot(dates, dist, type='l',col='gray', main='Top Matches', ylab='Euclidean Distance', xlab='')
abline(h = mean(dist, na.rm=T), col='darkgray', lwd=2)
points(dates[min.index], dist[min.index], pch=22, col='red', bg='red')
text(dates[min.index], dist[min.index], 1:n.match, adj=c(1,1), col='black',xpd=TRUE)

plota(data, type='l', col='gray', main=tickers)
plota.lines(last(data,90), col='blue')
for(i in 1:n.match) {
  plota.lines(data[(min.index[i]-n.query + 1):min.index[i]], col='red')
}
text(index(data)[min.index - n.query/2], reference[min.index - n.query/2], 1:n.match, 
     adj=c(1,-1), col='black',xpd=TRUE)
plota.legend('Pattern,Match #','blue,red')

Overlay all Matches

matches = matrix(NA, nr=(n.match+1), nc=3*n.query)
temp = c(rep(NA, n.query), reference, query)
for(i in 1:n.match) {
  matches[i,] = temp[ (min.index[i] - n.query + 1):(min.index[i] + 2*n.query) ]   
}

# add the 'query' pattern
matches[(n.match+1),] = temp[ (len(temp) - 2*n.query + 1):(len(temp) + n.query) ]       

# normalize
for(i in 1:(n.match+1)) {
  matches[i,] = matches[i,] / matches[i,n.query]
}

Plot all Matches

temp = 100 * ( t(matches[,-c(1:n.query)]) - 1)

par(mar=c(2, 4, 2, 2))
matplot(temp, type='l',col='gray',lwd=2, lty='dotted', xlim=c(1,2.5*n.query),
        main = paste('Pattern Prediction with', n.match, 'neighbours'),ylab='Normalized', xlab='')
lines(temp[,(n.match+1)], col='black',lwd=4)

points(rep(2*n.query,n.match), temp[2*n.query,1:n.match], pch=21, lwd=2, col='gray', bg='gray')

bt.plot.dot.label <- function(x, data, xfun, col='red') {
  for(j in 1:len(xfun)) {
    y = match.fun(xfun[[j]])(data)
    points(x, y, pch=21, lwd=4, col=col, bg=col)
    text(x, y, paste(names(xfun)[j], ':', round(y,1),'%'),
         adj=c(-0.1,0), cex = 0.8, col=col,xpd=TRUE)         
  }
}

# bt.plot.dot.label(2*n.query, temp[2*n.query,1:n.match],
#                   list(Min=min,Max=max,Median=median,'Bot 25%'=function(x) quantile(x,0.25),'Top 75%'=function(x) quantile(x,0.75)))
# bt.plot.dot.label(n.query, temp[n.query,(n.match+1)], list(Current=min))

Table with predictions

temp = matrix( double(), nr=(n.match+4), 6)
rownames(temp) = c(1:n.match, spl('Current,Min,Average,Max'))
colnames(temp) = spl('Start,End,Return,Week,Month,Quarter')

# compute returns
temp[1:(n.match+1),'Return'] = matches[,2*n.query]/ matches[,n.query]
temp[1:(n.match+1),'Week'] = matches[,(2*n.query+5)]/ matches[,2*n.query]
temp[1:(n.match+1),'Month'] = matches[,(2*n.query+20)]/ matches[,2*n.query]
temp[1:(n.match+1),'Quarter'] = matches[,(2*n.query+60)]/ matches[,2*n.query]

# compute average returns
index = spl('Return,Week,Month,Quarter')
temp['Min', index] = apply(temp[1:(n.match+1),index],2,min,na.rm=T)
temp['Average', index] = apply(temp[1:(n.match+1),index],2,mean,na.rm=T)
temp['Max', index] = apply(temp[1:(n.match+1),index],2,max,na.rm=T)

# format
temp[] = plota.format(100*(temp-1),1,'','%')

# enter dates
temp['Current', 'Start'] = format(index(last(data,90)[1]), '%d %b %Y')
temp['Current', 'End'] = format(index(last(data,1)[1]), '%d %b %Y')
for(i in 1:n.match) {
  temp[i, 'Start'] = format(index(data[min.index[i] - n.query + 1]), '%d %b %Y')
  temp[i, 'End'] = format(index(data[min.index[i]]), '%d %b %Y')  
}

# plot table
plot.table(temp, smain='Match #')

Pattern Matching of Time Series Data

http://contextbase.github.io

All programming by John Akwei, ECMp ERMp Data Scientist

February 23, 2019

SPX Data Candlestick Chart

The First Five Rows Of SPX Data

Load Systematic Investor Toolbox (SIT)

Load historical data

Setup search

Compute Distances

Find Matches

Plot Matches

Overlay all Matches

Plot all Matches

Table with predictions