https://github.com/JanpuHou/datasharing/blob/master/IntroductionToPairTrading.pdf.
Advanced Micro Devices (NYSE: AMD) is naturally compared with Nvidia (NASDAQ: NVDA) because their near zero-sum gain in the top-end gaming GPU market. Nearly a month has passed since their quarterly announcements, it is expected that investors should have digested all the public information released. This also is a good time to examine if market prices reflect the company’s long-term fundamental outlook.
Step 1: Obtain data via quantmod
#Utilize quantmod to load the security symbols
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.3
library(quantmod)
## Warning: package 'quantmod' was built under R version 3.4.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 3.4.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.4.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 3.4.3
## Version 0.4-0 included new data defaults. See ?getSymbols.
symbols <- c("AMD", "NVDA")
getSymbols(symbols)
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
##
## WARNING: There have been significant changes to Yahoo Finance data.
## Please see the Warning section of '?getSymbols.yahoo' for details.
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.yahoo.warning"=FALSE).
## [1] "AMD" "NVDA"
chartSeries(AMD)
chartSeries(NVDA)
Step 2: Extract prices and time ranges
#define training set
startT <- "2015-01-01"
endT <- "2016-12-31"
rangeT <- paste(startT,"::",endT,sep ="")
tAMD <- AMD[,6][rangeT]
tNVDA <- NVDA[,6][rangeT]
#define out of sample set
startO <- "2017-01-01"
endO <- "2017-10-27"
rangeO <- paste(startO,"::",endO,sep ="")
oAMD <- AMD[,6][rangeO]
oNVDA <- NVDA[,6][rangeO]
amd.tkr = getSymbols("AMD",from = as.Date("2017-01-04"), to = as.Date("2017-10-27"),auto.assign = F)
chartSeries(amd.tkr)
df = data.frame(date = index(amd.tkr), amd.tkr, row.names=NULL)
ggplot(df, aes(df$date, df$AMD.Adjusted)) + geom_line() + scale_x_date('Month/2017') + ylab("Stock Price") +
xlab("") + labs(title = "AMD Stock Price")
Notice how we defined and in-sample and out-of-sample range. We will use the in-sample data to compute a simple hedge ratio and then we will apply this hedge ratio to the out of sample data.
#compute price differences on in-sample data
pdtAMD <- diff(tAMD)[-1]
pdtNVDA <- diff(tNVDA)[-1]
#build the model
model <- lm(pdtAMD ~ pdtNVDA - 1)
#extract the hedge ratio
hr <- as.numeric(model$coefficients[1])
#spread price (in-sample)
spreadT <- tAMD - hr * tNVDA
#compute statistics of the spread
meanT <- as.numeric(mean(spreadT,na.rm=TRUE))
sdT <- as.numeric(sd(spreadT,na.rm=TRUE))
upperThr <- meanT + 1 * sdT
lowerThr <- meanT - 1 * sdT
#visualize the in-sample spread + stats
plot(spreadT, main = "AMD vs. NVDA spread (in-sample period)")
abline(h = meanT, col = "red", lwd =2)
abline(h = meanT + 1 * sdT, col = "blue", lwd=2)
abline(h = meanT - 1 * sdT, col = "blue", lwd=2)
# histogram of spread
hist(spreadT, col = "blue", breaks = 100, main = "Spread Histogram (AMD vs. NVDA)")
abline(v = meanT, col = "red", lwd = 2)
#spread price (out-sample)
spreadO <- oAMD - hr * oNVDA
#compute statistics of the spread
meanO <- as.numeric(mean(spreadO,na.rm=TRUE))
sdO <- as.numeric(sd(spreadO,na.rm=TRUE))
upperThr <- meanO + 1 * sdO
lowerThr <- meanO - 1 * sdO
#visualize the in-sample spread + stats
plot(spreadO, main = "AMD vs. NVDA spread (out-sample period)")
abline(h = meanO, col = "red", lwd =2)
abline(h = meanO + 1 * sdO, col = "blue", lwd=2)
abline(h = meanO - 1 * sdO, col = "blue", lwd=2)
# histogram of spread
hist(spreadO, col = "blue", breaks = 100, main = "Spread Histogram (AMD vs. NVDA)")
abline(v = meanO, col = "red", lwd = 2)
Once the spread exceeds our upper threshold, we sell AMD and buy NVDA. Once the spread drops below our lower threshold, we buy AMD and sell NVDA.
indSell <- which(spreadT >= meanT + sdT)
indBuy <- which(spreadT <= meanT - sdT)
spreadL <- length(spreadT)
pricesB <- c(rep(NA,spreadL))
pricesS <- c(rep(NA,spreadL))
sp <- as.numeric(spreadT)
tradeQty <- 100
totalP <- 0
for(i in 1:spreadL) {
spTemp <- sp[i]
if(spTemp < lowerThr) {
if(totalP <= 0){
totalP <- totalP + tradeQty
pricesB[i] <- spTemp
}
} else if(spTemp > upperThr) {
if(totalP >= 0){
totalP <- totalP - tradeQty
pricesS[i] <- spTemp
}
}
}
plot(spreadT, main = "AMD vs. NVDA spread (in-sample period)")
abline(h = meanT, col = "red", lwd =2)
abline(h = meanT + 1 * sdT, col = "blue", lwd = 2)
abline(h = meanT - 1 * sdT, col = "blue", lwd = 2)
points(xts(pricesB,index(spreadT)), col="green", cex=1.9, pch=19)
points(xts(pricesS,index(spreadT)), col="red", cex=1.9, pch=19)
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf