Pair Trading Strategy

https://github.com/JanpuHou/datasharing/blob/master/IntroductionToPairTrading.pdf.

Advanced Micro Devices (NYSE: AMD) is naturally compared with Nvidia (NASDAQ: NVDA) because their near zero-sum gain in the top-end gaming GPU market. Nearly a month has passed since their quarterly announcements, it is expected that investors should have digested all the public information released. This also is a good time to examine if market prices reflect the company’s long-term fundamental outlook.

Find two likely cointegrated stocks

Step 1: Obtain data via quantmod

#Utilize quantmod to load the security symbols
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.3
library(quantmod)
## Warning: package 'quantmod' was built under R version 3.4.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 3.4.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.4.3
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 3.4.3
## Version 0.4-0 included new data defaults. See ?getSymbols.
 symbols <- c("AMD", "NVDA")
 getSymbols(symbols)
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
## 
## This message is shown once per session and may be disabled by setting 
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
## 
## WARNING: There have been significant changes to Yahoo Finance data.
## Please see the Warning section of '?getSymbols.yahoo' for details.
## 
## This message is shown once per session and may be disabled by setting
## options("getSymbols.yahoo.warning"=FALSE).
## [1] "AMD"  "NVDA"
 chartSeries(AMD)

 chartSeries(NVDA)

Stock Price Only

Step 2: Extract prices and time ranges

 #define training set
 startT  <- "2015-01-01"
 endT    <- "2016-12-31"
 rangeT  <- paste(startT,"::",endT,sep ="")
 tAMD   <- AMD[,6][rangeT]
 tNVDA   <- NVDA[,6][rangeT]
 
 #define out of sample set
 startO  <- "2017-01-01"
 endO    <- "2017-10-27"
 rangeO  <- paste(startO,"::",endO,sep ="")
 oAMD   <- AMD[,6][rangeO]
 oNVDA   <- NVDA[,6][rangeO]


amd.tkr = getSymbols("AMD",from = as.Date("2017-01-04"), to = as.Date("2017-10-27"),auto.assign = F)
chartSeries(amd.tkr)

df = data.frame(date = index(amd.tkr), amd.tkr, row.names=NULL)

ggplot(df, aes(df$date, df$AMD.Adjusted)) + geom_line() + scale_x_date('Month/2017')  + ylab("Stock Price") +
  xlab("") + labs(title = "AMD Stock Price")

Notice how we defined and in-sample and out-of-sample range. We will use the in-sample data to compute a simple hedge ratio and then we will apply this hedge ratio to the out of sample data.

Step 3: Compute returns and find hedge ratio

 #compute price differences on in-sample data
 pdtAMD <- diff(tAMD)[-1]
 pdtNVDA <- diff(tNVDA)[-1]
 
 #build the model
 model  <- lm(pdtAMD ~ pdtNVDA - 1)
 
 #extract the hedge ratio
 hr     <- as.numeric(model$coefficients[1])

Step 4: Construct the spread

 #spread price (in-sample)
 spreadT <- tAMD - hr * tNVDA
 
 #compute statistics of the spread
 meanT    <- as.numeric(mean(spreadT,na.rm=TRUE))
 sdT      <- as.numeric(sd(spreadT,na.rm=TRUE))
 upperThr <- meanT + 1 * sdT
 lowerThr <- meanT - 1 * sdT
 
 #visualize the in-sample spread + stats
 plot(spreadT, main = "AMD vs. NVDA spread (in-sample period)")
 abline(h = meanT, col = "red", lwd =2)
 abline(h = meanT + 1 * sdT, col = "blue", lwd=2)
 abline(h = meanT - 1 * sdT, col = "blue", lwd=2)

 # histogram of spread 
 
 hist(spreadT, col = "blue", breaks = 100, main = "Spread Histogram (AMD vs. NVDA)")
 abline(v = meanT, col = "red", lwd = 2) 

Out-sample spread

 #spread price (out-sample)
 spreadO <- oAMD - hr * oNVDA
 
 #compute statistics of the spread
 meanO    <- as.numeric(mean(spreadO,na.rm=TRUE))
 sdO      <- as.numeric(sd(spreadO,na.rm=TRUE))
 upperThr <- meanO + 1 * sdO
 lowerThr <- meanO - 1 * sdO
 
 #visualize the in-sample spread + stats
 plot(spreadO, main = "AMD vs. NVDA spread (out-sample period)")
 abline(h = meanO, col = "red", lwd =2)
 abline(h = meanO + 1 * sdO, col = "blue", lwd=2)
 abline(h = meanO - 1 * sdO, col = "blue", lwd=2)

 # histogram of spread 
 
 hist(spreadO, col = "blue", breaks = 100, main = "Spread Histogram (AMD vs. NVDA)")
 abline(v = meanO, col = "red", lwd = 2) 

Step 5: Define the trading rule

Once the spread exceeds our upper threshold, we sell AMD and buy NVDA. Once the spread drops below our lower threshold, we buy AMD and sell NVDA.

 indSell <- which(spreadT >= meanT + sdT)
 indBuy  <- which(spreadT <= meanT - sdT)

Step 6: Figure out the trades

spreadL  <- length(spreadT)
 pricesB  <- c(rep(NA,spreadL))
 pricesS  <- c(rep(NA,spreadL))
 sp       <- as.numeric(spreadT)
 tradeQty <- 100
 totalP   <- 0
 
 for(i in 1:spreadL) {
     spTemp <- sp[i]
     if(spTemp < lowerThr) {
        if(totalP <= 0){
           totalP     <- totalP + tradeQty
           pricesB[i] <- spTemp
        }
     } else if(spTemp > upperThr) {
       if(totalP >= 0){
          totalP <- totalP - tradeQty
          pricesS[i] <- spTemp
       }
    }
 }

Step 7: Visualize trades

 plot(spreadT, main = "AMD vs. NVDA spread (in-sample period)")
 abline(h = meanT, col = "red", lwd =2)
 abline(h = meanT + 1 * sdT, col = "blue", lwd = 2)
 abline(h = meanT - 1 * sdT, col = "blue", lwd = 2)

 points(xts(pricesB,index(spreadT)), col="green", cex=1.9, pch=19)

 points(xts(pricesS,index(spreadT)), col="red", cex=1.9, pch=19)
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf