library(quantmod)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first()  masks xts::first()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ dplyr::last()   masks xts::last()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tseries)
library(quantmod)

First of all, I chose to have a low risk low reward plan due to the unstable job market and economy that we have. New hire should have more saving. So I pick CD as my main item. Investing into stock market is a very little part of the portfolio to limit the risk that a fresh graduate can take.

On the actually investment, I was thinking SPY500 trust ETFs or mutual fund. Then I pick mutual fund. Both SPY500 trust ETFs and mutual fund can spend less to have diversification of investment. However, I want to see if randomly pick one of the mutual fund who has the top performance could be better than choosing SPY500 trust ETFs.

Under this assignment, instead of looking at the mutual fund I picked, I want to switch my direction and look at the SPY500. I want to see if Apple Inc. 7.04% and Microsoft Corp. 7.00% can be a good pair trading.

Then I saw Anthony Arroyo posted a website which is very helpful. However, I still want to try to do it myself once.

If we look at the ETF prices, AAPL and MSFT has a similar rise trend but not too similar.

AAPL_prices <- Ad(getSymbols("AAPL", from = "2023-01-01", to = "2023-10-30", auto.assign = FALSE))
MSFT_prices <- Ad(getSymbols("MSFT", from = "2023-01-01", to = "2023-10-30", auto.assign = FALSE))
plot(cbind(AAPL_prices, MSFT_prices), legend.loc = "topleft", main = "ETF prices")

mySymbols <- c('AAPL', 'MSFT')

myStocks <-lapply(mySymbols, function(x) {getSymbols(x, 
                                                             from = "2023-01-01", 
                                                             to = "2024-01-03",
                                                             periodicity = "daily",
                                                             auto.assign=FALSE)} )


names(myStocks)<-mySymbols


closePrices <- lapply(myStocks, Cl)
closePrices <- do.call(merge, closePrices)

names(closePrices)<-sub("\\.Close", "", names(closePrices))
head(closePrices)
##              AAPL   MSFT
## 2023-01-03 125.07 239.58
## 2023-01-04 126.36 229.10
## 2023-01-05 125.02 222.31
## 2023-01-06 129.62 224.93
## 2023-01-09 130.15 227.12
## 2023-01-10 130.73 228.85

the correlation coefficient is around 97 % but p-value is 1%.

# train
train<-log(closePrices[1:190])

# test
test<-log(closePrices[190:221])




# get the correlation of each pair

left_side<-NULL
right_side<-NULL
correlation<-NULL
beta<-NULL
pvalue<-NULL

for (i in 1:length(mySymbols)) {
  for (j in 1:length(mySymbols)) {
    
    if (i>j) {
      left_side<-c(left_side, mySymbols[i])
      right_side<-c(right_side, mySymbols[j])
      correlation<-c(correlation, cor(train[,mySymbols[i]], train[,mySymbols[j]]))
      
      # linear regression withoout intercept
      m<-lm(train[,mySymbols[i]]~train[,mySymbols[j]]-1)
      beta<-c(beta, as.numeric(coef(m)[1]))
      
      # get the mispricings of the spread
      sprd<-residuals(m)
      
      # adf test
      pvalue<-c(pvalue, adf.test(sprd, alternative="stationary", k=0)$p.value)
      
    }
  }
  
}
## Warning in adf.test(sprd, alternative = "stationary", k = 0): p-value smaller
## than printed p-value
df<-data.frame(left_side, right_side, correlation, beta, pvalue)

df
##   left_side right_side correlation     beta pvalue
## 1      MSFT       AAPL   0.9723061 1.112269   0.01
myspread<-train[,"MSFT"]-1.112269*train[,"AAPL"]
plot(myspread, main = "MSFT vs AAPL")

myspread<-test[,"MSFT"]-1.112269*test[,"AAPL"]
plot(myspread, main = "MSFT vs AAPL")

When the spread is above 0.04, then we sell the spread which means that we sell the MSFT and we buy AAPL When the spread is below -0.04, then we buy the spread which means that we buy MSFT and we sell AAPL

The plot show that we should buy Apple and sell mircorsoft.