Source of data: https://finance.yahoo.com/quote/TSLA/history/
## Warning: package 'tidyverse' was built under R version 4.0.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v dplyr 1.0.2
## v tibble 3.0.4 v stringr 1.4.0
## v tidyr 1.1.2 v forcats 0.5.1
## v purrr 0.3.4
## Warning: package 'ggplot2' was built under R version 4.0.3
## Warning: package 'tibble' was built under R version 4.0.3
## Warning: package 'stringr' was built under R version 4.0.3
## Warning: package 'forcats' was built under R version 4.0.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## Warning: package 'modeltime' was built under R version 4.0.3
## Warning: package 'tidymodels' was built under R version 4.0.3
## -- Attaching packages -------------------------------------- tidymodels 0.1.2 --
## v broom 0.7.3 v recipes 0.1.15
## v dials 0.0.9 v rsample 0.0.8
## v infer 0.5.3 v tune 0.1.2
## v modeldata 0.1.0 v workflows 0.2.1
## v parsnip 0.1.5 v yardstick 0.0.7
## Warning: package 'broom' was built under R version 4.0.3
## Warning: package 'dials' was built under R version 4.0.3
## Warning: package 'infer' was built under R version 4.0.3
## Warning: package 'modeldata' was built under R version 4.0.3
## Warning: package 'parsnip' was built under R version 4.0.3
## Warning: package 'recipes' was built under R version 4.0.3
## Warning: package 'rsample' was built under R version 4.0.3
## Warning: package 'tune' was built under R version 4.0.3
## Warning: package 'workflows' was built under R version 4.0.3
## Warning: package 'yardstick' was built under R version 4.0.3
## -- Conflicts ----------------------------------------- tidymodels_conflicts() --
## x scales::discard() masks purrr::discard()
## x dplyr::filter() masks stats::filter()
## x recipes::fixed() masks stringr::fixed()
## x dplyr::lag() masks stats::lag()
## x yardstick::spec() masks readr::spec()
## x recipes::step() masks stats::step()
## Warning: package 'timetk' was built under R version 4.0.3
stockTes <- read_csv("./TSLA.csv")
## Parsed with column specification:
## cols(
## Date = col_date(format = ""),
## Open = col_double(),
## High = col_double(),
## Low = col_double(),
## Close = col_double(),
## `Adj Close` = col_double(),
## Volume = col_double()
## )
stockTes %>%
plot_time_series(Date, Close, .facet_ncol = 2)
# Nesting
data_nested <- stockTes %>%
select(Date, Close) %>%
nest(nested_column = c(Date, Close))
data_nested$nested_column
## [[1]]
## # A tibble: 252 x 2
## Date Close
## <date> <dbl>
## 1 2020-01-31 130.
## 2 2020-02-03 156
## 3 2020-02-04 177.
## 4 2020-02-05 147.
## 5 2020-02-06 150.
## 6 2020-02-07 150.
## 7 2020-02-10 154.
## 8 2020-02-11 155.
## 9 2020-02-12 153.
## 10 2020-02-13 161.
## # ... with 242 more rows
# Unnesting
data_nested %>%
unnest(nested_column)
## # A tibble: 252 x 2
## Date Close
## <date> <dbl>
## 1 2020-01-31 130.
## 2 2020-02-03 156
## 3 2020-02-04 177.
## 4 2020-02-05 147.
## 5 2020-02-06 150.
## 6 2020-02-07 150.
## 7 2020-02-10 154.
## 8 2020-02-11 155.
## 9 2020-02-12 153.
## 10 2020-02-13 161.
## # ... with 242 more rows
modelARIMA <- data_nested %>%
# Map Fitted Models
mutate(fitted_model = map(nested_column, .f = function(df) {
arima_reg(seasonal_period = 52) %>%
set_engine("auto_arima") %>%
fit(Close ~ Date, data = df)
})) %>%
# Map Forecasts
mutate(nested_forecast = map2(fitted_model, nested_column,
.f = function(arima_model, df) {
modeltime_table(
arima_model
) %>%
modeltime_forecast(
h = 52,
actual_data = df
)
}))
modelARIMA
## # A tibble: 1 x 3
## nested_column fitted_model nested_forecast
## <list> <list> <list>
## 1 <tibble [252 x 2]> <fit[+]> <tibble [304 x 5]>
# Unnesting
modelARIMA %>%
select(nested_forecast) %>%
unnest(nested_forecast) %>%
plot_modeltime_forecast(.facet_ncol = 2)
## Warning: Expecting the following names to be in the data frame: .conf_hi, .conf_lo.
## Proceeding with '.conf_interval_show = FALSE' to visualize the forecast without confidence intervals.
## Alternatively, try using `modeltime_calibrate()` before forecasting to add confidence intervals.
close_price = stockTes$Close
T = 1:length(close_price)
plot(T, close_price, type="l", xlim = c(0,400))
reg = lm(close_price~T)
abline(reg,col="red")
Plot of time series of stock price.
# Calculate Residuals
res_stock = residuals(reg)
acf(res_stock, lag=52, lwd=3)
Original ACF plot of time series data.
dif_stock = diff(res_stock, differences = 1) # set to 01 difference of time series data
acf(dif_stock, lag=52, lwd=3)
The ACF of first difference time series
As above graphs, we plotted the original time series and the first differences (ACF). The original plot shows a clear non-stationary. The ACF does not cuts off, rather it shows a slow decrease. On the other hand, the first differences data looks much stationary. The ACF cuts off after some lags. We can perform other formal test to test the stationary of the differenced data. But here from plot of the data and the ACF plot provide good indication that the differenced data is stationary. Since it took one differencing operation to get stationary, here d=1.
# Or the partial autocorrelation function
pacf(dif_stock, lag=52,lwd=3)
The PACF plot of first differences.
This plot demonstrates the PACF (Partial Autocorrelation Function), which can be interpreted to find p and q.
To be more explained, three numbers p, d and q specify ARIMA model and the ARIMA model is said to be of order (p,d,q). Here p, d and q are the orders of AR part, Difference and the MA part respectively.
As said that p and q, the order of AR and MA part. To do so, we plotted the sample ACF and PACF of the first difference of time series data as above. Those plots are suggestive of an AR(13) or AR(14), and MA(14). So initial candidate models are ARIMA (13,1,0), ARIMA(14,1,0), ARIMA(0,1,14) and ARIMA(0,1,0) as assumed that means no existence of AR/MA for testing purpose.
Our observation from the above plots shows a geometric pattern, ARIMA is a suitable model to predict stock close_price.
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
## Warning: package 'tseries' was built under R version 4.0.3
summary_stock_price <- stockTes %>%
group_by(Date)%>%
summarize(median_price = median(Close))
## `summarise()` ungrouping output (override with `.groups` argument)
max_Date = max(summary_stock_price$Date)
min_Date = min(summary_stock_price$Date)
stock_ts <- ts(summary_stock_price$median_price, frequency=12)
plot(stl(stock_ts, s.window = "periodic"))
adf.test(stock_ts)
##
## Augmented Dickey-Fuller Test
##
## data: stock_ts
## Dickey-Fuller = -1.8577, Lag order = 6, p-value = 0.6357
## alternative hypothesis: stationary
Output: The bar on the right side of the component “seasonal” confirms the seasonality of the time series. Also the p-value of ADF test (0.6357 > 0.05) rejects the possibility of the time series to be stationary.
We retain last 10 observations for forecasting and use first 40 observations to fit the models.
## partition into train and test
train_series = stock_ts[1:200] # 80% * length(stock_ts)
test_series = stock_ts[201:252]
For the sake of model inspection, we proceeded with auto.arima() to see which one might be automatically opted as the best choice.
## Warning: package 'forecast' was built under R version 4.0.3
##
## Attaching package: 'forecast'
## The following object is masked from 'package:yardstick':
##
## accuracy
AutoArimaModel <- auto.arima(train_series)
AutoArimaModel
## Series: train_series
## ARIMA(2,1,3)
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3
## -0.0111 -0.4406 -0.0531 0.5311 0.1227
## s.e. 0.5441 0.2791 0.5363 0.2212 0.0851
##
## sigma^2 estimated as 226.8: log likelihood=-819.62
## AIC=1651.23 AICc=1651.67 BIC=1670.99
This function actually searches for a range of p, q values, after fixing d by Kwiatkowski-Phillips-Schmidt-Shin (KPSS) test. It chooses the model having lowest AIC score.
## make ARIMA models
arimaModel_1 = arima(train_series, order=c(13,1,0))
arimaModel_2 = arima(train_series, order=c(14,1,0))
arimaModel_3 = arima(train_series, order=c(0,1,14))
arimaModel_4 = arima(train_series, order=c(2,1,3)) # added model after running auto-arima function
arimaModel_5 = arima(train_series, order=c(0,1,0))
## look at the parameters
print(arimaModel_1);print(arimaModel_2);print(arimaModel_3);print(arimaModel_4);
##
## Call:
## arima(x = train_series, order = c(13, 1, 0))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## -0.0880 0.0805 0.1281 -0.0201 -0.1913 -0.0873 0.0126 -0.0307
## s.e. 0.0714 0.0718 0.0727 0.0726 0.0721 0.0740 0.0740 0.0741
## ar9 ar10 ar11 ar12 ar13
## 0.0772 0.1273 0.0721 -0.0657 0.0207
## s.e. 0.0724 0.0729 0.0727 0.0726 0.0725
##
## sigma^2 estimated as 206.3: log likelihood = -813, aic = 1654
##
## Call:
## arima(x = train_series, order = c(14, 1, 0))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## -0.0856 0.0720 0.1361 -0.0049 -0.1806 -0.0900 0.0132 -0.0424
## s.e. 0.0709 0.0714 0.0722 0.0726 0.0718 0.0734 0.0737 0.0738
## ar9 ar10 ar11 ar12 ar13 ar14
## 0.0508 0.1273 0.0900 -0.0534 0.0107 -0.1217
## s.e. 0.0735 0.0724 0.0728 0.0724 0.0722 0.0718
##
## sigma^2 estimated as 203.2: log likelihood = -811.58, aic = 1653.16
##
## Call:
## arima(x = train_series, order = c(0, 1, 14))
##
## Coefficients:
## ma1 ma2 ma3 ma4 ma5 ma6 ma7 ma8
## -0.0900 0.0603 0.1382 0.0193 -0.1825 0.0245 -0.0859 -0.0942
## s.e. 0.0721 0.0709 0.0717 0.0709 0.0718 0.0734 0.0767 0.0804
## ma9 ma10 ma11 ma12 ma13 ma14
## 0.0248 0.1554 0.0554 0.0004 0.1496 -0.2187
## s.e. 0.0717 0.0730 0.0775 0.0673 0.0750 0.0842
##
## sigma^2 estimated as 200.8: log likelihood = -810.83, aic = 1651.67
##
## Call:
## arima(x = train_series, order = c(2, 1, 3))
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3
## -0.0111 -0.4406 -0.0531 0.5311 0.1227
## s.e. 0.5441 0.2791 0.5363 0.2212 0.0851
##
## sigma^2 estimated as 221.1: log likelihood = -819.62, aic = 1651.23
print(arimaModel_5)
##
## Call:
## arima(x = train_series, order = c(0, 1, 0))
##
##
## sigma^2 estimated as 230.4: log likelihood = -823.63, aic = 1649.26
Observation: From the standard errors, we see that all the parameters are more than 2 standard deviation away from zero. So all parameters passed the t-test. The model also calculated the variance of the error term contained in the model. It also gives the likelihood and the aic values. We see that the 4th model is best based on likelihood and aic (lower value of aic is desirable).
checkresiduals(arimaModel_1)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(13,1,0)
## Q* = 6.0211, df = 3, p-value = 0.1106
##
## Model df: 13. Total lags used: 16
checkresiduals(arimaModel_2)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(14,1,0)
## Q* = 2.8459, df = 3, p-value = 0.416
##
## Model df: 14. Total lags used: 17
checkresiduals(arimaModel_3)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,14)
## Q* = 5.0934, df = 3, p-value = 0.1651
##
## Model df: 14. Total lags used: 17
checkresiduals(arimaModel_4)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,3)
## Q* = 12.359, df = 5, p-value = 0.03019
##
## Model df: 5. Total lags used: 10
checkresiduals(arimaModel_5)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,0)
## Q* = 19.71, df = 10, p-value = 0.03212
##
## Model df: 0. Total lags used: 10
autoplot(forecast(arimaModel_3))
autoplot(forecast(arimaModel_4))
autoplot(forecast(arimaModel_5))
accuracy(forecast(arimaModel_3))
## ME RMSE MAE MPE MAPE MASE
## Training set 1.514747 14.13653 9.865165 0.4378071 4.288781 0.9689287
## ACF1
## Training set 0.002099209
accuracy(forecast(arimaModel_4))
## ME RMSE MAE MPE MAPE MASE
## Training set 1.281774 14.83296 10.14742 0.3676977 4.241656 0.9966508
## ACF1
## Training set -0.003057476
accuracy(forecast(arimaModel_5))
## ME RMSE MAE MPE MAPE MASE
## Training set 1.408881 15.14082 10.13126 0.3940584 4.220647 0.9950639
## ACF1
## Training set -0.06281438
Observation: Model 03 achieved the smallest Root Mean Square Error (RMSE) & Mean Absolute Error (MAE) as expected. This is considered as the the best among 05 models
# Predict average stock price on test set
head(data.frame(cbind(forecast(arimaModel_1, 52)$mean, forecast(arimaModel_2, 52)$mean,
forecast(arimaModel_3, 52)$mean, forecast(arimaModel_4, 52)$mean),
forecast(arimaModel_5, 52)$mean)) # length(test_series)=52
## forecast.arimaModel_1..52..mean forecast.arimaModel_2..52..mean
## 1 410.5902 409.4168
## 2 415.0725 412.5899
## 3 421.2235 420.7947
## 4 422.1414 421.1241
## 5 423.7804 425.8767
## 6 425.7212 427.0004
## forecast.arimaModel_3..52..mean forecast.arimaModel_4..52..mean
## 1 411.4380 412.0358
## 2 411.3391 413.7072
## 3 421.7552 413.1782
## 4 419.8514 412.4477
## 5 428.3156 412.6889
## 6 428.9283 413.0080
## forecast.arimaModel_5..52..mean
## 1 411.76
## 2 411.76
## 3 411.76
## 4 411.76
## 5 411.76
## 6 411.76
https://www.nasdaq.com/market-activity/stocks/screener
40 stocks from 2020-01-01 up to 2021-01-31, which generate 580 pairs of stocks.
library(tidyverse)
library(tseries)
library(quantmod)
## Warning: package 'quantmod' was built under R version 4.0.3
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
## Loading required package: TTR
##
## Attaching package: 'TTR'
## The following object is masked from 'package:modeltime':
##
## growth
mySymbols <- c('GOOGL', 'TSLA', 'FB', 'AMZN', 'AAPL', 'MSFT', 'VOD', 'ADBE', 'NVDA', 'CRM',
'EBAY', 'YNDX', 'TRIP', 'NFLX', 'DBX', 'ETSY', 'PINS','BX', 'MA', 'TMUS',
'SPLK', 'CTXS', 'TWTR', 'PYPL', 'ZM', 'INTC', 'GT', 'SBUX', 'WIX', 'ZNGA',
'SHOP', 'UBER', 'GS', 'BIDU', 'FDX', 'VRTX', 'D', 'APD', 'HD', 'BIIB'
)
myStocks <-lapply(mySymbols, function(x) {getSymbols(x,
from = "2020-01-01",
to = "2021-01-31",
periodicity = "daily",
auto.assign=FALSE)} )
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
names(myStocks) <- mySymbols
closePrices <- lapply(myStocks, Cl)
closePrices <- do.call(merge, closePrices)
names(closePrices)<-sub("\\.Close", "", names(closePrices))
head(closePrices)
## GOOGL TSLA FB AMZN AAPL MSFT VOD ADBE NVDA
## 2020-01-02 1368.68 86.052 209.78 1898.01 75.0875 160.62 19.43 334.43 239.91
## 2020-01-03 1361.52 88.602 208.67 1874.97 74.3575 158.62 19.29 331.81 236.07
## 2020-01-06 1397.81 90.308 212.60 1902.88 74.9500 159.03 19.34 333.71 237.06
## 2020-01-07 1395.11 93.812 213.06 1906.86 74.5975 157.58 19.21 333.39 239.93
## 2020-01-08 1405.04 98.428 215.22 1891.97 75.7975 160.09 19.30 337.87 240.38
## 2020-01-09 1419.79 96.268 218.30 1901.05 77.4075 162.09 19.88 340.45 243.02
## CRM EBAY YNDX TRIP NFLX DBX ETSY PINS BX MA
## 2020-01-02 166.99 36.30 44.08 30.26 329.81 18.09 45.190 18.80 55.80 303.39
## 2020-01-03 166.17 35.96 43.21 30.17 325.90 18.00 44.900 18.36 56.00 300.43
## 2020-01-06 173.45 35.78 43.10 30.06 335.83 18.53 44.835 18.91 55.90 301.23
## 2020-01-07 176.00 35.62 43.17 30.47 330.75 18.53 45.780 19.26 55.90 300.21
## 2020-01-08 177.33 35.60 43.77 30.59 339.26 18.46 45.005 19.72 56.74 305.10
## 2020-01-09 179.60 35.18 44.54 30.49 335.66 18.37 46.430 19.63 57.85 309.10
## TMUS SPLK CTXS TWTR PYPL ZM INTC GT SBUX WIX ZNGA
## 2020-01-02 78.59 151.98 111.67 32.30 110.75 68.72 60.84 15.38 89.35 127.55 6.15
## 2020-01-03 78.17 152.06 111.82 31.52 108.76 67.28 60.10 14.74 88.83 128.92 6.23
## 2020-01-06 78.62 154.46 113.01 31.64 110.17 70.32 59.93 14.95 88.13 131.63 6.27
## 2020-01-07 78.92 153.41 112.41 32.54 109.67 71.90 58.93 15.06 87.86 130.01 6.49
## 2020-01-08 79.42 155.76 113.76 33.05 111.82 72.55 58.97 14.99 88.88 133.78 6.59
## 2020-01-09 79.81 154.52 114.05 33.22 112.57 72.62 59.30 14.69 90.53 140.08 6.70
## SHOP UBER GS BIDU FDX VRTX D APD HD BIIB
## 2020-01-02 407.81 30.99 234.32 138.22 155.10 219.45 81.96 231.14 219.66 294.24
## 2020-01-03 404.29 31.37 231.58 133.80 153.18 217.98 81.76 226.00 218.93 290.85
## 2020-01-06 413.33 31.58 233.95 135.94 153.30 224.03 82.39 225.90 219.96 290.82
## 2020-01-07 414.50 32.81 235.49 136.70 154.80 223.79 82.21 226.86 218.52 290.09
## 2020-01-08 418.10 33.93 237.76 137.83 157.13 231.09 81.69 228.09 221.79 292.66
## 2020-01-09 430.20 33.97 242.60 140.86 158.05 230.26 81.92 233.50 225.19 294.30
# train
train <- log(closePrices[1:216]) # 80% of sample size
# test
test <- log(closePrices[217:270]) # 20% of sample size
# get the correlation of each pair
left_side <- NULL
right_side <- NULL
correlation <- NULL
beta <- NULL
pvalue <- NULL
for (i in 1:length(mySymbols)) {
for (j in 1:length(mySymbols)) {
if (i>j) {
left_side <- c(left_side, mySymbols[i])
right_side <- c(right_side, mySymbols[j])
correlation <- c(correlation, cor(train[,mySymbols[i]], train[,mySymbols[j]]))
# linear regression without intercept
m <- lm(train[,mySymbols[i]] ~ train[,mySymbols[j]] -1)
beta <- c(beta, as.numeric(coef(m)[1]))
# get the mispricings of the spread
sprd <- residuals(m)
# ADF test
pvalue <- c(pvalue, adf.test(sprd, alternative="stationary", k=0)$p.value)
}
}
}
## Warning in adf.test(sprd, alternative = "stationary", k = 0): p-value smaller
## than printed p-value
## Warning in adf.test(sprd, alternative = "stationary", k = 0): p-value smaller
## than printed p-value
## Warning in adf.test(sprd, alternative = "stationary", k = 0): p-value smaller
## than printed p-value
## Warning in adf.test(sprd, alternative = "stationary", k = 0): p-value smaller
## than printed p-value
## Warning in adf.test(sprd, alternative = "stationary", k = 0): p-value smaller
## than printed p-value
## Warning in adf.test(sprd, alternative = "stationary", k = 0): p-value smaller
## than printed p-value
## Warning in adf.test(sprd, alternative = "stationary", k = 0): p-value smaller
## than printed p-value
## Warning in adf.test(sprd, alternative = "stationary", k = 0): p-value smaller
## than printed p-value
## Warning in adf.test(sprd, alternative = "stationary", k = 0): p-value smaller
## than printed p-value
## Warning in adf.test(sprd, alternative = "stationary", k = 0): p-value smaller
## than printed p-value
## Warning in adf.test(sprd, alternative = "stationary", k = 0): p-value smaller
## than printed p-value
## Warning in adf.test(sprd, alternative = "stationary", k = 0): p-value smaller
## than printed p-value
## Warning in adf.test(sprd, alternative = "stationary", k = 0): p-value smaller
## than printed p-value
## Warning in adf.test(sprd, alternative = "stationary", k = 0): p-value smaller
## than printed p-value
## Warning in adf.test(sprd, alternative = "stationary", k = 0): p-value smaller
## than printed p-value
df <- data.frame(left_side, right_side, correlation, beta, pvalue)
mypairs <- df %>% filter(pvalue<=0.05, correlation>0.95) %>% arrange(-correlation)
mypairs
## left_side right_side correlation beta pvalue
## 1 SHOP PYPL 0.9732920 1.3145207 0.04992257
## 2 NFLX AMZN 0.9725145 0.7740786 0.01000000
## 3 WIX EBAY 0.9688423 1.3885803 0.04148109
## 4 NVDA AMZN 0.9661956 0.7499275 0.04121935
## 5 NVDA ADBE 0.9612544 0.9813591 0.01000000
## 6 TMUS NVDA 0.9607565 0.7803125 0.01000000
## 7 SHOP EBAY 0.9600129 1.7322467 0.04685409
## 8 HD FB 0.9535160 1.0145674 0.02406166
## 9 MSFT AMZN 0.9516087 0.6676443 0.01000000
From 580 pairs, we kept the 09 pairs above, which showed over 95% correlation to each other.
Let’s analyze a pair of ‘SHOP’ and ‘PYPL’, which consists of the Shopify and Paypal stocks respectively.
# plot the spread on the train dataset
myspread<-train[,"SHOP"]-1.3145207*train[,"PYPL"]
plot(myspread, main = "SHOP vs PYPL")
Analyzing the spread, we can define trading signals for when to open a position and when to close. We can use the quantiles or 3 standard deviations. For simplicity, let’s consider that our trading signals are 0.1 and -0.1 respectively. The strategy is the following:
spread_tes <- test[,"SHOP"]-1.3145207*test[,"PYPL"]
plot(spread_tes, main = "SHOP vs PYPL")
As observed, on the 23th of November,2020 the spread was below -0.1, fluctuating around baseline before continuing drop to a lower figure of -0.15. It’s hard to expect convergence to its mean at this point so far.
# plot the spread on the train dataset
myspread<-train[,"MSFT"]-0.6676443*train[,"AMZN"]
plot(myspread, main = "MSFT vs AMZN")
# apply on test set
spread_tes <- test[,"MSFT"]-0.6676443*test[,"AMZN"]
plot(spread_tes, main = "MSFT vs AMZN")
# plot the spread on the train dataset
myspread<-train[,"NFLX"]-0.7740786*train[,"AMZN"]
plot(myspread, main = "NFLX vs AMZN")
# apply on test set
spread_tes <- test[,"NFLX"]-0.7740786*test[,"AMZN"]
plot(spread_tes, main = "NFLX vs AMZN")
# plot the spread on the train dataset
myspread<-train[,"WIX"]-1.3885803*train[,"EBAY"]
plot(myspread, main = "WIX vs EBAY")
# apply on test set
spread_tes <- test[,"WIX"]-1.3885803*test[,"EBAY"]
plot(spread_tes, main = "WIX vs EBAY")
# plot the spread on the train dataset
myspread<-train[,"NVDA"]-0.7499275*train[,"AMZN"]
plot(myspread, main = "NVDA vs AMZN")
# apply on test set
spread_tes <- test[,"NVDA"]-0.7499275*test[,"AMZN"]
plot(spread_tes, main = "NVDA vs AMZN")
# plot the spread on the train dataset
myspread<-train[,"NVDA"]-0.9813591*train[,"ADBE"]
plot(myspread, main = "NVDA vs ADBE")
# apply on test set
spread_tes <- test[,"NVDA"]-0.9813591*test[,"AMZN"]
plot(spread_tes, main = "NVDA vs ADBE")
# plot the spread on the train dataset
myspread<-train[,"TMUS"]-0.7803125*train[,"NVDA"]
plot(myspread, main = "TMUS vs NVDA")
# apply on test set
spread_tes <- test[,"TMUS"]-0.7803125*test[,"NVDA"]
plot(spread_tes, main = "TMUS vs NVDA")
# plot the spread on the train dataset
myspread<-train[,"SHOP"]-1.7322467*train[,"EBAY"]
plot(myspread, main = "SHOP vs EBAY")
# apply on test set
spread_tes <- test[,"SHOP"]-1.7322467*test[,"EBAY"]
plot(spread_tes, main = "SHOP vs EBAY")
# plot the spread on the train dataset
myspread<-train[,"HD"]-1.0145674*train[,"FB"]
plot(myspread, main = "HD vs FB")
# apply on test set
spread_tes <- test[,"HD"]-1.0145674*test[,"FB"]
plot(spread_tes, main = "HD vs FB")