以 DJ30 成份股為配對交易的股價資料來源,透過網頁爬蟲抓取成份股的調整後收盤價,尋找最適合進行配對交易的股票,並探討使用 adfTest() 函數與 unitrootTest() 函數做檢定時,結果是否存在差異。
由 CNNMoney 抓取 DJ30 的成份股股票代碼,並利用 quantmod 套件抓取股價資訊。
利用 ADF 檢定檢查每支股票是否為 nonstationary,並對其做一期差分,再檢查是否為 stationary 的資料,若一期差分為 stationary,則為共整合關係檢定的資料,適合進行配對交易。假設檢定為
\[H_0: 股價走勢為\ nonstationary \quad vs. \quad H_1: 股價走勢為\ stationary\]
將所選資料做共整合檢定,再找出一組最顯著具有共整合關係( p-value 最小)的一對股票,並利用 OLS 最小平方法計算此組股票的線性組合。
載入相關套件:
rm(list=ls(all=TRUE))
library(quantmod)
library(fUnitRoots)
library(XML)
首先,建立一個以 ADF test 找出 nonstationary 股票的函數。函數裡的 ADF test 以 fUnitRoots 套件裡的 unitrootTest 或 adfTest 來進行檢定,回傳沒有通過檢定(\(p-value>\alpha\),表示不拒絕 \(H_0\) )的股票。
UnitrootTests: Unit Root Time Series TestsadfTest() computes test statistics and p values along the implementation from Trapletti’s augmented Dickey–Fuller test for unit roots. In contrast to Trapletti’s function three kind of test types can be selected.unitrootTest() computes test statistics and p values using McKinnon’s response surface approach.unitrootTest 函數Find_Integration_unitrootTest = function(dataframe, maxlag, regression, alpha)
{
dataframe = prices_df
n = ncol(dataframe)
keys = colnames(dataframe)
nonstationary = c()
for (i in 1:n) {
lag = maxlag # 從最大的lag開始檢查
stock = as.matrix(dataframe[keys[i]])
repeat {
adf = unitrootTest(stock, lags=lag, type=regression, title=NULL, description=NULL)
if (adf@test$p.value[[1]] <= alpha) # 使用 p value 檢查是否為 stationary
break
else
lag = lag-1
if(lag == -1) # lag=-1 時停止repeat
break
}
pvalue = adf@test$p.value[[1]]
Num_lags = adf@test$parameter[[1]]
if (pvalue > alpha)
nonstationary = append(nonstationary, list(c(keys[i], pvalue, Num_lags)))
}
return(nonstationary)
}
adfTest 函數Find_Integration_adfTest = function(dataframe, maxlag, regression, alpha)
{
dataframe = prices_df
n = ncol(dataframe)
keys = colnames(dataframe)
nonstationary = c()
for (i in 1:n) {
lag = maxlag # 從最大的lag開始檢查
stock = as.matrix(dataframe[keys[i]])
repeat {
adf = adfTest(stock, lags=lag, type=regression, title=NULL, description=NULL)
if (abs(adf@test$statistic[[1]]) > 2.58) # 使用檢定統計量檢查是否為 stationary
break
else
lag = lag-1
if(lag == -1) # lag=-1 時停止repeat
break
}
pvalue = adf@test$p.value[[1]]
Num_lags = adf@test$parameter[[1]]
if (pvalue > alpha)
nonstationary = append(nonstationary, list(c(keys[i], pvalue, Num_lags)))
}
return(nonstationary)
}
找出原為 nonstationary、一期差分後為 stationary 的股價資料後,進行共整合關係檢定。利用 OLS 線性回歸法找出兩支股票之間的線性組合,以此做 ADF test 檢定,看殘差是否為 stationary,最後回傳有通過檢定的股票組合、p-value 值與 \(\beta\) 值。
unitrootTest 函數Find_Cointegrated_Pairs_unitrootTest = function(dataframe, maxlag, regression, alpha)
{
n = ncol(dataframe)
keys = colnames(dataframe)
pvalues = matrix(1, nrow=n, ncol=n) # 檢查 y_t-beta*x_t 是否為stationary
betas = matrix(1, nrow=n, ncol=n)
pairs = c()
for (i in 1:n )
{
for(j in 1:n){
if (i != j)
{
y = as.matrix(dataframe[keys[i]])
x = as.matrix(dataframe[keys[j]])
result = lm(y ~ x) # 迴歸
beta = result$coefficients[[2]] # [[2]]為斜率的參數(含截距項的迴歸)
resid = y-beta*x
lag = maxlag
repeat {
adf = unitrootTest(resid, lags=lag, type=regression, title=NULL, description=NULL)
if (adf@test$p.value[[1]] <= alpha) # 使用 p value 檢查是否為 stationary
break
else
lag = lag-1
if(lag == -1)
break
}
pvalue = adf@test$p.value[[1]]
pvalues[i,j] = pvalue
betas[i,j] = beta
if (pvalue <= alpha) # stationary才能配對
pairs = append(pairs, list(c(keys[i], keys[j], pvalue, beta)))
}
}
}
outcome = list(pairs=pairs, pvalues=pvalues, betas=betas)
return(outcome)
}
adfTest 函數Find_Cointegrated_Pairs_adfTest = function(dataframe, maxlag, regression, alpha)
{
n = ncol(dataframe)
keys = colnames(dataframe)
pvalues = matrix(1, nrow=n, ncol=n) # 檢查 y_t-beta*x_t 是否為stationary
betas = matrix(1, nrow=n, ncol=n)
pairs = c()
for (i in 1:n )
{
for(j in 1:n){
if (i != j)
{
y = as.matrix(dataframe[keys[i]])
x = as.matrix(dataframe[keys[j]])
result = lm(y ~ x) # 迴歸
beta = result$coefficients[[2]] # [[2]]為斜率的參數(含截距項的迴歸)
resid = y-beta*x
lag = maxlag
repeat {
adf = adfTest(resid, lags=lag, type=regression, title=NULL, description=NULL)
if (abs(adf@test$statistic[[1]]) > 2.58) # 使用檢定統計量檢查是否為 stationary
break
else
lag = lag-1
if(lag == -1)
break
}
pvalue = adf@test$p.value[[1]]
pvalues[i,j] = pvalue
betas[i,j] = beta
if (pvalue <= alpha) # stationary才能配對
pairs = append(pairs, list(c(keys[i], keys[j], pvalue, beta)))
}
}
}
outcome = list(pairs=pairs, pvalues=pvalues, betas=betas)
return(outcome)
}
利用網路爬蟲取得 DJ30 成份股的股票代碼:
url = "http://money.cnn.com/data/dow30/"
html = htmlTreeParse(url, useInternal=TRUE)
tables = readHTMLTable(html)
length(tables)
## [1] 2
tables[[2]]
## Company Price Change % Change Volume YTDchange
## 1 MMM 3M 258.63 +6.27 +2.48% 3,730,628 +9.88%
## 2 AXP American Express 99.63 -0.06 -0.06% 3,668,677 +0.32%
## 3 AAPL Apple 171.51 +0.40 +0.23% 39,143,011 +1.35%
## 4 BA Boeing 343.22 +0.11 +0.03% 5,236,260 +16.38%
## 5 CAT Caterpillar 167.06 -2.31 -1.36% 9,045,259 +6.02%
## 6 CVX Chevron 131.19 +0.54 +0.41% 6,310,543 +4.79%
## 7 CSCO Cisco 42.56 +0.66 +1.58% 23,380,407 +11.12%
## 8 KO Coca-Cola 48.53 +0.69 +1.44% 16,671,227 +5.78%
## 9 DIS Disney 112.19 +1.64 +1.48% 7,743,825 +4.35%
## 10 DWDP DowDuPont Inc 77.02 +0.37 +0.48% 6,201,046 +8.14%
## 11 XOM Exxon Mobil 89.00 +0.63 +0.71% 10,514,408 +6.41%
## 12 GE General Electric 16.13 -0.05 -0.31% 90,888,608 -7.56%
## 13 GS Goldman Sachs 268.14 -0.89 -0.33% 3,535,395 +5.25%
## 14 HD Home Depot 207.23 +1.86 +0.91% 3,778,050 +9.34%
## 15 IBM IBM 167.34 +1.87 +1.13% 3,787,913 +9.07%
## 16 INTC Intel 50.08 +4.78 +10.55% 86,916,079 +8.49%
## 17 JNJ Johnson & Johnson 145.33 +0.93 +0.64% 8,065,646 +4.02%
## 18 JPM JPMorgan Chase 116.32 +0.62 +0.54% 13,883,875 +8.77%
## 19 MCD McDonald's 178.36 +2.70 +1.54% 3,957,724 +3.63%
## 20 MRK Merck 62.04 +0.74 +1.21% 10,553,733 +10.25%
## 21 MSFT Microsoft 94.06 +1.73 +1.87% 29,172,167 +9.96%
## 22 NKE Nike 68.04 +0.33 +0.49% 6,290,517 +8.78%
## 23 PFE Pfizer 39.01 +1.78 +4.78% 48,805,858 +7.70%
## 24 PG Procter & Gamble 87.73 -0.61 -0.69% 11,503,083 -4.52%
## 25 TRV Travelers Companies Inc 149.42 +0.95 +0.64% 2,538,390 +10.16%
## 26 UTX United Technologies 137.98 +0.23 +0.17% 3,203,557 +8.16%
## 27 UNH UnitedHealth 248.47 +3.29 +1.34% 2,615,342 +12.71%
## 28 VZ Verizon 54.72 +0.43 +0.79% 13,077,557 +3.38%
## 29 V Visa 126.32 +1.10 +0.88% 5,604,949 +10.79%
## 30 WMT Wal-Mart 108.39 +1.79 +1.68% 6,786,305 +9.76%
tables = readHTMLTable(html, stringsAsFactors=FALSE, which=2)
將 tables 裡 Company column 中的文字做字串分割,將股票代碼取出來:
for (i in 1:length(tables$Company)){
ticker = strsplit(as.character(tables$Company[i]), '\u00A0')
tables$tickers[i] = ticker[[1]][1]
}
tickers = tables$tickers
tickers = sapply(1:length(tables$Company),
function(i) {
strsplit(as.character(tables$Company[i]), '\u00A0')[[1]][1]
}
)
tickers
## [1] "MMM" "AXP" "AAPL" "BA" "CAT" "CVX" "CSCO" "KO" "DIS" "DWDP"
## [11] "XOM" "GE" "GS" "HD" "IBM" "INTC" "JNJ" "JPM" "MCD" "MRK"
## [21] "MSFT" "NKE" "PFE" "PG" "TRV" "UTX" "UNH" "VZ" "V" "WMT"
DJ30 總共有30支成份股:
n = length(tickers)
n
## [1] 30
建立配對交易策略的資料區間取為 2015 至 2017 年:
start = "2015-01-01"
end = "2017-12-31"
抓取 DJ30 的成份股之調整後收盤價:
prices_df = data.frame()
for(i in c(tickers)){
HisData = getSymbols(i, from=start, to=end, auto.assign=FALSE)
colnames(HisData)[6] = i
prices_df = cbind(prices_df,HisData[,6])
}
prices_df 中總共有30個變數、755個觀察值:
dim(prices_df)
## [1] 755 30
除了 DJ30 的成份股之外,加入道瓊工業平均指數:
DJI = getSymbols("^DJI", from=start, to=end, auto.assign=FALSE)
colnames(DJI)[6] = "DJI"
prices_df = cbind(prices_df,DJI[,6])
prices_df = as.data.frame(na.omit(prices_df))
加入道瓊工業平均指數後 prices_df 中總共有31個變數、755個觀察值:
dim(prices_df)
## [1] 755 31
將差分的最大 lag 數設定為 \(p=p_{max}\),\(\alpha\) 值設定為 \(0.01\)(對應之檢定統計量為 \(-2.58\)),將資料放入判斷是否為 stationary 的函數,找出 nonstationary 的股票後,對其做一次差分,再一次將資料放入判斷是否為 stationary 的函數。
利用下列公式依照資料集的觀察值個數\(T\),計算此資料集最適合的最大 lag 數(Schwert, 1989):
\[p_{max} = [12 \times (\cfrac{100}{T})^{\frac{1}{4}}] \]
maxlag = floor(12*(100/nrow(prices_df))^(1/4))
maxlag
## [1] 7
設定迴歸模型中 \(\eta_t=\eta_0\) ,且 \(\alpha =0.01\):
regression = 'c'
alpha = 0.01
unitrootTest 檢定資料是否為 nonstationary31支股票之股價走勢皆為 nonstationary:
nonstationary = Find_Integration_unitrootTest(prices_df, maxlag, regression, alpha)
length(nonstationary)
## [1] 31
nonstationary
## [[1]]
## [1] "MMM" "0.993575471563785" "0"
##
## [[2]]
## [1] "AXP" "0.917324181382732" "0"
##
## [[3]]
## [1] "AAPL" "0.946478365982145" "0"
##
## [[4]]
## [1] "BA" "0.999995489798612" "0"
##
## [[5]]
## [1] "CAT" "0.999992264205967" "0"
##
## [[6]]
## [1] "CVX" "0.903963381874599" "0"
##
## [[7]]
## [1] "CSCO" "0.945488032112726" "0"
##
## [[8]]
## [1] "KO" "0.656290590638458" "0"
##
## [[9]]
## [1] "DIS" "0.0954048093066255" "0"
##
## [[10]]
## [1] "DWDP" "0.882093768722041" "0"
##
## [[11]]
## [1] "XOM" "0.105873629628759" "0"
##
## [[12]]
## [1] "GE" "0.940720134952766" "0"
##
## [[13]]
## [1] "GS" "0.902545481307535" "0"
##
## [[14]]
## [1] "HD" "0.99252357641333" "0"
##
## [[15]]
## [1] "IBM" "0.362045514521435" "0"
##
## [[16]]
## [1] "INTC" "0.974543826238061" "0"
##
## [[17]]
## [1] "JNJ" "0.96154133246206" "0"
##
## [[18]]
## [1] "JPM" "0.990228150044222" "0"
##
## [[19]]
## [1] "MCD" "0.986884131949352" "0"
##
## [[20]]
## [1] "MRK" "0.344050768727083" "0"
##
## [[21]]
## [1] "MSFT" "0.989192603852535" "0"
##
## [[22]]
## [1] "NKE" "0.155481641641344" "0"
##
## [[23]]
## [1] "PFE" "0.270137307689095" "0"
##
## [[24]]
## [1] "PG" "0.800732367932228" "0"
##
## [[25]]
## [1] "TRV" "0.854926848020773" "0"
##
## [[26]]
## [1] "UTX" "0.879703594993256" "0"
##
## [[27]]
## [1] "UNH" "0.988449267865341" "0"
##
## [[28]]
## [1] "VZ" "0.479956510732118" "0"
##
## [[29]]
## [1] "V" "0.97219149948655" "0"
##
## [[30]]
## [1] "WMT" "0.97966256381402" "0"
##
## [[31]]
## [1] "DJI" "0.997020532709127" "0"
將資料做一次差分,檢定資料是否為 stationary:
prices_df_diff = diff(as.matrix(prices_df))
prices_df_diff = as.data.frame(prices_df_diff)
nonstationary_diff = Find_Integration_unitrootTest(prices_df_diff, maxlag, regression, alpha)
length(nonstationary_diff)
## [1] 31
資料做一次差分後皆為 nonstationary。
使用 setdiff 函數將 nonstationary 與 nonstationary_diff 取差集,得到原始資料為 nonstationary,一次差分後為 stationary 的資料:
stationary_diff = as.data.frame(setdiff(nonstationary, nonstationary_diff))
length(stationary_diff)
## [1] 0
確認後並沒有原始資料為 nonstationary,一次差分後為 stationary 的資料,因此將原本股價走勢皆為 nonstationary 的資料全數放入 Find_Cointegrated_Pairs_unitrootTest 測試共整合效應:
cointegrated_pairs = Find_Cointegrated_Pairs_unitrootTest(prices_df, maxlag, regression, alpha)
adfTest 檢定資料是否為 nonstationary31支股票之股價走勢皆為 nonstationary:
nonstationary_adfTest = Find_Integration_adfTest(prices_df, maxlag, regression, alpha)
length(nonstationary_adfTest)
## [1] 31
nonstationary_adfTest
## [[1]]
## [1] "MMM" "0.99" "0"
##
## [[2]]
## [1] "AXP" "0.913447902112316" "0"
##
## [[3]]
## [1] "AAPL" "0.944549696975875" "0"
##
## [[4]]
## [1] "BA" "0.99" "7"
##
## [[5]]
## [1] "CAT" "0.99" "7"
##
## [[6]]
## [1] "CVX" "0.902044689970324" "0"
##
## [[7]]
## [1] "CSCO" "0.943293569070274" "0"
##
## [[8]]
## [1] "KO" "0.595196506548888" "0"
##
## [[9]]
## [1] "DIS" "0.0770328513837364" "3"
##
## [[10]]
## [1] "DWDP" "0.861677732971666" "0"
##
## [[11]]
## [1] "XOM" "0.110328320068382" "0"
##
## [[12]]
## [1] "GE" "0.937490351358592" "0"
##
## [[13]]
## [1] "GS" "0.900908185358537" "0"
##
## [[14]]
## [1] "HD" "0.99" "0"
##
## [[15]]
## [1] "IBM" "0.373632395536677" "0"
##
## [[16]]
## [1] "INTC" "0.974452203006162" "0"
##
## [[17]]
## [1] "JNJ" "0.959565220283909" "0"
##
## [[18]]
## [1] "JPM" "0.99" "0"
##
## [[19]]
## [1] "MCD" "0.985779687279465" "0"
##
## [[20]]
## [1] "MRK" "0.359643473453953" "0"
##
## [[21]]
## [1] "MSFT" "0.988817838691997" "0"
##
## [[22]]
## [1] "NKE" "0.18077872896197" "0"
##
## [[23]]
## [1] "PFE" "0.298514883674632" "0"
##
## [[24]]
## [1] "PG" "0.739310915370985" "0"
##
## [[25]]
## [1] "TRV" "0.815151363854167" "0"
##
## [[26]]
## [1] "UTX" "0.857277614859277" "0"
##
## [[27]]
## [1] "UNH" "0.987783082380457" "0"
##
## [[28]]
## [1] "VZ" "0.461076076617081" "0"
##
## [[29]]
## [1] "V" "0.971345559406083" "0"
##
## [[30]]
## [1] "WMT" "0.978589181362012" "0"
##
## [[31]]
## [1] "DJI" "0.99" "0"
將資料做一次差分,檢定資料是否為 stationary:
prices_df_diff_adfTest = diff(as.matrix(prices_df))
prices_df_diff_adfTest = as.data.frame(prices_df_diff_adfTest)
nonstationary_diff_adfTest = Find_Integration_adfTest(prices_df_diff_adfTest, maxlag, regression, alpha)
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
## Warning in adfTest(stock, lags = lag, type = regression, title = NULL,
## description = NULL): p-value greater than printed p-value
length(nonstationary_diff_adfTest)
## [1] 31
資料做一次差分後皆為 nonstationary。
使用 setdiff 函數將 nonstationary_adfTest 與 nonstationary_diff_adfTest 取差集,得到原始資料為 nonstationary,一次差分後為 stationary 的資料:
stationary_diff_adfTest = as.data.frame(setdiff(nonstationary_adfTest, nonstationary_diff_adfTest))
length(stationary_diff_adfTest)
## [1] 0
確認後並沒有原始資料為 nonstationary,一次差分後為 stationary 的資料,因此將原本股價走勢皆為 nonstationary 的資料全數放入 Find_Cointegrated_Pairs_adfTest 測試共整合效應:
cointegrated_pairs_adfTest = Find_Cointegrated_Pairs_adfTest(prices_df, maxlag, regression, alpha)
因 adfTest 的方法較不準確,因此採用 unitrootTest 方法所計算出的數值:
pairs = cointegrated_pairs$pairs
pvalues = cointegrated_pairs$pvalues
betas = cointegrated_pairs$betas
colnames(pvalues) = colnames(prices_df)
rownames(pvalues) = colnames(prices_df)
head(pvalues)
## MMM AXP AAPL BA CAT CVX
## MMM 1.00000000 0.2945108491 0.482993751 0.381284485 0.14693945 0.1035489
## AXP 0.16075244 1.0000000000 0.001003975 0.009542605 0.06206242 0.4203897
## AAPL 0.32934478 0.0008253696 1.000000000 0.139627331 0.18512381 0.3428064
## BA 0.62122486 0.0182402522 0.356670204 1.000000000 0.02581254 0.6107183
## CAT 0.36206960 0.4531143998 0.571043450 0.045376194 1.00000000 0.6603313
## CVX 0.06174031 0.4461746146 0.335561994 0.217805981 0.19699226 1.0000000
## CSCO KO DIS DWDP XOM GE
## MMM 0.02834913 0.2647829 0.9931594 0.009246147 0.8484349 0.8676287
## AXP 0.09291080 0.7156080 0.5044756 0.074242615 0.9203481 0.1546991
## AAPL 0.29561099 0.7156510 0.8341247 0.168664066 0.9399252 0.3285736
## BA 0.41058967 0.8628697 0.9996877 0.444593067 0.9999000 0.6924943
## CAT 0.09955103 0.8659557 0.9999236 0.354126139 0.9994432 0.9024956
## CVX 0.02071028 0.4293532 0.8950287 0.046126526 0.7915814 0.8348339
## GS HD IBM INTC JNJ JPM
## MMM 0.80007932 0.04610984 0.9724809 0.09025657 0.07116981 0.31265926
## AXP 0.06607527 0.10618213 0.8617627 0.28829272 0.47265919 0.05051741
## AAPL 0.23765074 0.44010694 0.8906726 0.34332372 0.62244164 0.25284442
## BA 0.89369377 0.39098735 0.9999152 0.26614151 0.93999495 0.70429708
## CAT 0.96473660 0.17640248 0.9995253 0.18812643 0.87429365 0.62891207
## CVX 0.50399055 0.11132409 0.6912536 0.07354941 0.13511398 0.15695945
## MCD MRK MSFT NKE PFE PG
## MMM 0.1516099 0.9421546 0.1000998 0.9923257 0.10573666 0.16286614
## AXP 0.2977962 0.8325696 0.2226519 0.9384485 0.04201070 0.71826665
## AAPL 0.5798411 0.8589990 0.5546419 0.9465484 0.25946134 0.51421007
## BA 0.8501097 0.9989610 0.6918106 0.9999715 0.31704593 0.86417106
## CAT 0.7870622 0.9979994 0.5571064 0.9999919 0.20990119 0.88836483
## CVX 0.2367503 0.6199162 0.1410696 0.9082375 0.07838749 0.09150135
## TRV UTX UNH VZ V WMT
## MMM 0.01448095 0.4393226 0.009644491 0.8288426 0.06381458 0.1143156
## AXP 0.33290840 0.1920355 0.049803376 0.9339829 0.11355597 0.1967526
## AAPL 0.43949424 0.1477549 0.331527035 0.9252658 0.40903093 0.1081950
## BA 0.60735576 0.7763005 0.576416187 0.9985222 0.36770773 0.1369172
## CAT 0.49414877 0.8010185 0.075339419 0.9879426 0.27927896 0.1617570
## CVX 0.10272382 0.1290468 0.037616780 0.3224913 0.19805895 0.1786245
## DJI
## MMM 0.09158085
## AXP 0.03319656
## AAPL 0.21598937
## BA 0.26358573
## CAT 0.07308536
## CVX 0.11289861
載入畫圖相關套件:
library(ggplot2)
library(reshape2)
library(plotly)
library(dygraphs)
library(networkD3)
library(d3heatmap)
pvalue_dataframe = melt(pvalues)
colnames(pvalue_dataframe)[1] = 'stock1'
colnames(pvalue_dataframe)[2] = 'stock2'
ggplot(pvalue_dataframe, aes(stock1, stock2)) +
geom_tile(aes(fill=value), colour="white") +
scale_fill_gradient(low="white", high="steelblue") +
theme(text = element_text(size = 8),
axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none",
axis.ticks = element_blank(),
plot.title = element_text(hjust = 0.5))
熱力圖中顏色愈淺、愈接近白色,代表 p-value 愈小,意即兩支股票之間的共整合效應愈強烈。
利用 d3heatmap 套件繪製能夠顯示 p-value 數值的熱力圖,以利確認哪對股票之 p-value 數值小於設定值(0.01):
d3heatmap(pvalues, scale = "column", dendrogram = "none", color = "Blues")
圖形結果為:http://rpubs.com/appleyou1995/353078
找出 p-value 最小、最適合進行配對交易策略的兩支股票:
pvalues_min = min(pvalues)
for (i in 1:length(pairs))
{
if (pairs[[i]][3] == pvalues_min)
{
cat(pairs[[i]])
stock1 = pairs[[i]][1]
stock2 = pairs[[i]][2]
beta = pairs[[i]][4]
break
}
}
## AAPL AXP 0.000825369622931292 1.85776632098538
| 股票代碼 | 公司名稱 | |
|---|---|---|
| AAPL | Apple Inc. | 蘋果 |
| AXP | American Express Company | 美國運通 |
stock_y = as.xts(prices_df[stock1])
stock_x = as.xts(prices_df[stock2])
plot2 = cbind(AAPL = stock_y, AXP = stock_x)
dygraph(plot2, main = "stock price") %>%
dyRangeSelector(dateWindow = c(start, end))%>%
dyLegend(show = "always")%>%
dyHighlight(highlightSeriesBackgroundAlpha = 0.3,
hideOnMouseOut = FALSE)
x = stock_x
y = stock_y
result = lm(y ~ x)
intercept = result$coefficients[[1]]
beta = result$coefficients[[2]]
cat('intercept =', intercept)
## intercept = -14.56949
cat('beta =', beta)
## beta = 1.857766
意即此配對交易策略為「買 \(1\) 個單位的 AAPL,賣 \(1.857766\) 單位的 AXP」。
plot_ly(prices_df, x = array(x), y = array(y), type = 'scatter', name = 'Stock Price') %>%
add_trace(data = prices_df, x = array(x), y = fitted(result), name = 'Regression Line', mode = "lines", line = list(color = '#CD0C18'))%>%
layout(xaxis = list(title = "AXP"),
yaxis = list(title = "AAPL"))
stock_y_x = stock_y-beta*stock_x
dygraph(stock_y_x, main = "", ylab="Stationary Series") %>%
dyRangeSelector(dateWindow = c(start, end))%>%
dySeries("AAPL", label = "stock_y_x") %>%
dyLimit(as.numeric(mean(stock_y_x)), color = "#0D2A62")%>%
dyLegend(show = "always")
zscore=function(s)
{
z=(s-mean(s))/sd(s)
return(z)
}
plot5 = zscore(stock_y_x)
dygraph(plot5, main = "", ylab="Stationary Series") %>%
dyRangeSelector(dateWindow = c(start, end))%>%
dyLimit(as.numeric(mean(plot5)), strokePattern = "solid", color = "#02547D")%>%
dyLimit(1, color = "#1B62A5")%>%
dyLimit(-1, color = "#1B62A5")%>%
dySeries("AAPL", label = "zscore") %>%
dyLegend(show = "always")
Social Network Analysis
將所有配對交易組合中,應「買進」的股票(stock1)抓出來:
將所有配對交易組合中,應「賣出」的股票(stock2)抓出來:
利用
simpleNetwork函數畫圖: