QQC方法-變數選擇

1. 數據描述與預覽

使用的數據包含六個金融時間序列，資料區間為 2006-01-03 至 2025-07-15。下表詳細列出各變數的描述與其資料來源。

變數名稱 (Variable)	描述 (Description)	資料來源 (Source)
oil_price	WTI 原油現貨價格	U.S. Energy Information Administration (EIA)
gold_price	黃金現貨價格	Kaggle Dataset
SPX_Close	S&P 500 指數	Yahoo Finance
USD_index	美元指數	Macrotrends
US10Y	10 年期美國公債殖利率	FRED API
US2Y	2 年期美國公債殖利率	FRED API

會選擇這些變數的原因是想觀察這些常見的金融資產在不同分位數下且重大波動發生時，除了連結性的變化以外，市場是如何去做反應以及避險的。

2. 數據視覺化 (原始資料)

在進行任何轉換之前，我們先觀察各資產的原始價格（或指數水準）走勢。為了更好地比較不同尺度的序列，我對價格與指數型數據取對數 (Log) 處理，而油價則保持其原始值(因為有負數資料)。

# --- 導入並繪製對數水準圖 ---

# 導入原始數據
raw_data_level <- read.csv("data.csv", header = TRUE)
my_data_level <- xts(raw_data_level[, -1], order.by = as.Date(raw_data_level[, 1]))

# 對部分變數取對數
cols_to_log <- c("gold_price", "SPX_Close", "USD_index", "US10Y", "US2Y")
data_log_prices <- log(my_data_level[, cols_to_log])
data_final_for_plot <- merge(data_log_prices, my_data_level$Oil_price)

# 轉換為 data.frame 供 ggplot 使用
data_plot_df <- data.frame(Date = index(data_final_for_plot), coredata(data_final_for_plot))

#合成大圖
p_gold <- ggplot(data_plot_df, aes(x = Date, y = gold_price)) +
  geom_line(color = "salmon", linewidth = 0.8) +
  labs(title = "Fig. 1. Gold Price in Log form.", x = "Year", y = "Log Value") +
  theme_bw() +
  scale_x_date(date_breaks = "3 year", date_labels = "%Y")

p_oil <- ggplot(data_plot_df, aes(x = Date, y = Oil_price)) +
  geom_line(color = "olivedrab", linewidth = 0.8) +
  labs(title = "Fig. 2. Oil Price", x = "Year", y = "Value") +
  theme_bw() +
  scale_x_date(date_breaks = "3 year", date_labels = "%Y")

p_spx <- ggplot(data_plot_df, aes(x = Date, y = SPX_Close)) +
  geom_line(color = "darkturquoise", linewidth = 0.8) +
  labs(title = "Fig. 3. S&P 500 Index in Log form.", x = "Year", y = "Log Value") +
  theme_bw() +
  scale_x_date(date_breaks = "3 year", date_labels = "%Y")

p_us10y <- ggplot(data_plot_df, aes(x = Date, y = US10Y)) +
  geom_line(color = "lightgreen", linewidth = 0.8) +
  labs(title = "Fig. 4. 10Y Treasury Yield in Log form.", x = "Year", y = "Log Value") +
  theme_bw() +
  scale_x_date(date_breaks = "3 year", date_labels = "%Y")

p_us2y <- ggplot(data_plot_df, aes(x = Date, y = US2Y)) +
  geom_line(color = "deepskyblue", linewidth = 0.8) +
  labs(title = "Fig. 5. 2Y Treasury Yield in Log form.", x = "Year", y = "Log Value") +
  theme_bw() +
  scale_x_date(date_breaks = "3 year", date_labels = "%Y")

p_usd <- ggplot(data_plot_df, aes(x = Date, y = USD_index)) +
  geom_line(color = "magenta", linewidth = 0.8) +
  labs(title = "Fig. 6. US Dollar Index in Log form.", x = "Year", y = "Log Value") +
  theme_bw() +
  scale_x_date(date_breaks = "3 year", date_labels = "%Y")

(p_gold + p_oil + p_spx) / (p_us10y + p_us2y + p_usd) + 
  plot_layout(guides = 'collect') + # 將圖例集合在一起 (如果有的話)
  plot_annotation(
    title = '原始資料序列圖',
    subtitle = '價格與指數型數據取對數 (Log) 處理，而油價則保持其原始值(因為有負數資料)',
    caption = paste('資料期間:', min(data_plot_df$Date), '至', max(data_plot_df$Date))
  )

3. 對原始資料進行差分處理

為了進行後續的 QVAR 模型分析，所有數據都已經過預處理，轉換為定態序列（價格型數據取對數報酬率，殖利率數據取一階差分），並移除了缺失值。

首先，載入處理好的定態數據，並預覽其前六筆紀錄。

# 載入數據 
data_raw <- read.csv("data_stationary.csv", header = TRUE)
data <- xts(data_raw[, -1], order.by = as.Date(data_raw[, 1]))

# head()
kbl(head(data), caption = "數據集前六筆紀錄預覽") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

數據集前六筆紀錄預覽
Oil_price	gold_price	SPX_Close	USD_index	US10Y	US2Y
0.004742342	0.00000000	3.665964e-03	-0.0052441949	-0.01	-0.03
-0.009507281	-0.01681982	1.572046e-05	0.0007021464	0.00	0.01
0.022044665	0.02725989	9.355518e-03	-0.0027465619	0.02	0.04
-0.010174620	0.01564113	3.649696e-03	0.0015514217	0.00	0.00
-0.002362764	-0.01046169	-3.566756e-04	-0.0003574228	0.05	0.05
0.007854266	0.01137418	3.475497e-03	-0.0027258027	0.03	0.03

4. 定態數據之敘述統計量、各類檢定

接下來，計算數據的描述性統計量，包括平均值、變異數、偏度、峰度，並進行常態性檢定 (JB test)、單根檢定 (ERS test) 等，以確認數據的定態特性。同時，也一併呈現各序列間的相關性矩陣。

# 使用套件內建的函式計算統計量
stats_table <- SummaryStatistics(data)

# 使用 kableExtra 
kbl(stats_table, caption = "變數的描述性統計與相關性分析") %>%
  kable_styling(
    bootstrap_options = c("striped", "hover", "condensed", "responsive"),
    font_size = 12,
    full_width = FALSE
  ) %>%
  add_header_above(c(" " = 1, "敘述統計" = 6)) %>%
  add_header_above(c(" " = 1, "相關性矩陣" = 6), line = F) # Kendall

變數的描述性統計與相關性分析
	相關性矩陣
	敘述統計
	Oil_price	gold_price	SPX_Close	USD_index	US10Y	US2Y
Mean	0.000	0.000**	0.000*	0.000	0.000	0.000
	(0.684)	(0.018)	(0.059)	(0.470)	(0.960)	(0.914)
Variance	0.001	0	0	0	0.003	0.003
Skewness	0.828***	-0.315***	-0.477***	-0.031	-0.150***	-0.491***
	(0.000)	(0.000)	(0.000)	(0.366)	(0.000)	(0.000)
Ex.Kurtosis	32.606***	5.958***	13.341***	4.599***	3.038***	11.210***
	(0.000)	(0.000)	(0.000)	(0.000)	(0.000)	(0.000)
JB	226197.324***	7617.761***	37960.076***	4489.150***	1977.122***	26871.959***
	(0.000)	(0.000)	(0.000)	(0.000)	(0.000)	(0.000)
ERS	-30.701***	-31.712***	-26.899***	-8.272***	-29.964***	-17.856***
	(0.000)	(0.000)	(0.000)	(0.000)	(0.000)	(0.000)
Q(20)	79.787***	14.657	109.681***	21.525***	33.516***	44.106***
	(0.000)	(0.137)	(0.000)	(0.009)	(0.000)	(0.000)
Q2(20)	2722.891***	536.426***	4506.858***	1132.023***	749.461***	1711.616***
	(0.000)	(0.000)	(0.000)	(0.000)	(0.000)	(0.000)
kendall	Oil_price	gold_price	SPX_Close	USD_index	US10Y	US2Y
Oil_price	1.000***	0.139***	0.168***	-0.190***	0.134***	0.084***
gold_price	0.139***	1.000***	0.035***	-0.253***	-0.145***	-0.158***
SPX_Close	0.168***	0.035***	1.000***	-0.175***	0.187***	0.152***
USD_index	-0.190***	-0.253***	-0.175***	1.000***	0.002	0.046***
US10Y	0.134***	-0.145***	0.187***	0.002	1.000***	0.572***
US2Y	0.084***	-0.158***	0.152***	0.046***	0.572***	1.000***

correlation_matrix <- cor(data, use = "pairwise.complete.obs") # Pearson 
col_palette <- colorRampPalette(brewer.pal(n = 11, name = "RdBu"))
corrplot(
  correlation_matrix,
  method = "number",          # 方法: 僅顯示數字
  col = col_palette(200),      # 顏色: 使用我們剛建立的紅藍色盤(200階)
  
  type = "full",             # 類型: 顯示完整的矩陣 (預設)
  addCoef.col = "black",     # 係數: 在方塊上加上黑色的係數數字
  
  tl.col = "red",            # 文字標籤(Text Label): 將座標軸的文字標籤設為紅色
  tl.srt = 45,                # 文字標籤旋轉: 頂部文字旋轉45度
  
  diag = TRUE,               # 對角線: 顯示對角線 (預設)
  
  addgrid.col = "grey",      # 格線: 加上淡灰色的格線
  
  cl.pos = "r",              # 圖例位置(Color Legend): 放在右邊 (預設)
  number.cex = 1.5           # 係數字體大小: 調整為 1.5 倍，可根據您的圖大小微調
)

5. 報酬率時間序列視覺化

為了更直觀地觀察每個變數的動態變化，將五個序列分別繪製成線圖。從圖中可以觀察到波動群聚 (volatility clustering) 的現象，即大的波動傾向於聚集在一起，特別是在金融危機等時期。

# --- 繪製個別線圖 ---

# ggplot2 需要 data.frame 格式，我們先將 xts 物件轉換回來
data_df <- data.frame(Date = index(data), coredata(data))

# 使用 tidyr::pivot_longer() 將寬數據轉換為長數據，方便 ggplot2 繪圖
data_long <- data_df %>%
  pivot_longer(cols = -Date, names_to = "Series", values_to = "Value")

# 使用 ggplot2 繪圖，並用 facet_wrap() 將每個序列分開
ggplot(data_long, aes(x = Date, y = Value, color = Series)) +
  geom_line(linewidth = 0.6) +
  facet_wrap(~ Series, scales = "free_y", ncol = 1) + # scales = "free_y" 讓每個圖有獨立的Y軸
  labs(
    title = "時間序列圖",
    subtitle = "數據已轉換為對數報酬率或一階差分",
    x = "日期",
    y = "數值 (報酬率/變動量)"
  ) +
  theme_minimal() +
  theme(legend.position = "none") # 因為每個圖都有標題了，所以隱藏圖例