全コードは、必要に応じて右上のタブで表示されたい。

準備

# setup library
Packages <- c("tidyverse", "DT", "corrplot", "modelr", "dygraphs", "tibbletime")
lapply(Packages, library, character.only = TRUE)

# import dataset
df_all <- read_rds("~/R/Musashi_University/2021_first_semester/exchage_forcast/Data/df_all.RDS") 

# log percent
df_all_log <- df_all %>% 
  mutate(
    exchange_rate = close,
    exchange_log = log(exchange_rate) - log(lag(exchange_rate, 1L)),
    # employ_num_non_agr_log = log(employ_num_non_agr) - log(lag(employ_num_non_agr, 1L)),
    # bop_log = log(bop) - log(lag(bop, 1L)),
    # security_invest_to_us_log = log(security_invest_to_us) - log(lag(security_invest_to_us, 1L)),
    resid_num_log = log(resid_num) - log(lag(resid_num, 1L))
    )

データセット

今回は、外為ドットコム各国経済指標データを参照した。変数の解説は、経済指標 用語解説を参照されたい。分析の都合上、一部データを変更している。円ドル為替のデータは、日本銀行のデータベースを参照している。コロナのアメリカの罹患者数推移は、Johns Hopkinsのデータベースを利用した。

  • open: 始値
  • high: 高値
  • low: 安値
  • close: 終値
  • volume: 取引量
  • GDP: 四半期データを各月に振り分けている。
DT::datatable(df_all, 
              rownames = FALSE, 
              # colnames = c("年", "月", "失業率", "非農業部門雇用者数", "貿易収支", "対米証券投資",
                           # "小売売上高", "住宅着工件数", "住宅建設許可件数", "ミシガン大消費者信頼感指数",
                           # "消費者信頼感指数", "生産者物価指数", "消費者物価指数", "GDP", "耐久財受注",
                           # "ISM製造", "鉱工業生産指数", "製造業受注"),
              extensions = 'Buttons',
              options = list(autoWidth = TRUE,
                             pageLength = 5,
                             dom = 'Bfrtip',
                             buttons = list('copy', 'excel', "csv"),
                             scrollX = TRUE,
                             scrollCollapse = TRUE),
              class = 'cell-border stripe',
              caption = "source: ")

データの確認

# creat `xts` object for dygraph()
df_tem <- df_all_log %>% 
  select(date, 
         exchange_log) %>% 
  timetk::tk_xts() 
  
dygraph(df_tem,
        main = "JPY/$") %>% dyRangeSelector()

各変数の相関関係

全期間(2004~2021)

mrates_long <- df_all[, c(9, 11:length(df_all))] %>% 
  na.omit() %>% 
  cor() %>% 
  round(digits = 2)

corrplot(mrates_long, 
         type = "lower", # 下半分を表示
         method = "shade", # 四角いパネルで表示
         shade.col = NA, # パネルに政府を示す斜線を加えない
         tl.col = "black", # textlabelの色を変える
         tl.srt = 45, # 上部テキストラベルの角度を変更
         addCoef.col = "black", # 相関係数を黒で表示
         cl.pos = "n", # 凡例を表示させない
         order = "original") # "hclust", "AOE", "FPC"

過去3年(2019~2021)

df_tem <- df_all %>% filter(year >= 2019)

mrates_long <- df_tem[, c(9, 11:length(df_tem))] %>% 
  na.omit() %>% 
  cor() %>% 
  round(digits = 2)

corrplot(mrates_long, 
         type = "lower", # 下半分を表示
         method = "shade", # 四角いパネルで表示
         shade.col = NA, # パネルに政府を示す斜線を加えない
         tl.col = "black", # textlabelの色を変える
         tl.srt = 45, # 上部テキストラベルの角度を変更
         addCoef.col = "black", # 相関係数を黒で表示
         cl.pos = "n", # 凡例を表示させない
         order = "original") # "hclust", "AOE", "FPC"

過去1年(2020~2021)

df_tem <- df_all %>% filter(year >= 2021)

mrates_long <- df_tem[, c(9, 11:length(df_tem))] %>% 
  na.omit() %>% 
  cor() %>% 
  round(digits = 2)

corrplot(mrates_long, 
         type = "lower", # 下半分を表示
         method = "shade", # 四角いパネルで表示
         shade.col = NA, # パネルに政府を示す斜線を加えない
         tl.col = "black", # textlabelの色を変える
         tl.srt = 45, # 上部テキストラベルの角度を変更
         addCoef.col = "black", # 相関係数を黒で表示
         cl.pos = "n", # 凡例を表示させない
         order = "original") # "hclust", "AOE", "FPC"

重回帰分析

変数選択

全期間(2004~2021)

# set a model
mod1 <- lm(exchange_log ~ -exchange_rate +., data = df_all_log[,11:length(df_all_log)] %>% na.omit())

# select variables via AIC
stmod1 <- step(mod1)
# show in a table
library(gtsummary) #http://www.danieldsjoberg.com/gtsummary/articles/tbl_regression.html
tbl_regression(stmod1) %>% 
  add_global_p() %>%
  bold_p(t = 0.05) %>% 
  modify_caption("stmod1")
stmod1
Characteristic Beta 95% CI1 p-value
bop 0.00 0.00, 0.00 0.011
consumer_confidence 0.00 0.00, 0.00 0.002
ism 0.00 0.00, 0.00 0.008
exchange_rate 0.00 0.00, 0.00 <0.001

1 CI = Confidence Interval

過去3年(2019~2021)

df_tem2 <- df_all_log %>% filter(year >= 2019)

# set a model
mod2 <- lm(exchange_log ~ -exchange_rate +., data = df_tem2[,11:length(df_tem2)] %>% na.omit())

# select variables via AIC
stmod2 <- step(mod2)
# show in a table
library(gtsummary) #http://www.danieldsjoberg.com/gtsummary/articles/tbl_regression.html
tbl_regression(stmod2) %>% 
  add_global_p() %>%
  bold_p(t = 0.05) %>% 
  modify_caption("stmod1")
stmod1
Characteristic Beta 95% CI1 p-value
employ_num_non_agr 0.00 0.00, 0.00 <0.001
bop 0.00 0.00, 0.00 0.002
retail_sales 0.00 0.00, 0.00 <0.001
resid_num 0.00 0.00, 0.00 0.046
resid_num_permit 0.00 0.00, 0.00 <0.001
consumer_confidence 0.00 0.00, 0.00 0.039
price_level_producer 0.00 0.00, 0.00 0.11
gdp 0.00 0.00, 0.00 0.015
ism 0.00 0.00, 0.00 0.002
exchange_rate 0.00 0.00, 0.00 <0.001

1 CI = Confidence Interval

過去1年(2019~2021)

df_tem3 <- df_all_log %>% filter(year >= 2021)

# set a model
mod3 <- lm(exchange_log ~ -exchange_rate +., data = df_tem3[,11:length(df_tem3)] %>% na.omit())

# select variables via AIC
stmod3 <- step(mod3)
# show in a table
library(gtsummary) #http://www.danieldsjoberg.com/gtsummary/articles/tbl_regression.html
tbl_regression(stmod3) %>% 
  add_global_p() %>%
  bold_p(t = 0.05) %>% 
  modify_caption("stmod1")
stmod1
Characteristic Beta 95% CI1 p-value
uemploy_rate 0.05 0.02, 0.07 <0.001
exchange_rate 0.00 0.00, 0.00 <0.001
resid_num_log 0.04 0.00, 0.09 0.074

1 CI = Confidence Interval

誤差

全期間(2004~2021)

g1 <- df_all_log %>% 
  gather_residuals(model1 = stmod1) %>%
  ggplot(aes(date, resid, colour = model)) + 
  geom_ref_line(h = 0) +
  geom_line(alpha = 0.75) +
  scale_x_date(breaks = seq(as.Date("2004-01-04"), as.Date("2021-03-30"), by = "3 years"),
               labels = scales::date_format("%Y %b")) +
  labs(x = "Year",
       y = "Residuals") +
  theme(axis.text.x = element_text(angle = 60, hjust = 1),
        legend.position = "none") 
  
plotly::ggplotly(g1)  

過去3年(2019~2021)

g2 <- df_tem2 %>% 
  gather_residuals(model2 = stmod2) %>%
  ggplot(aes(date, resid, colour = model)) + 
  geom_ref_line(h = 0) +
  geom_line(alpha = 0.75) +
  scale_x_date(breaks = seq(as.Date("2004-01-04"), as.Date("2021-03-30"), by = "3 years"),
               labels = scales::date_format("%Y %b")) +
  labs(x = "Year",
       y = "Residuals") +
  theme(axis.text.x = element_text(angle = 60, hjust = 1),
        legend.position = "none") 
  
plotly::ggplotly(g2)  

過去1年(2019~2021)

g3 <- df_tem3 %>% 
  gather_residuals(model3 = stmod3) %>%
  ggplot(aes(date, resid, colour = model)) + 
  geom_ref_line(h = 0) +
  geom_line(alpha = 0.75) +
  scale_x_date(breaks = seq(as.Date("2004-01-04"), as.Date("2021-03-30"), by = "3 years"),
               labels = scales::date_format("%Y %b")) +
  labs(x = "Year",
       y = "Residuals") +
  theme(axis.text.x = element_text(angle = 60, hjust = 1),
        legend.position = "none") 
  
plotly::ggplotly(g3)