全コードは、必要に応じて右上のタブで表示されたい。

Preparation

Set up the library

# setup library
Packages <- c("tidyverse","quantmod", "countrycode", "WDI", "plotly", "patchwork",
              "dygraphs", "corrplot", "tidyquant", "timetk", "modelr")
lapply(Packages, library, character.only = TRUE)

Exchage rate from Bank of Japan (OHLC)

rates_boj <- read_csv("C:/Users/rikit/Documents/R/Musashi_University/2021_first_semester/exchage_forcast/Data/nme_R031.1110.20210513135227.02.csv") %>%
  `colnames<-`(c("Date", "Open", "High", "Low", "Close", "Volume")) %>% 
  na.omit() %>% 
  read.zoo() %>% 
  as.xts()

DT::datatable(rates_boj %>% 
                tk_tbl(preserve_index = TRUE, rename_index = "date") %>% 
                mutate_if(is.numeric, ~round(., digits = 2)), 
              rownames = FALSE, 
              # colnames = c("gdp" = "gdp($mil)"),
              extensions = 'Buttons',
              options = list(autoWidth = TRUE,
                             pageLength = 5,
                             dom = 'Bfrtip',
                             buttons = list('copy', 'print', 
                                            list(extend = 'collection',
                                                 buttons = c('csv', 'excel', 'pdf'),
                                                 text = 'Download')),
                             scrollX = TRUE,
                             scrollCollapse = TRUE),
              class = 'cell-border stripe',
              caption = "The data is frame extracted from Bank of Japan. You can explore the data by using 'search bar' on the top right.")

Economic data from World Bank

# set timeframe 
start_year <- 1970
end_year <- 2021

# countrycode 
names <- c("Japan", "USA", "China", "Great Britain")
iso3c_names <- countrycode::countrycode(sourcevar = names,
                                        origin = "country.name",
                                        destination = "iso3c")

# unemployment, gdp data, and gdp growth rate, real interest rate, inflation (GDP deflator), central government debt, balance of payment, official exchange rate
input <- c("SL.UEM.TOTL.ZS", "NY.GDP.MKTP.CD", "NY.GDP.MKTP.KD.ZG", "FR.INR.RINR", "NY.GDP.DEFL.KD.ZG", "GC.DOD.TOTL.GD.ZS", "BN.CAB.XOKA.CD", "PA.NUS.FCRF")

# import data from world bank database by using `WDI` package
df_worldbank <- input %>%
  purrr::map(~WDI(country = iso3c_names,
                  indicator = ., 
                  start = start_year,
                  end = end_year)) %>%
  reduce(merge) %>% 
  setNames(c("iso2c", "country", "year", "unemployment", "gdp", "gdp_growth", "interest_rate", "inflation", "government_debt", "bop", "exchange_rate")) %>% 
  as_tibble()

DT::datatable(df_worldbank %>% 
                mutate(gdp = gdp / 1e6) %>% 
                mutate_if(is.numeric, ~round(., digits = 2)), 
              rownames = FALSE, 
              # colnames = c("gdp" = "gdp($mil)"),
              extensions = 'Buttons',
              options = list(autoWidth = TRUE,
                             pageLength = 5,
                             dom = 'Bfrtip',
                             buttons = list('copy', 'print', 
                                            list(extend = 'collection',
                                                 buttons = c('csv', 'excel', 'pdf'),
                                                 text = 'Download')),
                             scrollX = TRUE,
                             scrollCollapse = TRUE),
              class = 'cell-border stripe',
              caption = "The data is frame extracted from World Bank. You can explore the data by using 'search bar' on the top right.")

Exchange rate from European Central Bank

# importing
url <- "http://www.ecb.europa.eu/stats/eurofxref/eurofxref-hist.zip"
download.file(url, "eurofxref-hist.zip")
rates <- read_csv(unz("eurofxref-hist.zip", "eurofxref-hist.csv"), na = "N/A")

# tidy
rates_na <- rates %>% 
  mutate(USDJPY = JPY / USD, # JPY against USD
         USDEUR = 1 / USD, # EUR against USD
         USDCNY = CNY / USD, # CNY (yuan) against USD
         USDGBP = GBP / USD, # GBP (Pond sterling) against USD
  ) %>% 
  tk_xts()

# fill na
rates_locf <- na.locf(rates_na)

DT::datatable(rates %>% 
                mutate_if(is.numeric, ~round(., digits = 2)), 
              rownames = FALSE, 
              extensions = 'Buttons',
              options = list(autoWidth = TRUE,
                             pageLength = 5,
                             dom = 'Bfrtip',
                             buttons = list('copy', 'print', 
                                            list(extend = 'collection',
                                                 buttons = c('csv', 'excel', 'pdf'),
                                                 text = 'Download')),
                             scrollX = TRUE,
                             scrollCollapse = TRUE),
              class = 'cell-border stripe',
              caption = "The data is frame extracted from European Central Bank (ECB). You can explore the data by using 'search bar' on the top right.")

Exchange rate within 180 days with quantmod

currency <- c("USD/JPY", "USD/EUR", "USD/CNY", "USD/GBP")

df_ex_rate <- purrr::map(currency, ~getFX(.x, 
                                  auto.assign = FALSE)) %>%
  reduce(merge) 
DT::datatable(df_ex_rate %>% 
                tk_tbl(preserve_index = TRUE) %>% 
                mutate_if(is.numeric, ~round(., digits = 2)), 
              rownames = FALSE, 
              extensions = 'Buttons',
              options = list(autoWidth = TRUE,
                             pageLength = 5,
                             dom = 'Bfrtip',
                             buttons = list('copy', 'print', 
                                            list(extend = 'collection',
                                                 buttons = c('csv', 'excel', 'pdf'),
                                                 text = 'Download')),
                             scrollX = TRUE,
                             scrollCollapse = TRUE),
              class = 'cell-border stripe',
              caption = "The data is frame extracted through quantmod. You can explore the data by using 'search bar' on the top right.")

Covid-19 cases

url <- "https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv"
covid_19 <- read_csv(url) %>% 
  mutate(date = ymd(date))

df_covid_19 <- covid_19 %>% 
  filter(location == c("Japan",  "United States")) %>% 
  select(date, location, total_cases, new_cases, total_deaths, new_deaths, total_cases_per_million, new_cases_per_million, total_deaths_per_million, new_deaths_per_million, reproduction_rate, icu_patients, icu_patients_per_million)

DT::datatable(df_covid_19 %>% 
                mutate_if(is.numeric, ~round(., digits = 2)), 
              rownames = FALSE, 
              extensions = 'Buttons',
              options = list(autoWidth = TRUE,
                             pageLength = 5,
                             dom = 'Bfrtip',
                             buttons = list('copy', 'print', 
                                            list(extend = 'collection',
                                                 buttons = c('csv', 'excel', 'pdf'),
                                                 text = 'Download')),
                             scrollX = TRUE,
                             scrollCollapse = TRUE),
              class = 'cell-border stripe',
              caption = "The data is frame extracted from World Bank. You can explore the data by using 'search bar' on the top right.")

Visualization

candleChart(rates_boj, subset = "2020-12::", type = "candles", major.ticks = "months",  
            theme = "white", TA = "addBBands();addVo();addCCI()")

Modeling

自己相関係数

自己相関係数は、その変数自身が過去データに依存しているかどうかを確認する指標である。

自己相関検定

今回は、自己相関のボックス＝ピアース（Box-Pierce)検定を行なう。\(p\leq0.05\)以下であれば、データに有意な自己相関があることを示し、\(p>0.05\)の場合は、自己相関がある証拠にはならない。

Box.test(df_ex_rate$USD.JPY)

## 
##  Box-Pierce test
## 
## data:  df_ex_rate$USD.JPY
## X-squared = 173.73, df = 1, p-value < 2.2e-16

プロット

acf(df_ex_rate$USD.JPY, type = "correlation", lag.max = 60, main = "自己相関系図：USDJPY")

2つの時系列データ間の遅れ相関を見つける

library(forecast)

usdjpy <- window(rates_boj, start = df_covid_19$date[1])[,4] %>% 
  as.ts()

covid <- df_covid_19 %>% 
  filter(location == "United States") %>% 
  .$new_cases %>% 
  as.ts

Ccf(usdjpy, covid, main = "USDJPY vs. Covid cases in the U.S.")

時系列データのトレンドを除去する

m <- lm(rates_boj$Close ~ index(rates_boj))
detr <- zoo(resid(m), index(rates_boj))
g1 <- ggplot2::autoplot(detr) +
  geom_hline(yintercept = 0, alpha =  0.25, linetype = "dashed") +
  labs(title = "plot without trend") +
  theme_bw()

g2 <- tk_tbl(rates_boj) %>% 
  ggplot(aes(index, Close)) +
  geom_line() +
  labs(title = "plot with trend") +
  theme_bw()

g1 / g2

ARIMAモデルをフィットさせる

ARIMAモデルの作成手順

1.モデル字数を特定する
2.データにモデルをフィットさせ、次数を取得する。
3.診断法を適用し、モデルを検証する

m <-  auto.arima(df_ex_rate$USD.JPY)
autoplot(df_ex_rate$USD.JPY)

checkresiduals(m)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,1) with drift
## Q* = 6.7422, df = 8, p-value = 0.5647
## 
## Model df: 2.   Total lags used: 10

以下の検証から、円ドルに季節性はないと考えられる。

ARIMAモデルから予測する

fc_m <- forecast(m)
autoplot(fc_m) +
  scale_y_continuous(breaks = seq(102.5, 112.5, by = 0.25)) +
  coord_cartesian(xlim = c(150, 190), ylim = c(108.25, 111.75))

因果推論

重回帰分析：円ドル vs. 経済データ

# transform data set
df_lm <- 
  df_worldbank %>% 
  filter(country %in% c("Japan", "United States")) %>% 
  na.omit() 


# economic data frame for Japan
df_lm_ja <- split(df_lm, df_lm$country)[[1]][,3:11] %>% 
  mutate(exchange_return = log(exchange_rate) - log(lag(exchange_rate))) %>% 
  select(-exchange_rate, ) %>% 
  na.omit()

# economic data from for the US
df_lm_us <- split(df_lm, df_lm$country)[[2]][,3:10] %>% 
  `colnames<-`(c("year", "us.unemployment", "us.gdp", "us.gdp_growth", 
                 "us.interest_rate", "us.inflation", "us.government_debt", "us.bop"))

# merge two dataset
df_lm_merge <- left_join(df_lm_ja, df_lm_us, by = "year")

mod1 <- lm(exchange_return ~ ., data = df_lm_merge)
mod2 <- glm(exchange_return ~ ., data = df_lm_merge, family = gaussian)
# mod3 <- glm(exchange_rate ~ ., data = df_lm_merge, family = poisson)
# mod4 <- glm(exchange_return ~ ., data = df_lm_merge, family = Gamma)

stmod1 <- step(mod1)
stmod2 <- step(mod2)
# stmod3 <- step(mod3)
# stmod4 <- step(mod4)

# summary of the models


library(gtsummary) #http://www.danieldsjoberg.com/gtsummary/articles/tbl_regression.html
tbl_regression(stmod1) %>% 
  add_global_p() %>%
  bold_p(t = 0.05) %>% 
  modify_caption("stmod1")

stmod1
Characteristic	Beta	95% CI¹	p-value
year	-0.06	-0.14, 0.02	0.12
unemployment	-0.20	-0.35, -0.05	0.017
gdp	0.00	0.00, 0.00	0.005
gdp_growth	0.02	0.01, 0.04	0.007
interest_rate	-0.24	-0.64, 0.15	0.2
inflation	-0.27	-0.65, 0.12	0.15
government_debt	0.01	-0.01, 0.02	0.2
bop	0.00	0.00, 0.00	0.009
us.unemployment	0.12	0.07, 0.18	0.001
us.interest_rate	0.04	0.00, 0.08	0.035
us.government_debt	-0.01	-0.01, 0.00	0.027
us.bop	0.00	0.00, 0.00	0.035
¹ CI = Confidence Interval

tbl_regression(stmod2) %>% 
  add_global_p() %>%
  bold_p(t = 0.05) %>% 
  modify_caption("stmod2")

stmod2
Characteristic	Beta	95% CI¹	p-value
year	-0.06	-0.12, 0.01	0.080
unemployment	-0.20	-0.32, -0.07	0.002
gdp	0.00	0.00, 0.00	<0.001
gdp_growth	0.02	0.01, 0.04	<0.001
interest_rate	-0.24	-0.57, 0.08	0.15
inflation	-0.27	-0.58, 0.05	0.10
government_debt	0.01	0.00, 0.02	0.15
bop	0.00	0.00, 0.00	<0.001
us.unemployment	0.12	0.08, 0.17	<0.001
us.interest_rate	0.04	0.01, 0.07	0.009
us.government_debt	-0.01	-0.01, 0.00	0.005
us.bop	0.00	0.00, 0.00	0.009
¹ CI = Confidence Interval

# summary(stmod3)
# summary(stmod4)

Visuallization

# plot residuals
df_lm_merge %>% 
  gather_residuals(stmod1 = stmod1, 
                   stmod2 = stmod2,
                   # resid4 = stmod4
                   ) %>% 
  ggplot(aes(year, resid, colour = model)) +
  geom_ref_line(h = 0) +
  geom_point() +
  geom_line() +
  facet_wrap(~model, ncol = 1) +
  labs(title = "Residuals b/w Two Models",
       x = "Year",
       y = "Residuals")

# plot prediction
df_lm_merge %>% 
  gather_predictions(stmod1 = stmod1,
                     stmod2 = stmod2,
                     # pred4 = stmod4
                     ) %>% 
  select(model, year, exchange_return, pred) %>%
  ggplot(aes(year, pred,  colour = model, group = model)) +
  geom_point() +
  geom_line(size = 0.1) +
  geom_point(aes(y = exchange_return), colour = "gray70") +
  geom_line(aes(y = exchange_return), colour = "gray70") +
  facet_wrap(~model, ncol = 1) +
  labs(title = "Predictions with two models",
       x = "Year",
       y = "Predicted Exchange Rate")

以上の2つのグラフから、残差にトレンドが発生していることが観察される。その為、次のステップとしては、日本経済の歴史を振り返りつつ、どのような出来事があったのかを史実に基づいて観察していこうと思う。

1997: アジア通貨危機
2000 ~ 2006: ＯECD加盟国の中で一人当たりGDPが下落（3位→18位）
2004:
2007: リーマンショック
2011: 東日本大震災による円の相対的現金需要高により、円高が進む。
2012: アベノミクスが始まる。
2015:
2016:

Models

Rikiya Honda

2021/06/19