全コードは、必要に応じて右上のタブで表示されたい。

Setup library

# setup library
Packages <- c("tidyverse", "tidyquant", "lubridate", "ggtext", "DT")
lapply(Packages, library, character.only = TRUE)

# set the theme of the graphs
themes <- theme(plot.title = element_text(family = "TT Times New Roman"),
        plot.subtitle = element_text(family = "TT Times New Roman")) +
  theme_minimal()

Importing data

# 以下のURLからコロナに関わる世界のデータを取得
url <- "https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv"
df <- read.csv(url)

# 為替データを取得
getFX("USD/JPY")
## [1] "USD/JPY"

Checking data

# 生データの確認
glimpse(df)
## Rows: 91,482
## Columns: 59
## $ iso_code                              <chr> "AFG", "AFG", "AFG", "AFG", "AFG~
## $ continent                             <chr> "Asia", "Asia", "Asia", "Asia", ~
## $ location                              <chr> "Afghanistan", "Afghanistan", "A~
## $ date                                  <chr> "2020-02-24", "2020-02-25", "202~
## $ total_cases                           <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 2, 4, 4,~
## $ new_cases                             <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0,~
## $ new_cases_smoothed                    <dbl> NA, NA, NA, NA, NA, 0.143, 0.143~
## $ total_deaths                          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_deaths                            <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_deaths_smoothed                   <dbl> NA, NA, NA, NA, NA, 0, 0, 0, 0, ~
## $ total_cases_per_million               <dbl> 0.026, 0.026, 0.026, 0.026, 0.02~
## $ new_cases_per_million                 <dbl> 0.026, 0.000, 0.000, 0.000, 0.00~
## $ new_cases_smoothed_per_million        <dbl> NA, NA, NA, NA, NA, 0.004, 0.004~
## $ total_deaths_per_million              <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_deaths_per_million                <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_deaths_smoothed_per_million       <dbl> NA, NA, NA, NA, NA, 0, 0, 0, 0, ~
## $ reproduction_rate                     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ icu_patients                          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ icu_patients_per_million              <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ hosp_patients                         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ hosp_patients_per_million             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ weekly_icu_admissions                 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ weekly_icu_admissions_per_million     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ weekly_hosp_admissions                <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ weekly_hosp_admissions_per_million    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_tests                             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ total_tests                           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ total_tests_per_thousand              <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_tests_per_thousand                <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_tests_smoothed                    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_tests_smoothed_per_thousand       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ positive_rate                         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ tests_per_case                        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ tests_units                           <chr> "", "", "", "", "", "", "", "", ~
## $ total_vaccinations                    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ people_vaccinated                     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ people_fully_vaccinated               <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_vaccinations                      <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_vaccinations_smoothed             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ total_vaccinations_per_hundred        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ people_vaccinated_per_hundred         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ people_fully_vaccinated_per_hundred   <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ new_vaccinations_smoothed_per_million <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ stringency_index                      <dbl> 8.33, 8.33, 8.33, 8.33, 8.33, 8.~
## $ population                            <dbl> 38928341, 38928341, 38928341, 38~
## $ population_density                    <dbl> 54.422, 54.422, 54.422, 54.422, ~
## $ median_age                            <dbl> 18.6, 18.6, 18.6, 18.6, 18.6, 18~
## $ aged_65_older                         <dbl> 2.581, 2.581, 2.581, 2.581, 2.58~
## $ aged_70_older                         <dbl> 1.337, 1.337, 1.337, 1.337, 1.33~
## $ gdp_per_capita                        <dbl> 1803.987, 1803.987, 1803.987, 18~
## $ extreme_poverty                       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ cardiovasc_death_rate                 <dbl> 597.029, 597.029, 597.029, 597.0~
## $ diabetes_prevalence                   <dbl> 9.59, 9.59, 9.59, 9.59, 9.59, 9.~
## $ female_smokers                        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ male_smokers                          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ handwashing_facilities                <dbl> 37.746, 37.746, 37.746, 37.746, ~
## $ hospital_beds_per_thousand            <dbl> 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.~
## $ life_expectancy                       <dbl> 64.83, 64.83, 64.83, 64.83, 64.8~
## $ human_development_index               <dbl> 0.511, 0.511, 0.511, 0.511, 0.51~
skimr::skim(df)
Data summary
Name df
Number of rows 91482
Number of columns 59
_______________________
Column type frequency:
character 5
numeric 54
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
iso_code 0 1 3 8 0 227 0
continent 0 1 0 13 4345 7 0
location 0 1 4 32 0 227 0
date 0 1 10 10 0 512 0
tests_units 0 1 0 15 42216 5 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
total_cases 2746 0.97 923120.57 6375811.50 1.00 1065.00 11722.00 124355.00 1.684087e+08 ▇▁▁▁▁
new_cases 2747 0.97 6044.20 38138.28 -74347.00 2.00 71.00 785.00 9.059920e+05 ▇▁▁▁▁
new_cases_smoothed 3754 0.96 6058.36 37795.22 -6223.00 7.14 87.71 821.89 8.263743e+05 ▇▁▁▁▁
total_deaths 12612 0.86 24956.50 148686.74 1.00 48.00 334.00 3250.00 3.498443e+06 ▇▁▁▁▁
new_deaths 12454 0.86 141.95 779.65 -1918.00 0.00 2.00 18.00 1.790600e+04 ▇▁▁▁▁
new_deaths_smoothed 3754 0.96 126.67 718.57 -232.14 0.00 1.29 14.14 1.443629e+04 ▇▁▁▁▁
total_cases_per_million 3221 0.96 11321.09 21387.23 0.00 222.25 1524.81 11221.25 1.769365e+05 ▇▁▁▁▁
new_cases_per_million 3222 0.96 75.86 196.36 -2153.44 0.20 7.88 70.34 1.829368e+04 ▇▁▁▁▁
new_cases_smoothed_per_million 4224 0.95 76.02 156.50 -276.82 1.22 10.45 78.87 4.083500e+03 ▇▁▁▁▁
total_deaths_per_million 13074 0.86 247.54 432.80 0.00 7.33 44.32 270.45 3.066350e+03 ▇▁▁▁▁
new_deaths_per_million 12916 0.86 1.52 3.94 -76.44 0.00 0.12 1.34 2.183300e+02 ▁▇▁▁▁
new_deaths_smoothed_per_million 4224 0.95 1.35 2.93 -10.92 0.00 0.15 1.27 6.314000e+01 ▇▁▁▁▁
reproduction_rate 17931 0.80 1.01 0.35 -0.01 0.85 1.01 1.18 5.820000e+00 ▇▃▁▁▁
icu_patients 82301 0.10 1099.07 2989.73 0.00 32.00 186.00 730.00 2.888900e+04 ▇▁▁▁▁
icu_patients_per_million 82301 0.10 27.07 27.82 0.00 4.69 17.85 42.22 1.925500e+02 ▇▂▁▁▁
hosp_patients 80062 0.12 4839.96 12270.47 0.00 118.00 704.00 2860.25 1.332140e+05 ▇▁▁▁▁
hosp_patients_per_million 80062 0.12 175.96 215.95 0.00 23.44 85.84 261.50 1.532570e+03 ▇▂▁▁▁
weekly_icu_admissions 90652 0.01 281.46 581.27 0.00 8.99 50.82 226.75 4.002460e+03 ▇▁▁▁▁
weekly_icu_admissions_per_million 90652 0.01 21.40 36.50 0.00 1.95 9.70 23.93 2.787600e+02 ▇▁▁▁▁
weekly_hosp_admissions 90036 0.02 3753.31 11215.81 0.00 51.62 342.28 1872.33 1.169380e+05 ▇▁▁▁▁
weekly_hosp_admissions_per_million 90036 0.02 115.23 222.09 0.00 9.42 45.18 134.02 2.656910e+03 ▇▁▁▁▁
new_tests 50265 0.45 45194.20 166267.61 -239172.00 1510.00 5729.00 22577.00 2.945871e+06 ▇▁▁▁▁
total_tests 50575 0.45 6664407.78 29694859.27 0.00 137806.00 691604.00 2907745.00 4.410178e+08 ▇▁▁▁▁
total_tests_per_thousand 50575 0.45 260.22 586.13 0.00 12.35 61.63 252.18 7.617470e+03 ▇▁▁▁▁
new_tests_per_thousand 50265 0.45 1.99 5.72 -23.01 0.13 0.58 1.86 3.270900e+02 ▇▁▁▁▁
new_tests_smoothed 43746 0.52 43052.29 151692.05 0.00 1566.75 5935.50 24943.00 2.016058e+06 ▇▁▁▁▁
new_tests_smoothed_per_thousand 43746 0.52 1.90 4.78 0.00 0.13 0.59 1.92 9.210000e+01 ▇▁▁▁▁
positive_rate 46518 0.49 0.09 0.10 0.00 0.02 0.05 0.13 7.400000e-01 ▇▂▁▁▁
tests_per_case 47115 0.48 160.69 875.71 1.30 7.70 17.90 55.90 4.425870e+04 ▇▁▁▁▁
total_vaccinations 79120 0.14 20221676.17 98196738.43 0.00 80341.00 637332.00 3829414.75 1.774959e+09 ▇▁▁▁▁
people_vaccinated 79884 0.13 11580265.62 50452895.02 0.00 64645.25 468206.00 2641097.00 7.963728e+08 ▇▁▁▁▁
people_fully_vaccinated 82501 0.10 6352067.31 26712026.02 1.00 32862.00 270828.00 1544760.00 4.074801e+08 ▇▁▁▁▁
new_vaccinations 81101 0.11 511114.65 2205693.12 0.00 3693.00 22132.00 114676.00 3.934053e+07 ▇▁▁▁▁
new_vaccinations_smoothed 70458 0.23 250477.11 1446598.90 0.00 852.00 6460.50 38876.50 3.038243e+07 ▇▁▁▁▁
total_vaccinations_per_hundred 79120 0.14 17.37 25.12 0.00 1.47 7.17 22.66 2.261400e+02 ▇▁▁▁▁
people_vaccinated_per_hundred 79884 0.13 11.91 15.77 0.00 1.26 5.30 16.27 1.157300e+02 ▇▁▁▁▁
people_fully_vaccinated_per_hundred 82501 0.10 6.75 10.97 0.00 0.60 2.76 7.98 1.104100e+02 ▇▁▁▁▁
new_vaccinations_smoothed_per_million 70458 0.23 2983.87 4516.83 0.00 328.75 1480.00 4020.00 1.187590e+05 ▇▁▁▁▁
stringency_index 14019 0.85 58.69 21.28 0.00 44.44 60.19 75.00 1.000000e+02 ▁▃▆▇▃
population 607 0.99 126417004.80 684839386.23 809.00 2351625.00 9904608.00 34813867.00 7.794799e+09 ▇▁▁▁▁
population_density 6398 0.93 380.47 1771.88 0.14 36.25 83.48 209.59 2.054677e+04 ▇▁▁▁▁
median_age 9337 0.90 30.58 9.13 15.10 22.20 29.90 39.10 4.820000e+01 ▇▆▇▆▆
aged_65_older 10265 0.89 8.80 6.22 1.14 3.47 6.61 14.31 2.705000e+01 ▇▃▂▂▁
aged_70_older 9793 0.89 5.57 4.24 0.53 2.04 3.87 9.17 1.849000e+01 ▇▃▂▂▁
gdp_per_capita 9120 0.90 19298.69 19958.74 661.24 4466.51 12951.84 27216.44 1.169356e+05 ▇▂▁▁▁
extreme_poverty 35754 0.61 13.38 19.95 0.10 0.60 2.20 21.20 7.760000e+01 ▇▁▁▁▁
cardiovasc_death_rate 8931 0.90 258.06 118.86 79.37 167.29 242.65 329.64 7.244200e+02 ▇▇▃▁▁
diabetes_prevalence 7045 0.92 7.88 4.07 0.99 5.31 7.11 10.08 3.053000e+01 ▇▇▂▁▁
female_smokers 26852 0.71 10.54 10.42 0.10 1.90 6.30 19.30 4.400000e+01 ▇▂▂▁▁
male_smokers 27801 0.70 32.67 13.47 7.70 21.60 31.40 41.10 7.810000e+01 ▆▇▆▂▁
handwashing_facilities 50004 0.45 50.88 31.76 1.19 20.86 49.84 83.24 1.000000e+02 ▇▅▅▅▇
hospital_beds_per_thousand 16208 0.82 3.03 2.46 0.10 1.30 2.40 3.86 1.380000e+01 ▇▃▂▁▁
life_expectancy 4615 0.95 73.23 7.56 53.28 67.92 74.62 78.74 8.675000e+01 ▂▃▅▇▅
human_development_index 8753 0.90 0.73 0.15 0.39 0.60 0.75 0.85 9.600000e-01 ▂▅▅▇▇

Cleaning data

# コロナに関するデータを日本とアメリカに絞る
jpn_us <- df %>% 
  mutate(date = ymd(date)) %>% 
  filter(location %in% c("Japan", "United States"))


# コロナに関するデータを為替データを結合
df_jpn_us <- USDJPY %>% 
  as_tibble() %>% 
  mutate(date = index(USDJPY) %>% ymd()) %>% 
  left_join(jpn_us, by = "date") %>% 
  select(date, USD.JPY, location, starts_with(c("new", "total"))) %>% 
  arrange(date)



# データテーブル一覧
DT::datatable(df_jpn_us, rownames = FALSE,
              filter = "top",
              options = list(pageLength = 5, autoWidth = TRUE))
# 変数一覧
names(df_jpn_us)
##  [1] "date"                                 
##  [2] "USD.JPY"                              
##  [3] "location"                             
##  [4] "new_cases"                            
##  [5] "new_cases_smoothed"                   
##  [6] "new_deaths"                           
##  [7] "new_deaths_smoothed"                  
##  [8] "new_cases_per_million"                
##  [9] "new_cases_smoothed_per_million"       
## [10] "new_deaths_per_million"               
## [11] "new_deaths_smoothed_per_million"      
## [12] "new_tests"                            
## [13] "new_tests_per_thousand"               
## [14] "new_tests_smoothed"                   
## [15] "new_tests_smoothed_per_thousand"      
## [16] "new_vaccinations"                     
## [17] "new_vaccinations_smoothed"            
## [18] "new_vaccinations_smoothed_per_million"
## [19] "total_cases"                          
## [20] "total_deaths"                         
## [21] "total_cases_per_million"              
## [22] "total_deaths_per_million"             
## [23] "total_tests"                          
## [24] "total_tests_per_thousand"             
## [25] "total_vaccinations"                   
## [26] "total_vaccinations_per_hundred"

Visualizing data

ggplot(df_jpn_us, aes(x = new_cases, y = USD.JPY, colour = location)) +
  geom_path(aes(alpha = 0.01, size = date)) +
  geom_point(alpha = 0.25) +
  geom_smooth(method = "lm", se = FALSE, alpha = 0.1, size = 0.1) +
  scale_x_continuous(labels = scales::comma) +
  scale_color_discrete("国") +
  scale_size(range = c(0.01, 1)) +
  labs(title = "コロナと為替データの相関関係",
       subtitle = stringr::str_wrap("日本のコロナの新感染者数と円ドル為替の関係はなさそうだが、
                                    アメリカでのコロナ罹患者数とは、負の相関があることが確認される。", 40),
       x = "新たな感染者数",
       y = "円ドルレート(円/$)") +
  guides(size = FALSE, alpha = FALSE) +
  theme_minimal()

Modeling

以下では、アメリカと日本の新規罹患者数と円ドル相場の単回帰分析を行った。

mod_us <- with(df_jpn_us %>% filter(location == "United States"), lm(USD.JPY ~ new_cases))
mod_ja <- with(df_jpn_us %>% filter(location == "Japan"), lm(USD.JPY ~ new_cases))

(result_ja <- summary(mod_ja))
## 
## Call:
## lm(formula = USD.JPY ~ new_cases)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.6721 -2.5305 -0.2674  2.3963  4.1222 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.066e+02  3.789e-01 281.412   <2e-16 ***
## new_cases   -1.903e-05  1.008e-04  -0.189    0.851    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.496 on 177 degrees of freedom
## Multiple R-squared:  0.0002012,  Adjusted R-squared:  -0.005447 
## F-statistic: 0.03562 on 1 and 177 DF,  p-value: 0.8505
(result_us <- summary(mod_us))
## 
## Call:
## lm(formula = USD.JPY ~ new_cases)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2539 -0.9387  0.1516  0.8537  3.2091 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.097e+02  1.795e-01  611.22   <2e-16 ***
## new_cases   -2.820e-05  1.346e-06  -20.95   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.338 on 177 degrees of freedom
## Multiple R-squared:  0.7127, Adjusted R-squared:  0.711 
## F-statistic:   439 on 1 and 177 DF,  p-value: < 2.2e-16

結果として、アメリカの新規感染者数は、円ドル相場に影響するが、日本の新規感染者数は、円ドル相場との関係が薄いことが分かった。

99%有意であったアメリカのケースでは、以下の式が得られる。

\[ USDJPY = 109.6921469 + -2.8196006\times 10^{-5}X + \varepsilon \\ \]

但し、アメリカ経済や利子率が為替相場と新規感染者数両方に関係している可能性が高い為、多重共線性についての分析も必要である。