1 Our world in dataのデータ、棒グラフ(barplot)

library(readxl)
covid2022G7 <- read_excel("covid2022G7.xlsx")
head(covid2022G7)
## # A tibble: 6 × 11
##    year iso_code location       total_cases total_deaths total_cases_per_million
##   <dbl> <chr>    <chr>                <dbl>        <dbl>                   <dbl>
## 1  2022 CAN      Canada             3901597        41311                 101461.
## 2  2022 DEU      Germany           25033970       135942                 300276.
## 3  2022 FRA      France            27217194       143694                 401357.
## 4  2022 GBR      United Kingdom    24024746       212296                 355875.
## 5  2022 ITA      Italy             16954784       164976                 287187.
## 6  2022 JPN      Japan              2682767        18767                  21644.
## # ℹ 5 more variables: total_deaths_per_million <dbl>, aged_65_older <dbl>,
## #   gdp_per_capita <dbl>, hospital_beds_per_thousand <dbl>,
## #   life_expectancy <dbl>
barplot(covid2022G7$total_deaths,names.arg = covid2022G7$iso_code)

2 ヒストグラム作成 (hist)

library(readxl)
covid2022 <- read_excel("covid2022.xlsx")
hist(covid2022$total_deaths_per_million)

hist(covid2022$gdp_per_capita)

hist(covid2022$total_cases)

3 回帰分析(lm)、散布図(plot)、回帰直線(abline)

#回帰分析
ols<-lm(covid2022$total_deaths_per_million~covid2022$aged_65_older)
summary(ols)
## 
## Call:
## lm(formula = covid2022$total_deaths_per_million ~ covid2022$aged_65_older)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3118.5  -494.6  -293.0   483.0  5222.0 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                82.03     126.53   0.648    0.518    
## covid2022$aged_65_older   124.98      11.94  10.466   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 997.9 on 184 degrees of freedom
##   (42 observations deleted due to missingness)
## Multiple R-squared:  0.3731, Adjusted R-squared:  0.3697 
## F-statistic: 109.5 on 1 and 184 DF,  p-value: < 2.2e-16
#散布図
plot(covid2022$aged_65_older,covid2022$total_deaths_per_million)
#散布図に回帰直線加える
abline(ols)

4 厚労省オープンデータ

library(readr)
deaths_cumulative_daily <- read_csv("deaths_cumulative_daily.csv")
## Rows: 943 Columns: 49
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): Date
## dbl (48): ALL, Hokkaido, Aomori, Iwate, Miyagi, Akita, Yamagata, Fukushima, ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(deaths_cumulative_daily)
## # A tibble: 6 × 49
##   Date        ALL Hokkaido Aomori Iwate Miyagi Akita Yamagata Fukushima Ibaraki
##   <chr>     <dbl>    <dbl>  <dbl> <dbl>  <dbl> <dbl>    <dbl>     <dbl>   <dbl>
## 1 2020/5/9    613       48      0     0      1     0        0         0       9
## 2 2020/5/10   621       51      0     0      1     0        0         0       9
## 3 2020/5/11   643       56      0     0      1     0        0         0       9
## 4 2020/5/12   668       62      0     0      1     0        0         0       9
## 5 2020/5/13   687       68      0     0      1     0        0         0       9
## 6 2020/5/14   710       70      0     0      1     0        0         0       9
## # ℹ 39 more variables: Tochigi <dbl>, Gunma <dbl>, Saitama <dbl>, Chiba <dbl>,
## #   Tokyo <dbl>, Kanagawa <dbl>, Niigata <dbl>, Toyama <dbl>, Ishikawa <dbl>,
## #   Fukui <dbl>, Yamanashi <dbl>, Nagano <dbl>, Gifu <dbl>, Shizuoka <dbl>,
## #   Aichi <dbl>, Mie <dbl>, Shiga <dbl>, Kyoto <dbl>, Osaka <dbl>, Hyogo <dbl>,
## #   Nara <dbl>, Wakayama <dbl>, Tottori <dbl>, Shimane <dbl>, Okayama <dbl>,
## #   Hiroshima <dbl>, Yamaguchi <dbl>, Tokushima <dbl>, Kagawa <dbl>,
## #   Ehime <dbl>, Kochi <dbl>, Fukuoka <dbl>, Saga <dbl>, Nagasaki <dbl>, …
#データの観測数
dim(deaths_cumulative_daily)
## [1] 943  49
nrow(deaths_cumulative_daily)
## [1] 943
death<-subset(deaths_cumulative_daily,deaths_cumulative_daily$Date=="2022/12/7"|deaths_cumulative_daily$Date=="2021/12/31"|deaths_cumulative_daily$Date=="2020/12/31")

head(death)
## # A tibble: 3 × 49
##   Date         ALL Hokkaido Aomori Iwate Miyagi Akita Yamagata Fukushima Ibaraki
##   <chr>      <dbl>    <dbl>  <dbl> <dbl>  <dbl> <dbl>    <dbl>     <dbl>   <dbl>
## 1 2020/12/31  3459      453      8    24     16     1        6        20      36
## 2 2021/12/31 18385     1475     38    53    118    27       56       176     220
## 3 2022/12/7  50819     3476    423   291    554   337      268       365     817
## # ℹ 39 more variables: Tochigi <dbl>, Gunma <dbl>, Saitama <dbl>, Chiba <dbl>,
## #   Tokyo <dbl>, Kanagawa <dbl>, Niigata <dbl>, Toyama <dbl>, Ishikawa <dbl>,
## #   Fukui <dbl>, Yamanashi <dbl>, Nagano <dbl>, Gifu <dbl>, Shizuoka <dbl>,
## #   Aichi <dbl>, Mie <dbl>, Shiga <dbl>, Kyoto <dbl>, Osaka <dbl>, Hyogo <dbl>,
## #   Nara <dbl>, Wakayama <dbl>, Tottori <dbl>, Shimane <dbl>, Okayama <dbl>,
## #   Hiroshima <dbl>, Yamaguchi <dbl>, Tokushima <dbl>, Kagawa <dbl>,
## #   Ehime <dbl>, Kochi <dbl>, Fukuoka <dbl>, Saga <dbl>, Nagasaki <dbl>, …

5 データのlong形式への変形 (pivot_longer)

library(tidyr)
names(death) <- paste0("pref_",names(death))
death$date<-death$pref_Date
death$pref_Date<-NULL
deathlong<-pivot_longer(death, starts_with("pref_"), names_to = "pref", values_to = "death")
deathlong<-subset(deathlong,pref!="pref_ALL")

6 記述統計、文字列の切り出し(str_sub)

library(stringr)
deathlong$pref2<-str_sub(deathlong$pref,6,20)

summary(deathlong)
##      date               pref               death           pref2          
##  Length:141         Length:141         Min.   :   0.0   Length:141        
##  Class :character   Class :character   1st Qu.:  30.0   Class :character  
##  Mode  :character   Mode  :character   Median : 136.0   Mode  :character  
##                                        Mean   : 515.3                     
##                                        3rd Qu.: 453.0                     
##                                        Max.   :6793.0
head(deathlong)
## # A tibble: 6 × 4
##   date       pref          death pref2   
##   <chr>      <chr>         <dbl> <chr>   
## 1 2020/12/31 pref_Hokkaido   453 Hokkaido
## 2 2020/12/31 pref_Aomori       8 Aomori  
## 3 2020/12/31 pref_Iwate       24 Iwate   
## 4 2020/12/31 pref_Miyagi      16 Miyagi  
## 5 2020/12/31 pref_Akita        1 Akita   
## 6 2020/12/31 pref_Yamagata     6 Yamagata

7 COVID19の感染者数と死者数の関係を表す散布図

library(readxl)
covid <- read_excel("covid.xlsx")
plot(covid$total_cases,covid$total_deaths)

8 COVID19の感染者数と死者数の関係を表す散布図&回帰直線

library(readxl)
covid <- read_excel("covid.xlsx")
result<-lm(total_deaths~total_cases,data = covid)
summary(result)
## 
## Call:
## lm(formula = total_deaths ~ total_cases, data = covid)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -179516  -45731     150   41216  328532 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -4.670e+04  5.107e+04  -0.914    0.379    
## total_cases  1.202e-02  1.623e-03   7.406  8.2e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 133600 on 12 degrees of freedom
## Multiple R-squared:  0.8205, Adjusted R-squared:  0.8056 
## F-statistic: 54.86 on 1 and 12 DF,  p-value: 8.203e-06
plot(covid$total_cases,covid$total_deaths)
abline(result)

9 ggplot GUI ローカル

  • library(ggplotgui)
  • ggplot_shiny()

10 ggplot GUI オンライン

ggplot
ggplot
ggplot
ggplot
ggplot
ggplot

10.1 データ名称をdfに変更しておく。

library(readxl)
covid <- read_excel("covid.xlsx")
df<-covid

10.2 ggplot GUIで出力されるコードをコピー&ペーストして、グラフを再現。

  • library(“ggplot2”)が必要。
  • library(“plotly”)も必要。
## You can use the below code to generate the graph.
## Don't forget to replace the 'df' with the name
## of your dataframe

# You need the following package(s):
library("ggplot2")

# The code below will generate the graph:
graph <- ggplot(df, aes(x = total_cases, y = total_deaths, colour = iso_code)) +
  geom_point() +
  theme_bw()
graph

# If you want the plot to be interactive,
# you need the following package(s):
library("plotly")
ggplotly(graph)
# If you would like to save your graph, you can use:
ggsave('my_graph.pdf', graph, width = 14, height = 14, units = 'cm')

11 致死率(=死者数/感染者数)と一人あたりGDPの関係を調べる

library(readxl)
covid <- read_excel("covid.xlsx")
covid$cfr<-covid$total_deaths/covid$total_cases
# las = 2によりx軸のラベルを90度回転。
# cex.names = 0.80によりx軸のラベルを小さく。
barplot(covid$cfr,names.arg = covid$iso_code,cex.names = 0.80,las=2)

result<-lm(cfr~gdp_per_capita,data=covid)
summary(result)
## 
## Call:
## lm(formula = cfr ~ gdp_per_capita, data = covid)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.007992 -0.004165 -0.001411  0.003943  0.010690 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1.823e-02  4.014e-03   4.542 0.000675 ***
## gdp_per_capita -2.848e-07  1.095e-07  -2.602 0.023148 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.006014 on 12 degrees of freedom
## Multiple R-squared:  0.3607, Adjusted R-squared:  0.3074 
## F-statistic: 6.769 on 1 and 12 DF,  p-value: 0.02315
plot(covid$gdp_per_capita,covid$cfr)
abline(result)