library(tidyverse)
library(stargazer)

Q8.1

「2. 行のソート: arrange()」を参考にして、次の問題にこたえなさい
分析には衆議院選挙データセット ( hr96_21.csv ) を使うこと
表示する変数は次の 6 つに限ること
(1) year
(2) pref
(3) kun
(4) seito
(5) j_name
(6) vote
(7) voteshare

Q1:

  • 2021年総選挙の立候補者の中で、獲得した票数の多い順に並べ、トップ10人の候補者名を挙げなさい
hr <- read_csv("data/hr96-21.csv")
library(DT)
str(hr)
spc_tbl_ [9,660 × 22] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ year         : num [1:9660] 1996 1996 1996 1996 1996 ...
 $ pref         : chr [1:9660] "愛知" "愛知" "愛知" "愛知" ...
 $ ku           : chr [1:9660] "aichi" "aichi" "aichi" "aichi" ...
 $ kun          : num [1:9660] 1 1 1 1 1 1 1 2 2 2 ...
 $ wl           : num [1:9660] 1 0 0 0 0 0 0 1 0 2 ...
 $ rank         : num [1:9660] 1 2 3 4 5 6 7 1 2 3 ...
 $ nocand       : num [1:9660] 7 7 7 7 7 7 7 8 8 8 ...
 $ seito        : chr [1:9660] "新進" "自民" "民主" "共産" ...
 $ j_name       : chr [1:9660] "河村たかし" "今枝敬雄" "佐藤泰介" "岩中美保子" ...
 $ gender       : chr [1:9660] "male" "male" "male" "female" ...
 $ name         : chr [1:9660] "KAWAMURA, TAKASHI" "IMAEDA, NORIO" "SATO, TAISUKE" "IWANAKA, MIHOKO" ...
 $ previous     : num [1:9660] 2 2 2 0 0 0 0 2 0 0 ...
 $ age          : num [1:9660] 47 72 53 43 51 51 45 51 71 30 ...
 $ exp          : chr [1:9660] "9828097" "9311555" "9231284" "2177203" ...
 $ status       : num [1:9660] 1 2 1 0 0 0 0 1 2 0 ...
 $ vote         : num [1:9660] 66876 42969 33503 22209 616 ...
 $ voteshare    : num [1:9660] 40 25.7 20.1 13.3 0.4 0.3 0.2 32.9 26.4 25.7 ...
 $ eligible     : num [1:9660] 346774 346774 346774 346774 346774 ...
 $ turnout      : num [1:9660] 49.2 49.2 49.2 49.2 49.2 49.2 49.2 51.8 51.8 51.8 ...
 $ seshu_dummy  : chr [1:9660] "0" "0" "0" "0" ...
 $ jiban_seshu  : chr [1:9660] "." "." "." "." ...
 $ nojiban_seshu: chr [1:9660] "." "." "." "." ...
 - attr(*, "spec")=
  .. cols(
  ..   year = col_double(),
  ..   pref = col_character(),
  ..   ku = col_character(),
  ..   kun = col_double(),
  ..   wl = col_double(),
  ..   rank = col_double(),
  ..   nocand = col_double(),
  ..   seito = col_character(),
  ..   j_name = col_character(),
  ..   gender = col_character(),
  ..   name = col_character(),
  ..   previous = col_double(),
  ..   age = col_double(),
  ..   exp = col_character(),
  ..   status = col_double(),
  ..   vote = col_double(),
  ..   voteshare = col_double(),
  ..   eligible = col_double(),
  ..   turnout = col_double(),
  ..   seshu_dummy = col_character(),
  ..   jiban_seshu = col_character(),
  ..   nojiban_seshu = col_character()
  .. )
 - attr(*, "problems")=<externalptr> 
hr_a <- hr |> 
  select(year, pref, kun, seito, j_name, vote)
hr_2021a <-hr_a |>
  filter(year == 2021) |>
  arrange(desc(vote))
datatable(hr_2021a)

Q2:

  • 2021年総選挙の立候補者の中で、獲得した得票率の大きい順に並べ、トップ10人の候補者名を挙げなさい
hr_b <- hr |> 
  select(year, pref, kun, seito, j_name, voteshare)
hr_2021b <-hr_b |>
  filter(year == 2021) |>
  arrange(desc(voteshare))
datatable(hr_2021b)

Q8.2

「7.3 separate()」を参考にして、次の問題にこたえなさい
データ COVID19_Worldwide.csv を使う

変数名 詳細
ID ID
Country 国名
Date 年月日
Confirmed_Day COVID-19 新規感染者数(人)/ 一日あたり
Confirmed_Total COVID-19 累積感染者数(人)総合
Death_Day COVID-19 新規死亡者数(人) 一日あたり
Death_Total COVID-19 累積死亡者数(人)総合
Test_Day COVID-19 新規検査数(人) 一日あたり
Test_Total COVID-19 累積検査数(人)総合

  • 2020年 (1月22日〜7月10日)の国別「累積検査数」を x 軸、「累積感染者数」を y 軸に設定した散布図を描きなさい

Q1:

  • 2020年 (1月22日〜7月10日)の国別「累積検査数」を x 軸、「累積感染者数」を y 軸に設定した散布図を描きなさい
    ・外れ値があれば、外れ値を除外した散布図を示しなさい
covid_df <- read_csv("data/COVID19_Worldwide.csv",
                     guess_max = 10000)
df_a <- covid_df |>
  select(Country, Date, Confirmed_Total, Test_Total)
DT::datatable(df_a)
library(stargazer)
stargazer(as.data.frame(df_a),
          type = "html",
          digits = 2)

<table style="text-align:center"><tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Statistic</td><td>N</td><td>Mean</td><td>St. Dev.</td><td>Min</td><td>Max</td></tr>
<tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Confirmed_Total</td><td>31,806</td><td>18,250.14</td><td>115,471.60</td><td>0</td><td>3,184,582</td></tr>
<tr><td style="text-align:left">Test_Total</td><td>8,686</td><td>559,831.30</td><td>2,146,252.00</td><td>1</td><td>39,011,749</td></tr>
<tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr></table>
df_a <- df_a |>
  separate(col = "Date",
           into = c("Year", "Month", "Day"),
           sep = "/")
test_country <- df_a |>
  group_by(Country, Year) |>
  summarise(Test = sum(Test_Total, na.rm = TRUE),
            Infected = sum(Confirmed_Total))
DT::datatable(test_country)
stargazer(as.data.frame(test_country),
          type = "html",
          digits = 2)

<table style="text-align:center"><tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Statistic</td><td>N</td><td>Mean</td><td>St. Dev.</td><td>Min</td><td>Max</td></tr>
<tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Test</td><td>186</td><td>26,143,522.00</td><td>109,131,592.00</td><td>0</td><td>1,079,925,840</td></tr>
<tr><td style="text-align:left">Infected</td><td>186</td><td>3,120,774.00</td><td>13,105,502.00</td><td>731</td><td>160,231,690</td></tr>
<tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr></table>
plot_1 <- test_country |>
  ggplot(aes(Infected, Test)) +
  geom_point() +
  stat_smooth(method = lm) +
  ggrepel::geom_text_repel(aes(label = Country),
                           size = 3,) +
  labs(x = "Covid19累積感染者数", y = "累積検査数") 
plot_1

plot_2 <- test_country |>
  filter(Country != "United States") |>
  ggplot(aes(Infected, Test)) +
  geom_point() +
  stat_smooth(method = lm) +
  ggrepel::geom_text_repel(aes(label = Country),
                           size = 3,) +
  labs(x = "Covid19累積感染者数", y = "累積検査数") 
plot_2