library(tidyverse)
library(stargazer)
「2. 行のソート: arrange()」を参考にして、次の問題にこたえなさい
分析には衆議院選挙データセット ( hr96_21.csv
) を使うこと
表示する変数は次の 6 つに限ること
(1) year
(2) pref
(3) kun
(4) seito
(5) j_name
(6) vote
(7) voteshare
hr <- read_csv("data/hr96-21.csv")
library(DT)
str(hr)
spc_tbl_ [9,660 × 22] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ year : num [1:9660] 1996 1996 1996 1996 1996 ...
$ pref : chr [1:9660] "愛知" "愛知" "愛知" "愛知" ...
$ ku : chr [1:9660] "aichi" "aichi" "aichi" "aichi" ...
$ kun : num [1:9660] 1 1 1 1 1 1 1 2 2 2 ...
$ wl : num [1:9660] 1 0 0 0 0 0 0 1 0 2 ...
$ rank : num [1:9660] 1 2 3 4 5 6 7 1 2 3 ...
$ nocand : num [1:9660] 7 7 7 7 7 7 7 8 8 8 ...
$ seito : chr [1:9660] "新進" "自民" "民主" "共産" ...
$ j_name : chr [1:9660] "河村たかし" "今枝敬雄" "佐藤泰介" "岩中美保子" ...
$ gender : chr [1:9660] "male" "male" "male" "female" ...
$ name : chr [1:9660] "KAWAMURA, TAKASHI" "IMAEDA, NORIO" "SATO, TAISUKE" "IWANAKA, MIHOKO" ...
$ previous : num [1:9660] 2 2 2 0 0 0 0 2 0 0 ...
$ age : num [1:9660] 47 72 53 43 51 51 45 51 71 30 ...
$ exp : chr [1:9660] "9828097" "9311555" "9231284" "2177203" ...
$ status : num [1:9660] 1 2 1 0 0 0 0 1 2 0 ...
$ vote : num [1:9660] 66876 42969 33503 22209 616 ...
$ voteshare : num [1:9660] 40 25.7 20.1 13.3 0.4 0.3 0.2 32.9 26.4 25.7 ...
$ eligible : num [1:9660] 346774 346774 346774 346774 346774 ...
$ turnout : num [1:9660] 49.2 49.2 49.2 49.2 49.2 49.2 49.2 51.8 51.8 51.8 ...
$ seshu_dummy : chr [1:9660] "0" "0" "0" "0" ...
$ jiban_seshu : chr [1:9660] "." "." "." "." ...
$ nojiban_seshu: chr [1:9660] "." "." "." "." ...
- attr(*, "spec")=
.. cols(
.. year = col_double(),
.. pref = col_character(),
.. ku = col_character(),
.. kun = col_double(),
.. wl = col_double(),
.. rank = col_double(),
.. nocand = col_double(),
.. seito = col_character(),
.. j_name = col_character(),
.. gender = col_character(),
.. name = col_character(),
.. previous = col_double(),
.. age = col_double(),
.. exp = col_character(),
.. status = col_double(),
.. vote = col_double(),
.. voteshare = col_double(),
.. eligible = col_double(),
.. turnout = col_double(),
.. seshu_dummy = col_character(),
.. jiban_seshu = col_character(),
.. nojiban_seshu = col_character()
.. )
- attr(*, "problems")=<externalptr>
hr_a <- hr |>
select(year, pref, kun, seito, j_name, vote)
hr_2021a <-hr_a |>
filter(year == 2021) |>
arrange(desc(vote))
datatable(hr_2021a)
hr_b <- hr |>
select(year, pref, kun, seito, j_name, voteshare)
hr_2021b <-hr_b |>
filter(year == 2021) |>
arrange(desc(voteshare))
datatable(hr_2021b)
「7.3 separate()」を参考にして、次の問題にこたえなさい
データ COVID19_Worldwide.csv
を使う
| 変数名 | 詳細 |
|---|---|
| ID | ID |
| Country | 国名 |
| Date | 年月日 |
| Confirmed_Day | COVID-19 新規感染者数(人)/ 一日あたり |
| Confirmed_Total | COVID-19 累積感染者数(人)総合 |
| Death_Day | COVID-19 新規死亡者数(人) 一日あたり |
| Death_Total | COVID-19 累積死亡者数(人)総合 |
| Test_Day | COVID-19 新規検査数(人) 一日あたり |
| Test_Total | COVID-19 累積検査数(人)総合 |
x
軸、「累積感染者数」を y
軸に設定した散布図を描きなさいx
軸、「累積感染者数」を y
軸に設定した散布図を描きなさいcovid_df <- read_csv("data/COVID19_Worldwide.csv",
guess_max = 10000)
df_a <- covid_df |>
select(Country, Date, Confirmed_Total, Test_Total)
DT::datatable(df_a)
library(stargazer)
stargazer(as.data.frame(df_a),
type = "html",
digits = 2)
<table style="text-align:center"><tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Statistic</td><td>N</td><td>Mean</td><td>St. Dev.</td><td>Min</td><td>Max</td></tr>
<tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Confirmed_Total</td><td>31,806</td><td>18,250.14</td><td>115,471.60</td><td>0</td><td>3,184,582</td></tr>
<tr><td style="text-align:left">Test_Total</td><td>8,686</td><td>559,831.30</td><td>2,146,252.00</td><td>1</td><td>39,011,749</td></tr>
<tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr></table>
df_a <- df_a |>
separate(col = "Date",
into = c("Year", "Month", "Day"),
sep = "/")
test_country <- df_a |>
group_by(Country, Year) |>
summarise(Test = sum(Test_Total, na.rm = TRUE),
Infected = sum(Confirmed_Total))
DT::datatable(test_country)
stargazer(as.data.frame(test_country),
type = "html",
digits = 2)
<table style="text-align:center"><tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Statistic</td><td>N</td><td>Mean</td><td>St. Dev.</td><td>Min</td><td>Max</td></tr>
<tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Test</td><td>186</td><td>26,143,522.00</td><td>109,131,592.00</td><td>0</td><td>1,079,925,840</td></tr>
<tr><td style="text-align:left">Infected</td><td>186</td><td>3,120,774.00</td><td>13,105,502.00</td><td>731</td><td>160,231,690</td></tr>
<tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr></table>
plot_1 <- test_country |>
ggplot(aes(Infected, Test)) +
geom_point() +
stat_smooth(method = lm) +
ggrepel::geom_text_repel(aes(label = Country),
size = 3,) +
labs(x = "Covid19累積感染者数", y = "累積検査数")
plot_1
plot_2 <- test_country |>
filter(Country != "United States") |>
ggplot(aes(Infected, Test)) +
geom_point() +
stat_smooth(method = lm) +
ggrepel::geom_text_repel(aes(label = Country),
size = 3,) +
labs(x = "Covid19累積感染者数", y = "累積検査数")
plot_2