table2 %>% spread(type, count) %>%
mutate(rate = cases/population * 10000) %>%
gather(key = type, value = count, cases, population) %>%
arrange(country, year) %>% select(country, year, type, count, rate) %>% kable
| country | year | type | count | rate |
|---|---|---|---|---|
| Afghanistan | 1999 | cases | 745 | 0.372741 |
| Afghanistan | 1999 | population | 19987071 | 0.372741 |
| Afghanistan | 2000 | cases | 2666 | 1.294466 |
| Afghanistan | 2000 | population | 20595360 | 1.294466 |
| Brazil | 1999 | cases | 37737 | 2.193931 |
| Brazil | 1999 | population | 172006362 | 2.193931 |
| Brazil | 2000 | cases | 80488 | 4.612363 |
| Brazil | 2000 | population | 174504898 | 4.612363 |
| China | 1999 | cases | 212258 | 1.667495 |
| China | 1999 | population | 1272915272 | 1.667495 |
| China | 2000 | cases | 213766 | 1.669488 |
| China | 2000 | population | 1280428583 | 1.669488 |
table4a_1 <- table4a %>% gather(year, cases, `1999`, `2000`)
table4b_1 <- table4b %>% gather(year, population, `1999`, `2000`)
table4_1 <- left_join(table4a_1, table4b_1) %>% mutate(rate = cases/population * 10000)
## Joining, by = c("country", "year")
table4_1 %>% select(-population) %>% spread(year, cases) %>% kable
| country | rate | 1999 | 2000 |
|---|---|---|---|
| Afghanistan | 0.372741 | 745 | NA |
| Afghanistan | 1.294466 | NA | 2666 |
| Brazil | 2.193931 | 37737 | NA |
| Brazil | 4.612363 | NA | 80488 |
| China | 1.667495 | 212258 | NA |
| China | 1.669488 | NA | 213766 |
table4_1 %>% select(-cases) %>% spread(year, population) %>% kable
| country | rate | 1999 | 2000 |
|---|---|---|---|
| Afghanistan | 0.372741 | 19987071 | NA |
| Afghanistan | 1.294466 | NA | 20595360 |
| Brazil | 2.193931 | 172006362 | NA |
| Brazil | 4.612363 | NA | 174504898 |
| China | 1.667495 | 1272915272 | NA |
| China | 1.669488 | NA | 1280428583 |
table2 %>% filter(type == "cases") %>%
ggplot(aes(year, count)) +
geom_line(aes(group = country), color = "grey50") +
geom_point(aes(color = country))
stocks <- tibble(
year = c(2015, 2015, 2016, 2016),
half = c( 1, 2, 1, 2),
return = c(1.88, 0.59, 0.92, 0.17)
)
stocks %>%
spread(year, return) %>%
gather("year", "return", `2015`:`2016`)
## # A tibble: 4 x 3
## half year return
## <dbl> <chr> <dbl>
## 1 1 2015 1.88
## 2 2 2015 0.59
## 3 1 2016 0.92
## 4 2 2016 0.17
yearがchrになってる。
spread
This is useful if the value column was a mix of variables that was coerced to a string.
gather
This is useful if the column types are actually numeric, integer, or logical.
table4a %>% gather(1999, 2000, key = "year", value = "cases")
1999番目のcolumnとして解釈されているから。
people <- tribble(
~name, ~key, ~value,
#-----------------|--------|------
"Phillip Woods", "age", 45,
"Phillip Woods", "height", 186,
"Phillip Woods", "age", 50,
"Jessica Cordero", "age", 37,
"Jessica Cordero", "height", 156
)
“Phillip Woods”のageが2つあるので失敗する
被っている2つめを削除
people %>% distinct(name, key, .keep_all = TRUE) %>%
spread(key, value) %>% kable
| name | age | height |
|---|---|---|
| Jessica Cordero | 37 | 156 |
| Phillip Woods | 45 | 186 |
idをつける
people <- tribble(
~name, ~key, ~value, ~id,
#-----------------|--------|------|----|
"Phillip Woods", "age", 45, 1,
"Phillip Woods", "height", 186, 1,
"Phillip Woods", "age", 50, 2,
"Jessica Cordero", "age", 37, 3,
"Jessica Cordero", "height", 156, 3,
)
spread(people, key, value) %>% kable
| name | id | age | height |
|---|---|---|---|
| Jessica Cordero | 3 | 37 | 156 |
| Phillip Woods | 1 | 45 | 186 |
| Phillip Woods | 2 | 50 | NA |
preg <- tribble(
~pregnant, ~male, ~female,
"yes", NA, 10,
"no", 20, 12
)
preg %>% gather(key = sex, value = count, male, female)
## # A tibble: 4 x 3
## pregnant sex count
## <chr> <chr> <dbl>
## 1 yes male NA
## 2 no male 20
## 3 yes female 10
## 4 no female 12
tb1 <- tibble(x = c("a,b,c", "d,e,f,g", "h,i,j"))
tb1 %>% separate(x, c("one", "two", "three")) %>% kable
## Warning: Expected 3 pieces. Additional pieces discarded in 1 rows [2].
| one | two | three |
|---|---|---|
| a | b | c |
| d | e | f |
| h | i | j |
tb1 %>% separate(x, c("one", "two", "three"), extra = "warn") %>% kable #drop
## Warning: Expected 3 pieces. Additional pieces discarded in 1 rows [2].
| one | two | three |
|---|---|---|
| a | b | c |
| d | e | f |
| h | i | j |
tb1 %>% separate(x, c("one", "two", "three"), extra = "drop") %>% kable
| one | two | three |
|---|---|---|
| a | b | c |
| d | e | f |
| h | i | j |
tb1 %>% separate(x, c("one", "two", "three"), extra = "merge") %>% kable
| one | two | three |
|---|---|---|
| a | b | c |
| d | e | f,g |
| h | i | j |
tb2 <- tibble(x = c("a,b,c", "d,e", "f,g,i"))
tb2 %>% separate(x, c("one", "two", "three")) %>% kable
## Warning: Expected 3 pieces. Missing pieces filled with `NA` in 1 rows [2].
| one | two | three |
|---|---|---|
| a | b | c |
| d | e | NA |
| f | g | i |
tb2 %>% separate(x, c("one", "two", "three"), fill = "warn") %>% kable #right
## Warning: Expected 3 pieces. Missing pieces filled with `NA` in 1 rows [2].
| one | two | three |
|---|---|---|
| a | b | c |
| d | e | NA |
| f | g | i |
tb2 %>% separate(x, c("one", "two", "three"), fill = "right") %>% kable
| one | two | three |
|---|---|---|
| a | b | c |
| d | e | NA |
| f | g | i |
tb2 %>% separate(x, c("one", "two", "three"), fill = "left") %>% kable
| one | two | three |
|---|---|---|
| a | b | c |
| NA | d | e |
| f | g | i |
元データをのこしておきたいときにFALSEにする
spreadのfillは単一の値だが、completeのfillはcolumn別に指定できる。
stocks %>% spread(year, return) %>%
complete(qtr, fill = list(`2015` = 12, `2016` = 30)) %>% kable
| qtr | 2015 | 2016 |
|---|---|---|
| 1 | 1.88 | 30.00 |
| 2 | 0.59 | 0.92 |
| 3 | 0.35 | 0.17 |
| 4 | 12.00 | 2.66 |
上向きか下向きか。
treatment %>%
fill(person, .direction = "up")
## # A tibble: 4 x 3
## person treatment response
## <chr> <dbl> <dbl>
## 1 Derrick Whitmore 1 7
## 2 Katherine Burke 2 10
## 3 Katherine Burke 3 9
## 4 Katherine Burke 1 4
complete(who5, country, year, type, sex, age) %>%
count(year, isna = is.na(cases)) %>% filter(isna) %>%
ggplot(aes(year, n)) +
geom_line()
調査範囲が変わっていそう。
separateがうまくいかない
who3 %>% count(country)
## # A tibble: 219 x 2
## country n
## <chr> <int>
## 1 Afghanistan 244
## 2 Albania 448
## 3 Algeria 224
## 4 American Samoa 172
## 5 Andorra 387
## 6 Angola 270
## 7 Anguilla 155
## 8 Antigua and Barbuda 346
## 9 Argentina 448
## 10 Armenia 461
## # … with 209 more rows
who3 %>% count(country, iso2, iso3)
## # A tibble: 219 x 4
## country iso2 iso3 n
## <chr> <chr> <chr> <int>
## 1 Afghanistan AF AFG 244
## 2 Albania AL ALB 448
## 3 Algeria DZ DZA 224
## 4 American Samoa AS ASM 172
## 5 Andorra AD AND 387
## 6 Angola AO AGO 270
## 7 Anguilla AI AIA 155
## 8 Antigua and Barbuda AG ATG 346
## 9 Argentina AR ARG 448
## 10 Armenia AM ARM 461
## # … with 209 more rows
who_count1 <- who5 %>% count(iso3, year, wt = cases) %>%
rename(cases = n) %>% complete(iso3, year)
library(rnaturalearth)
library(rnaturalearthdata)
world <- ne_countries(scale = "medium", returnclass = "sf")
プロットできなかった国(isoコードが合わなかった)
| iso3 |
|---|
| ANT |
| BES |
| SCG |
| TKL |
| TUV |
library(gganimate)
library(wbstats)
pop_data <- wb(indicator = "SP.POP.TOTL", startdate = 1980, enddate = 2013) %>% as_tibble() %>%
mutate(year = as.integer(date)) %>%
select(iso3 = iso3c, year, population = value) %>%
complete(iso3, year)
who_world <- world %>% right_join(who_count1, by = c("iso_a3" = "iso3"))
who_world_pop <- who_world %>% left_join(pop_data, by = c("iso_a3" = "iso3", "year" = "year"))
who_world_pop %>% filter(year == 1980) %>%
ggplot() + geom_sf(aes(fill = cases / population))
who_world_pop %>% filter(year == 1995) %>%
ggplot() + geom_sf(aes(fill = cases / population))
who_world_pop %>% filter(year == 2013) %>%
ggplot() + geom_sf(aes(fill = cases / population))
p <- ggplot(data = who_world_pop) +
geom_sf(aes(fill = cases / population))
## ani <- p + transition_time(who_world_pop$year,
## range = range(pull(who_world_pop, year)) ) +
## labs(title = "Year: {frame_time}")