table2のほうがマシ table2はyearでgroup_byしてしまえばいいが、table4だとそうはいかない
countryとyearでgroup_byする
Carefully consider the following example:
stocks <- tibble(
year = c(2015, 2015, 2016, 2016),
half = c( 1, 2, 1, 2),
return = c(1.88, 0.59, 0.92, 0.17)
)
stocks %>%
spread(year, return) %>%
gather("year", "return", `2015`:`2016`)
## # A tibble: 4 x 3
## half year return
## <dbl> <chr> <dbl>
## 1 1 2015 1.88
## 2 2 2015 0.59
## 3 1 2016 0.92
## 4 2 2016 0.17
chr以外の型の変数をspreadすると、自動的にchr型になってしまう
table4a %>%
gather(1999, 2000, key = "year", value = "cases")
> Error in inds_combine(.vars, ind_list) : Position must be between 0 and n
table4a %>%
gather(`1999`, `2000`, key = "year", value = "cases")
## # A tibble: 6 x 3
## country year cases
## <chr> <chr> <int>
## 1 Afghanistan 1999 745
## 2 Brazil 1999 37737
## 3 China 1999 212258
## 4 Afghanistan 2000 2666
## 5 Brazil 2000 80488
## 6 China 2000 213766
people <- tribble(
~name, ~key, ~value,
#-----------------|--------|------
"Phillip Woods", "age", 45,
"Phillip Woods", "height", 186,
"Phillip Woods", "age", 50,
"Jessica Cordero", "age", 37,
"Jessica Cordero", "height", 156
)
people %>%
spread(key = key, value = value)
> Error: Each row of output must be identified by a unique combination of keys. Keys are shared for 2 rows: * 1, 3 Do you need to create unique ID with tibble::rowid_to_column()?
Phillip Woodsは何歳かわからない
preg <- tribble(
~pregnant, ~male, ~female,
"yes", NA, 10,
"no", 20, 12
)
preg %>%
gather(male, female, key = sex, value = population) %>%
spread(key = pregnant, value = population) %>%
rename(
not_preg = no,
preg = yes)
## # A tibble: 2 x 3
## sex not_preg preg
## <chr> <dbl> <dbl>
## 1 female 12 10
## 2 male 20 NA
tibble(x = c("a,b,c", "d,e,f,g", "h,i,j")) %>%
separate(x, c("one", "two", "three"), extra = "merge")
## # A tibble: 3 x 3
## one two three
## <chr> <chr> <chr>
## 1 a b c
## 2 d e f,g
## 3 h i j
tibble(x = c("a,b,c", "d,e,f,g", "h,i,j")) %>%
separate(x, c("one", "two", "three"), extra = "drop")
## # A tibble: 3 x 3
## one two three
## <chr> <chr> <chr>
## 1 a b c
## 2 d e f
## 3 h i j
tibble(x = c("a,b,c", "d,e", "f,g,i")) %>%
separate(x, c("one", "two", "three"), fill = "right")
## # A tibble: 3 x 3
## one two three
## <chr> <chr> <chr>
## 1 a b c
## 2 d e <NA>
## 3 f g i
tibble(x = c("a,b,c", "d,e", "f,g,i")) %>%
separate(x, c("one", "two", "three"), fill = "left")
## # A tibble: 3 x 3
## one two three
## <chr> <chr> <chr>
## 1 a b c
## 2 <NA> d e
## 3 f g i
tibble(x = c("a,b,c", "d,e,f", "g,h,i")) %>%
separate(x, c("one", "two", "three"), remove = F)
## # A tibble: 3 x 4
## x one two three
## <chr> <chr> <chr> <chr>
## 1 a,b,c a b c
## 2 d,e,f d e f
## 3 g,h,i g h i
extract() は文字列の区切りに正規表現を使う。 一つの文字列を区切る方法は複数あるが、複数の列をまとめる方法は一つしかないから。
complete()は非明示的な欠損値を推測してNAで埋めるが、fill()は明示的な欠損値を近くのセルの値で埋める
どの方向に接しているセルの値で欠損値を埋めるかを指定する。
na.rm = TRUE にしてしまうとexplicitだった欠損値がimplicitになってしまう。
who4 <- who %>%
gather(new_sp_m014:newrel_f65, key = "key", value = "cases", na.rm = T) %>%
separate(key, c("new", "type", "sexage"), sep = "_")
## Warning: Expected 3 pieces. Missing pieces filled with `NA` in 2580 rows
## [73467, 73468, 73469, 73470, 73471, 73472, 73473, 73474, 73475, 73476,
## 73477, 73478, 73479, 73480, 73481, 73482, 73483, 73484, 73485, 73486, ...].
who[73467,]
## # A tibble: 1 x 60
## country iso2 iso3 year new_sp_m014 new_sp_m1524 new_sp_m2534
## <chr> <chr> <chr> <int> <int> <int> <int>
## 1 <NA> <NA> <NA> NA NA NA NA
## # ... with 53 more variables: new_sp_m3544 <int>, new_sp_m4554 <int>,
## # new_sp_m5564 <int>, new_sp_m65 <int>, new_sp_f014 <int>,
## # new_sp_f1524 <int>, new_sp_f2534 <int>, new_sp_f3544 <int>,
## # new_sp_f4554 <int>, new_sp_f5564 <int>, new_sp_f65 <int>,
## # new_sn_m014 <int>, new_sn_m1524 <int>, new_sn_m2534 <int>,
## # new_sn_m3544 <int>, new_sn_m4554 <int>, new_sn_m5564 <int>,
## # new_sn_m65 <int>, new_sn_f014 <int>, new_sn_f1524 <int>,
## # new_sn_f2534 <int>, new_sn_f3544 <int>, new_sn_f4554 <int>,
## # new_sn_f5564 <int>, new_sn_f65 <int>, new_ep_m014 <int>,
## # new_ep_m1524 <int>, new_ep_m2534 <int>, new_ep_m3544 <int>,
## # new_ep_m4554 <int>, new_ep_m5564 <int>, new_ep_m65 <int>,
## # new_ep_f014 <int>, new_ep_f1524 <int>, new_ep_f2534 <int>,
## # new_ep_f3544 <int>, new_ep_f4554 <int>, new_ep_f5564 <int>,
## # new_ep_f65 <int>, newrel_m014 <int>, newrel_m1524 <int>,
## # newrel_m2534 <int>, newrel_m3544 <int>, newrel_m4554 <int>,
## # newrel_m5564 <int>, newrel_m65 <int>, newrel_f014 <int>,
## # newrel_f1524 <int>, newrel_f2534 <int>, newrel_f3544 <int>,
## # newrel_f4554 <int>, newrel_f5564 <int>, newrel_f65 <int>
newrelを含む行ではsexageにNAが挿入されてしまっている
who %>%
gather(new_sp_m014:newrel_f65, key = "key", value = "cases", na.rm = T) %>%
select(country, iso2, iso3) %>%
distinct_all() %>%
nrow()
## [1] 219
who %>%
gather(new_sp_m014:newrel_f65, key = "key", value = "cases", na.rm = T) %>%
select(country, iso2, iso3) %>%
distinct(country) %>%
nrow()
## [1] 219
who %>%
gather(new_sp_m014:newrel_f65, key = "key", value = "cases", na.rm = T) %>%
select(country, iso2, iso3) %>%
distinct(iso2) %>%
nrow()
## [1] 219
who %>%
gather(new_sp_m014:newrel_f65, key = "key", value = "cases", na.rm = T) %>%
select(country, iso2, iso3) %>%
distinct(iso3) %>%
nrow()
## [1] 219
country, iso2, iso3は全て対応している
who %>%
gather(new_sp_m014:newrel_f65, key = "key", value = "cases", na.rm = T) %>%
mutate(key = stringr::str_replace(key, "newrel", "new_rel")) %>%
separate(key, c("new", "type", "sexage"), sep = "_") %>%
select(-new, -iso2) %>%
separate(sexage, c("sex", "age"), sep = 1) %>%
group_by(iso3) %>%
summarise(sum(cases)) %>%
joinCountryData2Map(joinCode="ISO3", nameJoinColumn="iso3") %>%
mapCountryData(nameColumnToPlot="sum(cases)", catMethod="fixedWidth", mapTitle = "TB cases of the World", addLegend = TRUE)
## 215 codes from your data successfully matched countries in the map
## 4 codes from your data failed to match with a country code in the map
## 28 codes from the map weren't represented in your data
who %>%
gather(new_sp_m014:newrel_f65, key = "key", value = "cases", na.rm = TRUE) %>%
mutate(key = stringr::str_replace(key, "newrel", "new_rel")) %>%
separate(key, c("new", "type", "sexage"), sep = "_") %>%
select(-new, -iso2) %>%
separate(sexage, c("sex", "age"), sep = 1) %>%
group_by(year) %>%
summarise(sum_cases = sum(cases)) %>%
ggplot(mapping = aes(x = year, y = sum_cases)) +
geom_line()
who %>%
gather(new_sp_m014:newrel_f65, key = "key", value = "cases", na.rm = TRUE) %>%
mutate(key = stringr::str_replace(key, "newrel", "new_rel")) %>%
separate(key, c("new", "type", "sexage"), sep = "_") %>%
select(-new, -iso2) %>%
separate(sexage, c("sex", "age"), sep = 1) %>%
group_by(year) %>%
summarise(countries = n_distinct(country)) %>%
ggplot(mapping = aes(x = year, y = countries)) +
geom_line()
who %>%
gather(new_sp_m014:newrel_f65, key = "key", value = "cases", na.rm = TRUE) %>%
mutate(key = stringr::str_replace(key, "newrel", "new_rel")) %>%
separate(key, c("new", "type", "sexage"), sep = "_") %>%
select(-new, -iso2) %>%
separate(sexage, c("sex", "age"), sep = 1) %>%
group_by(sex) %>%
summarise(sum_cases = sum(cases)) %>%
ggplot(mapping = aes(x = sex, y = sum_cases)) +
geom_point()