library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.3
## -- Attaching packages ------------------------------------------------------------------------- tidyverse 1.2.1 --
## √ ggplot2 2.2.1     √ purrr   0.2.4
## √ tibble  1.3.4     √ dplyr   0.7.4
## √ tidyr   0.7.2     √ stringr 1.2.0
## √ readr   1.1.1     √ forcats 0.2.0
## Warning: package 'stringr' was built under R version 3.4.3
## -- Conflicts ---------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
table1
## # A tibble: 6 x 4
##       country  year  cases population
##         <chr> <int>  <int>      <int>
## 1 Afghanistan  1999    745   19987071
## 2 Afghanistan  2000   2666   20595360
## 3      Brazil  1999  37737  172006362
## 4      Brazil  2000  80488  174504898
## 5       China  1999 212258 1272915272
## 6       China  2000 213766 1280428583
table2
## # A tibble: 12 x 4
##        country  year       type      count
##          <chr> <int>      <chr>      <int>
##  1 Afghanistan  1999      cases        745
##  2 Afghanistan  1999 population   19987071
##  3 Afghanistan  2000      cases       2666
##  4 Afghanistan  2000 population   20595360
##  5      Brazil  1999      cases      37737
##  6      Brazil  1999 population  172006362
##  7      Brazil  2000      cases      80488
##  8      Brazil  2000 population  174504898
##  9       China  1999      cases     212258
## 10       China  1999 population 1272915272
## 11       China  2000      cases     213766
## 12       China  2000 population 1280428583
table3
## # A tibble: 6 x 3
##       country  year              rate
## *       <chr> <int>             <chr>
## 1 Afghanistan  1999      745/19987071
## 2 Afghanistan  2000     2666/20595360
## 3      Brazil  1999   37737/172006362
## 4      Brazil  2000   80488/174504898
## 5       China  1999 212258/1272915272
## 6       China  2000 213766/1280428583
table4a
## # A tibble: 3 x 3
##       country `1999` `2000`
## *       <chr>  <int>  <int>
## 1 Afghanistan    745   2666
## 2      Brazil  37737  80488
## 3       China 212258 213766
table4b
## # A tibble: 3 x 3
##       country     `1999`     `2000`
## *       <chr>      <int>      <int>
## 1 Afghanistan   19987071   20595360
## 2      Brazil  172006362  174504898
## 3       China 1272915272 1280428583
table1 %>% 
  mutate(rate = cases / population * 10000)
## # A tibble: 6 x 5
##       country  year  cases population     rate
##         <chr> <int>  <int>      <int>    <dbl>
## 1 Afghanistan  1999    745   19987071 0.372741
## 2 Afghanistan  2000   2666   20595360 1.294466
## 3      Brazil  1999  37737  172006362 2.193930
## 4      Brazil  2000  80488  174504898 4.612363
## 5       China  1999 212258 1272915272 1.667495
## 6       China  2000 213766 1280428583 1.669488
table1 %>% 
  group_by(year) %>% 
  summarise(mean_cases = mean(cases), sum_case = sum(cases))
## # A tibble: 2 x 3
##    year mean_cases sum_case
##   <int>      <dbl>    <int>
## 1  1999   83580.00   250740
## 2  2000   98973.33   296920
table1 %>% 
  count(year, wt=cases)
## # A tibble: 2 x 2
##    year      n
##   <int>  <int>
## 1  1999 250740
## 2  2000 296920
library(ggplot2)
ggplot(data=table1, aes(x=year, y=cases)) +
  geom_line(aes(group=country), color="grey50") +
  geom_point(aes(color = country))

table2
## # A tibble: 12 x 4
##        country  year       type      count
##          <chr> <int>      <chr>      <int>
##  1 Afghanistan  1999      cases        745
##  2 Afghanistan  1999 population   19987071
##  3 Afghanistan  2000      cases       2666
##  4 Afghanistan  2000 population   20595360
##  5      Brazil  1999      cases      37737
##  6      Brazil  1999 population  172006362
##  7      Brazil  2000      cases      80488
##  8      Brazil  2000 population  174504898
##  9       China  1999      cases     212258
## 10       China  1999 population 1272915272
## 11       China  2000      cases     213766
## 12       China  2000 population 1280428583
table2 %>% 
  filter(type == "cases") %>% 
  group_by(year,country) %>% 
  summarise(sum_cnt = sum(count))
## # A tibble: 6 x 3
## # Groups:   year [?]
##    year     country sum_cnt
##   <int>       <chr>   <int>
## 1  1999 Afghanistan     745
## 2  1999      Brazil   37737
## 3  1999       China  212258
## 4  2000 Afghanistan    2666
## 5  2000      Brazil   80488
## 6  2000       China  213766
tbl2_pop <- table2 %>% 
  filter(type == "population") %>% 
  group_by(year,country) %>% 
  summarise(population = sum(count))

tbl2_case <- table2 %>% 
  filter(type == "cases") %>% 
  group_by(year,country) %>% 
  summarise(cases = sum(count))

tbl2_pop
## # A tibble: 6 x 3
## # Groups:   year [?]
##    year     country population
##   <int>       <chr>      <int>
## 1  1999 Afghanistan   19987071
## 2  1999      Brazil  172006362
## 3  1999       China 1272915272
## 4  2000 Afghanistan   20595360
## 5  2000      Brazil  174504898
## 6  2000       China 1280428583
tbl2_case
## # A tibble: 6 x 3
## # Groups:   year [?]
##    year     country  cases
##   <int>       <chr>  <int>
## 1  1999 Afghanistan    745
## 2  1999      Brazil  37737
## 3  1999       China 212258
## 4  2000 Afghanistan   2666
## 5  2000      Brazil  80488
## 6  2000       China 213766
tbl2_join <- left_join(tbl2_pop, tbl2_case)
## Joining, by = c("year", "country")
tbl2_join
## # A tibble: 6 x 4
## # Groups:   year [?]
##    year     country population  cases
##   <int>       <chr>      <int>  <int>
## 1  1999 Afghanistan   19987071    745
## 2  1999      Brazil  172006362  37737
## 3  1999       China 1272915272 212258
## 4  2000 Afghanistan   20595360   2666
## 5  2000      Brazil  174504898  80488
## 6  2000       China 1280428583 213766
tbl2_join %>% 
  mutate(rate = cases/population * 10000)
## # A tibble: 6 x 5
## # Groups:   year [2]
##    year     country population  cases     rate
##   <int>       <chr>      <int>  <int>    <dbl>
## 1  1999 Afghanistan   19987071    745 0.372741
## 2  1999      Brazil  172006362  37737 2.193930
## 3  1999       China 1272915272 212258 1.667495
## 4  2000 Afghanistan   20595360   2666 1.294466
## 5  2000      Brazil  174504898  80488 4.612363
## 6  2000       China 1280428583 213766 1.669488
ggplot(data=tbl2_join, aes(year,cases,group=country)) +
  geom_line() +
  geom_point(aes(color = country))

tidy4a <-  table4a %>% 
  gather(`1999`,`2000`, key="year", value="cases")
tidy4b <-  table4b %>% 
  gather(`1999`,`2000`, key="year", value="population")
left_join(tidy4a, tidy4b)
## Joining, by = c("country", "year")
## # A tibble: 6 x 4
##       country  year  cases population
##         <chr> <chr>  <int>      <int>
## 1 Afghanistan  1999    745   19987071
## 2      Brazil  1999  37737  172006362
## 3       China  1999 212258 1272915272
## 4 Afghanistan  2000   2666   20595360
## 5      Brazil  2000  80488  174504898
## 6       China  2000 213766 1280428583
spread(table2, key=type, value=count)
## # A tibble: 6 x 4
##       country  year  cases population
## *       <chr> <int>  <int>      <int>
## 1 Afghanistan  1999    745   19987071
## 2 Afghanistan  2000   2666   20595360
## 3      Brazil  1999  37737  172006362
## 4      Brazil  2000  80488  174504898
## 5       China  1999 212258 1272915272
## 6       China  2000 213766 1280428583
stocks <- tibble(
  year = c(2015,2015,2016,2016),
  half = c(1,2,1,2),
  return = c(1.88, 0.59, 0.92, 0.17)
)

stocks
## # A tibble: 4 x 3
##    year  half return
##   <dbl> <dbl>  <dbl>
## 1  2015     1   1.88
## 2  2015     2   0.59
## 3  2016     1   0.92
## 4  2016     2   0.17
stocks %>% 
  spread(year, return)
## # A tibble: 2 x 3
##    half `2015` `2016`
## * <dbl>  <dbl>  <dbl>
## 1     1   1.88   0.92
## 2     2   0.59   0.17
stocks %>% 
  spread(year, return) %>% 
  gather("year", "return", `2015`:`2016`)
## # A tibble: 4 x 3
##    half  year return
##   <dbl> <chr>  <dbl>
## 1     1  2015   1.88
## 2     2  2015   0.59
## 3     1  2016   0.92
## 4     2  2016   0.17
table4a %>% 
  gather(`1999`,`2000`, key="year", value = "cases")
## # A tibble: 6 x 3
##       country  year  cases
##         <chr> <chr>  <int>
## 1 Afghanistan  1999    745
## 2      Brazil  1999  37737
## 3       China  1999 212258
## 4 Afghanistan  2000   2666
## 5      Brazil  2000  80488
## 6       China  2000 213766
people <- tribble(
  ~name,             ~keyc,       ~valuec,
  #-----------------|-----------|------
  "Phillip Woods",   "age",       45,
  "Phillip Woods",   "height",   186,
  #"Phillip Woods",   "age",       50,
  "Jessica Cordero", "age",       37,
  "Jessica Cordero", "height",   156
)

spread(people, key=keyc, value=valuec)
## # A tibble: 2 x 3
##              name   age height
## *           <chr> <dbl>  <dbl>
## 1 Jessica Cordero    37    156
## 2   Phillip Woods    45    186
people <- tribble(
  ~name,            ~sn,  ~keyc,       ~valuec,
  #-----------------|---|------------|------
  "Phillip Woods", "jr.",   "age",       45,
  "Phillip Woods", "",      "height",   186,
  "Phillip Woods", "" ,     "age",       50,
  "Jessica Cordero","" ,    "age",       37,
  "Jessica Cordero","",     "height",   156
)

spread(people, key=keyc, value=valuec)
## # A tibble: 3 x 4
##              name    sn   age height
## *           <chr> <chr> <dbl>  <dbl>
## 1 Jessica Cordero          37    156
## 2   Phillip Woods          50    186
## 3   Phillip Woods   jr.    45     NA
preg <- tribble(
  ~pregnant, ~male, ~female,
  "yes",     NA,    10,
  "no",      20,    12
)

table3
## # A tibble: 6 x 3
##       country  year              rate
## *       <chr> <int>             <chr>
## 1 Afghanistan  1999      745/19987071
## 2 Afghanistan  2000     2666/20595360
## 3      Brazil  1999   37737/172006362
## 4      Brazil  2000   80488/174504898
## 5       China  1999 212258/1272915272
## 6       China  2000 213766/1280428583
table5
## # A tibble: 6 x 4
##       country century  year              rate
## *       <chr>   <chr> <chr>             <chr>
## 1 Afghanistan      19    99      745/19987071
## 2 Afghanistan      20    00     2666/20595360
## 3      Brazil      19    99   37737/172006362
## 4      Brazil      20    00   80488/174504898
## 5       China      19    99 212258/1272915272
## 6       China      20    00 213766/1280428583
table3 %>% 
  separate(
    rate, 
    into = c("cases", "population"),
    convert = T)
## # A tibble: 6 x 4
##       country  year  cases population
## *       <chr> <int>  <int>      <int>
## 1 Afghanistan  1999    745   19987071
## 2 Afghanistan  2000   2666   20595360
## 3      Brazil  1999  37737  172006362
## 4      Brazil  2000  80488  174504898
## 5       China  1999 212258 1272915272
## 6       China  2000 213766 1280428583
table3 %>% 
  separate(
    year,
    into = c("century", "year"),
    convert = T,
    sep = 2
  )
## # A tibble: 6 x 4
##       country century  year              rate
## *       <chr>   <int> <int>             <chr>
## 1 Afghanistan      19    99      745/19987071
## 2 Afghanistan      20     0     2666/20595360
## 3      Brazil      19    99   37737/172006362
## 4      Brazil      20     0   80488/174504898
## 5       China      19    99 212258/1272915272
## 6       China      20     0 213766/1280428583
table5 %>% 
  unite(new, century, year, sep = "")
## # A tibble: 6 x 3
##       country   new              rate
## *       <chr> <chr>             <chr>
## 1 Afghanistan  1999      745/19987071
## 2 Afghanistan  2000     2666/20595360
## 3      Brazil  1999   37737/172006362
## 4      Brazil  2000   80488/174504898
## 5       China  1999 212258/1272915272
## 6       China  2000 213766/1280428583
?separate
## starting httpd help server ...
##  done
tibble(x = c("a,b,c", "d,e,f,g", "h,i,j")) %>% 
  separate(x, c("one","two","three"), extra='merge')
## # A tibble: 3 x 3
##     one   two three
## * <chr> <chr> <chr>
## 1     a     b     c
## 2     d     e   f,g
## 3     h     i     j
tibble(x = c("a,b,c", "d,e", "f,g,i")) %>% 
  separate(x, c("one","two","three"), fill="left")
## # A tibble: 3 x 3
##     one   two three
## * <chr> <chr> <chr>
## 1     a     b     c
## 2  <NA>     d     e
## 3     f     g     i
?unite

table5 %>% 
  unite(new, century, year, sep = "", remove = F)
## # A tibble: 6 x 5
##       country   new century  year              rate
## *       <chr> <chr>   <chr> <chr>             <chr>
## 1 Afghanistan  1999      19    99      745/19987071
## 2 Afghanistan  2000      20    00     2666/20595360
## 3      Brazil  1999      19    99   37737/172006362
## 4      Brazil  2000      20    00   80488/174504898
## 5       China  1999      19    99 212258/1272915272
## 6       China  2000      20    00 213766/1280428583
tibble(x = c("a,b,c", "d,e", "f,g,i")) %>% 
  separate(x, c("one","two","three"), remove = F)
## Warning: Too few values at 1 locations: 2
## # A tibble: 3 x 4
##       x   one   two three
## * <chr> <chr> <chr> <chr>
## 1 a,b,c     a     b     c
## 2   d,e     d     e  <NA>
## 3 f,g,i     f     g     i
table3 %>% 
  separate(
    year,
    into = c("century", "year"),
    convert = T,
    sep = 2,
    remove = F
  )
## # A tibble: 6 x 5
##       country  year century  year              rate
## *       <chr> <int>   <int> <int>             <chr>
## 1 Afghanistan  1999      19    99      745/19987071
## 2 Afghanistan  2000      20     0     2666/20595360
## 3      Brazil  1999      19    99   37737/172006362
## 4      Brazil  2000      20     0   80488/174504898
## 5       China  1999      19    99 212258/1272915272
## 6       China  2000      20     0 213766/1280428583
table3 %>% 
  extract(
    rate,
    "ext_col"
  )
## # A tibble: 6 x 3
##       country  year ext_col
## *       <chr> <int>   <chr>
## 1 Afghanistan  1999     745
## 2 Afghanistan  2000    2666
## 3      Brazil  1999   37737
## 4      Brazil  2000   80488
## 5       China  1999  212258
## 6       China  2000  213766
library(dplyr)
df <- data.frame(x = c(NA, "a-b", "a-d", "b-c", "d-e"))
df
##      x
## 1 <NA>
## 2  a-b
## 3  a-d
## 4  b-c
## 5  d-e
df %>% extract(x, "A")
##      A
## 1 <NA>
## 2    a
## 3    a
## 4    b
## 5    d
df %>% extract(x, c("A", "B"), "([[:alnum:]]+)-([[:alnum:]]+)")
##      A    B
## 1 <NA> <NA>
## 2    a    b
## 3    a    d
## 4    b    c
## 5    d    e
# If no match, NA:
df %>% extract(x, c("A", "B"), "([a-d]+)-([a-d]+)")
##      A    B
## 1 <NA> <NA>
## 2    a    b
## 3    a    d
## 4    b    c
## 5 <NA> <NA>
stocks <- tibble(
  year   = c(2015,2015,2015,2015,2016,2016,2016),
  qtr    = c(   1,   2,   3,   4,   2,   3,   4),
  return = c(1.88,0.59,0.35,  NA,0.92,0.17,2.66)
)
stocks %>% 
  spread(year, return)
## # A tibble: 4 x 3
##     qtr `2015` `2016`
## * <dbl>  <dbl>  <dbl>
## 1     1   1.88     NA
## 2     2   0.59   0.92
## 3     3   0.35   0.17
## 4     4     NA   2.66
stocks %>% 
  spread(year, return) %>% 
  gather(year, return, `2015`:`2016`, na.rm = T)
## # A tibble: 6 x 3
##     qtr  year return
## * <dbl> <chr>  <dbl>
## 1     1  2015   1.88
## 2     2  2015   0.59
## 3     3  2015   0.35
## 4     2  2016   0.92
## 5     3  2016   0.17
## 6     4  2016   2.66
stocks %>% 
  complete(year, qtr)
## # A tibble: 8 x 3
##    year   qtr return
##   <dbl> <dbl>  <dbl>
## 1  2015     1   1.88
## 2  2015     2   0.59
## 3  2015     3   0.35
## 4  2015     4     NA
## 5  2016     1     NA
## 6  2016     2   0.92
## 7  2016     3   0.17
## 8  2016     4   2.66
who
## # A tibble: 7,240 x 60
##        country  iso2  iso3  year new_sp_m014 new_sp_m1524 new_sp_m2534
##          <chr> <chr> <chr> <int>       <int>        <int>        <int>
##  1 Afghanistan    AF   AFG  1980          NA           NA           NA
##  2 Afghanistan    AF   AFG  1981          NA           NA           NA
##  3 Afghanistan    AF   AFG  1982          NA           NA           NA
##  4 Afghanistan    AF   AFG  1983          NA           NA           NA
##  5 Afghanistan    AF   AFG  1984          NA           NA           NA
##  6 Afghanistan    AF   AFG  1985          NA           NA           NA
##  7 Afghanistan    AF   AFG  1986          NA           NA           NA
##  8 Afghanistan    AF   AFG  1987          NA           NA           NA
##  9 Afghanistan    AF   AFG  1988          NA           NA           NA
## 10 Afghanistan    AF   AFG  1989          NA           NA           NA
## # ... with 7,230 more rows, and 53 more variables: new_sp_m3544 <int>,
## #   new_sp_m4554 <int>, new_sp_m5564 <int>, new_sp_m65 <int>,
## #   new_sp_f014 <int>, new_sp_f1524 <int>, new_sp_f2534 <int>,
## #   new_sp_f3544 <int>, new_sp_f4554 <int>, new_sp_f5564 <int>,
## #   new_sp_f65 <int>, new_sn_m014 <int>, new_sn_m1524 <int>,
## #   new_sn_m2534 <int>, new_sn_m3544 <int>, new_sn_m4554 <int>,
## #   new_sn_m5564 <int>, new_sn_m65 <int>, new_sn_f014 <int>,
## #   new_sn_f1524 <int>, new_sn_f2534 <int>, new_sn_f3544 <int>,
## #   new_sn_f4554 <int>, new_sn_f5564 <int>, new_sn_f65 <int>,
## #   new_ep_m014 <int>, new_ep_m1524 <int>, new_ep_m2534 <int>,
## #   new_ep_m3544 <int>, new_ep_m4554 <int>, new_ep_m5564 <int>,
## #   new_ep_m65 <int>, new_ep_f014 <int>, new_ep_f1524 <int>,
## #   new_ep_f2534 <int>, new_ep_f3544 <int>, new_ep_f4554 <int>,
## #   new_ep_f5564 <int>, new_ep_f65 <int>, newrel_m014 <int>,
## #   newrel_m1524 <int>, newrel_m2534 <int>, newrel_m3544 <int>,
## #   newrel_m4554 <int>, newrel_m5564 <int>, newrel_m65 <int>,
## #   newrel_f014 <int>, newrel_f1524 <int>, newrel_f2534 <int>,
## #   newrel_f3544 <int>, newrel_f4554 <int>, newrel_f5564 <int>,
## #   newrel_f65 <int>