Load gói dplyr

Lấy data có sẵn

data("starwars")
names(starwars)
##  [1] "name"       "height"     "mass"       "hair_color" "skin_color"
##  [6] "eye_color"  "birth_year" "gender"     "homeworld"  "species"   
## [11] "films"      "vehicles"   "starships"
head(starwars)
## # A tibble: 6 x 13
##   name      height  mass hair_color skin_color eye_color birth_year gender
##   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> 
## 1 Luke Sky~    172    77 blond      fair       blue            19   male  
## 2 C-3PO        167    75 <NA>       gold       yellow         112   <NA>  
## 3 R2-D2         96    32 <NA>       white, bl~ red             33   <NA>  
## 4 Darth Va~    202   136 none       white      yellow          41.9 male  
## 5 Leia Org~    150    49 brown      light      brown           19   female
## 6 Owen Lars    178   120 brown, gr~ light      blue            52   male  
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>

Nhóm lệnh liên quan đến tập hợp

x1 <- starwars %>% slice(1:5) %>% select(hair_color, gender)
x1
## # A tibble: 5 x 2
##   hair_color gender
##   <chr>      <chr> 
## 1 blond      male  
## 2 <NA>       <NA>  
## 3 <NA>       <NA>  
## 4 none       male  
## 5 brown      female
x2 <- starwars %>% slice(6:10) %>% select(hair_color, gender)
x2
## # A tibble: 5 x 2
##   hair_color    gender
##   <chr>         <chr> 
## 1 brown, grey   male  
## 2 brown         female
## 3 <NA>          <NA>  
## 4 black         male  
## 5 auburn, white male

Tập giao (có trong cả 2 tập hợp)

x1 %>% intersect(x2)
## # A tibble: 2 x 2
##   hair_color gender
##   <chr>      <chr> 
## 1 brown      female
## 2 <NA>       <NA>

Tập khác

# chỉ có ở x1 mà ko có ở x2
x1 %>% setdiff(x2)
## # A tibble: 2 x 2
##   hair_color gender
##   <chr>      <chr> 
## 1 blond      male  
## 2 none       male
# Chỉ có ở x2 mà ko có ở x1
x2 %>% setdiff(x1)
## # A tibble: 3 x 2
##   hair_color    gender
##   <chr>         <chr> 
## 1 brown, grey   male  
## 2 black         male  
## 3 auburn, white male

Tập hợp (có cả ở x1 hoặc x2)

x1 %>% union(x2)
## # A tibble: 7 x 2
##   hair_color    gender
##   <chr>         <chr> 
## 1 <NA>          <NA>  
## 2 black         male  
## 3 blond         male  
## 4 brown, grey   male  
## 5 none          male  
## 6 brown         female
## 7 auburn, white male
# Cách khác
starwars %>% 
  slice(1:5) %>% 
  select(hair_color, gender) %>%
  union(starwars %>% slice(6:10) %>% 
          select(hair_color, gender)) 
## # A tibble: 7 x 2
##   hair_color    gender
##   <chr>         <chr> 
## 1 <NA>          <NA>  
## 2 black         male  
## 3 blond         male  
## 4 brown, grey   male  
## 5 none          male  
## 6 brown         female
## 7 auburn, white male

Tạo biến số thứ tự dòng

x1 %>% mutate(id=row_number())
## # A tibble: 5 x 3
##   hair_color gender    id
##   <chr>      <chr>  <int>
## 1 blond      male       1
## 2 <NA>       <NA>       2
## 3 <NA>       <NA>       3
## 4 none       male       4
## 5 brown      female     5
x1 %>% mutate(id=row_number(gender))
## # A tibble: 5 x 3
##   hair_color gender    id
##   <chr>      <chr>  <int>
## 1 blond      male       2
## 2 <NA>       <NA>      NA
## 3 <NA>       <NA>      NA
## 4 none       male       3
## 5 brown      female     1

Xếp hạng các quan sát

Hạng bằng giá trị min nếu các quan sát cùng hạng

x1 <- starwars %>% slice(10:20) %>% select(hair_color, gender, height)

x1 %>% mutate(rk = min_rank(gender))
## # A tibble: 11 x 4
##    hair_color    gender        height    rk
##    <chr>         <chr>          <int> <int>
##  1 auburn, white male             182     2
##  2 blond         male             188     2
##  3 auburn, grey  male             180     2
##  4 brown         male             228     2
##  5 brown         male             180     2
##  6 <NA>          male             173     2
##  7 <NA>          hermaphrodite    175     1
##  8 brown         male             170     2
##  9 brown         male             180     2
## 10 white         male              66     2
## 11 grey          male             170     2
x1 %>% mutate(rk = min_rank(hair_color))
## # A tibble: 11 x 4
##    hair_color    gender        height    rk
##    <chr>         <chr>          <int> <int>
##  1 auburn, white male             182     2
##  2 blond         male             188     3
##  3 auburn, grey  male             180     1
##  4 brown         male             228     4
##  5 brown         male             180     4
##  6 <NA>          male             173    NA
##  7 <NA>          hermaphrodite    175    NA
##  8 brown         male             170     4
##  9 brown         male             180     4
## 10 white         male              66     9
## 11 grey          male             170     8
x1 %>% mutate(rk = min_rank(height))
## # A tibble: 11 x 4
##    hair_color    gender        height    rk
##    <chr>         <chr>          <int> <int>
##  1 auburn, white male             182     9
##  2 blond         male             188    10
##  3 auburn, grey  male             180     6
##  4 brown         male             228    11
##  5 brown         male             180     6
##  6 <NA>          male             173     4
##  7 <NA>          hermaphrodite    175     5
##  8 brown         male             170     2
##  9 brown         male             180     6
## 10 white         male              66     1
## 11 grey          male             170     2

Hạng theo mật độ dense_rank

x1 %>% mutate(rk = dense_rank(gender))
## # A tibble: 11 x 4
##    hair_color    gender        height    rk
##    <chr>         <chr>          <int> <int>
##  1 auburn, white male             182     2
##  2 blond         male             188     2
##  3 auburn, grey  male             180     2
##  4 brown         male             228     2
##  5 brown         male             180     2
##  6 <NA>          male             173     2
##  7 <NA>          hermaphrodite    175     1
##  8 brown         male             170     2
##  9 brown         male             180     2
## 10 white         male              66     2
## 11 grey          male             170     2
x1 %>% mutate(rk = dense_rank(hair_color))
## # A tibble: 11 x 4
##    hair_color    gender        height    rk
##    <chr>         <chr>          <int> <int>
##  1 auburn, white male             182     2
##  2 blond         male             188     3
##  3 auburn, grey  male             180     1
##  4 brown         male             228     4
##  5 brown         male             180     4
##  6 <NA>          male             173    NA
##  7 <NA>          hermaphrodite    175    NA
##  8 brown         male             170     4
##  9 brown         male             180     4
## 10 white         male              66     6
## 11 grey          male             170     5

Hạng theo phần trăm percent_rank

x1 %>% 
  mutate(prk = percent_rank(height))
## # A tibble: 11 x 4
##    hair_color    gender        height   prk
##    <chr>         <chr>          <int> <dbl>
##  1 auburn, white male             182   0.8
##  2 blond         male             188   0.9
##  3 auburn, grey  male             180   0.5
##  4 brown         male             228   1  
##  5 brown         male             180   0.5
##  6 <NA>          male             173   0.3
##  7 <NA>          hermaphrodite    175   0.4
##  8 brown         male             170   0.1
##  9 brown         male             180   0.5
## 10 white         male              66   0  
## 11 grey          male             170   0.1
x1 %>% 
  mutate(prk = percent_rank(height)) %>% 
  mutate(sprk = sum(prk))
## # A tibble: 11 x 5
##    hair_color    gender        height   prk  sprk
##    <chr>         <chr>          <int> <dbl> <dbl>
##  1 auburn, white male             182   0.8   5.1
##  2 blond         male             188   0.9   5.1
##  3 auburn, grey  male             180   0.5   5.1
##  4 brown         male             228   1     5.1
##  5 brown         male             180   0.5   5.1
##  6 <NA>          male             173   0.3   5.1
##  7 <NA>          hermaphrodite    175   0.4   5.1
##  8 brown         male             170   0.1   5.1
##  9 brown         male             180   0.5   5.1
## 10 white         male              66   0     5.1
## 11 grey          male             170   0.1   5.1
x1 %>% 
  mutate(prk = cume_dist(height))
## # A tibble: 11 x 4
##    hair_color    gender        height    prk
##    <chr>         <chr>          <int>  <dbl>
##  1 auburn, white male             182 0.818 
##  2 blond         male             188 0.909 
##  3 auburn, grey  male             180 0.727 
##  4 brown         male             228 1     
##  5 brown         male             180 0.727 
##  6 <NA>          male             173 0.364 
##  7 <NA>          hermaphrodite    175 0.455 
##  8 brown         male             170 0.273 
##  9 brown         male             180 0.727 
## 10 white         male              66 0.0909
## 11 grey          male             170 0.273
x1 %>% 
  mutate(prk = cume_dist(height)) %>% 
  mutate(sprk = sum(prk))
## # A tibble: 11 x 5
##    hair_color    gender        height    prk  sprk
##    <chr>         <chr>          <int>  <dbl> <dbl>
##  1 auburn, white male             182 0.818   6.36
##  2 blond         male             188 0.909   6.36
##  3 auburn, grey  male             180 0.727   6.36
##  4 brown         male             228 1       6.36
##  5 brown         male             180 0.727   6.36
##  6 <NA>          male             173 0.364   6.36
##  7 <NA>          hermaphrodite    175 0.455   6.36
##  8 brown         male             170 0.273   6.36
##  9 brown         male             180 0.727   6.36
## 10 white         male              66 0.0909  6.36
## 11 grey          male             170 0.273   6.36

Chọn biến: select

select_if(starwars, funs(is.numeric))
## # A tibble: 87 x 3
##    height  mass birth_year
##     <int> <dbl>      <dbl>
##  1    172    77       19  
##  2    167    75      112  
##  3     96    32       33  
##  4    202   136       41.9
##  5    150    49       19  
##  6    178   120       52  
##  7    165    75       47  
##  8     97    32       NA  
##  9    183    84       24  
## 10    182    77       57  
## # ... with 77 more rows
select_at(starwars, vars("name", "height"))
## # A tibble: 87 x 2
##    name               height
##    <chr>               <int>
##  1 Luke Skywalker        172
##  2 C-3PO                 167
##  3 R2-D2                  96
##  4 Darth Vader           202
##  5 Leia Organa           150
##  6 Owen Lars             178
##  7 Beru Whitesun lars    165
##  8 R5-D4                  97
##  9 Biggs Darklighter     183
## 10 Obi-Wan Kenobi        182
## # ... with 77 more rows

Đối tên biến: rename

starwars %>% rename(sex = gender)
## # A tibble: 87 x 13
##    name     height  mass hair_color  skin_color eye_color birth_year sex  
##    <chr>     <int> <dbl> <chr>       <chr>      <chr>          <dbl> <chr>
##  1 Luke Sk~    172    77 blond       fair       blue            19   male 
##  2 C-3PO       167    75 <NA>        gold       yellow         112   <NA> 
##  3 R2-D2        96    32 <NA>        white, bl~ red             33   <NA> 
##  4 Darth V~    202   136 none        white      yellow          41.9 male 
##  5 Leia Or~    150    49 brown       light      brown           19   fema~
##  6 Owen La~    178   120 brown, grey light      blue            52   male 
##  7 Beru Wh~    165    75 brown       light      blue            47   fema~
##  8 R5-D4        97    32 <NA>        white, red red             NA   <NA> 
##  9 Biggs D~    183    84 black       light      brown           24   male 
## 10 Obi-Wan~    182    77 auburn, wh~ fair       blue-gray       57   male 
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## #   species <chr>, films <list>, vehicles <list>, starships <list>
starwars %>% rename_all(funs(paste0("sw_", .)))
## # A tibble: 87 x 13
##    sw_name      sw_height sw_mass sw_hair_color sw_skin_color sw_eye_color
##    <chr>            <int>   <dbl> <chr>         <chr>         <chr>       
##  1 Luke Skywal~       172      77 blond         fair          blue        
##  2 C-3PO              167      75 <NA>          gold          yellow      
##  3 R2-D2               96      32 <NA>          white, blue   red         
##  4 Darth Vader        202     136 none          white         yellow      
##  5 Leia Organa        150      49 brown         light         brown       
##  6 Owen Lars          178     120 brown, grey   light         blue        
##  7 Beru Whites~       165      75 brown         light         blue        
##  8 R5-D4               97      32 <NA>          white, red    red         
##  9 Biggs Darkl~       183      84 black         light         brown       
## 10 Obi-Wan Ken~       182      77 auburn, white fair          blue-gray   
## # ... with 77 more rows, and 7 more variables: sw_birth_year <dbl>,
## #   sw_gender <chr>, sw_homeworld <chr>, sw_species <chr>,
## #   sw_films <list>, sw_vehicles <list>, sw_starships <list>
starwars %>% rename_if(funs(is.numeric), funs(str_to_upper)) # những biến là numeric thì sẽ in hoa
## # A tibble: 87 x 13
##    name     HEIGHT  MASS hair_color skin_color eye_color BIRTH_YEAR gender
##    <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> 
##  1 Luke Sk~    172    77 blond      fair       blue            19   male  
##  2 C-3PO       167    75 <NA>       gold       yellow         112   <NA>  
##  3 R2-D2        96    32 <NA>       white, bl~ red             33   <NA>  
##  4 Darth V~    202   136 none       white      yellow          41.9 male  
##  5 Leia Or~    150    49 brown      light      brown           19   female
##  6 Owen La~    178   120 brown, gr~ light      blue            52   male  
##  7 Beru Wh~    165    75 brown      light      blue            47   female
##  8 R5-D4        97    32 <NA>       white, red red             NA   <NA>  
##  9 Biggs D~    183    84 black      light      brown           24   male  
## 10 Obi-Wan~    182    77 auburn, w~ fair       blue-gray       57   male  
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## #   species <chr>, films <list>, vehicles <list>, starships <list>
starwars %>% rename_at( vars("name", "height"), funs(str_to_upper))
## # A tibble: 87 x 13
##    NAME     HEIGHT  mass hair_color skin_color eye_color birth_year gender
##    <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> 
##  1 Luke Sk~    172    77 blond      fair       blue            19   male  
##  2 C-3PO       167    75 <NA>       gold       yellow         112   <NA>  
##  3 R2-D2        96    32 <NA>       white, bl~ red             33   <NA>  
##  4 Darth V~    202   136 none       white      yellow          41.9 male  
##  5 Leia Or~    150    49 brown      light      brown           19   female
##  6 Owen La~    178   120 brown, gr~ light      blue            52   male  
##  7 Beru Wh~    165    75 brown      light      blue            47   female
##  8 R5-D4        97    32 <NA>       white, red red             NA   <NA>  
##  9 Biggs D~    183    84 black      light      brown           24   male  
## 10 Obi-Wan~    182    77 auburn, w~ fair       blue-gray       57   male  
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## #   species <chr>, films <list>, vehicles <list>, starships <list>

Chọn quan sát: filter

starwars %>% filter_at(vars(contains("color")), all_vars(. == "brown"))
## # A tibble: 1 x 13
##   name      height  mass hair_color skin_color eye_color birth_year gender
##   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> 
## 1 Wicket S~     88    20 brown      brown      brown              8 male  
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>
starwars %>% filter_at(vars(contains("color")), any_vars(. == "brown"))
## # A tibble: 31 x 13
##    name     height  mass hair_color skin_color eye_color birth_year gender
##    <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> 
##  1 Leia Or~    150  49   brown      light      brown           19   female
##  2 Beru Wh~    165  75   brown      light      blue            47   female
##  3 Biggs D~    183  84   black      light      brown           24   male  
##  4 Chewbac~    228 112   brown      unknown    blue           200   male  
##  5 Han Solo    180  80   brown      fair       brown           29   male  
##  6 Wedge A~    170  77   brown      fair       hazel           21   male  
##  7 Jek Ton~    180 110   brown      fair       blue            NA   male  
##  8 Yoda         66  17   white      green      brown          896   male  
##  9 Boba Fe~    183  78.2 black      fair       brown           31.5 male  
## 10 Lando C~    177  79   black      dark       brown           31   male  
## # ... with 21 more rows, and 5 more variables: homeworld <chr>,
## #   species <chr>, films <list>, vehicles <list>, starships <list>
starwars %>% filter_if(is.numeric, all_vars(. > 100))
## # A tibble: 2 x 13
##   name     height  mass hair_color skin_color  eye_color birth_year gender
##   <chr>     <int> <dbl> <chr>      <chr>       <chr>          <dbl> <chr> 
## 1 Chewbac~    228   112 brown      unknown     blue             200 male  
## 2 Jabba D~    175  1358 <NA>       green-tan,~ orange           600 herma~
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>

Tạo biến mới: mutate

starwars %>% 
  select(hair_color) %>% 
  mutate(rk = dense_rank(hair_color))
## # A tibble: 87 x 2
##    hair_color       rk
##    <chr>         <int>
##  1 blond             5
##  2 <NA>             NA
##  3 <NA>             NA
##  4 none             10
##  5 brown             7
##  6 brown, grey       8
##  7 brown             7
##  8 <NA>             NA
##  9 black             4
## 10 auburn, white     3
## # ... with 77 more rows
starwars %>% mutate_all(as.character)
## # A tibble: 87 x 13
##    name     height mass  hair_color skin_color eye_color birth_year gender
##    <chr>    <chr>  <chr> <chr>      <chr>      <chr>     <chr>      <chr> 
##  1 Luke Sk~ 172    77    blond      fair       blue      19         male  
##  2 C-3PO    167    75    <NA>       gold       yellow    112        <NA>  
##  3 R2-D2    96     32    <NA>       white, bl~ red       33         <NA>  
##  4 Darth V~ 202    136   none       white      yellow    41.9       male  
##  5 Leia Or~ 150    49    brown      light      brown     19         female
##  6 Owen La~ 178    120   brown, gr~ light      blue      52         male  
##  7 Beru Wh~ 165    75    brown      light      blue      47         female
##  8 R5-D4    97     32    <NA>       white, red red       <NA>       <NA>  
##  9 Biggs D~ 183    84    black      light      brown     24         male  
## 10 Obi-Wan~ 182    77    auburn, w~ fair       blue-gray 57         male  
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## #   species <chr>, films <chr>, vehicles <chr>, starships <chr>
starwars %>% mutate_if(funs(is.character), funs(as.factor))
## # A tibble: 87 x 13
##    name     height  mass hair_color skin_color eye_color birth_year gender
##    <fct>     <int> <dbl> <fct>      <fct>      <fct>          <dbl> <fct> 
##  1 Luke Sk~    172    77 blond      fair       blue            19   male  
##  2 C-3PO       167    75 <NA>       gold       yellow         112   <NA>  
##  3 R2-D2        96    32 <NA>       white, bl~ red             33   <NA>  
##  4 Darth V~    202   136 none       white      yellow          41.9 male  
##  5 Leia Or~    150    49 brown      light      brown           19   female
##  6 Owen La~    178   120 brown, gr~ light      blue            52   male  
##  7 Beru Wh~    165    75 brown      light      blue            47   female
##  8 R5-D4        97    32 <NA>       white, red red             NA   <NA>  
##  9 Biggs D~    183    84 black      light      brown           24   male  
## 10 Obi-Wan~    182    77 auburn, w~ fair       blue-gray       57   male  
## # ... with 77 more rows, and 5 more variables: homeworld <fct>,
## #   species <fct>, films <list>, vehicles <list>, starships <list>
starwars %>% mutate_at(vars("height"), funs(. / 10))
## # A tibble: 87 x 13
##    name     height  mass hair_color skin_color eye_color birth_year gender
##    <chr>     <dbl> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> 
##  1 Luke Sk~   17.2    77 blond      fair       blue            19   male  
##  2 C-3PO      16.7    75 <NA>       gold       yellow         112   <NA>  
##  3 R2-D2       9.6    32 <NA>       white, bl~ red             33   <NA>  
##  4 Darth V~   20.2   136 none       white      yellow          41.9 male  
##  5 Leia Or~   15      49 brown      light      brown           19   female
##  6 Owen La~   17.8   120 brown, gr~ light      blue            52   male  
##  7 Beru Wh~   16.5    75 brown      light      blue            47   female
##  8 R5-D4       9.7    32 <NA>       white, red red             NA   <NA>  
##  9 Biggs D~   18.3    84 black      light      brown           24   male  
## 10 Obi-Wan~   18.2    77 auburn, w~ fair       blue-gray       57   male  
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## #   species <chr>, films <list>, vehicles <list>, starships <list>

Nhóm theo quan sát: group by

starwars %>%
  # Chọn cột từ 1 đến 3
  select(1:3) %>%
  # Tổng tất cả các dòng của biến height
  mutate(sum(height, na.rm=T))
## # A tibble: 87 x 4
##    name               height  mass `sum(height, na.rm = T)`
##    <chr>               <int> <dbl>                    <int>
##  1 Luke Skywalker        172    77                    14123
##  2 C-3PO                 167    75                    14123
##  3 R2-D2                  96    32                    14123
##  4 Darth Vader           202   136                    14123
##  5 Leia Organa           150    49                    14123
##  6 Owen Lars             178   120                    14123
##  7 Beru Whitesun lars    165    75                    14123
##  8 R5-D4                  97    32                    14123
##  9 Biggs Darklighter     183    84                    14123
## 10 Obi-Wan Kenobi        182    77                    14123
## # ... with 77 more rows
starwars %>%
  select(1:3) %>%
  # Tổng các dòng của cả 2 biến
  mutate(sum(height, mass, na.rm=T)) 
## # A tibble: 87 x 4
##    name               height  mass `sum(height, mass, na.rm = T)`
##    <chr>               <int> <dbl>                          <dbl>
##  1 Luke Skywalker        172    77                         19864.
##  2 C-3PO                 167    75                         19864.
##  3 R2-D2                  96    32                         19864.
##  4 Darth Vader           202   136                         19864.
##  5 Leia Organa           150    49                         19864.
##  6 Owen Lars             178   120                         19864.
##  7 Beru Whitesun lars    165    75                         19864.
##  8 R5-D4                  97    32                         19864.
##  9 Biggs Darklighter     183    84                         19864.
## 10 Obi-Wan Kenobi        182    77                         19864.
## # ... with 77 more rows
starwars %>%
  select(1:3) %>%
  # group hết tất cả các biến
  group_by_all() %>% 
  mutate(sum(height, mass, na.rm=T))
## # A tibble: 87 x 4
## # Groups:   name, height, mass [87]
##    name               height  mass `sum(height, mass, na.rm = T)`
##    <chr>               <int> <dbl>                          <dbl>
##  1 Luke Skywalker        172    77                            249
##  2 C-3PO                 167    75                            242
##  3 R2-D2                  96    32                            128
##  4 Darth Vader           202   136                            338
##  5 Leia Organa           150    49                            199
##  6 Owen Lars             178   120                            298
##  7 Beru Whitesun lars    165    75                            240
##  8 R5-D4                  97    32                            129
##  9 Biggs Darklighter     183    84                            267
## 10 Obi-Wan Kenobi        182    77                            259
## # ... with 77 more rows
starwars %>%
  select(1:3) %>%
  # group nếu là biến character
  group_by_if(is.character) %>% 
  count()
## # A tibble: 87 x 2
## # Groups:   name [87]
##    name                    n
##    <chr>               <int>
##  1 Ackbar                  1
##  2 Adi Gallia              1
##  3 Anakin Skywalker        1
##  4 Arvel Crynyd            1
##  5 Ayla Secura             1
##  6 Bail Prestor Organa     1
##  7 Barriss Offee           1
##  8 BB8                     1
##  9 Ben Quadinaros          1
## 10 Beru Whitesun lars      1
## # ... with 77 more rows
group_by_at(starwars, vars("eye_color", "hair_color")) %>%  
  # group theo 2 biến không cần phải select
  count()
## # A tibble: 35 x 3
## # Groups:   eye_color, hair_color [35]
##    eye_color hair_color       n
##    <chr>     <chr>        <int>
##  1 black     none             9
##  2 black     <NA>             1
##  3 blue      auburn           1
##  4 blue      auburn, grey     1
##  5 blue      black            2
##  6 blue      blond            3
##  7 blue      brown            7
##  8 blue      brown, grey      1
##  9 blue      none             3
## 10 blue      white            1
## # ... with 25 more rows

Mô tả biến: summarise

summarise_all

starwars %>%
  summarise_all(funs(sum(is.na(.)))) %>%
  select_if(any_vars(. > 0))
## # A tibble: 1 x 7
##   height  mass hair_color birth_year gender homeworld species
##    <int> <int>      <int>      <int>  <int>     <int>   <int>
## 1      6    28          5         44      3        10       5

summarise_if

starwars %>%
  # chọn biến numeric rồi tính summarise
  select_if(is.numeric) %>%  
  summarise_all(funs(mean), na.rm = T)
## # A tibble: 1 x 3
##   height  mass birth_year
##    <dbl> <dbl>      <dbl>
## 1   174.  97.3       87.6
starwars %>% 
  summarise_if(funs(is.numeric), funs(min, median, mean, sd, max), na.rm = T)
## # A tibble: 1 x 15
##   height_min mass_min birth_year_min height_median mass_median
##        <dbl>    <dbl>          <dbl>         <int>       <dbl>
## 1         66       15              8           180          79
## # ... with 10 more variables: birth_year_median <dbl>, height_mean <dbl>,
## #   mass_mean <dbl>, birth_year_mean <dbl>, height_sd <dbl>,
## #   mass_sd <dbl>, birth_year_sd <dbl>, height_max <dbl>, mass_max <dbl>,
## #   birth_year_max <dbl>

summarise_at

starwars %>% 
  summarise_at(vars("height", "mass"), funs(sum, mean), na.rm = T)
## # A tibble: 1 x 4
##   height_sum mass_sum height_mean mass_mean
##        <int>    <dbl>       <dbl>     <dbl>
## 1      14123    5741.        174.      97.3

Chuyển dạng dữ liệu: Reshape

starwars %>%
  summarise_if(is.numeric, funs(min, median, mean, sd, max), na.rm = T) %>%
  # chuyển sang dạng long
  gather() %>% 
  arrange(key)
## # A tibble: 15 x 2
##    key                value
##    <chr>              <dbl>
##  1 birth_year_max     896  
##  2 birth_year_mean     87.6
##  3 birth_year_median   52  
##  4 birth_year_min       8  
##  5 birth_year_sd      155. 
##  6 height_max         264  
##  7 height_mean        174. 
##  8 height_median      180  
##  9 height_min          66  
## 10 height_sd           34.8
## 11 mass_max          1358  
## 12 mass_mean           97.3
## 13 mass_median         79  
## 14 mass_min            15  
## 15 mass_sd            169.