1. Purpose.

The purpose of this noteboook is to illustrate how the dplyr package can be used to perform basic data manipulation tasks.

2. Load libraries and view practice dataset.

library(tidyverse)
starwars
## # A tibble: 87 x 13
##    name     height  mass hair_color skin_color eye_color birth_year gender
##    <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> 
##  1 Luke Sk…    172    77 blond      fair       blue            19   male  
##  2 C-3PO       167    75 <NA>       gold       yellow         112   <NA>  
##  3 R2-D2        96    32 <NA>       white, bl… red             33   <NA>  
##  4 Darth V…    202   136 none       white      yellow          41.9 male  
##  5 Leia Or…    150    49 brown      light      brown           19   female
##  6 Owen La…    178   120 brown, gr… light      blue            52   male  
##  7 Beru Wh…    165    75 brown      light      blue            47   female
##  8 R5-D4        97    32 <NA>       white, red red             NA   <NA>  
##  9 Biggs D…    183    84 black      light      brown           24   male  
## 10 Obi-Wan…    182    77 auburn, w… fair       blue-gray       57   male  
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## #   species <chr>, films <list>, vehicles <list>, starships <list>

3. Select columns.

starwars
## # A tibble: 87 x 13
##    name     height  mass hair_color skin_color eye_color birth_year gender
##    <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> 
##  1 Luke Sk…    172    77 blond      fair       blue            19   male  
##  2 C-3PO       167    75 <NA>       gold       yellow         112   <NA>  
##  3 R2-D2        96    32 <NA>       white, bl… red             33   <NA>  
##  4 Darth V…    202   136 none       white      yellow          41.9 male  
##  5 Leia Or…    150    49 brown      light      brown           19   female
##  6 Owen La…    178   120 brown, gr… light      blue            52   male  
##  7 Beru Wh…    165    75 brown      light      blue            47   female
##  8 R5-D4        97    32 <NA>       white, red red             NA   <NA>  
##  9 Biggs D…    183    84 black      light      brown           24   male  
## 10 Obi-Wan…    182    77 auburn, w… fair       blue-gray       57   male  
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## #   species <chr>, films <list>, vehicles <list>, starships <list>
select(starwars, name, eye_color)
## # A tibble: 87 x 2
##    name               eye_color
##    <chr>              <chr>    
##  1 Luke Skywalker     blue     
##  2 C-3PO              yellow   
##  3 R2-D2              red      
##  4 Darth Vader        yellow   
##  5 Leia Organa        brown    
##  6 Owen Lars          blue     
##  7 Beru Whitesun lars blue     
##  8 R5-D4              red      
##  9 Biggs Darklighter  brown    
## 10 Obi-Wan Kenobi     blue-gray
## # ... with 77 more rows
select(starwars, mass:eye_color, starships)
## # A tibble: 87 x 5
##     mass hair_color    skin_color  eye_color starships
##    <dbl> <chr>         <chr>       <chr>     <list>   
##  1    77 blond         fair        blue      <chr [2]>
##  2    75 <NA>          gold        yellow    <chr [0]>
##  3    32 <NA>          white, blue red       <chr [0]>
##  4   136 none          white       yellow    <chr [1]>
##  5    49 brown         light       brown     <chr [0]>
##  6   120 brown, grey   light       blue      <chr [0]>
##  7    75 brown         light       blue      <chr [0]>
##  8    32 <NA>          white, red  red       <chr [0]>
##  9    84 black         light       brown     <chr [1]>
## 10    77 auburn, white fair        blue-gray <chr [5]>
## # ... with 77 more rows
select(starwars, 1:3, 7, starships)
## # A tibble: 87 x 5
##    name               height  mass birth_year starships
##    <chr>               <int> <dbl>      <dbl> <list>   
##  1 Luke Skywalker        172    77       19   <chr [2]>
##  2 C-3PO                 167    75      112   <chr [0]>
##  3 R2-D2                  96    32       33   <chr [0]>
##  4 Darth Vader           202   136       41.9 <chr [1]>
##  5 Leia Organa           150    49       19   <chr [0]>
##  6 Owen Lars             178   120       52   <chr [0]>
##  7 Beru Whitesun lars    165    75       47   <chr [0]>
##  8 R5-D4                  97    32       NA   <chr [0]>
##  9 Biggs Darklighter     183    84       24   <chr [1]>
## 10 Obi-Wan Kenobi        182    77       57   <chr [5]>
## # ... with 77 more rows
select(starwars, -eye_color)
## # A tibble: 87 x 12
##    name     height  mass hair_color skin_color birth_year gender homeworld
##    <chr>     <int> <dbl> <chr>      <chr>           <dbl> <chr>  <chr>    
##  1 Luke Sk…    172    77 blond      fair             19   male   Tatooine 
##  2 C-3PO       167    75 <NA>       gold            112   <NA>   Tatooine 
##  3 R2-D2        96    32 <NA>       white, bl…       33   <NA>   Naboo    
##  4 Darth V…    202   136 none       white            41.9 male   Tatooine 
##  5 Leia Or…    150    49 brown      light            19   female Alderaan 
##  6 Owen La…    178   120 brown, gr… light            52   male   Tatooine 
##  7 Beru Wh…    165    75 brown      light            47   female Tatooine 
##  8 R5-D4        97    32 <NA>       white, red       NA   <NA>   Tatooine 
##  9 Biggs D…    183    84 black      light            24   male   Tatooine 
## 10 Obi-Wan…    182    77 auburn, w… fair             57   male   Stewjon  
## # ... with 77 more rows, and 4 more variables: species <chr>,
## #   films <list>, vehicles <list>, starships <list>
select(starwars, starts_with("h"))
## # A tibble: 87 x 3
##    height hair_color    homeworld
##     <int> <chr>         <chr>    
##  1    172 blond         Tatooine 
##  2    167 <NA>          Tatooine 
##  3     96 <NA>          Naboo    
##  4    202 none          Tatooine 
##  5    150 brown         Alderaan 
##  6    178 brown, grey   Tatooine 
##  7    165 brown         Tatooine 
##  8     97 <NA>          Tatooine 
##  9    183 black         Tatooine 
## 10    182 auburn, white Stewjon  
## # ... with 77 more rows
select(starwars, ends_with("r"))
## # A tibble: 87 x 5
##    hair_color    skin_color  eye_color birth_year gender
##    <chr>         <chr>       <chr>          <dbl> <chr> 
##  1 blond         fair        blue            19   male  
##  2 <NA>          gold        yellow         112   <NA>  
##  3 <NA>          white, blue red             33   <NA>  
##  4 none          white       yellow          41.9 male  
##  5 brown         light       brown           19   female
##  6 brown, grey   light       blue            52   male  
##  7 brown         light       blue            47   female
##  8 <NA>          white, red  red             NA   <NA>  
##  9 black         light       brown           24   male  
## 10 auburn, white fair        blue-gray       57   male  
## # ... with 77 more rows
select(starwars, name, contains("color"))
## # A tibble: 87 x 4
##    name               hair_color    skin_color  eye_color
##    <chr>              <chr>         <chr>       <chr>    
##  1 Luke Skywalker     blond         fair        blue     
##  2 C-3PO              <NA>          gold        yellow   
##  3 R2-D2              <NA>          white, blue red      
##  4 Darth Vader        none          white       yellow   
##  5 Leia Organa        brown         light       brown    
##  6 Owen Lars          brown, grey   light       blue     
##  7 Beru Whitesun lars brown         light       blue     
##  8 R5-D4              <NA>          white, red  red      
##  9 Biggs Darklighter  black         light       brown    
## 10 Obi-Wan Kenobi     auburn, white fair        blue-gray
## # ... with 77 more rows
select(starwars, name, species, everything())
## # A tibble: 87 x 13
##    name    species height  mass hair_color skin_color eye_color birth_year
##    <chr>   <chr>    <int> <dbl> <chr>      <chr>      <chr>          <dbl>
##  1 Luke S… Human      172    77 blond      fair       blue            19  
##  2 C-3PO   Droid      167    75 <NA>       gold       yellow         112  
##  3 R2-D2   Droid       96    32 <NA>       white, bl… red             33  
##  4 Darth … Human      202   136 none       white      yellow          41.9
##  5 Leia O… Human      150    49 brown      light      brown           19  
##  6 Owen L… Human      178   120 brown, gr… light      blue            52  
##  7 Beru W… Human      165    75 brown      light      blue            47  
##  8 R5-D4   Droid       97    32 <NA>       white, red red             NA  
##  9 Biggs … Human      183    84 black      light      brown           24  
## 10 Obi-Wa… Human      182    77 auburn, w… fair       blue-gray       57  
## # ... with 77 more rows, and 5 more variables: gender <chr>,
## #   homeworld <chr>, films <list>, vehicles <list>, starships <list>
select(starwars, sex = gender)
## # A tibble: 87 x 1
##    sex   
##    <chr> 
##  1 male  
##  2 <NA>  
##  3 <NA>  
##  4 male  
##  5 female
##  6 male  
##  7 female
##  8 <NA>  
##  9 male  
## 10 male  
## # ... with 77 more rows
rename(starwars, sex = gender)
## # A tibble: 87 x 13
##    name     height  mass hair_color  skin_color eye_color birth_year sex  
##    <chr>     <int> <dbl> <chr>       <chr>      <chr>          <dbl> <chr>
##  1 Luke Sk…    172    77 blond       fair       blue            19   male 
##  2 C-3PO       167    75 <NA>        gold       yellow         112   <NA> 
##  3 R2-D2        96    32 <NA>        white, bl… red             33   <NA> 
##  4 Darth V…    202   136 none        white      yellow          41.9 male 
##  5 Leia Or…    150    49 brown       light      brown           19   fema…
##  6 Owen La…    178   120 brown, grey light      blue            52   male 
##  7 Beru Wh…    165    75 brown       light      blue            47   fema…
##  8 R5-D4        97    32 <NA>        white, red red             NA   <NA> 
##  9 Biggs D…    183    84 black       light      brown           24   male 
## 10 Obi-Wan…    182    77 auburn, wh… fair       blue-gray       57   male 
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## #   species <chr>, films <list>, vehicles <list>, starships <list>

4. Filter rows.

filter(starwars, eye_color == "red")
## # A tibble: 5 x 13
##   name     height  mass hair_color skin_color  eye_color birth_year gender
##   <chr>     <int> <dbl> <chr>      <chr>       <chr>          <dbl> <chr> 
## 1 R2-D2        96    32 <NA>       white, blue red               33 <NA>  
## 2 R5-D4        97    32 <NA>       white, red  red               NA <NA>  
## 3 IG-88       200   140 none       metal       red               15 none  
## 4 Bossk       190   113 none       green       red               53 male  
## 5 Nute Gu…    191    90 none       mottled gr… red               NA male  
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>
filter(starwars, eye_color == "red")
## # A tibble: 5 x 13
##   name     height  mass hair_color skin_color  eye_color birth_year gender
##   <chr>     <int> <dbl> <chr>      <chr>       <chr>          <dbl> <chr> 
## 1 R2-D2        96    32 <NA>       white, blue red               33 <NA>  
## 2 R5-D4        97    32 <NA>       white, red  red               NA <NA>  
## 3 IG-88       200   140 none       metal       red               15 none  
## 4 Bossk       190   113 none       green       red               53 male  
## 5 Nute Gu…    191    90 none       mottled gr… red               NA male  
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>
filter(starwars, eye_color != "blue")
## # A tibble: 68 x 13
##    name    height  mass hair_color  skin_color eye_color birth_year gender
##    <chr>    <int> <dbl> <chr>       <chr>      <chr>          <dbl> <chr> 
##  1 C-3PO      167    75 <NA>        gold       yellow         112   <NA>  
##  2 R2-D2       96    32 <NA>        white, bl… red             33   <NA>  
##  3 Darth …    202   136 none        white      yellow          41.9 male  
##  4 Leia O…    150    49 brown       light      brown           19   female
##  5 R5-D4       97    32 <NA>        white, red red             NA   <NA>  
##  6 Biggs …    183    84 black       light      brown           24   male  
##  7 Obi-Wa…    182    77 auburn, wh… fair       blue-gray       57   male  
##  8 Han So…    180    80 brown       fair       brown           29   male  
##  9 Greedo     173    74 <NA>        green      black           44   male  
## 10 Jabba …    175  1358 <NA>        green-tan… orange         600   herma…
## # ... with 58 more rows, and 5 more variables: homeworld <chr>,
## #   species <chr>, films <list>, vehicles <list>, starships <list>
filter(starwars, eye_color %in% c("red", "yellow"))
## # A tibble: 16 x 13
##    name    height  mass hair_color skin_color  eye_color birth_year gender
##    <chr>    <int> <dbl> <chr>      <chr>       <chr>          <dbl> <chr> 
##  1 C-3PO      167    75 <NA>       gold        yellow         112   <NA>  
##  2 R2-D2       96    32 <NA>       white, blue red             33   <NA>  
##  3 Darth …    202   136 none       white       yellow          41.9 male  
##  4 R5-D4       97    32 <NA>       white, red  red             NA   <NA>  
##  5 Palpat…    170    75 grey       pale        yellow          82   male  
##  6 IG-88      200   140 none       metal       red             15   none  
##  7 Bossk      190   113 none       green       red             53   male  
##  8 Nute G…    191    90 none       mottled gr… red             NA   male  
##  9 Watto      137    NA black      blue, grey  yellow          NA   male  
## 10 Darth …    175    80 none       red         yellow          54   male  
## 11 Dud Bo…     94    45 none       blue, grey  yellow          NA   male  
## 12 Ki-Adi…    198    82 white      pale        yellow          92   male  
## 13 Yarael…    264    NA none       white       yellow          NA   male  
## 14 Poggle…    183    80 none       green       yellow          NA   male  
## 15 Zam We…    168    55 blonde     fair, gree… yellow          NA   female
## 16 Dexter…    198   102 none       brown       yellow          NA   male  
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>
filter(starwars, !eye_color %in% c("red", "yellow"))
## # A tibble: 71 x 13
##    name     height  mass hair_color skin_color eye_color birth_year gender
##    <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> 
##  1 Luke Sk…    172    77 blond      fair       blue            19   male  
##  2 Leia Or…    150    49 brown      light      brown           19   female
##  3 Owen La…    178   120 brown, gr… light      blue            52   male  
##  4 Beru Wh…    165    75 brown      light      blue            47   female
##  5 Biggs D…    183    84 black      light      brown           24   male  
##  6 Obi-Wan…    182    77 auburn, w… fair       blue-gray       57   male  
##  7 Anakin …    188    84 blond      fair       blue            41.9 male  
##  8 Wilhuff…    180    NA auburn, g… fair       blue            64   male  
##  9 Chewbac…    228   112 brown      unknown    blue           200   male  
## 10 Han Solo    180    80 brown      fair       brown           29   male  
## # ... with 61 more rows, and 5 more variables: homeworld <chr>,
## #   species <chr>, films <list>, vehicles <list>, starships <list>
filter(starwars, height < 100)
## # A tibble: 7 x 13
##   name      height  mass hair_color skin_color eye_color birth_year gender
##   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> 
## 1 R2-D2         96    32 <NA>       white, bl… red               33 <NA>  
## 2 R5-D4         97    32 <NA>       white, red red               NA <NA>  
## 3 Yoda          66    17 white      green      brown            896 male  
## 4 Wicket S…     88    20 brown      brown      brown              8 male  
## 5 Dud Bolt      94    45 none       blue, grey yellow            NA male  
## 6 Ratts Ty…     79    15 none       grey, blue unknown           NA male  
## 7 R4-P17        96    NA none       silver, r… red, blue         NA female
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>
filter(starwars, height < 100)
## # A tibble: 7 x 13
##   name      height  mass hair_color skin_color eye_color birth_year gender
##   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> 
## 1 R2-D2         96    32 <NA>       white, bl… red               33 <NA>  
## 2 R5-D4         97    32 <NA>       white, red red               NA <NA>  
## 3 Yoda          66    17 white      green      brown            896 male  
## 4 Wicket S…     88    20 brown      brown      brown              8 male  
## 5 Dud Bolt      94    45 none       blue, grey yellow            NA male  
## 6 Ratts Ty…     79    15 none       grey, blue unknown           NA male  
## 7 R4-P17        96    NA none       silver, r… red, blue         NA female
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>
filter(starwars, is.na(hair_color))
## # A tibble: 5 x 13
##   name     height  mass hair_color skin_color  eye_color birth_year gender
##   <chr>     <int> <dbl> <chr>      <chr>       <chr>          <dbl> <chr> 
## 1 C-3PO       167    75 <NA>       gold        yellow           112 <NA>  
## 2 R2-D2        96    32 <NA>       white, blue red               33 <NA>  
## 3 R5-D4        97    32 <NA>       white, red  red               NA <NA>  
## 4 Greedo      173    74 <NA>       green       black             44 male  
## 5 Jabba D…    175  1358 <NA>       green-tan,… orange           600 herma…
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>
filter(starwars, !is.na(hair_color))
## # A tibble: 82 x 13
##    name     height  mass hair_color skin_color eye_color birth_year gender
##    <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> 
##  1 Luke Sk…    172    77 blond      fair       blue            19   male  
##  2 Darth V…    202   136 none       white      yellow          41.9 male  
##  3 Leia Or…    150    49 brown      light      brown           19   female
##  4 Owen La…    178   120 brown, gr… light      blue            52   male  
##  5 Beru Wh…    165    75 brown      light      blue            47   female
##  6 Biggs D…    183    84 black      light      brown           24   male  
##  7 Obi-Wan…    182    77 auburn, w… fair       blue-gray       57   male  
##  8 Anakin …    188    84 blond      fair       blue            41.9 male  
##  9 Wilhuff…    180    NA auburn, g… fair       blue            64   male  
## 10 Chewbac…    228   112 brown      unknown    blue           200   male  
## # ... with 72 more rows, and 5 more variables: homeworld <chr>,
## #   species <chr>, films <list>, vehicles <list>, starships <list>
filter(starwars, height < 100, eye_color == "red")
## # A tibble: 2 x 13
##   name  height  mass hair_color skin_color  eye_color birth_year gender
##   <chr>  <int> <dbl> <chr>      <chr>       <chr>          <dbl> <chr> 
## 1 R2-D2     96    32 <NA>       white, blue red               33 <NA>  
## 2 R5-D4     97    32 <NA>       white, red  red               NA <NA>  
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>
filter(starwars, height < 100 | eye_color == "red")
## # A tibble: 10 x 13
##    name     height  mass hair_color skin_color eye_color birth_year gender
##    <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> 
##  1 R2-D2        96    32 <NA>       white, bl… red               33 <NA>  
##  2 R5-D4        97    32 <NA>       white, red red               NA <NA>  
##  3 Yoda         66    17 white      green      brown            896 male  
##  4 IG-88       200   140 none       metal      red               15 none  
##  5 Bossk       190   113 none       green      red               53 male  
##  6 Wicket …     88    20 brown      brown      brown              8 male  
##  7 Nute Gu…    191    90 none       mottled g… red               NA male  
##  8 Dud Bolt     94    45 none       blue, grey yellow            NA male  
##  9 Ratts T…     79    15 none       grey, blue unknown           NA male  
## 10 R4-P17       96    NA none       silver, r… red, blue         NA female
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>
distinct(starwars)
## Warning: distinct() does not fully support columns of type `list`.
## List elements are compared by reference, see ?distinct for details.
## This affects the following columns:
## - `films`, `vehicles`, `starships`
## # A tibble: 87 x 13
##    name     height  mass hair_color skin_color eye_color birth_year gender
##    <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> 
##  1 Luke Sk…    172    77 blond      fair       blue            19   male  
##  2 C-3PO       167    75 <NA>       gold       yellow         112   <NA>  
##  3 R2-D2        96    32 <NA>       white, bl… red             33   <NA>  
##  4 Darth V…    202   136 none       white      yellow          41.9 male  
##  5 Leia Or…    150    49 brown      light      brown           19   female
##  6 Owen La…    178   120 brown, gr… light      blue            52   male  
##  7 Beru Wh…    165    75 brown      light      blue            47   female
##  8 R5-D4        97    32 <NA>       white, red red             NA   <NA>  
##  9 Biggs D…    183    84 black      light      brown           24   male  
## 10 Obi-Wan…    182    77 auburn, w… fair       blue-gray       57   male  
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## #   species <chr>, films <list>, vehicles <list>, starships <list>
set.seed(123456789) #ensures random sampling is reproducible
sample_n(starwars, 10)
## # A tibble: 10 x 13
##    name    height  mass hair_color skin_color  eye_color birth_year gender
##    <chr>    <int> <dbl> <chr>      <chr>       <chr>          <dbl> <chr> 
##  1 Lumina…    170  56.2 black      yellow      blue              58 female
##  2 Cordé      157  NA   brown      light       brown             NA female
##  3 Mas Am…    196  NA   none       blue        blue              NA male  
##  4 Padmé …    165  45   brown      light       brown             46 female
##  5 Grievo…    216 159   none       brown, whi… green, y…         NA male  
##  6 Wat Ta…    193  48   none       green, grey unknown           NA male  
##  7 Palpat…    170  75   grey       pale        yellow            82 male  
##  8 San Hi…    191  NA   none       grey        gold              NA male  
##  9 IG-88      200 140   none       metal       red               15 none  
## 10 Rugor …    206  NA   none       green       orange            NA male  
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>
sample_frac(starwars, 0.05)
## # A tibble: 4 x 13
##   name    height  mass hair_color  skin_color  eye_color birth_year gender
##   <chr>    <int> <dbl> <chr>       <chr>       <chr>          <dbl> <chr> 
## 1 Owen L…    178   120 brown, grey light       blue              52 male  
## 2 R4-P17      96    NA none        silver, red red, blue         NA female
## 3 Ben Qu…    163    65 none        grey, gree… orange            NA male  
## 4 Chewba…    228   112 brown       unknown     blue             200 male  
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>
slice(starwars, 1:3)
## # A tibble: 3 x 13
##   name      height  mass hair_color skin_color eye_color birth_year gender
##   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> 
## 1 Luke Sky…    172    77 blond      fair       blue              19 male  
## 2 C-3PO        167    75 <NA>       gold       yellow           112 <NA>  
## 3 R2-D2         96    32 <NA>       white, bl… red               33 <NA>  
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>
slice(starwars, c(1, 3))
## # A tibble: 2 x 13
##   name      height  mass hair_color skin_color eye_color birth_year gender
##   <chr>      <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> 
## 1 Luke Sky…    172    77 blond      fair       blue              19 male  
## 2 R2-D2         96    32 <NA>       white, bl… red               33 <NA>  
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>

5. Arrange rows.

arrange(starwars, height)
## # A tibble: 87 x 13
##    name     height  mass hair_color skin_color eye_color birth_year gender
##    <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> 
##  1 Yoda         66    17 white      green      brown            896 male  
##  2 Ratts T…     79    15 none       grey, blue unknown           NA male  
##  3 Wicket …     88    20 brown      brown      brown              8 male  
##  4 Dud Bolt     94    45 none       blue, grey yellow            NA male  
##  5 R2-D2        96    32 <NA>       white, bl… red               33 <NA>  
##  6 R4-P17       96    NA none       silver, r… red, blue         NA female
##  7 R5-D4        97    32 <NA>       white, red red               NA <NA>  
##  8 Sebulba     112    40 none       grey, red  orange            NA male  
##  9 Gasgano     122    NA none       white, bl… black             NA male  
## 10 Watto       137    NA black      blue, grey yellow            NA male  
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## #   species <chr>, films <list>, vehicles <list>, starships <list>
arrange(starwars, mass, height)
## # A tibble: 87 x 13
##    name     height  mass hair_color skin_color eye_color birth_year gender
##    <chr>     <int> <dbl> <chr>      <chr>      <chr>          <dbl> <chr> 
##  1 Ratts T…     79    15 none       grey, blue unknown           NA male  
##  2 Yoda         66    17 white      green      brown            896 male  
##  3 Wicket …     88    20 brown      brown      brown              8 male  
##  4 R2-D2        96    32 <NA>       white, bl… red               33 <NA>  
##  5 R5-D4        97    32 <NA>       white, red red               NA <NA>  
##  6 Sebulba     112    40 none       grey, red  orange            NA male  
##  7 Dud Bolt     94    45 none       blue, grey yellow            NA male  
##  8 Padmé A…    165    45 brown      light      brown             46 female
##  9 Sly Moo…    178    48 none       pale       white             NA female
## 10 Wat Tam…    193    48 none       green, gr… unknown           NA male  
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## #   species <chr>, films <list>, vehicles <list>, starships <list>
arrange(starwars, desc(height))
## # A tibble: 87 x 13
##    name    height  mass hair_color skin_color  eye_color birth_year gender
##    <chr>    <int> <dbl> <chr>      <chr>       <chr>          <dbl> <chr> 
##  1 Yarael…    264    NA none       white       yellow          NA   male  
##  2 Tarfful    234   136 brown      brown       blue            NA   male  
##  3 Lama Su    229    88 none       grey        black           NA   male  
##  4 Chewba…    228   112 brown      unknown     blue           200   male  
##  5 Roos T…    224    82 none       grey        orange          NA   male  
##  6 Grievo…    216   159 none       brown, whi… green, y…       NA   male  
##  7 Taun We    213    NA none       grey        black           NA   female
##  8 Rugor …    206    NA none       green       orange          NA   male  
##  9 Tion M…    206    80 none       grey        black           NA   male  
## 10 Darth …    202   136 none       white       yellow          41.9 male  
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## #   species <chr>, films <list>, vehicles <list>, starships <list>

6. Create new columns.

starwars_subset <- select(starwars, name, mass, height, eye_color)
starwars_subset
## # A tibble: 87 x 4
##    name                mass height eye_color
##    <chr>              <dbl>  <int> <chr>    
##  1 Luke Skywalker        77    172 blue     
##  2 C-3PO                 75    167 yellow   
##  3 R2-D2                 32     96 red      
##  4 Darth Vader          136    202 yellow   
##  5 Leia Organa           49    150 brown    
##  6 Owen Lars            120    178 blue     
##  7 Beru Whitesun lars    75    165 blue     
##  8 R5-D4                 32     97 red      
##  9 Biggs Darklighter     84    183 brown    
## 10 Obi-Wan Kenobi        77    182 blue-gray
## # ... with 77 more rows
mutate(starwars_subset, mass_height_ratio = mass / height)
## # A tibble: 87 x 5
##    name                mass height eye_color mass_height_ratio
##    <chr>              <dbl>  <int> <chr>                 <dbl>
##  1 Luke Skywalker        77    172 blue                  0.448
##  2 C-3PO                 75    167 yellow                0.449
##  3 R2-D2                 32     96 red                   0.333
##  4 Darth Vader          136    202 yellow                0.673
##  5 Leia Organa           49    150 brown                 0.327
##  6 Owen Lars            120    178 blue                  0.674
##  7 Beru Whitesun lars    75    165 blue                  0.455
##  8 R5-D4                 32     97 red                   0.330
##  9 Biggs Darklighter     84    183 brown                 0.459
## 10 Obi-Wan Kenobi        77    182 blue-gray             0.423
## # ... with 77 more rows
transmute(starwars_subset, mass_height_ratio = mass / height)
## # A tibble: 87 x 1
##    mass_height_ratio
##                <dbl>
##  1             0.448
##  2             0.449
##  3             0.333
##  4             0.673
##  5             0.327
##  6             0.674
##  7             0.455
##  8             0.330
##  9             0.459
## 10             0.423
## # ... with 77 more rows
mutate(starwars_subset, bmi = mass / ((height / 100)^2))
## # A tibble: 87 x 5
##    name                mass height eye_color   bmi
##    <chr>              <dbl>  <int> <chr>     <dbl>
##  1 Luke Skywalker        77    172 blue       26.0
##  2 C-3PO                 75    167 yellow     26.9
##  3 R2-D2                 32     96 red        34.7
##  4 Darth Vader          136    202 yellow     33.3
##  5 Leia Organa           49    150 brown      21.8
##  6 Owen Lars            120    178 blue       37.9
##  7 Beru Whitesun lars    75    165 blue       27.5
##  8 R5-D4                 32     97 red        34.0
##  9 Biggs Darklighter     84    183 brown      25.1
## 10 Obi-Wan Kenobi        77    182 blue-gray  23.2
## # ... with 77 more rows
mutate(starwars_subset, short = height < 100)
## # A tibble: 87 x 5
##    name                mass height eye_color short
##    <chr>              <dbl>  <int> <chr>     <lgl>
##  1 Luke Skywalker        77    172 blue      FALSE
##  2 C-3PO                 75    167 yellow    FALSE
##  3 R2-D2                 32     96 red       TRUE 
##  4 Darth Vader          136    202 yellow    FALSE
##  5 Leia Organa           49    150 brown     FALSE
##  6 Owen Lars            120    178 blue      FALSE
##  7 Beru Whitesun lars    75    165 blue      FALSE
##  8 R5-D4                 32     97 red       TRUE 
##  9 Biggs Darklighter     84    183 brown     FALSE
## 10 Obi-Wan Kenobi        77    182 blue-gray FALSE
## # ... with 77 more rows
mutate(starwars_subset, red_eyes = eye_color == "red")
## # A tibble: 87 x 5
##    name                mass height eye_color red_eyes
##    <chr>              <dbl>  <int> <chr>     <lgl>   
##  1 Luke Skywalker        77    172 blue      FALSE   
##  2 C-3PO                 75    167 yellow    FALSE   
##  3 R2-D2                 32     96 red       TRUE    
##  4 Darth Vader          136    202 yellow    FALSE   
##  5 Leia Organa           49    150 brown     FALSE   
##  6 Owen Lars            120    178 blue      FALSE   
##  7 Beru Whitesun lars    75    165 blue      FALSE   
##  8 R5-D4                 32     97 red       TRUE    
##  9 Biggs Darklighter     84    183 brown     FALSE   
## 10 Obi-Wan Kenobi        77    182 blue-gray FALSE   
## # ... with 77 more rows
mutate(starwars_subset, eyes_unusual = if_else(!eye_color %in% c("blue", "brown"), T, F))
## # A tibble: 87 x 5
##    name                mass height eye_color eyes_unusual
##    <chr>              <dbl>  <int> <chr>     <lgl>       
##  1 Luke Skywalker        77    172 blue      FALSE       
##  2 C-3PO                 75    167 yellow    TRUE        
##  3 R2-D2                 32     96 red       TRUE        
##  4 Darth Vader          136    202 yellow    TRUE        
##  5 Leia Organa           49    150 brown     FALSE       
##  6 Owen Lars            120    178 blue      FALSE       
##  7 Beru Whitesun lars    75    165 blue      FALSE       
##  8 R5-D4                 32     97 red       TRUE        
##  9 Biggs Darklighter     84    183 brown     FALSE       
## 10 Obi-Wan Kenobi        77    182 blue-gray TRUE        
## # ... with 77 more rows
mutate(starwars_subset,
  type = case_when(
    height <= 100 ~ "short",
    height <= 150 ~ "medium",
    height > 150 ~ "tall"
))
## # A tibble: 87 x 5
##    name                mass height eye_color type  
##    <chr>              <dbl>  <int> <chr>     <chr> 
##  1 Luke Skywalker        77    172 blue      tall  
##  2 C-3PO                 75    167 yellow    tall  
##  3 R2-D2                 32     96 red       short 
##  4 Darth Vader          136    202 yellow    tall  
##  5 Leia Organa           49    150 brown     medium
##  6 Owen Lars            120    178 blue      tall  
##  7 Beru Whitesun lars    75    165 blue      tall  
##  8 R5-D4                 32     97 red       short 
##  9 Biggs Darklighter     84    183 brown     tall  
## 10 Obi-Wan Kenobi        77    182 blue-gray tall  
## # ... with 77 more rows

7. Use the pipe (%>%) to perform lots of operations in one go.

starwars %>% 
  select(name, species, height) %>% 
  filter(species == "Human") %>% 
  arrange(desc(height))
## # A tibble: 35 x 3
##    name                species height
##    <chr>               <chr>    <int>
##  1 Darth Vader         Human      202
##  2 Qui-Gon Jinn        Human      193
##  3 Dooku               Human      193
##  4 Bail Prestor Organa Human      191
##  5 Anakin Skywalker    Human      188
##  6 Mace Windu          Human      188
##  7 Raymus Antilles     Human      188
##  8 Gregar Typho        Human      185
##  9 Biggs Darklighter   Human      183
## 10 Boba Fett           Human      183
## # ... with 25 more rows
set.seed(123456789)
starwars %>% 
  group_by(eye_color) %>% 
  sample_n(1)
## # A tibble: 15 x 13
## # Groups:   eye_color [15]
##    name    height  mass hair_color  skin_color eye_color birth_year gender
##    <chr>    <int> <dbl> <chr>       <chr>      <chr>          <dbl> <chr> 
##  1 Taun We    213    NA none        grey       black             NA female
##  2 Adi Ga…    184    50 none        dark       blue              NA female
##  3 Obi-Wa…    182    77 auburn, wh… fair       blue-gray         57 male  
##  4 Dooku      193    80 white       fair       brown            102 male  
##  5 Finn        NA    NA black       dark       dark              NA male  
##  6 San Hi…    191    NA none        grey       gold              NA male  
##  7 Grievo…    216   159 none        brown, wh… green, y…         NA male  
##  8 Rey         NA    NA brown       light      hazel             NA female
##  9 Jar Ja…    196    66 none        orange     orange            52 male  
## 10 Bib Fo…    180    NA none        pale       pink              NA male  
## 11 R2-D2       96    32 <NA>        white, bl… red               33 <NA>  
## 12 R4-P17      96    NA none        silver, r… red, blue         NA female
## 13 Wat Ta…    193    48 none        green, gr… unknown           NA male  
## 14 Sly Mo…    178    48 none        pale       white             NA female
## 15 Darth …    175    80 none        red        yellow            54 male  
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## #   vehicles <list>, starships <list>

8. Calculate summary statistics.

summarise(starwars, height_mean = mean(height, na.rm = T)) 
## # A tibble: 1 x 1
##   height_mean
##         <dbl>
## 1        174.
summarise(starwars, height_max = max(height, na.rm = T)) 
## # A tibble: 1 x 1
##   height_max
##        <int>
## 1        264
starwars %>%  
  group_by(gender) %>% 
  summarise(height_mean = mean(height, na.rm = T)) 
## # A tibble: 5 x 2
##   gender        height_mean
##   <chr>               <dbl>
## 1 female               165.
## 2 hermaphrodite        175 
## 3 male                 179.
## 4 none                 200 
## 5 <NA>                 120
starwars %>%  
  group_by(gender, eye_color) %>% 
  summarise(height_mean = mean(height, na.rm = T)) 
## # A tibble: 26 x 3
## # Groups:   gender [?]
##    gender        eye_color height_mean
##    <chr>         <chr>           <dbl>
##  1 female        black            196.
##  2 female        blue             167 
##  3 female        brown            160 
##  4 female        hazel            178 
##  5 female        red, blue         96 
##  6 female        unknown          NaN 
##  7 female        white            178 
##  8 female        yellow           168 
##  9 hermaphrodite orange           175 
## 10 male          black            182 
## # ... with 16 more rows
starwars %>%  
  group_by(gender) %>% 
  summarise(n=n()) 
## # A tibble: 5 x 2
##   gender            n
##   <chr>         <int>
## 1 female           19
## 2 hermaphrodite     1
## 3 male             62
## 4 none              2
## 5 <NA>              3
starwars %>%  
  group_by(gender) %>% 
  count() 
## # A tibble: 5 x 2
## # Groups:   gender [5]
##   gender            n
##   <chr>         <int>
## 1 female           19
## 2 hermaphrodite     1
## 3 male             62
## 4 none              2
## 5 <NA>              3
starwars %>%  
  group_by(gender) %>% 
  summarise(mean_mass = mean(mass, na.rm = T))
## # A tibble: 5 x 2
##   gender        mean_mass
##   <chr>             <dbl>
## 1 female             54.0
## 2 hermaphrodite    1358  
## 3 male               81.0
## 4 none              140  
## 5 <NA>               46.3

9. Convert a data.frame into a print-friendly tbl.

iris
##     Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 1            5.1         3.5          1.4         0.2     setosa
## 2            4.9         3.0          1.4         0.2     setosa
## 3            4.7         3.2          1.3         0.2     setosa
## 4            4.6         3.1          1.5         0.2     setosa
## 5            5.0         3.6          1.4         0.2     setosa
## 6            5.4         3.9          1.7         0.4     setosa
## 7            4.6         3.4          1.4         0.3     setosa
## 8            5.0         3.4          1.5         0.2     setosa
## 9            4.4         2.9          1.4         0.2     setosa
## 10           4.9         3.1          1.5         0.1     setosa
## 11           5.4         3.7          1.5         0.2     setosa
## 12           4.8         3.4          1.6         0.2     setosa
## 13           4.8         3.0          1.4         0.1     setosa
## 14           4.3         3.0          1.1         0.1     setosa
## 15           5.8         4.0          1.2         0.2     setosa
## 16           5.7         4.4          1.5         0.4     setosa
## 17           5.4         3.9          1.3         0.4     setosa
## 18           5.1         3.5          1.4         0.3     setosa
## 19           5.7         3.8          1.7         0.3     setosa
## 20           5.1         3.8          1.5         0.3     setosa
## 21           5.4         3.4          1.7         0.2     setosa
## 22           5.1         3.7          1.5         0.4     setosa
## 23           4.6         3.6          1.0         0.2     setosa
## 24           5.1         3.3          1.7         0.5     setosa
## 25           4.8         3.4          1.9         0.2     setosa
## 26           5.0         3.0          1.6         0.2     setosa
## 27           5.0         3.4          1.6         0.4     setosa
## 28           5.2         3.5          1.5         0.2     setosa
## 29           5.2         3.4          1.4         0.2     setosa
## 30           4.7         3.2          1.6         0.2     setosa
## 31           4.8         3.1          1.6         0.2     setosa
## 32           5.4         3.4          1.5         0.4     setosa
## 33           5.2         4.1          1.5         0.1     setosa
## 34           5.5         4.2          1.4         0.2     setosa
## 35           4.9         3.1          1.5         0.2     setosa
## 36           5.0         3.2          1.2         0.2     setosa
## 37           5.5         3.5          1.3         0.2     setosa
## 38           4.9         3.6          1.4         0.1     setosa
## 39           4.4         3.0          1.3         0.2     setosa
## 40           5.1         3.4          1.5         0.2     setosa
## 41           5.0         3.5          1.3         0.3     setosa
## 42           4.5         2.3          1.3         0.3     setosa
## 43           4.4         3.2          1.3         0.2     setosa
## 44           5.0         3.5          1.6         0.6     setosa
## 45           5.1         3.8          1.9         0.4     setosa
## 46           4.8         3.0          1.4         0.3     setosa
## 47           5.1         3.8          1.6         0.2     setosa
## 48           4.6         3.2          1.4         0.2     setosa
## 49           5.3         3.7          1.5         0.2     setosa
## 50           5.0         3.3          1.4         0.2     setosa
## 51           7.0         3.2          4.7         1.4 versicolor
## 52           6.4         3.2          4.5         1.5 versicolor
## 53           6.9         3.1          4.9         1.5 versicolor
## 54           5.5         2.3          4.0         1.3 versicolor
## 55           6.5         2.8          4.6         1.5 versicolor
## 56           5.7         2.8          4.5         1.3 versicolor
## 57           6.3         3.3          4.7         1.6 versicolor
## 58           4.9         2.4          3.3         1.0 versicolor
## 59           6.6         2.9          4.6         1.3 versicolor
## 60           5.2         2.7          3.9         1.4 versicolor
## 61           5.0         2.0          3.5         1.0 versicolor
## 62           5.9         3.0          4.2         1.5 versicolor
## 63           6.0         2.2          4.0         1.0 versicolor
## 64           6.1         2.9          4.7         1.4 versicolor
## 65           5.6         2.9          3.6         1.3 versicolor
## 66           6.7         3.1          4.4         1.4 versicolor
## 67           5.6         3.0          4.5         1.5 versicolor
## 68           5.8         2.7          4.1         1.0 versicolor
## 69           6.2         2.2          4.5         1.5 versicolor
## 70           5.6         2.5          3.9         1.1 versicolor
## 71           5.9         3.2          4.8         1.8 versicolor
## 72           6.1         2.8          4.0         1.3 versicolor
## 73           6.3         2.5          4.9         1.5 versicolor
## 74           6.1         2.8          4.7         1.2 versicolor
## 75           6.4         2.9          4.3         1.3 versicolor
## 76           6.6         3.0          4.4         1.4 versicolor
## 77           6.8         2.8          4.8         1.4 versicolor
## 78           6.7         3.0          5.0         1.7 versicolor
## 79           6.0         2.9          4.5         1.5 versicolor
## 80           5.7         2.6          3.5         1.0 versicolor
## 81           5.5         2.4          3.8         1.1 versicolor
## 82           5.5         2.4          3.7         1.0 versicolor
## 83           5.8         2.7          3.9         1.2 versicolor
## 84           6.0         2.7          5.1         1.6 versicolor
## 85           5.4         3.0          4.5         1.5 versicolor
## 86           6.0         3.4          4.5         1.6 versicolor
## 87           6.7         3.1          4.7         1.5 versicolor
## 88           6.3         2.3          4.4         1.3 versicolor
## 89           5.6         3.0          4.1         1.3 versicolor
## 90           5.5         2.5          4.0         1.3 versicolor
## 91           5.5         2.6          4.4         1.2 versicolor
## 92           6.1         3.0          4.6         1.4 versicolor
## 93           5.8         2.6          4.0         1.2 versicolor
## 94           5.0         2.3          3.3         1.0 versicolor
## 95           5.6         2.7          4.2         1.3 versicolor
## 96           5.7         3.0          4.2         1.2 versicolor
## 97           5.7         2.9          4.2         1.3 versicolor
## 98           6.2         2.9          4.3         1.3 versicolor
## 99           5.1         2.5          3.0         1.1 versicolor
## 100          5.7         2.8          4.1         1.3 versicolor
## 101          6.3         3.3          6.0         2.5  virginica
## 102          5.8         2.7          5.1         1.9  virginica
## 103          7.1         3.0          5.9         2.1  virginica
## 104          6.3         2.9          5.6         1.8  virginica
## 105          6.5         3.0          5.8         2.2  virginica
## 106          7.6         3.0          6.6         2.1  virginica
## 107          4.9         2.5          4.5         1.7  virginica
## 108          7.3         2.9          6.3         1.8  virginica
## 109          6.7         2.5          5.8         1.8  virginica
## 110          7.2         3.6          6.1         2.5  virginica
## 111          6.5         3.2          5.1         2.0  virginica
## 112          6.4         2.7          5.3         1.9  virginica
## 113          6.8         3.0          5.5         2.1  virginica
## 114          5.7         2.5          5.0         2.0  virginica
## 115          5.8         2.8          5.1         2.4  virginica
## 116          6.4         3.2          5.3         2.3  virginica
## 117          6.5         3.0          5.5         1.8  virginica
## 118          7.7         3.8          6.7         2.2  virginica
## 119          7.7         2.6          6.9         2.3  virginica
## 120          6.0         2.2          5.0         1.5  virginica
## 121          6.9         3.2          5.7         2.3  virginica
## 122          5.6         2.8          4.9         2.0  virginica
## 123          7.7         2.8          6.7         2.0  virginica
## 124          6.3         2.7          4.9         1.8  virginica
## 125          6.7         3.3          5.7         2.1  virginica
## 126          7.2         3.2          6.0         1.8  virginica
## 127          6.2         2.8          4.8         1.8  virginica
## 128          6.1         3.0          4.9         1.8  virginica
## 129          6.4         2.8          5.6         2.1  virginica
## 130          7.2         3.0          5.8         1.6  virginica
## 131          7.4         2.8          6.1         1.9  virginica
## 132          7.9         3.8          6.4         2.0  virginica
## 133          6.4         2.8          5.6         2.2  virginica
## 134          6.3         2.8          5.1         1.5  virginica
## 135          6.1         2.6          5.6         1.4  virginica
## 136          7.7         3.0          6.1         2.3  virginica
## 137          6.3         3.4          5.6         2.4  virginica
## 138          6.4         3.1          5.5         1.8  virginica
## 139          6.0         3.0          4.8         1.8  virginica
## 140          6.9         3.1          5.4         2.1  virginica
## 141          6.7         3.1          5.6         2.4  virginica
## 142          6.9         3.1          5.1         2.3  virginica
## 143          5.8         2.7          5.1         1.9  virginica
## 144          6.8         3.2          5.9         2.3  virginica
## 145          6.7         3.3          5.7         2.5  virginica
## 146          6.7         3.0          5.2         2.3  virginica
## 147          6.3         2.5          5.0         1.9  virginica
## 148          6.5         3.0          5.2         2.0  virginica
## 149          6.2         3.4          5.4         2.3  virginica
## 150          5.9         3.0          5.1         1.8  virginica
class(iris)
## [1] "data.frame"
iris <- as_tibble(iris)
class(iris)
## [1] "tbl_df"     "tbl"        "data.frame"
iris
## # A tibble: 150 x 5
##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
##           <dbl>       <dbl>        <dbl>       <dbl> <fct>  
##  1          5.1         3.5          1.4         0.2 setosa 
##  2          4.9         3            1.4         0.2 setosa 
##  3          4.7         3.2          1.3         0.2 setosa 
##  4          4.6         3.1          1.5         0.2 setosa 
##  5          5           3.6          1.4         0.2 setosa 
##  6          5.4         3.9          1.7         0.4 setosa 
##  7          4.6         3.4          1.4         0.3 setosa 
##  8          5           3.4          1.5         0.2 setosa 
##  9          4.4         2.9          1.4         0.2 setosa 
## 10          4.9         3.1          1.5         0.1 setosa 
## # ... with 140 more rows