dplyr training 1The purpose of this noteboook is to illustrate how the dplyr package can be used to perform basic data manipulation tasks.
library(tidyverse)
starwars
## # A tibble: 87 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Luke Sk… 172 77 blond fair blue 19 male
## 2 C-3PO 167 75 <NA> gold yellow 112 <NA>
## 3 R2-D2 96 32 <NA> white, bl… red 33 <NA>
## 4 Darth V… 202 136 none white yellow 41.9 male
## 5 Leia Or… 150 49 brown light brown 19 female
## 6 Owen La… 178 120 brown, gr… light blue 52 male
## 7 Beru Wh… 165 75 brown light blue 47 female
## 8 R5-D4 97 32 <NA> white, red red NA <NA>
## 9 Biggs D… 183 84 black light brown 24 male
## 10 Obi-Wan… 182 77 auburn, w… fair blue-gray 57 male
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## # species <chr>, films <list>, vehicles <list>, starships <list>
starwars
## # A tibble: 87 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Luke Sk… 172 77 blond fair blue 19 male
## 2 C-3PO 167 75 <NA> gold yellow 112 <NA>
## 3 R2-D2 96 32 <NA> white, bl… red 33 <NA>
## 4 Darth V… 202 136 none white yellow 41.9 male
## 5 Leia Or… 150 49 brown light brown 19 female
## 6 Owen La… 178 120 brown, gr… light blue 52 male
## 7 Beru Wh… 165 75 brown light blue 47 female
## 8 R5-D4 97 32 <NA> white, red red NA <NA>
## 9 Biggs D… 183 84 black light brown 24 male
## 10 Obi-Wan… 182 77 auburn, w… fair blue-gray 57 male
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## # species <chr>, films <list>, vehicles <list>, starships <list>
select(starwars, name, eye_color)
## # A tibble: 87 x 2
## name eye_color
## <chr> <chr>
## 1 Luke Skywalker blue
## 2 C-3PO yellow
## 3 R2-D2 red
## 4 Darth Vader yellow
## 5 Leia Organa brown
## 6 Owen Lars blue
## 7 Beru Whitesun lars blue
## 8 R5-D4 red
## 9 Biggs Darklighter brown
## 10 Obi-Wan Kenobi blue-gray
## # ... with 77 more rows
select(starwars, mass:eye_color, starships)
## # A tibble: 87 x 5
## mass hair_color skin_color eye_color starships
## <dbl> <chr> <chr> <chr> <list>
## 1 77 blond fair blue <chr [2]>
## 2 75 <NA> gold yellow <chr [0]>
## 3 32 <NA> white, blue red <chr [0]>
## 4 136 none white yellow <chr [1]>
## 5 49 brown light brown <chr [0]>
## 6 120 brown, grey light blue <chr [0]>
## 7 75 brown light blue <chr [0]>
## 8 32 <NA> white, red red <chr [0]>
## 9 84 black light brown <chr [1]>
## 10 77 auburn, white fair blue-gray <chr [5]>
## # ... with 77 more rows
select(starwars, 1:3, 7, starships)
## # A tibble: 87 x 5
## name height mass birth_year starships
## <chr> <int> <dbl> <dbl> <list>
## 1 Luke Skywalker 172 77 19 <chr [2]>
## 2 C-3PO 167 75 112 <chr [0]>
## 3 R2-D2 96 32 33 <chr [0]>
## 4 Darth Vader 202 136 41.9 <chr [1]>
## 5 Leia Organa 150 49 19 <chr [0]>
## 6 Owen Lars 178 120 52 <chr [0]>
## 7 Beru Whitesun lars 165 75 47 <chr [0]>
## 8 R5-D4 97 32 NA <chr [0]>
## 9 Biggs Darklighter 183 84 24 <chr [1]>
## 10 Obi-Wan Kenobi 182 77 57 <chr [5]>
## # ... with 77 more rows
select(starwars, -eye_color)
## # A tibble: 87 x 12
## name height mass hair_color skin_color birth_year gender homeworld
## <chr> <int> <dbl> <chr> <chr> <dbl> <chr> <chr>
## 1 Luke Sk… 172 77 blond fair 19 male Tatooine
## 2 C-3PO 167 75 <NA> gold 112 <NA> Tatooine
## 3 R2-D2 96 32 <NA> white, bl… 33 <NA> Naboo
## 4 Darth V… 202 136 none white 41.9 male Tatooine
## 5 Leia Or… 150 49 brown light 19 female Alderaan
## 6 Owen La… 178 120 brown, gr… light 52 male Tatooine
## 7 Beru Wh… 165 75 brown light 47 female Tatooine
## 8 R5-D4 97 32 <NA> white, red NA <NA> Tatooine
## 9 Biggs D… 183 84 black light 24 male Tatooine
## 10 Obi-Wan… 182 77 auburn, w… fair 57 male Stewjon
## # ... with 77 more rows, and 4 more variables: species <chr>,
## # films <list>, vehicles <list>, starships <list>
select(starwars, starts_with("h"))
## # A tibble: 87 x 3
## height hair_color homeworld
## <int> <chr> <chr>
## 1 172 blond Tatooine
## 2 167 <NA> Tatooine
## 3 96 <NA> Naboo
## 4 202 none Tatooine
## 5 150 brown Alderaan
## 6 178 brown, grey Tatooine
## 7 165 brown Tatooine
## 8 97 <NA> Tatooine
## 9 183 black Tatooine
## 10 182 auburn, white Stewjon
## # ... with 77 more rows
select(starwars, ends_with("r"))
## # A tibble: 87 x 5
## hair_color skin_color eye_color birth_year gender
## <chr> <chr> <chr> <dbl> <chr>
## 1 blond fair blue 19 male
## 2 <NA> gold yellow 112 <NA>
## 3 <NA> white, blue red 33 <NA>
## 4 none white yellow 41.9 male
## 5 brown light brown 19 female
## 6 brown, grey light blue 52 male
## 7 brown light blue 47 female
## 8 <NA> white, red red NA <NA>
## 9 black light brown 24 male
## 10 auburn, white fair blue-gray 57 male
## # ... with 77 more rows
select(starwars, name, contains("color"))
## # A tibble: 87 x 4
## name hair_color skin_color eye_color
## <chr> <chr> <chr> <chr>
## 1 Luke Skywalker blond fair blue
## 2 C-3PO <NA> gold yellow
## 3 R2-D2 <NA> white, blue red
## 4 Darth Vader none white yellow
## 5 Leia Organa brown light brown
## 6 Owen Lars brown, grey light blue
## 7 Beru Whitesun lars brown light blue
## 8 R5-D4 <NA> white, red red
## 9 Biggs Darklighter black light brown
## 10 Obi-Wan Kenobi auburn, white fair blue-gray
## # ... with 77 more rows
select(starwars, name, species, everything())
## # A tibble: 87 x 13
## name species height mass hair_color skin_color eye_color birth_year
## <chr> <chr> <int> <dbl> <chr> <chr> <chr> <dbl>
## 1 Luke S… Human 172 77 blond fair blue 19
## 2 C-3PO Droid 167 75 <NA> gold yellow 112
## 3 R2-D2 Droid 96 32 <NA> white, bl… red 33
## 4 Darth … Human 202 136 none white yellow 41.9
## 5 Leia O… Human 150 49 brown light brown 19
## 6 Owen L… Human 178 120 brown, gr… light blue 52
## 7 Beru W… Human 165 75 brown light blue 47
## 8 R5-D4 Droid 97 32 <NA> white, red red NA
## 9 Biggs … Human 183 84 black light brown 24
## 10 Obi-Wa… Human 182 77 auburn, w… fair blue-gray 57
## # ... with 77 more rows, and 5 more variables: gender <chr>,
## # homeworld <chr>, films <list>, vehicles <list>, starships <list>
select(starwars, sex = gender)
## # A tibble: 87 x 1
## sex
## <chr>
## 1 male
## 2 <NA>
## 3 <NA>
## 4 male
## 5 female
## 6 male
## 7 female
## 8 <NA>
## 9 male
## 10 male
## # ... with 77 more rows
rename(starwars, sex = gender)
## # A tibble: 87 x 13
## name height mass hair_color skin_color eye_color birth_year sex
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Luke Sk… 172 77 blond fair blue 19 male
## 2 C-3PO 167 75 <NA> gold yellow 112 <NA>
## 3 R2-D2 96 32 <NA> white, bl… red 33 <NA>
## 4 Darth V… 202 136 none white yellow 41.9 male
## 5 Leia Or… 150 49 brown light brown 19 fema…
## 6 Owen La… 178 120 brown, grey light blue 52 male
## 7 Beru Wh… 165 75 brown light blue 47 fema…
## 8 R5-D4 97 32 <NA> white, red red NA <NA>
## 9 Biggs D… 183 84 black light brown 24 male
## 10 Obi-Wan… 182 77 auburn, wh… fair blue-gray 57 male
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## # species <chr>, films <list>, vehicles <list>, starships <list>
filter(starwars, eye_color == "red")
## # A tibble: 5 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 R2-D2 96 32 <NA> white, blue red 33 <NA>
## 2 R5-D4 97 32 <NA> white, red red NA <NA>
## 3 IG-88 200 140 none metal red 15 none
## 4 Bossk 190 113 none green red 53 male
## 5 Nute Gu… 191 90 none mottled gr… red NA male
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
filter(starwars, eye_color == "red")
## # A tibble: 5 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 R2-D2 96 32 <NA> white, blue red 33 <NA>
## 2 R5-D4 97 32 <NA> white, red red NA <NA>
## 3 IG-88 200 140 none metal red 15 none
## 4 Bossk 190 113 none green red 53 male
## 5 Nute Gu… 191 90 none mottled gr… red NA male
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
filter(starwars, eye_color != "blue")
## # A tibble: 68 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 C-3PO 167 75 <NA> gold yellow 112 <NA>
## 2 R2-D2 96 32 <NA> white, bl… red 33 <NA>
## 3 Darth … 202 136 none white yellow 41.9 male
## 4 Leia O… 150 49 brown light brown 19 female
## 5 R5-D4 97 32 <NA> white, red red NA <NA>
## 6 Biggs … 183 84 black light brown 24 male
## 7 Obi-Wa… 182 77 auburn, wh… fair blue-gray 57 male
## 8 Han So… 180 80 brown fair brown 29 male
## 9 Greedo 173 74 <NA> green black 44 male
## 10 Jabba … 175 1358 <NA> green-tan… orange 600 herma…
## # ... with 58 more rows, and 5 more variables: homeworld <chr>,
## # species <chr>, films <list>, vehicles <list>, starships <list>
filter(starwars, eye_color %in% c("red", "yellow"))
## # A tibble: 16 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 C-3PO 167 75 <NA> gold yellow 112 <NA>
## 2 R2-D2 96 32 <NA> white, blue red 33 <NA>
## 3 Darth … 202 136 none white yellow 41.9 male
## 4 R5-D4 97 32 <NA> white, red red NA <NA>
## 5 Palpat… 170 75 grey pale yellow 82 male
## 6 IG-88 200 140 none metal red 15 none
## 7 Bossk 190 113 none green red 53 male
## 8 Nute G… 191 90 none mottled gr… red NA male
## 9 Watto 137 NA black blue, grey yellow NA male
## 10 Darth … 175 80 none red yellow 54 male
## 11 Dud Bo… 94 45 none blue, grey yellow NA male
## 12 Ki-Adi… 198 82 white pale yellow 92 male
## 13 Yarael… 264 NA none white yellow NA male
## 14 Poggle… 183 80 none green yellow NA male
## 15 Zam We… 168 55 blonde fair, gree… yellow NA female
## 16 Dexter… 198 102 none brown yellow NA male
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
filter(starwars, !eye_color %in% c("red", "yellow"))
## # A tibble: 71 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Luke Sk… 172 77 blond fair blue 19 male
## 2 Leia Or… 150 49 brown light brown 19 female
## 3 Owen La… 178 120 brown, gr… light blue 52 male
## 4 Beru Wh… 165 75 brown light blue 47 female
## 5 Biggs D… 183 84 black light brown 24 male
## 6 Obi-Wan… 182 77 auburn, w… fair blue-gray 57 male
## 7 Anakin … 188 84 blond fair blue 41.9 male
## 8 Wilhuff… 180 NA auburn, g… fair blue 64 male
## 9 Chewbac… 228 112 brown unknown blue 200 male
## 10 Han Solo 180 80 brown fair brown 29 male
## # ... with 61 more rows, and 5 more variables: homeworld <chr>,
## # species <chr>, films <list>, vehicles <list>, starships <list>
filter(starwars, height < 100)
## # A tibble: 7 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 R2-D2 96 32 <NA> white, bl… red 33 <NA>
## 2 R5-D4 97 32 <NA> white, red red NA <NA>
## 3 Yoda 66 17 white green brown 896 male
## 4 Wicket S… 88 20 brown brown brown 8 male
## 5 Dud Bolt 94 45 none blue, grey yellow NA male
## 6 Ratts Ty… 79 15 none grey, blue unknown NA male
## 7 R4-P17 96 NA none silver, r… red, blue NA female
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
filter(starwars, height < 100)
## # A tibble: 7 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 R2-D2 96 32 <NA> white, bl… red 33 <NA>
## 2 R5-D4 97 32 <NA> white, red red NA <NA>
## 3 Yoda 66 17 white green brown 896 male
## 4 Wicket S… 88 20 brown brown brown 8 male
## 5 Dud Bolt 94 45 none blue, grey yellow NA male
## 6 Ratts Ty… 79 15 none grey, blue unknown NA male
## 7 R4-P17 96 NA none silver, r… red, blue NA female
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
filter(starwars, is.na(hair_color))
## # A tibble: 5 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 C-3PO 167 75 <NA> gold yellow 112 <NA>
## 2 R2-D2 96 32 <NA> white, blue red 33 <NA>
## 3 R5-D4 97 32 <NA> white, red red NA <NA>
## 4 Greedo 173 74 <NA> green black 44 male
## 5 Jabba D… 175 1358 <NA> green-tan,… orange 600 herma…
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
filter(starwars, !is.na(hair_color))
## # A tibble: 82 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Luke Sk… 172 77 blond fair blue 19 male
## 2 Darth V… 202 136 none white yellow 41.9 male
## 3 Leia Or… 150 49 brown light brown 19 female
## 4 Owen La… 178 120 brown, gr… light blue 52 male
## 5 Beru Wh… 165 75 brown light blue 47 female
## 6 Biggs D… 183 84 black light brown 24 male
## 7 Obi-Wan… 182 77 auburn, w… fair blue-gray 57 male
## 8 Anakin … 188 84 blond fair blue 41.9 male
## 9 Wilhuff… 180 NA auburn, g… fair blue 64 male
## 10 Chewbac… 228 112 brown unknown blue 200 male
## # ... with 72 more rows, and 5 more variables: homeworld <chr>,
## # species <chr>, films <list>, vehicles <list>, starships <list>
filter(starwars, height < 100, eye_color == "red")
## # A tibble: 2 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 R2-D2 96 32 <NA> white, blue red 33 <NA>
## 2 R5-D4 97 32 <NA> white, red red NA <NA>
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
filter(starwars, height < 100 | eye_color == "red")
## # A tibble: 10 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 R2-D2 96 32 <NA> white, bl… red 33 <NA>
## 2 R5-D4 97 32 <NA> white, red red NA <NA>
## 3 Yoda 66 17 white green brown 896 male
## 4 IG-88 200 140 none metal red 15 none
## 5 Bossk 190 113 none green red 53 male
## 6 Wicket … 88 20 brown brown brown 8 male
## 7 Nute Gu… 191 90 none mottled g… red NA male
## 8 Dud Bolt 94 45 none blue, grey yellow NA male
## 9 Ratts T… 79 15 none grey, blue unknown NA male
## 10 R4-P17 96 NA none silver, r… red, blue NA female
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
distinct(starwars)
## Warning: distinct() does not fully support columns of type `list`.
## List elements are compared by reference, see ?distinct for details.
## This affects the following columns:
## - `films`, `vehicles`, `starships`
## # A tibble: 87 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Luke Sk… 172 77 blond fair blue 19 male
## 2 C-3PO 167 75 <NA> gold yellow 112 <NA>
## 3 R2-D2 96 32 <NA> white, bl… red 33 <NA>
## 4 Darth V… 202 136 none white yellow 41.9 male
## 5 Leia Or… 150 49 brown light brown 19 female
## 6 Owen La… 178 120 brown, gr… light blue 52 male
## 7 Beru Wh… 165 75 brown light blue 47 female
## 8 R5-D4 97 32 <NA> white, red red NA <NA>
## 9 Biggs D… 183 84 black light brown 24 male
## 10 Obi-Wan… 182 77 auburn, w… fair blue-gray 57 male
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## # species <chr>, films <list>, vehicles <list>, starships <list>
set.seed(123456789) #ensures random sampling is reproducible
sample_n(starwars, 10)
## # A tibble: 10 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Lumina… 170 56.2 black yellow blue 58 female
## 2 Cordé 157 NA brown light brown NA female
## 3 Mas Am… 196 NA none blue blue NA male
## 4 Padmé … 165 45 brown light brown 46 female
## 5 Grievo… 216 159 none brown, whi… green, y… NA male
## 6 Wat Ta… 193 48 none green, grey unknown NA male
## 7 Palpat… 170 75 grey pale yellow 82 male
## 8 San Hi… 191 NA none grey gold NA male
## 9 IG-88 200 140 none metal red 15 none
## 10 Rugor … 206 NA none green orange NA male
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
sample_frac(starwars, 0.05)
## # A tibble: 4 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Owen L… 178 120 brown, grey light blue 52 male
## 2 R4-P17 96 NA none silver, red red, blue NA female
## 3 Ben Qu… 163 65 none grey, gree… orange NA male
## 4 Chewba… 228 112 brown unknown blue 200 male
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
slice(starwars, 1:3)
## # A tibble: 3 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Luke Sky… 172 77 blond fair blue 19 male
## 2 C-3PO 167 75 <NA> gold yellow 112 <NA>
## 3 R2-D2 96 32 <NA> white, bl… red 33 <NA>
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
slice(starwars, c(1, 3))
## # A tibble: 2 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Luke Sky… 172 77 blond fair blue 19 male
## 2 R2-D2 96 32 <NA> white, bl… red 33 <NA>
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
arrange(starwars, height)
## # A tibble: 87 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Yoda 66 17 white green brown 896 male
## 2 Ratts T… 79 15 none grey, blue unknown NA male
## 3 Wicket … 88 20 brown brown brown 8 male
## 4 Dud Bolt 94 45 none blue, grey yellow NA male
## 5 R2-D2 96 32 <NA> white, bl… red 33 <NA>
## 6 R4-P17 96 NA none silver, r… red, blue NA female
## 7 R5-D4 97 32 <NA> white, red red NA <NA>
## 8 Sebulba 112 40 none grey, red orange NA male
## 9 Gasgano 122 NA none white, bl… black NA male
## 10 Watto 137 NA black blue, grey yellow NA male
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## # species <chr>, films <list>, vehicles <list>, starships <list>
arrange(starwars, mass, height)
## # A tibble: 87 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Ratts T… 79 15 none grey, blue unknown NA male
## 2 Yoda 66 17 white green brown 896 male
## 3 Wicket … 88 20 brown brown brown 8 male
## 4 R2-D2 96 32 <NA> white, bl… red 33 <NA>
## 5 R5-D4 97 32 <NA> white, red red NA <NA>
## 6 Sebulba 112 40 none grey, red orange NA male
## 7 Dud Bolt 94 45 none blue, grey yellow NA male
## 8 Padmé A… 165 45 brown light brown 46 female
## 9 Sly Moo… 178 48 none pale white NA female
## 10 Wat Tam… 193 48 none green, gr… unknown NA male
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## # species <chr>, films <list>, vehicles <list>, starships <list>
arrange(starwars, desc(height))
## # A tibble: 87 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Yarael… 264 NA none white yellow NA male
## 2 Tarfful 234 136 brown brown blue NA male
## 3 Lama Su 229 88 none grey black NA male
## 4 Chewba… 228 112 brown unknown blue 200 male
## 5 Roos T… 224 82 none grey orange NA male
## 6 Grievo… 216 159 none brown, whi… green, y… NA male
## 7 Taun We 213 NA none grey black NA female
## 8 Rugor … 206 NA none green orange NA male
## 9 Tion M… 206 80 none grey black NA male
## 10 Darth … 202 136 none white yellow 41.9 male
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## # species <chr>, films <list>, vehicles <list>, starships <list>
starwars_subset <- select(starwars, name, mass, height, eye_color)
starwars_subset
## # A tibble: 87 x 4
## name mass height eye_color
## <chr> <dbl> <int> <chr>
## 1 Luke Skywalker 77 172 blue
## 2 C-3PO 75 167 yellow
## 3 R2-D2 32 96 red
## 4 Darth Vader 136 202 yellow
## 5 Leia Organa 49 150 brown
## 6 Owen Lars 120 178 blue
## 7 Beru Whitesun lars 75 165 blue
## 8 R5-D4 32 97 red
## 9 Biggs Darklighter 84 183 brown
## 10 Obi-Wan Kenobi 77 182 blue-gray
## # ... with 77 more rows
mutate(starwars_subset, mass_height_ratio = mass / height)
## # A tibble: 87 x 5
## name mass height eye_color mass_height_ratio
## <chr> <dbl> <int> <chr> <dbl>
## 1 Luke Skywalker 77 172 blue 0.448
## 2 C-3PO 75 167 yellow 0.449
## 3 R2-D2 32 96 red 0.333
## 4 Darth Vader 136 202 yellow 0.673
## 5 Leia Organa 49 150 brown 0.327
## 6 Owen Lars 120 178 blue 0.674
## 7 Beru Whitesun lars 75 165 blue 0.455
## 8 R5-D4 32 97 red 0.330
## 9 Biggs Darklighter 84 183 brown 0.459
## 10 Obi-Wan Kenobi 77 182 blue-gray 0.423
## # ... with 77 more rows
transmute(starwars_subset, mass_height_ratio = mass / height)
## # A tibble: 87 x 1
## mass_height_ratio
## <dbl>
## 1 0.448
## 2 0.449
## 3 0.333
## 4 0.673
## 5 0.327
## 6 0.674
## 7 0.455
## 8 0.330
## 9 0.459
## 10 0.423
## # ... with 77 more rows
mutate(starwars_subset, bmi = mass / ((height / 100)^2))
## # A tibble: 87 x 5
## name mass height eye_color bmi
## <chr> <dbl> <int> <chr> <dbl>
## 1 Luke Skywalker 77 172 blue 26.0
## 2 C-3PO 75 167 yellow 26.9
## 3 R2-D2 32 96 red 34.7
## 4 Darth Vader 136 202 yellow 33.3
## 5 Leia Organa 49 150 brown 21.8
## 6 Owen Lars 120 178 blue 37.9
## 7 Beru Whitesun lars 75 165 blue 27.5
## 8 R5-D4 32 97 red 34.0
## 9 Biggs Darklighter 84 183 brown 25.1
## 10 Obi-Wan Kenobi 77 182 blue-gray 23.2
## # ... with 77 more rows
mutate(starwars_subset, short = height < 100)
## # A tibble: 87 x 5
## name mass height eye_color short
## <chr> <dbl> <int> <chr> <lgl>
## 1 Luke Skywalker 77 172 blue FALSE
## 2 C-3PO 75 167 yellow FALSE
## 3 R2-D2 32 96 red TRUE
## 4 Darth Vader 136 202 yellow FALSE
## 5 Leia Organa 49 150 brown FALSE
## 6 Owen Lars 120 178 blue FALSE
## 7 Beru Whitesun lars 75 165 blue FALSE
## 8 R5-D4 32 97 red TRUE
## 9 Biggs Darklighter 84 183 brown FALSE
## 10 Obi-Wan Kenobi 77 182 blue-gray FALSE
## # ... with 77 more rows
mutate(starwars_subset, red_eyes = eye_color == "red")
## # A tibble: 87 x 5
## name mass height eye_color red_eyes
## <chr> <dbl> <int> <chr> <lgl>
## 1 Luke Skywalker 77 172 blue FALSE
## 2 C-3PO 75 167 yellow FALSE
## 3 R2-D2 32 96 red TRUE
## 4 Darth Vader 136 202 yellow FALSE
## 5 Leia Organa 49 150 brown FALSE
## 6 Owen Lars 120 178 blue FALSE
## 7 Beru Whitesun lars 75 165 blue FALSE
## 8 R5-D4 32 97 red TRUE
## 9 Biggs Darklighter 84 183 brown FALSE
## 10 Obi-Wan Kenobi 77 182 blue-gray FALSE
## # ... with 77 more rows
mutate(starwars_subset, eyes_unusual = if_else(!eye_color %in% c("blue", "brown"), T, F))
## # A tibble: 87 x 5
## name mass height eye_color eyes_unusual
## <chr> <dbl> <int> <chr> <lgl>
## 1 Luke Skywalker 77 172 blue FALSE
## 2 C-3PO 75 167 yellow TRUE
## 3 R2-D2 32 96 red TRUE
## 4 Darth Vader 136 202 yellow TRUE
## 5 Leia Organa 49 150 brown FALSE
## 6 Owen Lars 120 178 blue FALSE
## 7 Beru Whitesun lars 75 165 blue FALSE
## 8 R5-D4 32 97 red TRUE
## 9 Biggs Darklighter 84 183 brown FALSE
## 10 Obi-Wan Kenobi 77 182 blue-gray TRUE
## # ... with 77 more rows
mutate(starwars_subset,
type = case_when(
height <= 100 ~ "short",
height <= 150 ~ "medium",
height > 150 ~ "tall"
))
## # A tibble: 87 x 5
## name mass height eye_color type
## <chr> <dbl> <int> <chr> <chr>
## 1 Luke Skywalker 77 172 blue tall
## 2 C-3PO 75 167 yellow tall
## 3 R2-D2 32 96 red short
## 4 Darth Vader 136 202 yellow tall
## 5 Leia Organa 49 150 brown medium
## 6 Owen Lars 120 178 blue tall
## 7 Beru Whitesun lars 75 165 blue tall
## 8 R5-D4 32 97 red short
## 9 Biggs Darklighter 84 183 brown tall
## 10 Obi-Wan Kenobi 77 182 blue-gray tall
## # ... with 77 more rows
%>%) to perform lots of operations in one go.starwars %>%
select(name, species, height) %>%
filter(species == "Human") %>%
arrange(desc(height))
## # A tibble: 35 x 3
## name species height
## <chr> <chr> <int>
## 1 Darth Vader Human 202
## 2 Qui-Gon Jinn Human 193
## 3 Dooku Human 193
## 4 Bail Prestor Organa Human 191
## 5 Anakin Skywalker Human 188
## 6 Mace Windu Human 188
## 7 Raymus Antilles Human 188
## 8 Gregar Typho Human 185
## 9 Biggs Darklighter Human 183
## 10 Boba Fett Human 183
## # ... with 25 more rows
set.seed(123456789)
starwars %>%
group_by(eye_color) %>%
sample_n(1)
## # A tibble: 15 x 13
## # Groups: eye_color [15]
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Taun We 213 NA none grey black NA female
## 2 Adi Ga… 184 50 none dark blue NA female
## 3 Obi-Wa… 182 77 auburn, wh… fair blue-gray 57 male
## 4 Dooku 193 80 white fair brown 102 male
## 5 Finn NA NA black dark dark NA male
## 6 San Hi… 191 NA none grey gold NA male
## 7 Grievo… 216 159 none brown, wh… green, y… NA male
## 8 Rey NA NA brown light hazel NA female
## 9 Jar Ja… 196 66 none orange orange 52 male
## 10 Bib Fo… 180 NA none pale pink NA male
## 11 R2-D2 96 32 <NA> white, bl… red 33 <NA>
## 12 R4-P17 96 NA none silver, r… red, blue NA female
## 13 Wat Ta… 193 48 none green, gr… unknown NA male
## 14 Sly Mo… 178 48 none pale white NA female
## 15 Darth … 175 80 none red yellow 54 male
## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
summarise(starwars, height_mean = mean(height, na.rm = T))
## # A tibble: 1 x 1
## height_mean
## <dbl>
## 1 174.
summarise(starwars, height_max = max(height, na.rm = T))
## # A tibble: 1 x 1
## height_max
## <int>
## 1 264
starwars %>%
group_by(gender) %>%
summarise(height_mean = mean(height, na.rm = T))
## # A tibble: 5 x 2
## gender height_mean
## <chr> <dbl>
## 1 female 165.
## 2 hermaphrodite 175
## 3 male 179.
## 4 none 200
## 5 <NA> 120
starwars %>%
group_by(gender, eye_color) %>%
summarise(height_mean = mean(height, na.rm = T))
## # A tibble: 26 x 3
## # Groups: gender [?]
## gender eye_color height_mean
## <chr> <chr> <dbl>
## 1 female black 196.
## 2 female blue 167
## 3 female brown 160
## 4 female hazel 178
## 5 female red, blue 96
## 6 female unknown NaN
## 7 female white 178
## 8 female yellow 168
## 9 hermaphrodite orange 175
## 10 male black 182
## # ... with 16 more rows
starwars %>%
group_by(gender) %>%
summarise(n=n())
## # A tibble: 5 x 2
## gender n
## <chr> <int>
## 1 female 19
## 2 hermaphrodite 1
## 3 male 62
## 4 none 2
## 5 <NA> 3
starwars %>%
group_by(gender) %>%
count()
## # A tibble: 5 x 2
## # Groups: gender [5]
## gender n
## <chr> <int>
## 1 female 19
## 2 hermaphrodite 1
## 3 male 62
## 4 none 2
## 5 <NA> 3
starwars %>%
group_by(gender) %>%
summarise(mean_mass = mean(mass, na.rm = T))
## # A tibble: 5 x 2
## gender mean_mass
## <chr> <dbl>
## 1 female 54.0
## 2 hermaphrodite 1358
## 3 male 81.0
## 4 none 140
## 5 <NA> 46.3
data.frame into a print-friendly tbl.iris
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
## 11 5.4 3.7 1.5 0.2 setosa
## 12 4.8 3.4 1.6 0.2 setosa
## 13 4.8 3.0 1.4 0.1 setosa
## 14 4.3 3.0 1.1 0.1 setosa
## 15 5.8 4.0 1.2 0.2 setosa
## 16 5.7 4.4 1.5 0.4 setosa
## 17 5.4 3.9 1.3 0.4 setosa
## 18 5.1 3.5 1.4 0.3 setosa
## 19 5.7 3.8 1.7 0.3 setosa
## 20 5.1 3.8 1.5 0.3 setosa
## 21 5.4 3.4 1.7 0.2 setosa
## 22 5.1 3.7 1.5 0.4 setosa
## 23 4.6 3.6 1.0 0.2 setosa
## 24 5.1 3.3 1.7 0.5 setosa
## 25 4.8 3.4 1.9 0.2 setosa
## 26 5.0 3.0 1.6 0.2 setosa
## 27 5.0 3.4 1.6 0.4 setosa
## 28 5.2 3.5 1.5 0.2 setosa
## 29 5.2 3.4 1.4 0.2 setosa
## 30 4.7 3.2 1.6 0.2 setosa
## 31 4.8 3.1 1.6 0.2 setosa
## 32 5.4 3.4 1.5 0.4 setosa
## 33 5.2 4.1 1.5 0.1 setosa
## 34 5.5 4.2 1.4 0.2 setosa
## 35 4.9 3.1 1.5 0.2 setosa
## 36 5.0 3.2 1.2 0.2 setosa
## 37 5.5 3.5 1.3 0.2 setosa
## 38 4.9 3.6 1.4 0.1 setosa
## 39 4.4 3.0 1.3 0.2 setosa
## 40 5.1 3.4 1.5 0.2 setosa
## 41 5.0 3.5 1.3 0.3 setosa
## 42 4.5 2.3 1.3 0.3 setosa
## 43 4.4 3.2 1.3 0.2 setosa
## 44 5.0 3.5 1.6 0.6 setosa
## 45 5.1 3.8 1.9 0.4 setosa
## 46 4.8 3.0 1.4 0.3 setosa
## 47 5.1 3.8 1.6 0.2 setosa
## 48 4.6 3.2 1.4 0.2 setosa
## 49 5.3 3.7 1.5 0.2 setosa
## 50 5.0 3.3 1.4 0.2 setosa
## 51 7.0 3.2 4.7 1.4 versicolor
## 52 6.4 3.2 4.5 1.5 versicolor
## 53 6.9 3.1 4.9 1.5 versicolor
## 54 5.5 2.3 4.0 1.3 versicolor
## 55 6.5 2.8 4.6 1.5 versicolor
## 56 5.7 2.8 4.5 1.3 versicolor
## 57 6.3 3.3 4.7 1.6 versicolor
## 58 4.9 2.4 3.3 1.0 versicolor
## 59 6.6 2.9 4.6 1.3 versicolor
## 60 5.2 2.7 3.9 1.4 versicolor
## 61 5.0 2.0 3.5 1.0 versicolor
## 62 5.9 3.0 4.2 1.5 versicolor
## 63 6.0 2.2 4.0 1.0 versicolor
## 64 6.1 2.9 4.7 1.4 versicolor
## 65 5.6 2.9 3.6 1.3 versicolor
## 66 6.7 3.1 4.4 1.4 versicolor
## 67 5.6 3.0 4.5 1.5 versicolor
## 68 5.8 2.7 4.1 1.0 versicolor
## 69 6.2 2.2 4.5 1.5 versicolor
## 70 5.6 2.5 3.9 1.1 versicolor
## 71 5.9 3.2 4.8 1.8 versicolor
## 72 6.1 2.8 4.0 1.3 versicolor
## 73 6.3 2.5 4.9 1.5 versicolor
## 74 6.1 2.8 4.7 1.2 versicolor
## 75 6.4 2.9 4.3 1.3 versicolor
## 76 6.6 3.0 4.4 1.4 versicolor
## 77 6.8 2.8 4.8 1.4 versicolor
## 78 6.7 3.0 5.0 1.7 versicolor
## 79 6.0 2.9 4.5 1.5 versicolor
## 80 5.7 2.6 3.5 1.0 versicolor
## 81 5.5 2.4 3.8 1.1 versicolor
## 82 5.5 2.4 3.7 1.0 versicolor
## 83 5.8 2.7 3.9 1.2 versicolor
## 84 6.0 2.7 5.1 1.6 versicolor
## 85 5.4 3.0 4.5 1.5 versicolor
## 86 6.0 3.4 4.5 1.6 versicolor
## 87 6.7 3.1 4.7 1.5 versicolor
## 88 6.3 2.3 4.4 1.3 versicolor
## 89 5.6 3.0 4.1 1.3 versicolor
## 90 5.5 2.5 4.0 1.3 versicolor
## 91 5.5 2.6 4.4 1.2 versicolor
## 92 6.1 3.0 4.6 1.4 versicolor
## 93 5.8 2.6 4.0 1.2 versicolor
## 94 5.0 2.3 3.3 1.0 versicolor
## 95 5.6 2.7 4.2 1.3 versicolor
## 96 5.7 3.0 4.2 1.2 versicolor
## 97 5.7 2.9 4.2 1.3 versicolor
## 98 6.2 2.9 4.3 1.3 versicolor
## 99 5.1 2.5 3.0 1.1 versicolor
## 100 5.7 2.8 4.1 1.3 versicolor
## 101 6.3 3.3 6.0 2.5 virginica
## 102 5.8 2.7 5.1 1.9 virginica
## 103 7.1 3.0 5.9 2.1 virginica
## 104 6.3 2.9 5.6 1.8 virginica
## 105 6.5 3.0 5.8 2.2 virginica
## 106 7.6 3.0 6.6 2.1 virginica
## 107 4.9 2.5 4.5 1.7 virginica
## 108 7.3 2.9 6.3 1.8 virginica
## 109 6.7 2.5 5.8 1.8 virginica
## 110 7.2 3.6 6.1 2.5 virginica
## 111 6.5 3.2 5.1 2.0 virginica
## 112 6.4 2.7 5.3 1.9 virginica
## 113 6.8 3.0 5.5 2.1 virginica
## 114 5.7 2.5 5.0 2.0 virginica
## 115 5.8 2.8 5.1 2.4 virginica
## 116 6.4 3.2 5.3 2.3 virginica
## 117 6.5 3.0 5.5 1.8 virginica
## 118 7.7 3.8 6.7 2.2 virginica
## 119 7.7 2.6 6.9 2.3 virginica
## 120 6.0 2.2 5.0 1.5 virginica
## 121 6.9 3.2 5.7 2.3 virginica
## 122 5.6 2.8 4.9 2.0 virginica
## 123 7.7 2.8 6.7 2.0 virginica
## 124 6.3 2.7 4.9 1.8 virginica
## 125 6.7 3.3 5.7 2.1 virginica
## 126 7.2 3.2 6.0 1.8 virginica
## 127 6.2 2.8 4.8 1.8 virginica
## 128 6.1 3.0 4.9 1.8 virginica
## 129 6.4 2.8 5.6 2.1 virginica
## 130 7.2 3.0 5.8 1.6 virginica
## 131 7.4 2.8 6.1 1.9 virginica
## 132 7.9 3.8 6.4 2.0 virginica
## 133 6.4 2.8 5.6 2.2 virginica
## 134 6.3 2.8 5.1 1.5 virginica
## 135 6.1 2.6 5.6 1.4 virginica
## 136 7.7 3.0 6.1 2.3 virginica
## 137 6.3 3.4 5.6 2.4 virginica
## 138 6.4 3.1 5.5 1.8 virginica
## 139 6.0 3.0 4.8 1.8 virginica
## 140 6.9 3.1 5.4 2.1 virginica
## 141 6.7 3.1 5.6 2.4 virginica
## 142 6.9 3.1 5.1 2.3 virginica
## 143 5.8 2.7 5.1 1.9 virginica
## 144 6.8 3.2 5.9 2.3 virginica
## 145 6.7 3.3 5.7 2.5 virginica
## 146 6.7 3.0 5.2 2.3 virginica
## 147 6.3 2.5 5.0 1.9 virginica
## 148 6.5 3.0 5.2 2.0 virginica
## 149 6.2 3.4 5.4 2.3 virginica
## 150 5.9 3.0 5.1 1.8 virginica
class(iris)
## [1] "data.frame"
iris <- as_tibble(iris)
class(iris)
## [1] "tbl_df" "tbl" "data.frame"
iris
## # A tibble: 150 x 5
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## <dbl> <dbl> <dbl> <dbl> <fct>
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
## # ... with 140 more rows