The arrow is used to create and “assign” new varibles.
2*3
## [1] 6
# The above hasn't been stored.
# Let's store the result and name it:
myVariable <- 2*3
#about naming variables: have a consistent style! Not too short!
data <- 2*3 ### <- this is a bad name!!
# my style: camelCase
camelCaseLooksLikeThis <- 4*8
camelCaseLooksLikeThis
## [1] 32
Let’s use the built-in “iris” dataset to practice choosing variables.
R has lots of included data that we can use for practice.
#Here, I just want to show you that you can use the assignment <- with built-in data to practice.
# What if I wanted to focus -only- on the species? I could make a new variable with just that data:
mySpecies <- iris$Species
#Another example here: saving the Sepal Width
mySepalWidth <- iris$Sepal.Width
The filter() command does exactly what it sounds like!
# let's get a table of the virginicas:
filter(iris, Species=="virginica") #<----------------------
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 6.3 3.3 6.0 2.5 virginica
## 2 5.8 2.7 5.1 1.9 virginica
## 3 7.1 3.0 5.9 2.1 virginica
## 4 6.3 2.9 5.6 1.8 virginica
## 5 6.5 3.0 5.8 2.2 virginica
## 6 7.6 3.0 6.6 2.1 virginica
## 7 4.9 2.5 4.5 1.7 virginica
## 8 7.3 2.9 6.3 1.8 virginica
## 9 6.7 2.5 5.8 1.8 virginica
## 10 7.2 3.6 6.1 2.5 virginica
## 11 6.5 3.2 5.1 2.0 virginica
## 12 6.4 2.7 5.3 1.9 virginica
## 13 6.8 3.0 5.5 2.1 virginica
## 14 5.7 2.5 5.0 2.0 virginica
## 15 5.8 2.8 5.1 2.4 virginica
## 16 6.4 3.2 5.3 2.3 virginica
## 17 6.5 3.0 5.5 1.8 virginica
## 18 7.7 3.8 6.7 2.2 virginica
## 19 7.7 2.6 6.9 2.3 virginica
## 20 6.0 2.2 5.0 1.5 virginica
## 21 6.9 3.2 5.7 2.3 virginica
## 22 5.6 2.8 4.9 2.0 virginica
## 23 7.7 2.8 6.7 2.0 virginica
## 24 6.3 2.7 4.9 1.8 virginica
## 25 6.7 3.3 5.7 2.1 virginica
## 26 7.2 3.2 6.0 1.8 virginica
## 27 6.2 2.8 4.8 1.8 virginica
## 28 6.1 3.0 4.9 1.8 virginica
## 29 6.4 2.8 5.6 2.1 virginica
## 30 7.2 3.0 5.8 1.6 virginica
## 31 7.4 2.8 6.1 1.9 virginica
## 32 7.9 3.8 6.4 2.0 virginica
## 33 6.4 2.8 5.6 2.2 virginica
## 34 6.3 2.8 5.1 1.5 virginica
## 35 6.1 2.6 5.6 1.4 virginica
## 36 7.7 3.0 6.1 2.3 virginica
## 37 6.3 3.4 5.6 2.4 virginica
## 38 6.4 3.1 5.5 1.8 virginica
## 39 6.0 3.0 4.8 1.8 virginica
## 40 6.9 3.1 5.4 2.1 virginica
## 41 6.7 3.1 5.6 2.4 virginica
## 42 6.9 3.1 5.1 2.3 virginica
## 43 5.8 2.7 5.1 1.9 virginica
## 44 6.8 3.2 5.9 2.3 virginica
## 45 6.7 3.3 5.7 2.5 virginica
## 46 6.7 3.0 5.2 2.3 virginica
## 47 6.3 2.5 5.0 1.9 virginica
## 48 6.5 3.0 5.2 2.0 virginica
## 49 6.2 3.4 5.4 2.3 virginica
## 50 5.9 3.0 5.1 1.8 virginica
# |
#a better way: "pipes". exactly the same! | These two are identical!
# |
iris %>% filter(Species == "virginica") #<-------------------
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 6.3 3.3 6.0 2.5 virginica
## 2 5.8 2.7 5.1 1.9 virginica
## 3 7.1 3.0 5.9 2.1 virginica
## 4 6.3 2.9 5.6 1.8 virginica
## 5 6.5 3.0 5.8 2.2 virginica
## 6 7.6 3.0 6.6 2.1 virginica
## 7 4.9 2.5 4.5 1.7 virginica
## 8 7.3 2.9 6.3 1.8 virginica
## 9 6.7 2.5 5.8 1.8 virginica
## 10 7.2 3.6 6.1 2.5 virginica
## 11 6.5 3.2 5.1 2.0 virginica
## 12 6.4 2.7 5.3 1.9 virginica
## 13 6.8 3.0 5.5 2.1 virginica
## 14 5.7 2.5 5.0 2.0 virginica
## 15 5.8 2.8 5.1 2.4 virginica
## 16 6.4 3.2 5.3 2.3 virginica
## 17 6.5 3.0 5.5 1.8 virginica
## 18 7.7 3.8 6.7 2.2 virginica
## 19 7.7 2.6 6.9 2.3 virginica
## 20 6.0 2.2 5.0 1.5 virginica
## 21 6.9 3.2 5.7 2.3 virginica
## 22 5.6 2.8 4.9 2.0 virginica
## 23 7.7 2.8 6.7 2.0 virginica
## 24 6.3 2.7 4.9 1.8 virginica
## 25 6.7 3.3 5.7 2.1 virginica
## 26 7.2 3.2 6.0 1.8 virginica
## 27 6.2 2.8 4.8 1.8 virginica
## 28 6.1 3.0 4.9 1.8 virginica
## 29 6.4 2.8 5.6 2.1 virginica
## 30 7.2 3.0 5.8 1.6 virginica
## 31 7.4 2.8 6.1 1.9 virginica
## 32 7.9 3.8 6.4 2.0 virginica
## 33 6.4 2.8 5.6 2.2 virginica
## 34 6.3 2.8 5.1 1.5 virginica
## 35 6.1 2.6 5.6 1.4 virginica
## 36 7.7 3.0 6.1 2.3 virginica
## 37 6.3 3.4 5.6 2.4 virginica
## 38 6.4 3.1 5.5 1.8 virginica
## 39 6.0 3.0 4.8 1.8 virginica
## 40 6.9 3.1 5.4 2.1 virginica
## 41 6.7 3.1 5.6 2.4 virginica
## 42 6.9 3.1 5.1 2.3 virginica
## 43 5.8 2.7 5.1 1.9 virginica
## 44 6.8 3.2 5.9 2.3 virginica
## 45 6.7 3.3 5.7 2.5 virginica
## 46 6.7 3.0 5.2 2.3 virginica
## 47 6.3 2.5 5.0 1.9 virginica
## 48 6.5 3.0 5.2 2.0 virginica
## 49 6.2 3.4 5.4 2.3 virginica
## 50 5.9 3.0 5.1 1.8 virginica
#The pipe is the %>% thingie. It plugs the output of the left into the input of the right.
#Pipes make it easy to chain together lots of operations on data.
#Check out this example:
iris %>% filter(Species == "virginica") %>% #We want only virginicas
filter(Sepal.Length >6) %>% #and also Sepal Length bigger than 6
filter(Petal.Length <5.8) #and also Petal Length less than 5.8
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 6.3 2.9 5.6 1.8 virginica
## 2 6.5 3.2 5.1 2.0 virginica
## 3 6.4 2.7 5.3 1.9 virginica
## 4 6.8 3.0 5.5 2.1 virginica
## 5 6.4 3.2 5.3 2.3 virginica
## 6 6.5 3.0 5.5 1.8 virginica
## 7 6.9 3.2 5.7 2.3 virginica
## 8 6.3 2.7 4.9 1.8 virginica
## 9 6.7 3.3 5.7 2.1 virginica
## 10 6.2 2.8 4.8 1.8 virginica
## 11 6.1 3.0 4.9 1.8 virginica
## 12 6.4 2.8 5.6 2.1 virginica
## 13 6.4 2.8 5.6 2.2 virginica
## 14 6.3 2.8 5.1 1.5 virginica
## 15 6.1 2.6 5.6 1.4 virginica
## 16 6.3 3.4 5.6 2.4 virginica
## 17 6.4 3.1 5.5 1.8 virginica
## 18 6.9 3.1 5.4 2.1 virginica
## 19 6.7 3.1 5.6 2.4 virginica
## 20 6.9 3.1 5.1 2.3 virginica
## 21 6.7 3.3 5.7 2.5 virginica
## 22 6.7 3.0 5.2 2.3 virginica
## 23 6.3 2.5 5.0 1.9 virginica
## 24 6.5 3.0 5.2 2.0 virginica
## 25 6.2 3.4 5.4 2.3 virginica
#another example: setosa plants with petal length longer than 5:
iris %>% filter(Species == "setosa") %>%
filter(Petal.Length>5)
## [1] Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## <0 rows> (or 0-length row.names)
#Who are the women of star wars?
starwars %>% filter(gender == "female")
## # A tibble: 19 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Leia… 150 49 brown light brown 19 female
## 2 Beru… 165 75 brown light blue 47 female
## 3 Mon … 150 NA auburn fair blue 48 female
## 4 Shmi… 163 NA black fair brown 72 female
## 5 Ayla… 178 55 none blue hazel 48 female
## 6 Adi … 184 50 none dark blue NA female
## 7 Cordé 157 NA brown light brown NA female
## 8 Lumi… 170 56.2 black yellow blue 58 female
## 9 Barr… 166 50 black yellow blue 40 female
## 10 Dormé 165 NA brown light brown NA female
## 11 Zam … 168 55 blonde fair, gre… yellow NA female
## 12 Taun… 213 NA none grey black NA female
## 13 Joca… 167 NA white fair blue NA female
## 14 R4-P… 96 NA none silver, r… red, blue NA female
## 15 Shaa… 178 57 none red, blue… black NA female
## 16 Sly … 178 48 none pale white NA female
## 17 Rey NA NA brown light hazel NA female
## 18 Capt… NA NA unknown unknown unknown NA female
## 19 Padm… 165 45 brown light brown 46 female
## # … with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
# find all women in star wars from Tatooine
starwars %>% filter(gender == "female") %>%
filter(homeworld == "Tatooine")
## # A tibble: 2 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Beru… 165 75 brown light blue 47 female
## 2 Shmi… 163 NA black fair brown 72 female
## # … with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
# What about all women from tatooine with birth year later than 50?
starwars %>% filter(gender == "female") %>%
filter(homeworld == "Tatooine") %>%
filter(birth_year>50)
## # A tibble: 1 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Shmi… 163 NA black fair brown 72 female
## # … with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
# The example below does exactly the same thing, but without using pipes.
# I know which one I prefer to work with!
filter(filter( filter(starwars, gender=="female"), homeworld == "Tatooine"), birth_year>50)
## # A tibble: 1 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Shmi… 163 NA black fair brown 72 female
## # … with 5 more variables: homeworld <chr>, species <chr>, films <list>,
## # vehicles <list>, starships <list>
#aah! it's so gross! the above is terrible code, even though it works!