dplyr has a number of functions which are useful for manipulating data.frames.
Here, I will look at:
Our data is a 12 item subset of the iris dataset
data(iris)
iris2 <- head(iris,12)
iris2
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
## 11 5.4 3.7 1.5 0.2 setosa
## 12 4.8 3.4 1.6 0.2 setosa
Return rows with matching conditions.
filter(iris2, Sepal.Length > 5)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 5.4 3.9 1.7 0.4 setosa
## 3 5.4 3.7 1.5 0.2 setosa
Select variables by name.
select(iris2, Sepal.Length)
## Sepal.Length
## 1 5.1
## 2 4.9
## 3 4.7
## 4 4.6
## 5 5.0
## 6 5.4
## 7 4.6
## 8 5.0
## 9 4.4
## 10 4.9
## 11 5.4
## 12 4.8
select(iris2, contains("etal"))
## Petal.Length Petal.Width
## 1 1.4 0.2
## 2 1.4 0.2
## 3 1.3 0.2
## 4 1.5 0.2
## 5 1.4 0.2
## 6 1.7 0.4
## 7 1.4 0.3
## 8 1.5 0.2
## 9 1.4 0.2
## 10 1.5 0.1
## 11 1.5 0.2
## 12 1.6 0.2
select(iris2, ends_with("Width"))
## Sepal.Width Petal.Width
## 1 3.5 0.2
## 2 3.0 0.2
## 3 3.2 0.2
## 4 3.1 0.2
## 5 3.6 0.2
## 6 3.9 0.4
## 7 3.4 0.3
## 8 3.4 0.2
## 9 2.9 0.2
## 10 3.1 0.1
## 11 3.7 0.2
## 12 3.4 0.2
Change the name of a variable and keep the rest.
rename(iris2, Type=Species)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Type
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
## 11 5.4 3.7 1.5 0.2 setosa
## 12 4.8 3.4 1.6 0.2 setosa
Add new variables and preserve existing variables.
mutate(iris2, Sepal.Measure = (Sepal.Length+Sepal.Width)/2)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species Sepal.Measure
## 1 5.1 3.5 1.4 0.2 setosa 4.30
## 2 4.9 3.0 1.4 0.2 setosa 3.95
## 3 4.7 3.2 1.3 0.2 setosa 3.95
## 4 4.6 3.1 1.5 0.2 setosa 3.85
## 5 5.0 3.6 1.4 0.2 setosa 4.30
## 6 5.4 3.9 1.7 0.4 setosa 4.65
## 7 4.6 3.4 1.4 0.3 setosa 4.00
## 8 5.0 3.4 1.5 0.2 setosa 4.20
## 9 4.4 2.9 1.4 0.2 setosa 3.65
## 10 4.9 3.1 1.5 0.1 setosa 4.00
## 11 5.4 3.7 1.5 0.2 setosa 4.55
## 12 4.8 3.4 1.6 0.2 setosa 4.10
Add new variables and drop existing variables.
transmute(iris2, Petal.Measure = (Petal.Length + Petal.Width) / 2)
## Petal.Measure
## 1 0.80
## 2 0.80
## 3 0.75
## 4 0.85
## 5 0.80
## 6 1.05
## 7 0.85
## 8 0.85
## 9 0.80
## 10 0.80
## 11 0.85
## 12 0.90
Select distinct rows
# Duplicate the final row for the purpose of our example
iris2 <- bind_rows(iris2, iris2[12,])
iris2
## Source: local data frame [13 x 5]
##
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## (dbl) (dbl) (dbl) (dbl) (fctr)
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
## 11 5.4 3.7 1.5 0.2 setosa
## 12 4.8 3.4 1.6 0.2 setosa
## 13 4.8 3.4 1.6 0.2 setosa
distinct(iris2)
## Source: local data frame [12 x 5]
##
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## (dbl) (dbl) (dbl) (dbl) (fctr)
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
## 11 5.4 3.7 1.5 0.2 setosa
## 12 4.8 3.4 1.6 0.2 setosa
Arrange rows by variables
arrange(iris2, Sepal.Length, Sepal.Width)
## Source: local data frame [13 x 5]
##
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## (dbl) (dbl) (dbl) (dbl) (fctr)
## 1 4.4 2.9 1.4 0.2 setosa
## 2 4.6 3.1 1.5 0.2 setosa
## 3 4.6 3.4 1.4 0.3 setosa
## 4 4.7 3.2 1.3 0.2 setosa
## 5 4.8 3.4 1.6 0.2 setosa
## 6 4.8 3.4 1.6 0.2 setosa
## 7 4.9 3.0 1.4 0.2 setosa
## 8 4.9 3.1 1.5 0.1 setosa
## 9 5.0 3.4 1.5 0.2 setosa
## 10 5.0 3.6 1.4 0.2 setosa
## 11 5.1 3.5 1.4 0.2 setosa
## 12 5.4 3.7 1.5 0.2 setosa
## 13 5.4 3.9 1.7 0.4 setosa
Group by - take an existing table and convert to grouped table where operations are performed by group.
Summarise - Summarise multiple values to a single value.
group_by(iris2, Sepal.Length>5) %>%
summarise(mean(Sepal.Width))
## Source: local data frame [2 x 2]
##
## Sepal.Length > 5 mean(Sepal.Width)
## (lgl) (dbl)
## 1 FALSE 3.25
## 2 TRUE 3.70