dplyr is one of the most powerful package in R in terms
of data handling and it will be more powerful if we can combine
dplyr with other visualization package for exploring data
such as ggplot2 or echarts4r.
dplyr functions
does%>% to chain together multiple
dplyr functionslibrary(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
setwd('~/Asdos/VDE')
penguins <- read.csv('penguins_size.csv')
head(penguins)
## species island culmen_length_mm culmen_depth_mm flipper_length_mm
## 1 Adelie Torgersen 39.1 18.7 181
## 2 Adelie Torgersen 39.5 17.4 186
## 3 Adelie Torgersen 40.3 18.0 195
## 4 Adelie Torgersen NA NA NA
## 5 Adelie Torgersen 36.7 19.3 193
## 6 Adelie Torgersen 39.3 20.6 190
## body_mass_g sex
## 1 3750 MALE
## 2 3800 FEMALE
## 3 3250 FEMALE
## 4 NA <NA>
## 5 3450 FEMALE
## 6 3650 MALE
glimpse(penguins)
## Rows: 344
## Columns: 7
## $ species <chr> "Adelie", "Adelie", "Adelie", "Adelie", "Adelie", "A…
## $ island <chr> "Torgersen", "Torgersen", "Torgersen", "Torgersen", …
## $ culmen_length_mm <dbl> 39.1, 39.5, 40.3, NA, 36.7, 39.3, 38.9, 39.2, 34.1, …
## $ culmen_depth_mm <dbl> 18.7, 17.4, 18.0, NA, 19.3, 20.6, 17.8, 19.6, 18.1, …
## $ flipper_length_mm <int> 181, 186, 195, NA, 193, 190, 181, 195, 193, 190, 186…
## $ body_mass_g <int> 3750, 3800, 3250, NA, 3450, 3650, 3625, 4675, 3475, …
## $ sex <chr> "MALE", "FEMALE", "FEMALE", NA, "FEMALE", "MALE", "F…
summary(penguins)
## species island culmen_length_mm culmen_depth_mm
## Length:344 Length:344 Min. :32.10 Min. :13.10
## Class :character Class :character 1st Qu.:39.23 1st Qu.:15.60
## Mode :character Mode :character Median :44.45 Median :17.30
## Mean :43.92 Mean :17.15
## 3rd Qu.:48.50 3rd Qu.:18.70
## Max. :59.60 Max. :21.50
## NA's :2 NA's :2
## flipper_length_mm body_mass_g sex
## Min. :172.0 Min. :2700 Length:344
## 1st Qu.:190.0 1st Qu.:3550 Class :character
## Median :197.0 Median :4050 Mode :character
## Mean :200.9 Mean :4202
## 3rd Qu.:213.0 3rd Qu.:4750
## Max. :231.0 Max. :6300
## NA's :2 NA's :2
#Col names of dataframe
names(penguins)
## [1] "species" "island" "culmen_length_mm"
## [4] "culmen_depth_mm" "flipper_length_mm" "body_mass_g"
## [7] "sex"
arrange()filter()select()mutate()summarise()R code logic : Take the penguins dataset and then filter for all penguins that live on Torgersen Island
penguins%>%filter(island=='Torgersen')penguins %>%
filter(island == 'Torgersen')
## species island culmen_length_mm culmen_depth_mm flipper_length_mm
## 1 Adelie Torgersen 39.1 18.7 181
## 2 Adelie Torgersen 39.5 17.4 186
## 3 Adelie Torgersen 40.3 18.0 195
## 4 Adelie Torgersen NA NA NA
## 5 Adelie Torgersen 36.7 19.3 193
## 6 Adelie Torgersen 39.3 20.6 190
## 7 Adelie Torgersen 38.9 17.8 181
## 8 Adelie Torgersen 39.2 19.6 195
## 9 Adelie Torgersen 34.1 18.1 193
## 10 Adelie Torgersen 42.0 20.2 190
## 11 Adelie Torgersen 37.8 17.1 186
## 12 Adelie Torgersen 37.8 17.3 180
## 13 Adelie Torgersen 41.1 17.6 182
## 14 Adelie Torgersen 38.6 21.2 191
## 15 Adelie Torgersen 34.6 21.1 198
## 16 Adelie Torgersen 36.6 17.8 185
## 17 Adelie Torgersen 38.7 19.0 195
## 18 Adelie Torgersen 42.5 20.7 197
## 19 Adelie Torgersen 34.4 18.4 184
## 20 Adelie Torgersen 46.0 21.5 194
## 21 Adelie Torgersen 35.9 16.6 190
## 22 Adelie Torgersen 41.8 19.4 198
## 23 Adelie Torgersen 33.5 19.0 190
## 24 Adelie Torgersen 39.7 18.4 190
## 25 Adelie Torgersen 39.6 17.2 196
## 26 Adelie Torgersen 45.8 18.9 197
## 27 Adelie Torgersen 35.5 17.5 190
## 28 Adelie Torgersen 42.8 18.5 195
## 29 Adelie Torgersen 40.9 16.8 191
## 30 Adelie Torgersen 37.2 19.4 184
## 31 Adelie Torgersen 36.2 16.1 187
## 32 Adelie Torgersen 42.1 19.1 195
## 33 Adelie Torgersen 34.6 17.2 189
## 34 Adelie Torgersen 42.9 17.6 196
## 35 Adelie Torgersen 36.7 18.8 187
## 36 Adelie Torgersen 35.1 19.4 193
## 37 Adelie Torgersen 38.6 17.0 188
## 38 Adelie Torgersen 37.3 20.5 199
## 39 Adelie Torgersen 35.7 17.0 189
## 40 Adelie Torgersen 41.1 18.6 189
## 41 Adelie Torgersen 36.2 17.2 187
## 42 Adelie Torgersen 37.7 19.8 198
## 43 Adelie Torgersen 40.2 17.0 176
## 44 Adelie Torgersen 41.4 18.5 202
## 45 Adelie Torgersen 35.2 15.9 186
## 46 Adelie Torgersen 40.6 19.0 199
## 47 Adelie Torgersen 38.8 17.6 191
## 48 Adelie Torgersen 41.5 18.3 195
## 49 Adelie Torgersen 39.0 17.1 191
## 50 Adelie Torgersen 44.1 18.0 210
## 51 Adelie Torgersen 38.5 17.9 190
## 52 Adelie Torgersen 43.1 19.2 197
## body_mass_g sex
## 1 3750 MALE
## 2 3800 FEMALE
## 3 3250 FEMALE
## 4 NA <NA>
## 5 3450 FEMALE
## 6 3650 MALE
## 7 3625 FEMALE
## 8 4675 MALE
## 9 3475 <NA>
## 10 4250 <NA>
## 11 3300 <NA>
## 12 3700 <NA>
## 13 3200 FEMALE
## 14 3800 MALE
## 15 4400 MALE
## 16 3700 FEMALE
## 17 3450 FEMALE
## 18 4500 MALE
## 19 3325 FEMALE
## 20 4200 MALE
## 21 3050 FEMALE
## 22 4450 MALE
## 23 3600 FEMALE
## 24 3900 MALE
## 25 3550 FEMALE
## 26 4150 MALE
## 27 3700 FEMALE
## 28 4250 MALE
## 29 3700 FEMALE
## 30 3900 MALE
## 31 3550 FEMALE
## 32 4000 MALE
## 33 3200 FEMALE
## 34 4700 MALE
## 35 3800 FEMALE
## 36 4200 MALE
## 37 2900 FEMALE
## 38 3775 MALE
## 39 3350 FEMALE
## 40 3325 MALE
## 41 3150 FEMALE
## 42 3500 MALE
## 43 3450 FEMALE
## 44 3875 MALE
## 45 3050 FEMALE
## 46 4000 MALE
## 47 3275 FEMALE
## 48 4300 MALE
## 49 3050 FEMALE
## 50 4000 MALE
## 51 3325 FEMALE
## 52 3500 MALE
# Use head() to show only the first 6 row of data
penguins %>%
filter(island == 'Torgersen') %>%
head()
## species island culmen_length_mm culmen_depth_mm flipper_length_mm
## 1 Adelie Torgersen 39.1 18.7 181
## 2 Adelie Torgersen 39.5 17.4 186
## 3 Adelie Torgersen 40.3 18.0 195
## 4 Adelie Torgersen NA NA NA
## 5 Adelie Torgersen 36.7 19.3 193
## 6 Adelie Torgersen 39.3 20.6 190
## body_mass_g sex
## 1 3750 MALE
## 2 3800 FEMALE
## 3 3250 FEMALE
## 4 NA <NA>
## 5 3450 FEMALE
## 6 3650 MALE
Applying arrange()
# Ascending order
penguins %>%
filter(island == 'Torgersen') %>%
arrange(culmen_length_mm) %>%
head()
## species island culmen_length_mm culmen_depth_mm flipper_length_mm
## 1 Adelie Torgersen 33.5 19.0 190
## 2 Adelie Torgersen 34.1 18.1 193
## 3 Adelie Torgersen 34.4 18.4 184
## 4 Adelie Torgersen 34.6 21.1 198
## 5 Adelie Torgersen 34.6 17.2 189
## 6 Adelie Torgersen 35.1 19.4 193
## body_mass_g sex
## 1 3600 FEMALE
## 2 3475 <NA>
## 3 3325 FEMALE
## 4 4400 MALE
## 5 3200 FEMALE
## 6 4200 MALE
# Descending order
penguins %>%
filter(island == 'Torgersen') %>%
arrange(desc(culmen_length_mm)) %>%
head()
## species island culmen_length_mm culmen_depth_mm flipper_length_mm
## 1 Adelie Torgersen 46.0 21.5 194
## 2 Adelie Torgersen 45.8 18.9 197
## 3 Adelie Torgersen 44.1 18.0 210
## 4 Adelie Torgersen 43.1 19.2 197
## 5 Adelie Torgersen 42.9 17.6 196
## 6 Adelie Torgersen 42.8 18.5 195
## body_mass_g sex
## 1 4200 MALE
## 2 4150 MALE
## 3 4000 MALE
## 4 3500 MALE
## 5 4700 MALE
## 6 4250 MALE
# You can also arrange the categorical column
penguins %>%
filter(island == 'Torgersen') %>%
arrange(desc(species)) %>%
head()
## species island culmen_length_mm culmen_depth_mm flipper_length_mm
## 1 Adelie Torgersen 39.1 18.7 181
## 2 Adelie Torgersen 39.5 17.4 186
## 3 Adelie Torgersen 40.3 18.0 195
## 4 Adelie Torgersen NA NA NA
## 5 Adelie Torgersen 36.7 19.3 193
## 6 Adelie Torgersen 39.3 20.6 190
## body_mass_g sex
## 1 3750 MALE
## 2 3800 FEMALE
## 3 3250 FEMALE
## 4 NA <NA>
## 5 3450 FEMALE
## 6 3650 MALE
Creating a subset
set.seed(42)
penguins_subset <- penguins %>%
sample_n(15)
penguins_subset
## species island culmen_length_mm culmen_depth_mm flipper_length_mm
## 1 Adelie Dream 36.0 17.9 190
## 2 Gentoo Biscoe 48.5 15.0 219
## 3 Chinstrap Dream 46.5 17.9 192
## 4 Adelie Torgersen 45.8 18.9 197
## 5 Gentoo Biscoe 46.7 15.3 219
## 6 Adelie Dream 39.0 18.7 185
## 7 Adelie Torgersen 37.7 19.8 198
## 8 Gentoo Biscoe 49.9 16.1 213
## 9 Adelie Torgersen 41.5 18.3 195
## 10 Gentoo Biscoe 47.4 14.6 212
## 11 Adelie Biscoe 38.2 18.1 185
## 12 Gentoo Biscoe 41.7 14.7 210
## 13 Adelie Dream 38.3 19.2 189
## 14 Chinstrap Dream 47.0 17.3 185
## 15 Adelie Biscoe 43.2 19.0 197
## body_mass_g sex
## 1 3450 FEMALE
## 2 4850 FEMALE
## 3 3500 FEMALE
## 4 4150 MALE
## 5 5200 MALE
## 6 3650 MALE
## 7 3500 MALE
## 8 5400 MALE
## 9 4300 MALE
## 10 4725 FEMALE
## 11 3950 MALE
## 12 4700 FEMALE
## 13 3950 MALE
## 14 3700 FEMALE
## 15 4775 MALE
# Arrange the species in descending order
penguins_subset %>%
arrange(desc(species)) %>%
head()
## species island culmen_length_mm culmen_depth_mm flipper_length_mm
## 1 Gentoo Biscoe 48.5 15.0 219
## 2 Gentoo Biscoe 46.7 15.3 219
## 3 Gentoo Biscoe 49.9 16.1 213
## 4 Gentoo Biscoe 47.4 14.6 212
## 5 Gentoo Biscoe 41.7 14.7 210
## 6 Chinstrap Dream 46.5 17.9 192
## body_mass_g sex
## 1 4850 FEMALE
## 2 5200 MALE
## 3 5400 MALE
## 4 4725 FEMALE
## 5 4700 FEMALE
## 6 3500 FEMALE
Applying Filter with Condition
# Filter with one condition
penguins_subset %>%
filter(culmen_depth_mm > 16)
## species island culmen_length_mm culmen_depth_mm flipper_length_mm
## 1 Adelie Dream 36.0 17.9 190
## 2 Chinstrap Dream 46.5 17.9 192
## 3 Adelie Torgersen 45.8 18.9 197
## 4 Adelie Dream 39.0 18.7 185
## 5 Adelie Torgersen 37.7 19.8 198
## 6 Gentoo Biscoe 49.9 16.1 213
## 7 Adelie Torgersen 41.5 18.3 195
## 8 Adelie Biscoe 38.2 18.1 185
## 9 Adelie Dream 38.3 19.2 189
## 10 Chinstrap Dream 47.0 17.3 185
## 11 Adelie Biscoe 43.2 19.0 197
## body_mass_g sex
## 1 3450 FEMALE
## 2 3500 FEMALE
## 3 4150 MALE
## 4 3650 MALE
## 5 3500 MALE
## 6 5400 MALE
## 7 4300 MALE
## 8 3950 MALE
## 9 3950 MALE
## 10 3700 FEMALE
## 11 4775 MALE
# Filter between values
penguins_subset %>%
filter(between(culmen_depth_mm,16,18))
## species island culmen_length_mm culmen_depth_mm flipper_length_mm
## 1 Adelie Dream 36.0 17.9 190
## 2 Chinstrap Dream 46.5 17.9 192
## 3 Gentoo Biscoe 49.9 16.1 213
## 4 Chinstrap Dream 47.0 17.3 185
## body_mass_g sex
## 1 3450 FEMALE
## 2 3500 FEMALE
## 3 5400 MALE
## 4 3700 FEMALE
Applying Select (pick which column we want to look at)
penguins_subset %>%
select(species, flipper_length_mm,sex)
## species flipper_length_mm sex
## 1 Adelie 190 FEMALE
## 2 Gentoo 219 FEMALE
## 3 Chinstrap 192 FEMALE
## 4 Adelie 197 MALE
## 5 Gentoo 219 MALE
## 6 Adelie 185 MALE
## 7 Adelie 198 MALE
## 8 Gentoo 213 MALE
## 9 Adelie 195 MALE
## 10 Gentoo 212 FEMALE
## 11 Adelie 185 MALE
## 12 Gentoo 210 FEMALE
## 13 Adelie 189 MALE
## 14 Chinstrap 185 FEMALE
## 15 Adelie 197 MALE
# Select all character data
penguins_subset %>%
select(where(is.character))
## species island sex
## 1 Adelie Dream FEMALE
## 2 Gentoo Biscoe FEMALE
## 3 Chinstrap Dream FEMALE
## 4 Adelie Torgersen MALE
## 5 Gentoo Biscoe MALE
## 6 Adelie Dream MALE
## 7 Adelie Torgersen MALE
## 8 Gentoo Biscoe MALE
## 9 Adelie Torgersen MALE
## 10 Gentoo Biscoe FEMALE
## 11 Adelie Biscoe MALE
## 12 Gentoo Biscoe FEMALE
## 13 Adelie Dream MALE
## 14 Chinstrap Dream FEMALE
## 15 Adelie Biscoe MALE
# Select all numeric data
penguins_subset %>%
select(where(is.numeric))
## culmen_length_mm culmen_depth_mm flipper_length_mm body_mass_g
## 1 36.0 17.9 190 3450
## 2 48.5 15.0 219 4850
## 3 46.5 17.9 192 3500
## 4 45.8 18.9 197 4150
## 5 46.7 15.3 219 5200
## 6 39.0 18.7 185 3650
## 7 37.7 19.8 198 3500
## 8 49.9 16.1 213 5400
## 9 41.5 18.3 195 4300
## 10 47.4 14.6 212 4725
## 11 38.2 18.1 185 3950
## 12 41.7 14.7 210 4700
## 13 38.3 19.2 189 3950
## 14 47.0 17.3 185 3700
## 15 43.2 19.0 197 4775
Math with Mutate()
# Convert gram to pound
penguins_subset %>%
mutate(body_weight_pounds = body_mass_g/453.59237)
## species island culmen_length_mm culmen_depth_mm flipper_length_mm
## 1 Adelie Dream 36.0 17.9 190
## 2 Gentoo Biscoe 48.5 15.0 219
## 3 Chinstrap Dream 46.5 17.9 192
## 4 Adelie Torgersen 45.8 18.9 197
## 5 Gentoo Biscoe 46.7 15.3 219
## 6 Adelie Dream 39.0 18.7 185
## 7 Adelie Torgersen 37.7 19.8 198
## 8 Gentoo Biscoe 49.9 16.1 213
## 9 Adelie Torgersen 41.5 18.3 195
## 10 Gentoo Biscoe 47.4 14.6 212
## 11 Adelie Biscoe 38.2 18.1 185
## 12 Gentoo Biscoe 41.7 14.7 210
## 13 Adelie Dream 38.3 19.2 189
## 14 Chinstrap Dream 47.0 17.3 185
## 15 Adelie Biscoe 43.2 19.0 197
## body_mass_g sex body_weight_pounds
## 1 3450 FEMALE 7.605948
## 2 4850 FEMALE 10.692420
## 3 3500 FEMALE 7.716179
## 4 4150 MALE 9.149184
## 5 5200 MALE 11.464038
## 6 3650 MALE 8.046873
## 7 3500 MALE 7.716179
## 8 5400 MALE 11.904962
## 9 4300 MALE 9.479877
## 10 4725 FEMALE 10.416842
## 11 3950 MALE 8.708259
## 12 4700 FEMALE 10.361726
## 13 3950 MALE 8.708259
## 14 3700 FEMALE 8.157104
## 15 4775 MALE 10.527073
# Combine mutate with select
penguins_subset %>%
mutate(body_weight_pounds = body_mass_g/453.59237) %>%
select(everything())
## species island culmen_length_mm culmen_depth_mm flipper_length_mm
## 1 Adelie Dream 36.0 17.9 190
## 2 Gentoo Biscoe 48.5 15.0 219
## 3 Chinstrap Dream 46.5 17.9 192
## 4 Adelie Torgersen 45.8 18.9 197
## 5 Gentoo Biscoe 46.7 15.3 219
## 6 Adelie Dream 39.0 18.7 185
## 7 Adelie Torgersen 37.7 19.8 198
## 8 Gentoo Biscoe 49.9 16.1 213
## 9 Adelie Torgersen 41.5 18.3 195
## 10 Gentoo Biscoe 47.4 14.6 212
## 11 Adelie Biscoe 38.2 18.1 185
## 12 Gentoo Biscoe 41.7 14.7 210
## 13 Adelie Dream 38.3 19.2 189
## 14 Chinstrap Dream 47.0 17.3 185
## 15 Adelie Biscoe 43.2 19.0 197
## body_mass_g sex body_weight_pounds
## 1 3450 FEMALE 7.605948
## 2 4850 FEMALE 10.692420
## 3 3500 FEMALE 7.716179
## 4 4150 MALE 9.149184
## 5 5200 MALE 11.464038
## 6 3650 MALE 8.046873
## 7 3500 MALE 7.716179
## 8 5400 MALE 11.904962
## 9 4300 MALE 9.479877
## 10 4725 FEMALE 10.416842
## 11 3950 MALE 8.708259
## 12 4700 FEMALE 10.361726
## 13 3950 MALE 8.708259
## 14 3700 FEMALE 8.157104
## 15 4775 MALE 10.527073
Summarise with summarise(), with help from group_by()
penguins_subset %>%
summarise(avg_body_mass = mean(body_mass_g))
## avg_body_mass
## 1 4253.333
# Summarise the non_subset data
penguins %>%
summarise(avg_body_mass = mean(body_mass_g))
## avg_body_mass
## 1 NA
# Ignore the NA
penguins %>%
summarise(avg_body_mass = mean(body_mass_g, na.rm = TRUE))
## avg_body_mass
## 1 4201.754
# Summarise the mean for each species group
penguins %>%
group_by(species) %>%
summarise(avg_species_body_mass = mean(body_mass_g, na.rm = TRUE))
## # A tibble: 3 × 2
## species avg_species_body_mass
## <chr> <dbl>
## 1 Adelie 3701.
## 2 Chinstrap 3733.
## 3 Gentoo 5076.