This tuturial is the part of the dplyr training series. Here is the YouTube Video link

Why dplyr

dplyr is a great tool to use in R. The commands may look long and overwhelming to someone not using dplyr but that is not the case. Once you learn the basics then it is very intuitive. It is just like making a long sentence by using different words of any language.

Audience

For beginners or experienced R users wanting to learn various commands of dplyr.

DPLYR : Mutate

We will be covering all practical aspects of dplyr::mutate command in this. This tutorial is part of a series of tutorials on all practical aspects of dplyr All youtube videos are available in a single playlist on YouTube.

https://www.youtube.com/playlist?list=PLkHcMTpvAaXVJzyRSytUn3nSK92TJphxR

Here is the link to the YouTube video: https://youtu.be/RddRtTaaQ2s

Create sample dataset

We will be using the built in dataset called starwars in this tutuorial

library(ggplot2)
library(dplyr)  
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
d <- starwars 

Have a look at the sample dataset

d
d1 <- d %>%
      dplyr::select(name, mass, species) %>%
      dplyr::mutate(mean =  mean(mass, na.rm = TRUE))
d1
d2 <- d %>%
      dplyr::select(name, mass, species) %>%
      dplyr::mutate(mean =  mean(mass, na.rm = TRUE))%>%
      dplyr::mutate(normalisedmean =   mass /mean)
      
d2
d3 <- d %>%
      dplyr::select(mass, species) %>%
      dplyr::group_by(species)%>%
      dplyr::mutate(mean =  mean(mass, na.rm = TRUE))
d3
d4 <- d %>%
      dplyr::select(mass, species) %>%
      dplyr::group_by(species)%>%
      dplyr::mutate(mean =  mean(mass, na.rm = TRUE), .after= mass)
d4
d5 <- d %>%
      dplyr::select(mass, species) %>%
      dplyr::group_by(species)%>%
      dplyr::mutate(mean =  mean(mass, na.rm = TRUE), .before= mass)
d5
d6 <- d %>%
      dplyr::group_by(species)%>%
      dplyr::mutate(mean =  mean(mass, na.rm = TRUE))
d6
d7 <- d %>%
      dplyr::select(mass, species,hair_color) %>%
      dplyr::group_by(species)%>%
      dplyr::mutate(mean =  mean(mass, na.rm = TRUE), .keep ="all")
d7
d8 <- d %>%
      dplyr::select(mass, species,hair_color) %>%
      dplyr::group_by(species)%>%
      dplyr::mutate(mean =  mean(mass, na.rm = TRUE), .keep = 'unused') 
d8
d9 <- d %>%
      dplyr::select(mass, species,hair_color) %>%
      dplyr::group_by(species)%>%
      dplyr::mutate(mean =  mean(mass, na.rm = TRUE), .keep = 'used')
d9
d10 <- d %>%
      dplyr::group_by(species)%>%
      dplyr::transmute(mean =  mean(mass, na.rm = TRUE))
d10
d11 <- d %>%
     
      dplyr::group_by(species)%>%
      dplyr::mutate(mean =  mean(mass, na.rm = TRUE), .keep = 'none')
d11
d12 <- d%>%
       dplyr::select(species,mass)%>%
       dplyr::group_by(species)%>%
       dplyr::mutate(rankByMass =  min_rank(mass))%>%
       dplyr::arrange(species,mass)
d12
d13 <- d %>%
       dplyr::select(species,mass)%>%
       dplyr::group_by(species)%>%
       dplyr::transmute(rankByMass =  min_rank(desc(mass)))
d13
# New style syntax
d14 <- d%>%
       dplyr::mutate(across(c(hair_color, skin_color, eye_color), function(x)toupper(x) ))
      
d14
# old style syntax using mutate_at
d15 <- d %>%
    dplyr::mutate_at(vars(ends_with("color")), ~ toupper(.x) )
d15      
# New style syntax
d14 <- d%>%
       dplyr::mutate(across(c(hair_color, skin_color, eye_color), function(x)toupper(x) ))
      
d14
d15 <- d %>%
       dplyr::mutate(across( where (is.character), trimws ))
     
     

d15
d16 <- d %>%
       dplyr::mutate(across( where (is.character),  as.factor ))
     
     

d16
d17 <- d %>%
     dplyr::select(mass, height)%>%
     dplyr::mutate(bmi = mass /(height * height) )%>%
     dplyr::mutate(across(c(mass,height,bmi) , ~ .x * 3) )
     
     

d17
d18 <- d %>%
     dplyr::select(mass, height)%>%
     dplyr::mutate(bmi = mass /(height * height) )%>%
     dplyr::mutate(across(c(mass,height,bmi) , function(x) x *.1) )
     
     

d18
d19 <- mpg%>%
        dplyr::group_by(manufacturer)%>%
        tally()%>%
        dplyr::mutate(n = prop.table(n) * 100) 
d19
d20 <- mpg %>%
      dplyr::count(manufacturer)%>%
      dplyr::mutate(pct = prop.table(n) * 100) 
d20
library(scales)

mpg %>%
  dplyr::count(manufacturer)%>%
  dplyr::mutate(pct = prop.table(n)) %>%
  ggplot(aes(x =manufacturer, y = pct)) + geom_col() + scale_y_continuous(labels = scales::percent)

mpg %>%
  dplyr::count(manufacturer)%>%
  dplyr::mutate(pct = prop.table(n)) %>%
  ggplot(aes(x = reorder(manufacturer, -pct), y = pct)) + geom_col() + scale_y_continuous(labels = percent)