This tuturial is the part of the dplyr training series. Here is the YouTube Video link
dplyr is a great tool to use in R. The commands may look long and overwhelming to someone not using dplyr but that is not the case. Once you learn the basics then it is very intuitive. It is just like making a long sentence by using different words of any language.
For beginners or experienced R users wanting to learn various commands of dplyr.
We will be covering all practical aspects of dplyr::mutate command in this. This tutorial is part of a series of tutorials on all practical aspects of dplyr All youtube videos are available in a single playlist on YouTube.
https://www.youtube.com/playlist?list=PLkHcMTpvAaXVJzyRSytUn3nSK92TJphxR
Here is the link to the YouTube video: https://youtu.be/RddRtTaaQ2s
We will be using the built in dataset called starwars in this tutuorial
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
d <- starwars
d
d1 <- d %>%
dplyr::select(name, mass, species) %>%
dplyr::mutate(mean = mean(mass, na.rm = TRUE))
d1
d2 <- d %>%
dplyr::select(name, mass, species) %>%
dplyr::mutate(mean = mean(mass, na.rm = TRUE))%>%
dplyr::mutate(normalisedmean = mass /mean)
d2
d3 <- d %>%
dplyr::select(mass, species) %>%
dplyr::group_by(species)%>%
dplyr::mutate(mean = mean(mass, na.rm = TRUE))
d3
d4 <- d %>%
dplyr::select(mass, species) %>%
dplyr::group_by(species)%>%
dplyr::mutate(mean = mean(mass, na.rm = TRUE), .after= mass)
d4
d5 <- d %>%
dplyr::select(mass, species) %>%
dplyr::group_by(species)%>%
dplyr::mutate(mean = mean(mass, na.rm = TRUE), .before= mass)
d5
d6 <- d %>%
dplyr::group_by(species)%>%
dplyr::mutate(mean = mean(mass, na.rm = TRUE))
d6
d7 <- d %>%
dplyr::select(mass, species,hair_color) %>%
dplyr::group_by(species)%>%
dplyr::mutate(mean = mean(mass, na.rm = TRUE), .keep ="all")
d7
d8 <- d %>%
dplyr::select(mass, species,hair_color) %>%
dplyr::group_by(species)%>%
dplyr::mutate(mean = mean(mass, na.rm = TRUE), .keep = 'unused')
d8
d9 <- d %>%
dplyr::select(mass, species,hair_color) %>%
dplyr::group_by(species)%>%
dplyr::mutate(mean = mean(mass, na.rm = TRUE), .keep = 'used')
d9
d10 <- d %>%
dplyr::group_by(species)%>%
dplyr::transmute(mean = mean(mass, na.rm = TRUE))
d10
d11 <- d %>%
dplyr::group_by(species)%>%
dplyr::mutate(mean = mean(mass, na.rm = TRUE), .keep = 'none')
d11
d12 <- d%>%
dplyr::select(species,mass)%>%
dplyr::group_by(species)%>%
dplyr::mutate(rankByMass = min_rank(mass))%>%
dplyr::arrange(species,mass)
d12
d13 <- d %>%
dplyr::select(species,mass)%>%
dplyr::group_by(species)%>%
dplyr::transmute(rankByMass = min_rank(desc(mass)))
d13
# New style syntax
d14 <- d%>%
dplyr::mutate(across(c(hair_color, skin_color, eye_color), function(x)toupper(x) ))
d14
# old style syntax using mutate_at
d15 <- d %>%
dplyr::mutate_at(vars(ends_with("color")), ~ toupper(.x) )
d15
# New style syntax
d14 <- d%>%
dplyr::mutate(across(c(hair_color, skin_color, eye_color), function(x)toupper(x) ))
d14
d15 <- d %>%
dplyr::mutate(across( where (is.character), trimws ))
d15
d16 <- d %>%
dplyr::mutate(across( where (is.character), as.factor ))
d16
d17 <- d %>%
dplyr::select(mass, height)%>%
dplyr::mutate(bmi = mass /(height * height) )%>%
dplyr::mutate(across(c(mass,height,bmi) , ~ .x * 3) )
d17
d18 <- d %>%
dplyr::select(mass, height)%>%
dplyr::mutate(bmi = mass /(height * height) )%>%
dplyr::mutate(across(c(mass,height,bmi) , function(x) x *.1) )
d18
d19 <- mpg%>%
dplyr::group_by(manufacturer)%>%
tally()%>%
dplyr::mutate(n = prop.table(n) * 100)
d19
d20 <- mpg %>%
dplyr::count(manufacturer)%>%
dplyr::mutate(pct = prop.table(n) * 100)
d20
library(scales)
mpg %>%
dplyr::count(manufacturer)%>%
dplyr::mutate(pct = prop.table(n)) %>%
ggplot(aes(x =manufacturer, y = pct)) + geom_col() + scale_y_continuous(labels = scales::percent)
mpg %>%
dplyr::count(manufacturer)%>%
dplyr::mutate(pct = prop.table(n)) %>%
ggplot(aes(x = reorder(manufacturer, -pct), y = pct)) + geom_col() + scale_y_continuous(labels = percent)