library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.1
## ✓ tidyr 1.1.1 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
head(iris, n = 4)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
iris %>% head(. , n = 4)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
iris %>% head(n = 4)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
4 %>% head(iris, .)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
notas <- data.frame(
names = c("Beimar", "Johanna", "Adriana"),
Talleres = c(50, 50, 10),
Quices = c(25, 30, 35),
Parciales = c(10, 12, 45)
)
notas
## names Talleres Quices Parciales
## 1 Beimar 50 25 10
## 2 Johanna 50 30 12
## 3 Adriana 10 35 45
NOTAS <- data.frame(
names = c("Beimar", "Johanna", "Adriana", "Beimar", "Johanna", "Adriana", "Beimar", "Johanna", "Adriana"),
rubric = c("Talleres", "Talleres", "Talleres", "Quices", "Quices", "Quices", "Parciales", "Parciales", "Parciales"),
note = c(50, 50, 10, 25, 30, 35, 10, 12, 45)
)
NOTAS
## names rubric note
## 1 Beimar Talleres 50
## 2 Johanna Talleres 50
## 3 Adriana Talleres 10
## 4 Beimar Quices 25
## 5 Johanna Quices 30
## 6 Adriana Quices 35
## 7 Beimar Parciales 10
## 8 Johanna Parciales 12
## 9 Adriana Parciales 45
NOTAs <- notas %>% gather(rubric, note, 2:4)
NOTAs
## names rubric note
## 1 Beimar Talleres 50
## 2 Johanna Talleres 50
## 3 Adriana Talleres 10
## 4 Beimar Quices 25
## 5 Johanna Quices 30
## 6 Adriana Quices 35
## 7 Beimar Parciales 10
## 8 Johanna Parciales 12
## 9 Adriana Parciales 45
notas <- NOTAS %>% spread(rubric, note)
notas
## names Parciales Quices Talleres
## 1 Adriana 45 35 10
## 2 Beimar 10 25 50
## 3 Johanna 12 30 50
notas <- data.frame(
name_surname = c("Beimar_Rodríguez", "Johanna_Vanegas", "Adriana_Guerrero"),
Talleres = c(50, 50, 10),
Quices = c(25, 30, 35),
Parciales = c(10, 12, 45)
)
notas
## name_surname Talleres Quices Parciales
## 1 Beimar_Rodríguez 50 25 10
## 2 Johanna_Vanegas 50 30 12
## 3 Adriana_Guerrero 10 35 45
Notas <- notas %>% separate(name_surname, c("name", "surname"), sep = "_")
Notas
## name surname Talleres Quices Parciales
## 1 Beimar Rodríguez 50 25 10
## 2 Johanna Vanegas 50 30 12
## 3 Adriana Guerrero 10 35 45
notas <- Notas %>% unite(name_surname, name:surname, sep = "_")
notas
## name_surname Talleres Quices Parciales
## 1 Beimar_Rodríguez 50 25 10
## 2 Johanna_Vanegas 50 30 12
## 3 Adriana_Guerrero 10 35 45
NOTAS %>%
group_by(rubric) %>%
summarise(
mean(note), median(note), sd(note), IQR(note)
)
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 5
## rubric `mean(note)` `median(note)` `sd(note)` `IQR(note)`
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Parciales 22.3 12 19.7 17.5
## 2 Quices 30 30 5 5
## 3 Talleres 36.7 50 23.1 20
NOTAS %>%
group_by(rubric) %>%
summarise_at(
vars(note),
funs(mean, median, sd, IQR)
)
## Warning: `funs()` is deprecated as of dplyr 0.8.0.
## Please use a list of either functions or lambdas:
##
## # Simple named list:
## list(mean = mean, median = median)
##
## # Auto named with `tibble::lst()`:
## tibble::lst(mean, median)
##
## # Using lambdas
## list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## # A tibble: 3 x 5
## rubric mean median sd IQR
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Parciales 22.3 12 19.7 17.5
## 2 Quices 30 30 5 5
## 3 Talleres 36.7 50 23.1 20
NOTAS %>%
group_by(rubric) %>%
summarise_if(
is.numeric,
funs(mean, median, sd, IQR)
)
## # A tibble: 3 x 5
## rubric mean median sd IQR
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Parciales 22.3 12 19.7 17.5
## 2 Quices 30 30 5 5
## 3 Talleres 36.7 50 23.1 20
head(band_members)
## # A tibble: 3 x 2
## name band
## <chr> <chr>
## 1 Mick Stones
## 2 John Beatles
## 3 Paul Beatles
head(band_instruments)
## # A tibble: 3 x 2
## name plays
## <chr> <chr>
## 1 John guitar
## 2 Paul bass
## 3 Keith guitar
head(band_instruments2)
## # A tibble: 3 x 2
## artist plays
## <chr> <chr>
## 1 John guitar
## 2 Paul bass
## 3 Keith guitar
inner_join(x, y, by = NULL, copy = FALSE, suffix = c(“.x”, “.y”), …)
inner_join(band_members, band_instruments, by = "name")
## # A tibble: 2 x 3
## name band plays
## <chr> <chr> <chr>
## 1 John Beatles guitar
## 2 Paul Beatles bass
band_members %>% inner_join(band_instruments)
## Joining, by = "name"
## # A tibble: 2 x 3
## name band plays
## <chr> <chr> <chr>
## 1 John Beatles guitar
## 2 Paul Beatles bass
left_join(x, y, by = NULL, copy = FALSE, suffix = c(“.x”, “.y”), …)
left_join(band_members, band_instruments, by = "name")
## # A tibble: 3 x 3
## name band plays
## <chr> <chr> <chr>
## 1 Mick Stones <NA>
## 2 John Beatles guitar
## 3 Paul Beatles bass
band_members %>% left_join(band_instruments)
## Joining, by = "name"
## # A tibble: 3 x 3
## name band plays
## <chr> <chr> <chr>
## 1 Mick Stones <NA>
## 2 John Beatles guitar
## 3 Paul Beatles bass
right_join(x, y, by = NULL, copy = FALSE, suffix = c(“.x”, “.y”), …)
right_join(band_members, band_instruments, by = "name")
## # A tibble: 3 x 3
## name band plays
## <chr> <chr> <chr>
## 1 John Beatles guitar
## 2 Paul Beatles bass
## 3 Keith <NA> guitar
band_members %>% right_join(band_instruments)
## Joining, by = "name"
## # A tibble: 3 x 3
## name band plays
## <chr> <chr> <chr>
## 1 John Beatles guitar
## 2 Paul Beatles bass
## 3 Keith <NA> guitar
full_join(x, y, by = NULL, copy = FALSE, suffix = c(“.x”, “.y”), …)
full_join(band_members, band_instruments, by = "name")
## # A tibble: 4 x 3
## name band plays
## <chr> <chr> <chr>
## 1 Mick Stones <NA>
## 2 John Beatles guitar
## 3 Paul Beatles bass
## 4 Keith <NA> guitar
band_members %>% full_join(band_instruments)
## Joining, by = "name"
## # A tibble: 4 x 3
## name band plays
## <chr> <chr> <chr>
## 1 Mick Stones <NA>
## 2 John Beatles guitar
## 3 Paul Beatles bass
## 4 Keith <NA> guitar
full_join(x, y, by = NULL, copy = FALSE, suffix = c(“.x”, “.y”), …)
full_join(band_members, band_instruments2, by = c("name" = "artist"))
## # A tibble: 4 x 3
## name band plays
## <chr> <chr> <chr>
## 1 Mick Stones <NA>
## 2 John Beatles guitar
## 3 Paul Beatles bass
## 4 Keith <NA> guitar
\(copy\) - if the datasets are from different sources and copy = TRUE then y will be copied across to the datasource where x is located.
\(suffix\) - if a variable name occurs in both datasets, and is not used as part of the join, a suffix is added to ensure variable names are unique. By default “.x” and “.y” are added to the variable names but other suffixes can be specified.
x %>% full_join(y, by = NULL, copy = FALSE, suffix = c(“.x”, “.y”), …)
band_members %>% full_join(band_instruments)
## Joining, by = "name"
## # A tibble: 4 x 3
## name band plays
## <chr> <chr> <chr>
## 1 Mick Stones <NA>
## 2 John Beatles guitar
## 3 Paul Beatles bass
## 4 Keith <NA> guitar
semi_join(x, y, by = NULL, copy = FALSE, suffix = c(“.x”, “.y”), …)
band_members %>% semi_join(band_instruments)
## Joining, by = "name"
## # A tibble: 2 x 2
## name band
## <chr> <chr>
## 1 John Beatles
## 2 Paul Beatles
anti_join(x, y, by = NULL, copy = FALSE, suffix = c(“.x”, “.y”), …)
band_members %>% anti_join(band_instruments)
## Joining, by = "name"
## # A tibble: 1 x 2
## name band
## <chr> <chr>
## 1 Mick Stones
nest_join(x, y, by = NULL, copy = FALSE, suffix = c(“.x”, “.y”), …)
band_members %>% nest_join(band_instruments)
## Joining, by = "name"
## # A tibble: 3 x 3
## name band band_instruments
## <chr> <chr> <list>
## 1 Mick Stones <tibble [0 × 1]>
## 2 John Beatles <tibble [1 × 1]>
## 3 Paul Beatles <tibble [1 × 1]>