# Load package
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Import Data

age_gaps <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-02-14/age_gaps.csv')
## Rows: 1155 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (6): movie_name, director, actor_1_name, actor_2_name, character_1_gend...
## dbl  (5): release_year, age_difference, couple_number, actor_1_age, actor_2_age
## date (2): actor_1_birthdate, actor_2_birthdate
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
set.seed(123)

data_small <- age_gaps %>%
select(movie_name, release_year, director, age_difference) %>%
sample_n(10)

data_small
## # A tibble: 10 × 4
##    movie_name                release_year director            age_difference
##    <chr>                            <dbl> <chr>                        <dbl>
##  1 A Star Is Born                    2018 Bradley Cooper                  11
##  2 You Only Live Twice               1967 Lewis Gilbert                   11
##  3 Firewall                          2006 Richard Loncraine               19
##  4 Don't Mess with the Zohan         2008 Dennis Dugan                     9
##  5 A Single Man                      2009 Tom Ford                        18
##  6 Tag                               2018 Jeff Tomsic                      3
##  7 Red Riding Hood                   2011 Catherine Hardwicke              0
##  8 The Vow                           2012 Michael Sucsy                    2
##  9 Love Actually                     2003 Richard Curtis                   7
## 10 Sorry to Bother You               2018 Boots Riley                      8

Pivoting

long to wide form

data_small_wide <- data_small %>%
pivot_wider(names_from = movie_name, values_from = age_difference)

data_small_wide
## # A tibble: 10 × 12
##    release_year director         `A Star Is Born` `You Only Live Twice` Firewall
##           <dbl> <chr>                       <dbl>                 <dbl>    <dbl>
##  1         2018 Bradley Cooper                 11                    NA       NA
##  2         1967 Lewis Gilbert                  NA                    11       NA
##  3         2006 Richard Loncrai…               NA                    NA       19
##  4         2008 Dennis Dugan                   NA                    NA       NA
##  5         2009 Tom Ford                       NA                    NA       NA
##  6         2018 Jeff Tomsic                    NA                    NA       NA
##  7         2011 Catherine Hardw…               NA                    NA       NA
##  8         2012 Michael Sucsy                  NA                    NA       NA
##  9         2003 Richard Curtis                 NA                    NA       NA
## 10         2018 Boots Riley                    NA                    NA       NA
## # ℹ 7 more variables: `Don't Mess with the Zohan` <dbl>, `A Single Man` <dbl>,
## #   Tag <dbl>, `Red Riding Hood` <dbl>, `The Vow` <dbl>, `Love Actually` <dbl>,
## #   `Sorry to Bother You` <dbl>

wide to long form

data_small_wide %>%
pivot_longer(cols = `A Star Is Born`:`Sorry to Bother You`,
values_drop_na = TRUE,
names_to = "movie_name",
values_to = "age_difference") %>%
select(movie_name, everything())
## # A tibble: 10 × 4
##    movie_name                release_year director            age_difference
##    <chr>                            <dbl> <chr>                        <dbl>
##  1 A Star Is Born                    2018 Bradley Cooper                  11
##  2 You Only Live Twice               1967 Lewis Gilbert                   11
##  3 Firewall                          2006 Richard Loncraine               19
##  4 Don't Mess with the Zohan         2008 Dennis Dugan                     9
##  5 A Single Man                      2009 Tom Ford                        18
##  6 Tag                               2018 Jeff Tomsic                      3
##  7 Red Riding Hood                   2011 Catherine Hardwicke              0
##  8 The Vow                           2012 Michael Sucsy                    2
##  9 Love Actually                     2003 Richard Curtis                   7
## 10 Sorry to Bother You               2018 Boots Riley                      8

Seperating and Uniting

Seperate Column

data_small %>% separate(col = director, into = c("f_name", "l_name"), sep = " ")
## # A tibble: 10 × 5
##    movie_name                release_year f_name    l_name    age_difference
##    <chr>                            <dbl> <chr>     <chr>              <dbl>
##  1 A Star Is Born                    2018 Bradley   Cooper                11
##  2 You Only Live Twice               1967 Lewis     Gilbert               11
##  3 Firewall                          2006 Richard   Loncraine             19
##  4 Don't Mess with the Zohan         2008 Dennis    Dugan                  9
##  5 A Single Man                      2009 Tom       Ford                  18
##  6 Tag                               2018 Jeff      Tomsic                 3
##  7 Red Riding Hood                   2011 Catherine Hardwicke              0
##  8 The Vow                           2012 Michael   Sucsy                  2
##  9 Love Actually                     2003 Richard   Curtis                 7
## 10 Sorry to Bother You               2018 Boots     Riley                  8

Unite two columns

data_unite <- data_small %>% unite(col = "movie_year", c(movie_name, release_year), sep = "/")

data_unite
## # A tibble: 10 × 3
##    movie_year                     director            age_difference
##    <chr>                          <chr>                        <dbl>
##  1 A Star Is Born/2018            Bradley Cooper                  11
##  2 You Only Live Twice/1967       Lewis Gilbert                   11
##  3 Firewall/2006                  Richard Loncraine               19
##  4 Don't Mess with the Zohan/2008 Dennis Dugan                     9
##  5 A Single Man/2009              Tom Ford                        18
##  6 Tag/2018                       Jeff Tomsic                      3
##  7 Red Riding Hood/2011           Catherine Hardwicke              0
##  8 The Vow/2012                   Michael Sucsy                    2
##  9 Love Actually/2003             Richard Curtis                   7
## 10 Sorry to Bother You/2018       Boots Riley                      8

Missing Values

data_small %>% complete(director, release_year)
## # A tibble: 80 × 4
##    director       release_year movie_name          age_difference
##    <chr>                 <dbl> <chr>                        <dbl>
##  1 Boots Riley            1967 <NA>                            NA
##  2 Boots Riley            2003 <NA>                            NA
##  3 Boots Riley            2006 <NA>                            NA
##  4 Boots Riley            2008 <NA>                            NA
##  5 Boots Riley            2009 <NA>                            NA
##  6 Boots Riley            2011 <NA>                            NA
##  7 Boots Riley            2012 <NA>                            NA
##  8 Boots Riley            2018 Sorry to Bother You              8
##  9 Bradley Cooper         1967 <NA>                            NA
## 10 Bradley Cooper         2003 <NA>                            NA
## # ℹ 70 more rows