Import your data

data <- read_excel("../05_module8/Apply_1.xlsx")

Pivoting

long to wide form

data_long <- data %>%
    
  pivot_longer(cols = c(`character_1_gender`, `character_2_gender`),
               names_to = "character_1_gender",
               values_to = "character_2_gender")

wide to long form

data_long %>%
    
    pivot_wider(names_from = character_1_gender,
                values_from = character_2_gender)
## # A tibble: 1,155 × 13
##    movie_name    release_year director age_difference couple_number actor_1_name
##    <chr>                <dbl> <chr>             <dbl>         <dbl> <chr>       
##  1 Harold and M…         1971 Hal Ash…             52             1 Ruth Gordon 
##  2 Venus                 2006 Roger M…             50             1 Peter O'Too…
##  3 The Quiet Am…         2002 Phillip…             49             1 Michael Cai…
##  4 The Big Lebo…         1998 Joel Co…             45             1 David Huddl…
##  5 Beginners             2010 Mike Mi…             43             1 Christopher…
##  6 Poison Ivy            1992 Katt Sh…             42             1 Tom Skerritt
##  7 Whatever Wor…         2009 Woody A…             40             1 Larry David 
##  8 Entrapment            1999 Jon Ami…             39             1 Sean Connery
##  9 Husbands and…         1992 Woody A…             38             1 Woody Allen 
## 10 Magnolia              1999 Paul Th…             38             1 Jason Robar…
## # ℹ 1,145 more rows
## # ℹ 7 more variables: actor_2_name <chr>, actor_1_birthdate <chr>,
## #   actor_2_birthdate <chr>, actor_1_age <dbl>, actor_2_age <dbl>,
## #   character_1_gender <chr>, character_2_gender <chr>

Separating and Uniting

Separate a column

data_sep <- data %>% 
    
    separate(col = actor_1_birthdate, into = c("year", "month", "day"), sep = "-")

Unite two columns

data_sep %>%
    
    unite(col = "actor_1_birthdate", c("year", "month", "day"), sep = "-")
## # A tibble: 1,155 × 13
##    movie_name    release_year director age_difference couple_number actor_1_name
##    <chr>                <dbl> <chr>             <dbl>         <dbl> <chr>       
##  1 Harold and M…         1971 Hal Ash…             52             1 Ruth Gordon 
##  2 Venus                 2006 Roger M…             50             1 Peter O'Too…
##  3 The Quiet Am…         2002 Phillip…             49             1 Michael Cai…
##  4 The Big Lebo…         1998 Joel Co…             45             1 David Huddl…
##  5 Beginners             2010 Mike Mi…             43             1 Christopher…
##  6 Poison Ivy            1992 Katt Sh…             42             1 Tom Skerritt
##  7 Whatever Wor…         2009 Woody A…             40             1 Larry David 
##  8 Entrapment            1999 Jon Ami…             39             1 Sean Connery
##  9 Husbands and…         1992 Woody A…             38             1 Woody Allen 
## 10 Magnolia              1999 Paul Th…             38             1 Jason Robar…
## # ℹ 1,145 more rows
## # ℹ 7 more variables: actor_2_name <chr>, character_1_gender <chr>,
## #   character_2_gender <chr>, actor_1_birthdate <chr>, actor_2_birthdate <chr>,
## #   actor_1_age <dbl>, actor_2_age <dbl>

Missing Values

tibble(
    release_year = c(2002, 2003, 2004, 2005),
    age_difference = c(49, 35, 34, 34),
    actor_1_age = c(69, 66, 56, 60)
)
## # A tibble: 4 × 3
##   release_year age_difference actor_1_age
##          <dbl>          <dbl>       <dbl>
## 1         2002             49          69
## 2         2003             35          66
## 3         2004             34          56
## 4         2005             34          60