Import data

# excel file
data <- read_excel("Apply_1.xlsx")
data
## # A tibble: 1,155 × 13
##    movie_name    release_year director age_difference couple_number actor_1_name
##    <chr>                <dbl> <chr>             <dbl>         <dbl> <chr>       
##  1 Harold and M…         1971 Hal Ash…             52             1 Ruth Gordon 
##  2 Venus                 2006 Roger M…             50             1 Peter O'Too…
##  3 The Quiet Am…         2002 Phillip…             49             1 Michael Cai…
##  4 The Big Lebo…         1998 Joel Co…             45             1 David Huddl…
##  5 Beginners             2010 Mike Mi…             43             1 Christopher…
##  6 Poison Ivy            1992 Katt Sh…             42             1 Tom Skerritt
##  7 Whatever Wor…         2009 Woody A…             40             1 Larry David 
##  8 Entrapment            1999 Jon Ami…             39             1 Sean Connery
##  9 Husbands and…         1992 Woody A…             38             1 Woody Allen 
## 10 Magnolia              1999 Paul Th…             38             1 Jason Robar…
## # ℹ 1,145 more rows
## # ℹ 7 more variables: actor_2_name <chr>, character_1_gender <chr>,
## #   character_2_gender <chr>, actor_1_birthdate <chr>, actor_2_birthdate <chr>,
## #   actor_1_age <dbl>, actor_2_age <dbl>

Apply the following dplyr verbs to your data

Filter rows

filter(data, actor_1_age == 75, actor_2_age == 23)
## # A tibble: 1 × 13
##   movie_name     release_year director age_difference couple_number actor_1_name
##   <chr>                 <dbl> <chr>             <dbl>         <dbl> <chr>       
## 1 Harold and Ma…         1971 Hal Ash…             52             1 Ruth Gordon 
## # ℹ 7 more variables: actor_2_name <chr>, character_1_gender <chr>,
## #   character_2_gender <chr>, actor_1_birthdate <chr>, actor_2_birthdate <chr>,
## #   actor_1_age <dbl>, actor_2_age <dbl>
filter(data, actor_1_age == 75 | actor_2_age == 23)
## # A tibble: 58 × 13
##    movie_name    release_year director age_difference couple_number actor_1_name
##    <chr>                <dbl> <chr>             <dbl>         <dbl> <chr>       
##  1 Harold and M…         1971 Hal Ash…             52             1 Ruth Gordon 
##  2 The Big Lebo…         1998 Joel Co…             45             1 David Huddl…
##  3 Indiana Jone…         1989 Steven …             36             1 Sean Connery
##  4 Fort Apache,…         1981 Daniel …             33             1 Paul Newman 
##  5 The Private …         2009 Rebecca…             32             1 Alan Arkin  
##  6 High Noon             1952 Fred Zi…             28             1 Gary Cooper 
##  7 The Squid an…         2005 Noah Ba…             27             1 Jeff Daniels
##  8 Die Another …         2002 Lee Tam…             26             1 Pierce Bros…
##  9 Dark Passage          1947 Delmer …             25             1 Humphrey Bo…
## 10 State of the…         1948 Frank C…             25             1 Spencer Tra…
## # ℹ 48 more rows
## # ℹ 7 more variables: actor_2_name <chr>, character_1_gender <chr>,
## #   character_2_gender <chr>, actor_1_birthdate <chr>, actor_2_birthdate <chr>,
## #   actor_1_age <dbl>, actor_2_age <dbl>

Arrange rows

arrange(data, desc(actor_1_age), desc(actor_2_age))
## # A tibble: 1,155 × 13
##    movie_name    release_year director age_difference couple_number actor_1_name
##    <chr>                <dbl> <chr>             <dbl>         <dbl> <chr>       
##  1 Beginners             2010 Mike Mi…             43             1 Christopher…
##  2 A Walk in th…         2015 Ken Kwa…             23             1 Robert Redf…
##  3 Magnolia              1999 Paul Th…             38             1 Jason Robar…
##  4 The Private …         2009 Rebecca…             32             1 Alan Arkin  
##  5 Harold and M…         1971 Hal Ash…             52             1 Ruth Gordon 
##  6 Venus                 2006 Roger M…             50             1 Peter O'Too…
##  7 The Royal Te…         2001 Wes And…             21             2 Gene Hackman
##  8 The Prom              2020 Ryan Mu…             22             1 Meryl Streep
##  9 And So It Go…         2014 Rob Rei…              2             1 Michael Dou…
## 10 Behind the C…         2013 Steven …             26             1 Michael Dou…
## # ℹ 1,145 more rows
## # ℹ 7 more variables: actor_2_name <chr>, character_1_gender <chr>,
## #   character_2_gender <chr>, actor_1_birthdate <chr>, actor_2_birthdate <chr>,
## #   actor_1_age <dbl>, actor_2_age <dbl>
arrange(data, desc(age_difference))
## # A tibble: 1,155 × 13
##    movie_name    release_year director age_difference couple_number actor_1_name
##    <chr>                <dbl> <chr>             <dbl>         <dbl> <chr>       
##  1 Harold and M…         1971 Hal Ash…             52             1 Ruth Gordon 
##  2 Venus                 2006 Roger M…             50             1 Peter O'Too…
##  3 The Quiet Am…         2002 Phillip…             49             1 Michael Cai…
##  4 The Big Lebo…         1998 Joel Co…             45             1 David Huddl…
##  5 Beginners             2010 Mike Mi…             43             1 Christopher…
##  6 Poison Ivy            1992 Katt Sh…             42             1 Tom Skerritt
##  7 Whatever Wor…         2009 Woody A…             40             1 Larry David 
##  8 Entrapment            1999 Jon Ami…             39             1 Sean Connery
##  9 Husbands and…         1992 Woody A…             38             1 Woody Allen 
## 10 Magnolia              1999 Paul Th…             38             1 Jason Robar…
## # ℹ 1,145 more rows
## # ℹ 7 more variables: actor_2_name <chr>, character_1_gender <chr>,
## #   character_2_gender <chr>, actor_1_birthdate <chr>, actor_2_birthdate <chr>,
## #   actor_1_age <dbl>, actor_2_age <dbl>

Select columns

select(data, release_year:age_difference)
## # A tibble: 1,155 × 3
##    release_year director             age_difference
##           <dbl> <chr>                         <dbl>
##  1         1971 Hal Ashby                        52
##  2         2006 Roger Michell                    50
##  3         2002 Phillip Noyce                    49
##  4         1998 Joel Coen                        45
##  5         2010 Mike Mills                       43
##  6         1992 Katt Shea                        42
##  7         2009 Woody Allen                      40
##  8         1999 Jon Amiel                        39
##  9         1992 Woody Allen                      38
## 10         1999 Paul Thomas Anderson             38
## # ℹ 1,145 more rows

Add columns

mutate(data,
       agediff = actor_1_age - actor_2_age) %>%
    
    # Select actor_1_age actor_2_age, and agediff
    select(actor_1_age:actor_2_age, agediff)
## # A tibble: 1,155 × 3
##    actor_1_age actor_2_age agediff
##          <dbl>       <dbl>   <dbl>
##  1          75          23      52
##  2          74          24      50
##  3          69          20      49
##  4          68          23      45
##  5          81          38      43
##  6          59          17      42
##  7          62          22      40
##  8          69          30      39
##  9          57          19      38
## 10          77          39      38
## # ℹ 1,145 more rows

Summarize by groups

summarise(data, avgagediff = mean(age_difference, na.rm = TRUE) )
## # A tibble: 1 × 1
##   avgagediff
##        <dbl>
## 1       10.4