Import data

# excel file
data <- read_excel("../00_data/MyData.xlsx")
## New names:
## • `` -> `...1`
data
## # A tibble: 3,401 × 9
##     ...1 release_date movie     production_budget domestic_gross worldwide_gross
##    <dbl> <chr>        <chr>                 <dbl>          <dbl>           <dbl>
##  1     1 6/22/2007    Evan Alm…         175000000      100289690       174131329
##  2     2 7/28/1995    Waterwor…         175000000       88246220       264246220
##  3     3 5/12/2017    King Art…         175000000       39175066       139950708
##  4     4 12/25/2013   47 Ronin          175000000       38362475       151716815
##  5     5 6/22/2018    Jurassic…         170000000      416769345      1304866322
##  6     6 8/1/2014     Guardian…         170000000      333172112       771051335
##  7     7 5/7/2010     Iron Man…         170000000      312433331       621156389
##  8     8 4/4/2014     Captain …         170000000      259746958       714401889
##  9     9 7/11/2014    Dawn of …         170000000      208545589       710644566
## 10    10 11/10/2004   The Pola…         170000000      186493587       310634169
## # ℹ 3,391 more rows
## # ℹ 3 more variables: distributor <chr>, mpaa_rating <chr>, genre <chr>

Apply the following dplyr verbs to your data

Filter rows

filter(data, mpaa_rating == "G")
## # A tibble: 85 × 9
##     ...1 release_date movie     production_budget domestic_gross worldwide_gross
##    <dbl> <chr>        <chr>                 <dbl>          <dbl>           <dbl>
##  1    10 11/10/2004   The Pola…         170000000      186493587       310634169
##  2    46 6/29/2007    Ratatoui…         150000000      206445654       626549695
##  3    74 6/16/1999    Tarzan            145000000      171091819       448191819
##  4   113 4/11/2014    Rio 2             130000000      131538435       492846291
##  5   151 11/2/2001    Monsters…         115000000      289423425       559757719
##  6   185 11/25/2009   The Prin…         105000000      104400899       270997378
##  7   208 6/21/1996    The Hunc…         100000000      100138851       325500000
##  8   209 12/15/2000   The Empe…         100000000       89296573       169296573
##  9   230 11/6/2015    The Pean…          99000000      130178411       250091610
## 10   244 5/30/2003    Finding …          94000000      380529370       936429370
## # ℹ 75 more rows
## # ℹ 3 more variables: distributor <chr>, mpaa_rating <chr>, genre <chr>

Arrange rows

arrange(data, release_date, genre, mpaa_rating)
## # A tibble: 3,401 × 9
##     ...1 release_date movie     production_budget domestic_gross worldwide_gross
##    <dbl> <chr>        <chr>                 <dbl>          <dbl>           <dbl>
##  1  3027 1/1/1946     It’s a W…           3180000        6600000        10768908
##  2  2999 1/1/1967     In Cold …           3500000       13000000        13007551
##  3  3270 1/1/1970     Beyond t…           1000000        9000000         9000000
##  4  1649 1/1/1970     Darling …          22000000        5000000         5000000
##  5  2364 1/1/1970     The Moll…          11000000        2200000         2200000
##  6  2349 1/1/1975     Barry Ly…          11000000       20000000        20169934
##  7  2984 1/1/1976     Network             3800000       23689877        23689877
##  8  2171 1/1/1978     Caravans           14000000        1000000         1000000
##  9  3034 1/1/1978     Coming H…           3000000       32653000        32653000
## 10  2041 1/1/1979     The Deer…          15000000       50000000        50009253
## # ℹ 3,391 more rows
## # ℹ 3 more variables: distributor <chr>, mpaa_rating <chr>, genre <chr>

Select columns

select(data, movie, production_budget)
## # A tibble: 3,401 × 2
##    movie                               production_budget
##    <chr>                                           <dbl>
##  1 Evan Almighty                               175000000
##  2 Waterworld                                  175000000
##  3 King Arthur: Legend of the Sword            175000000
##  4 47 Ronin                                    175000000
##  5 Jurassic World: Fallen Kingdom              170000000
##  6 Guardians of the Galaxy                     170000000
##  7 Iron Man 2                                  170000000
##  8 Captain America: The Winter Soldier         170000000
##  9 Dawn of the Planet of the Apes              170000000
## 10 The Polar Express                           170000000
## # ℹ 3,391 more rows

Add columns

mutate(data, profit = worldwide_gross + domestic_gross - production_budget)
## # A tibble: 3,401 × 10
##     ...1 release_date movie     production_budget domestic_gross worldwide_gross
##    <dbl> <chr>        <chr>                 <dbl>          <dbl>           <dbl>
##  1     1 6/22/2007    Evan Alm…         175000000      100289690       174131329
##  2     2 7/28/1995    Waterwor…         175000000       88246220       264246220
##  3     3 5/12/2017    King Art…         175000000       39175066       139950708
##  4     4 12/25/2013   47 Ronin          175000000       38362475       151716815
##  5     5 6/22/2018    Jurassic…         170000000      416769345      1304866322
##  6     6 8/1/2014     Guardian…         170000000      333172112       771051335
##  7     7 5/7/2010     Iron Man…         170000000      312433331       621156389
##  8     8 4/4/2014     Captain …         170000000      259746958       714401889
##  9     9 7/11/2014    Dawn of …         170000000      208545589       710644566
## 10    10 11/10/2004   The Pola…         170000000      186493587       310634169
## # ℹ 3,391 more rows
## # ℹ 4 more variables: distributor <chr>, mpaa_rating <chr>, genre <chr>,
## #   profit <dbl>

Summarize by groups

by_day <- group_by(data, genre)
summarise(by_day, Avg_Production_Budget = mean(production_budget, na.rm = TRUE))
## # A tibble: 5 × 2
##   genre     Avg_Production_Budget
##   <chr>                     <dbl>
## 1 Action                55870572.
## 2 Adventure             61111682.
## 3 Comedy                24289354.
## 4 Drama                 21774116.
## 5 Horror                17224168.