library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)

# Load the movies dataset
movies <- read.csv('https://gist.githubusercontent.com/tiangechen/b68782efa49a16edaf07dc2cdaa855ea/raw/0c794a9717f18b094eabab2cd6a6b9a226903577/movies.csv')

1. rename(): (4 points)

Rename the “Film” column to “movie_title” and “Year” release_year

q1  <- movies %>%  
  rename(movie_title = Film, release_year = Year)

head(q1)
##                          movie_title   Genre           Lead.Studio
## 1         Zack and Miri Make a Porno Romance The Weinstein Company
## 2                    Youth in Revolt  Comedy The Weinstein Company
## 3 You Will Meet a Tall Dark Stranger  Comedy           Independent
## 4                       When in Rome  Comedy                Disney
## 5              What Happens in Vegas  Comedy                   Fox
## 6                Water For Elephants   Drama      20th Century Fox
##   Audience.score.. Profitability Rotten.Tomatoes.. Worldwide.Gross release_year
## 1               70      1.747542                64         $41.94          2008
## 2               52      1.090000                68         $19.62          2010
## 3               35      1.211818                43         $26.66          2010
## 4               44      0.000000                15         $43.04          2010
## 5               72      6.267647                28        $219.37          2008
## 6               72      3.081421                60        $117.09          2011

2. select(): (4 points)

Create a new dataframe with only the columns: movie_title, release_year, Genre, Profitability,

Select only the movie_title, release_year,genre, and Profitability

q2 <- q1 %>% 
  select(movie_title , release_year , Genre , Profitability)

head(q2)
##                          movie_title release_year   Genre Profitability
## 1         Zack and Miri Make a Porno         2008 Romance      1.747542
## 2                    Youth in Revolt         2010  Comedy      1.090000
## 3 You Will Meet a Tall Dark Stranger         2010  Comedy      1.211818
## 4                       When in Rome         2010  Comedy      0.000000
## 5              What Happens in Vegas         2008  Comedy      6.267647
## 6                Water For Elephants         2011   Drama      3.081421

3. filter(): (4 points)

Filter the dataset to include only movies released after 2000 with a Rotten Tomatoes % higher than 80.

q3 <- q1 %>% 
  filter(release_year > 2000 , Rotten.Tomatoes..>80)

head(q3)
##              movie_title     Genre           Lead.Studio Audience.score..
## 1                 WALL-E Animation                Disney               89
## 2               Waitress   Romance           Independent               67
## 3                Tangled Animation                Disney               88
## 4 Rachel Getting Married     Drama           Independent               61
## 5   My Week with Marilyn     Drama The Weinstein Company               84
## 6      Midnight in Paris   Romence                  Sony               84
##   Profitability Rotten.Tomatoes.. Worldwide.Gross release_year
## 1      2.896019                96        $521.28          2008
## 2     11.089742                89         $22.18          2007
## 3      1.365692                89        $355.01          2010
## 4      1.384167                85         $16.61          2008
## 5      0.825800                83          $8.26          2011
## 6      8.744706                93        $148.66          2011

4. mutate(): (4 points)

Add a new column called “Profitability_millions” that converts the Profitability to millions of dollars.

q4 <- q1 %>% 
  mutate(Profitability_millions = Profitability)
head(q4)
##                          movie_title   Genre           Lead.Studio
## 1         Zack and Miri Make a Porno Romance The Weinstein Company
## 2                    Youth in Revolt  Comedy The Weinstein Company
## 3 You Will Meet a Tall Dark Stranger  Comedy           Independent
## 4                       When in Rome  Comedy                Disney
## 5              What Happens in Vegas  Comedy                   Fox
## 6                Water For Elephants   Drama      20th Century Fox
##   Audience.score.. Profitability Rotten.Tomatoes.. Worldwide.Gross release_year
## 1               70      1.747542                64         $41.94          2008
## 2               52      1.090000                68         $19.62          2010
## 3               35      1.211818                43         $26.66          2010
## 4               44      0.000000                15         $43.04          2010
## 5               72      6.267647                28        $219.37          2008
## 6               72      3.081421                60        $117.09          2011
##   Profitability_millions
## 1               1.747542
## 2               1.090000
## 3               1.211818
## 4               0.000000
## 5               6.267647
## 6               3.081421

5. arrange(): (3 points)

Sort the filtered dataset by Rotten Tomatoes % in descending order, and then by Profitability in descending order. five <- four %>% arrange(desc(Rotten Tomatoes %) , desc(Profitability_millions))

q5 <- q1 %>%
  arrange(desc(Rotten.Tomatoes..),desc(Profitability))
head(q5)
##         movie_title     Genre Lead.Studio Audience.score.. Profitability
## 1            WALL-E Animation      Disney               89      2.896019
## 2 Midnight in Paris   Romence        Sony               84      8.744706
## 3         Enchanted    Comedy      Disney               80      4.005737
## 4        Knocked Up    Comedy   Universal               83      6.636402
## 5          Waitress   Romance Independent               67     11.089742
## 6     A Serious Man     Drama   Universal               64      4.382857
##   Rotten.Tomatoes.. Worldwide.Gross release_year
## 1                96        $521.28          2008
## 2                93        $148.66          2011
## 3                93        $340.49          2007
## 4                91           $219          2007
## 5                89         $22.18          2007
## 6                89         $30.68          2009

6. Combining functions: (3 points)

Use the pipe operator (%>%) to chain these operations together, starting with the original dataset and ending with a final dataframe that incorporates all the above transformations.

q6 <- movies %>%
  rename(movie_title = Film, release_year = Year) %>% 
  select(movie_title, release_year, Genre, Profitability , Rotten.Tomatoes..) %>%
  filter(release_year > 2000 & Rotten.Tomatoes.. > 80) %>%
  mutate(Profitability_millions = Profitability / 1e6) %>%
  arrange(desc(Rotten.Tomatoes..), desc(Profitability))

7. Interpret question 6 (1 point)