library(dplyr) library(readr)

Load the movies dataset

movies <- read_csv(“https://gist.githubusercontent.com/tiangechen/b68782efa49a16edaf07dc2cdaa855ea/raw/0c794a9717f18b094eabab2cd6a6b9a226903577/movies.csv”)

1. rename()

one <- movies %>% rename(movie_title = Film, release_year = Year) head(one)

2. select()

two <- one %>% select(movie_title, release_year, Genre, Profitability, Rotten Tomatoes %) head(two)

3. filter()

three <- two %>% filter(release_year > 2000, Rotten Tomatoes % > 80) head(three)

4. mutate()

four <- three %>% mutate(Profitability_millions = Profitability / 1e6) head(four)

5. arrange()

five <- four %>% arrange(desc(Rotten Tomatoes %), desc(Profitability_millions)) head(five)

6. Combined pipeline

final_df <- movies %>% rename( movie_title = Film, release_year = Year ) %>% select(movie_title, release_year, Genre, Profitability, Rotten Tomatoes %) %>% filter(release_year > 2000, Rotten Tomatoes % > 80) %>% mutate(Profitability_millions = Profitability / 1e6) %>% arrange(desc(Rotten Tomatoes %), desc(Profitability_millions))

head(final_df)

EXTRA CREDIT

summary_df <- final_df %>% group_by(Genre) %>% summarize( avg_rating = mean(Rotten Tomatoes %, na.rm = TRUE), avg_profit = mean(Profitability_millions, na.rm = TRUE) )

head(summary_df)