1. rename(): (4 points), Rename the “Film” column to “movie_title” and “Year” to “release_year”.

q1 <- movies %>% 
  rename(movie_title = Film,
         release_year = Year)

knitr::kable(head(q1, 6))
movie_title Genre Lead Studio Audience score % Profitability Rotten Tomatoes % Worldwide Gross release_year
Zack and Miri Make a Porno Romance The Weinstein Company 70 1.747542 64 $41.94 2008
Youth in Revolt Comedy The Weinstein Company 52 1.090000 68 $19.62 2010
You Will Meet a Tall Dark Stranger Comedy Independent 35 1.211818 43 $26.66 2010
When in Rome Comedy Disney 44 0.000000 15 $43.04 2010
What Happens in Vegas Comedy Fox 72 6.267647 28 $219.37 2008
Water For Elephants Drama 20th Century Fox 72 3.081421 60 $117.09 2011

2. select(): (4 points), Create a new dataframe with only the columns: movie_title, release_year, Genre, Profitability,

q2 <- q1 %>%
  select(movie_title, release_year, Genre, Profitability)

knitr::kable(head(q2, 6))
movie_title release_year Genre Profitability
Zack and Miri Make a Porno 2008 Romance 1.747542
Youth in Revolt 2010 Comedy 1.090000
You Will Meet a Tall Dark Stranger 2010 Comedy 1.211818
When in Rome 2010 Comedy 0.000000
What Happens in Vegas 2008 Comedy 6.267647
Water For Elephants 2011 Drama 3.081421

3. filter(): (4 points), Filter the dataset to include only movies released after 2000 with a Rotten Tomatoes % higher than 80.

q3 <- q2 %>%
  left_join(
    q1 %>% select(movie_title, release_year, `Rotten Tomatoes %`),
    by = c("movie_title", "release_year")
  ) %>%
  filter(release_year > 2000,
         `Rotten Tomatoes %` > 80)

knitr::kable(head(q3, 6))
movie_title release_year Genre Profitability Rotten Tomatoes %
WALL-E 2008 Animation 2.896019 96
Waitress 2007 Romance 11.089742 89
Tangled 2010 Animation 1.365692 89
Rachel Getting Married 2008 Drama 1.384167 85
My Week with Marilyn 2011 Drama 0.825800 83
Midnight in Paris 2011 Romence 8.744706 93

4. mutate(): (4 points), Add a new column called “Profitability_millions” that converts the Profitability to millions of dollars.

q4 <- q3 %>%
  mutate(Profitability_millions = Profitability / 1e6)

knitr::kable(head(q4, 6))
movie_title release_year Genre Profitability Rotten Tomatoes % Profitability_millions
WALL-E 2008 Animation 2.896019 96 2.90e-06
Waitress 2007 Romance 11.089742 89 1.11e-05
Tangled 2010 Animation 1.365692 89 1.40e-06
Rachel Getting Married 2008 Drama 1.384167 85 1.40e-06
My Week with Marilyn 2011 Drama 0.825800 83 8.00e-07
Midnight in Paris 2011 Romence 8.744706 93 8.70e-06

5. arrange(): (3 points), Sort the filtered dataset by Rotten Tomatoes % in descending order, and then by Profitability in descending order. five <- four %>% arrange(desc(Rotten Tomatoes %) , desc(Profitability_millions))

q5 <- q4 %>%
  arrange(desc(`Rotten Tomatoes %`), desc(Profitability_millions))

knitr::kable(head(q5 %>% select(movie_title, `Rotten Tomatoes %`, Profitability_millions),6))
movie_title Rotten Tomatoes % Profitability_millions
WALL-E 96 2.90e-06
Midnight in Paris 93 8.70e-06
Enchanted 93 4.00e-06
Knocked Up 91 6.60e-06
Waitress 89 1.11e-05
A Serious Man 89 4.40e-06

6. Combining functions: (3 points), Use the pipe operator (%>%) to chain these operations together, starting with the original dataset and ending with a final dataframe that incorporates all the above transformations.

final_movies <- movies %>%
  rename(movie_title = Film,
         release_year = Year) %>%
  select(movie_title,
         release_year,
         Genre,
         Profitability,
         `Rotten Tomatoes %`) %>%
  filter(release_year > 2000,
         `Rotten Tomatoes %` > 80) %>%
  mutate(Profitability_millions = Profitability / 1e6) %>%
  arrange(desc(`Rotten Tomatoes %`),
          desc(Profitability_millions))

knitr::kable(head(final_movies, 6))
movie_title release_year Genre Profitability Rotten Tomatoes % Profitability_millions
WALL-E 2008 Animation 2.896019 96 2.90e-06
Midnight in Paris 2011 Romence 8.744706 93 8.70e-06
Enchanted 2007 Comedy 4.005737 93 4.00e-06
Knocked Up 2007 Comedy 6.636402 91 6.60e-06
Waitress 2007 Romance 11.089742 89 1.11e-05
A Serious Man 2009 Drama 4.382857 89 4.40e-06

7. Interpret question 6 (1 point)

EXTRA CREDIT (4 points)

Create a summary dataframe that shows the average rating and Profitability_millions for movies by Genre. Hint: You’ll need to use group_by() and summarize().

genre_summary <- final_movies %>%
  mutate(
    Genre = recode(Genre,
                   "Comdy"   = "Comedy",
                   "comedy"  = "Comedy",
                   "Romnace" = "Romance",
                   "Romence" = "Romance")
  ) %>%
  group_by(Genre) %>%
  summarize(
    avg_rating = mean(`Rotten Tomatoes %`, na.rm = TRUE),
    avg_profit_millions = mean(Profitability_millions, na.rm = TRUE),
    .groups = "drop"
  )

knitr::kable(head(genre_summary, 6))
Genre avg_rating avg_profit_millions
Animation 92.50000 2.1e-06
Comedy 88.75000 5.8e-06
Drama 85.66667 2.2e-06
Romance 89.00000 6.6e-06