Question 1

q1  <- movies %>%  
  rename(movie_title = Film, release_year = Year)
print(head(q1))
## # A tibble: 6 × 8
##   movie_title               Genre `Lead Studio` `Audience score %` Profitability
##   <chr>                     <chr> <chr>                      <dbl>         <dbl>
## 1 Zack and Miri Make a Por… Roma… The Weinstei…                 70          1.75
## 2 Youth in Revolt           Come… The Weinstei…                 52          1.09
## 3 You Will Meet a Tall Dar… Come… Independent                   35          1.21
## 4 When in Rome              Come… Disney                        44          0   
## 5 What Happens in Vegas     Come… Fox                           72          6.27
## 6 Water For Elephants       Drama 20th Century…                 72          3.08
## # ℹ 3 more variables: `Rotten Tomatoes %` <dbl>, `Worldwide Gross` <chr>,
## #   release_year <dbl>

Question 2

q2 <- q1 %>%
select(movie_title , release_year , Genre , Profitability)
print(head(q2))
## # A tibble: 6 × 4
##   movie_title                        release_year Genre   Profitability
##   <chr>                                     <dbl> <chr>           <dbl>
## 1 Zack and Miri Make a Porno                 2008 Romance          1.75
## 2 Youth in Revolt                            2010 Comedy           1.09
## 3 You Will Meet a Tall Dark Stranger         2010 Comedy           1.21
## 4 When in Rome                               2010 Comedy           0   
## 5 What Happens in Vegas                      2008 Comedy           6.27
## 6 Water For Elephants                        2011 Drama            3.08

Question 3

q3 <- q1 %>%  
  filter(release_year > 2000 & `Rotten Tomatoes %` > 80)
head(q3)
## # A tibble: 6 × 8
##   movie_title            Genre    `Lead Studio` `Audience score %` Profitability
##   <chr>                  <chr>    <chr>                      <dbl>         <dbl>
## 1 WALL-E                 Animati… Disney                        89         2.90 
## 2 Waitress               Romance  Independent                   67        11.1  
## 3 Tangled                Animati… Disney                        88         1.37 
## 4 Rachel Getting Married Drama    Independent                   61         1.38 
## 5 My Week with Marilyn   Drama    The Weinstei…                 84         0.826
## 6 Midnight in Paris      Romence  Sony                          84         8.74 
## # ℹ 3 more variables: `Rotten Tomatoes %` <dbl>, `Worldwide Gross` <chr>,
## #   release_year <dbl>

Question 4

q4 <- q3 %>% 
  mutate(Profitability_millions = Profitability* 1e6 )
head(q4)
## # A tibble: 6 × 9
##   movie_title            Genre    `Lead Studio` `Audience score %` Profitability
##   <chr>                  <chr>    <chr>                      <dbl>         <dbl>
## 1 WALL-E                 Animati… Disney                        89         2.90 
## 2 Waitress               Romance  Independent                   67        11.1  
## 3 Tangled                Animati… Disney                        88         1.37 
## 4 Rachel Getting Married Drama    Independent                   61         1.38 
## 5 My Week with Marilyn   Drama    The Weinstei…                 84         0.826
## 6 Midnight in Paris      Romence  Sony                          84         8.74 
## # ℹ 4 more variables: `Rotten Tomatoes %` <dbl>, `Worldwide Gross` <chr>,
## #   release_year <dbl>, Profitability_millions <dbl>

Question 5

q5 <- q4 %>%
  arrange(desc(`Rotten Tomatoes %`), desc(Profitability_millions))
head(q5)
## # A tibble: 6 × 9
##   movie_title       Genre     `Lead Studio` `Audience score %` Profitability
##   <chr>             <chr>     <chr>                      <dbl>         <dbl>
## 1 WALL-E            Animation Disney                        89          2.90
## 2 Midnight in Paris Romence   Sony                          84          8.74
## 3 Enchanted         Comedy    Disney                        80          4.01
## 4 Knocked Up        Comedy    Universal                     83          6.64
## 5 Waitress          Romance   Independent                   67         11.1 
## 6 A Serious Man     Drama     Universal                     64          4.38
## # ℹ 4 more variables: `Rotten Tomatoes %` <dbl>, `Worldwide Gross` <chr>,
## #   release_year <dbl>, Profitability_millions <dbl>

Question 6

q6 <- movies %>%  
  rename(movie_title = Film, release_year = Year) %>% 
  select(movie_title , release_year , Genre , Profitability ,  `Rotten Tomatoes %`) %>% 
  filter(release_year > 2000 & `Rotten Tomatoes %` > 80) %>% 
  mutate(Profitability_millions = Profitability* 1e6 ) %>% 
  arrange(desc(`Rotten Tomatoes %`), desc(Profitability_millions))

Question 7

#The most profitable movies are not considered the most popular as seen from the data. When filtering the data to show the most profitable movies, they aren’t the highest rotten tomatoes ratings. Same thing goes with worldwide gross. The movies with the highest gross aren’t the highest rating per rotten tomatoes.

Extra Credit

summary_df <- movies %>%
  group_by(Genre) %>%
  summarize(
    Avg_Rating = mean(`Rotten Tomatoes %`, na.rm = TRUE),
    Avg_Profitability_millions = mean(Profitability, na.rm = TRUE)
  )
print(summary_df)
## # A tibble: 10 × 3
##    Genre     Avg_Rating Avg_Profitability_millions
##    <chr>          <dbl>                      <dbl>
##  1 Action          11                        1.25 
##  2 Animation       74.2                      3.76 
##  3 Comdy           13                        2.65 
##  4 Comedy          42.7                      3.78 
##  5 Drama           51.5                      8.41 
##  6 Fantasy         73                        1.78 
##  7 Romance         42.1                      3.98 
##  8 Romence         93                        8.74 
##  9 comedy          87                        8.10 
## 10 romance         54                        0.653