Original Dataset

movies <- read.csv("https://gist.githubusercontent.com/tiangechen/b68782efa49a16edaf07dc2cdaa855ea/raw/0c794a9717f18b094eabab2cd6a6b9a226903577/movies.csv")

Question 1- Rename

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Renamed_movies <- movies %>%
  rename(movie_title = Film, release_year = Year)
print(head(Renamed_movies))
##                          movie_title   Genre           Lead.Studio
## 1         Zack and Miri Make a Porno Romance The Weinstein Company
## 2                    Youth in Revolt  Comedy The Weinstein Company
## 3 You Will Meet a Tall Dark Stranger  Comedy           Independent
## 4                       When in Rome  Comedy                Disney
## 5              What Happens in Vegas  Comedy                   Fox
## 6                Water For Elephants   Drama      20th Century Fox
##   Audience.score.. Profitability Rotten.Tomatoes.. Worldwide.Gross release_year
## 1               70      1.747542                64         $41.94          2008
## 2               52      1.090000                68         $19.62          2010
## 3               35      1.211818                43         $26.66          2010
## 4               44      0.000000                15         $43.04          2010
## 5               72      6.267647                28        $219.37          2008
## 6               72      3.081421                60        $117.09          2011

Question 2- Select

Select_movies <- Renamed_movies %>%
  select(movie_title, release_year, Genre, Profitability)
print(head(Select_movies))
##                          movie_title release_year   Genre Profitability
## 1         Zack and Miri Make a Porno         2008 Romance      1.747542
## 2                    Youth in Revolt         2010  Comedy      1.090000
## 3 You Will Meet a Tall Dark Stranger         2010  Comedy      1.211818
## 4                       When in Rome         2010  Comedy      0.000000
## 5              What Happens in Vegas         2008  Comedy      6.267647
## 6                Water For Elephants         2011   Drama      3.081421

Question 3- Filter

Filter_movies <- Renamed_movies %>%
  filter(release_year > 2000 & Rotten.Tomatoes..>80)
print(Filter_movies)
##               movie_title     Genre           Lead.Studio Audience.score..
## 1                  WALL-E Animation                Disney               89
## 2                Waitress   Romance           Independent               67
## 3                 Tangled Animation                Disney               88
## 4  Rachel Getting Married     Drama           Independent               61
## 5    My Week with Marilyn     Drama The Weinstein Company               84
## 6       Midnight in Paris   Romence                  Sony               84
## 7              Knocked Up    Comedy             Universal               83
## 8               Jane Eyre   Romance             Universal               77
## 9               Enchanted    Comedy                Disney               80
## 10              Beginners    Comedy           Independent               80
## 11          A Serious Man     Drama             Universal               64
## 12   (500) Days of Summer    comedy                   Fox               81
##    Profitability Rotten.Tomatoes.. Worldwide.Gross release_year
## 1       2.896019                96        $521.28          2008
## 2      11.089742                89         $22.18          2007
## 3       1.365692                89        $355.01          2010
## 4       1.384167                85         $16.61          2008
## 5       0.825800                83          $8.26          2011
## 6       8.744706                93        $148.66          2011
## 7       6.636402                91           $219          2007
## 8       0.000000                85         $30.15          2011
## 9       4.005737                93        $340.49          2007
## 10      4.471875                84         $14.31          2011
## 11      4.382857                89         $30.68          2009
## 12      8.096000                87         $60.72          2009

Question 4- Mutate

Mutate_movies <- Filter_movies %>%
  mutate(Profitability_in_Millions = Profitability*1000000)
print(select(Mutate_movies, Profitability_in_Millions))
##    Profitability_in_Millions
## 1                    2896019
## 2                   11089742
## 3                    1365692
## 4                    1384167
## 5                     825800
## 6                    8744706
## 7                    6636402
## 8                          0
## 9                    4005737
## 10                   4471875
## 11                   4382857
## 12                   8096000

Question 5- Arrange

sorted_movies <- Mutate_movies %>%
  arrange(desc(Rotten.Tomatoes..), desc(Profitability_in_Millions))
print(select(sorted_movies, Rotten.Tomatoes.., Profitability_in_Millions))
##    Rotten.Tomatoes.. Profitability_in_Millions
## 1                 96                   2896019
## 2                 93                   8744706
## 3                 93                   4005737
## 4                 91                   6636402
## 5                 89                  11089742
## 6                 89                   4382857
## 7                 89                   1365692
## 8                 87                   8096000
## 9                 85                   1384167
## 10                85                         0
## 11                84                   4471875
## 12                83                    825800

Question 6 & 7- Combine & Interpret

Combine_Movies <- movies %>%
  rename(movie_title = Film, release_year = Year) %>%
  select(movie_title, release_year, Genre, Profitability, Rotten.Tomatoes..) %>%
  filter(release_year > 2000 & Rotten.Tomatoes.. > 80) %>%
  mutate(Profitability_in_Millions = Profitability*1000000) %>%
  arrange(desc(Rotten.Tomatoes..), desc(Profitability_in_Millions))
print(head(Combine_Movies))
##         movie_title release_year     Genre Profitability Rotten.Tomatoes..
## 1            WALL-E         2008 Animation      2.896019                96
## 2 Midnight in Paris         2011   Romence      8.744706                93
## 3         Enchanted         2007    Comedy      4.005737                93
## 4        Knocked Up         2007    Comedy      6.636402                91
## 5          Waitress         2007   Romance     11.089742                89
## 6     A Serious Man         2009     Drama      4.382857                89
##   Profitability_in_Millions
## 1                   2896019
## 2                   8744706
## 3                   4005737
## 4                   6636402
## 5                  11089742
## 6                   4382857
# The best movies are not always the most popular. You can see that some higher rated movies have less profitability. 
# This is due to advertising, targeted audience, and other factors

Extra Credit

summaryMutate_movies <- Mutate_movies %>%
  group_by(Genre) %>%
  summarise(avg_Rotten.Tomatoes.. = mean(Rotten.Tomatoes..), avg_ProfitabilityinMillions = mean(Profitability_in_Millions))
print(summaryMutate_movies)
## # A tibble: 6 × 3
##   Genre     avg_Rotten.Tomatoes.. avg_ProfitabilityinMillions
##   <chr>                     <dbl>                       <dbl>
## 1 Animation                  92.5                    2130856.
## 2 Comedy                     89.3                    5038005.
## 3 Drama                      85.7                    2197608.
## 4 Romance                    87                      5544871.
## 5 Romence                    93                      8744706.
## 6 comedy                     87                      8096000