library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
# Load the movies dataset
movies <- read.csv('https://gist.githubusercontent.com/tiangechen/b68782efa49a16edaf07dc2cdaa855ea/raw/0c794a9717f18b094eabab2cd6a6b9a226903577/movies.csv')
q1 <- movies %>%
rename(movie_title = Film, release_year = Year)
head(q1)
## movie_title Genre Lead.Studio
## 1 Zack and Miri Make a Porno Romance The Weinstein Company
## 2 Youth in Revolt Comedy The Weinstein Company
## 3 You Will Meet a Tall Dark Stranger Comedy Independent
## 4 When in Rome Comedy Disney
## 5 What Happens in Vegas Comedy Fox
## 6 Water For Elephants Drama 20th Century Fox
## Audience.score.. Profitability Rotten.Tomatoes.. Worldwide.Gross release_year
## 1 70 1.747542 64 $41.94 2008
## 2 52 1.090000 68 $19.62 2010
## 3 35 1.211818 43 $26.66 2010
## 4 44 0.000000 15 $43.04 2010
## 5 72 6.267647 28 $219.37 2008
## 6 72 3.081421 60 $117.09 2011
q2 <- q1 %>%
select(movie_title , release_year , Genre , Profitability)
head(q2)
## movie_title release_year Genre Profitability
## 1 Zack and Miri Make a Porno 2008 Romance 1.747542
## 2 Youth in Revolt 2010 Comedy 1.090000
## 3 You Will Meet a Tall Dark Stranger 2010 Comedy 1.211818
## 4 When in Rome 2010 Comedy 0.000000
## 5 What Happens in Vegas 2008 Comedy 6.267647
## 6 Water For Elephants 2011 Drama 3.081421
q3 <- q1 %>%
filter(release_year > 2000 , Rotten.Tomatoes..>80)
head(q3)
## movie_title Genre Lead.Studio Audience.score..
## 1 WALL-E Animation Disney 89
## 2 Waitress Romance Independent 67
## 3 Tangled Animation Disney 88
## 4 Rachel Getting Married Drama Independent 61
## 5 My Week with Marilyn Drama The Weinstein Company 84
## 6 Midnight in Paris Romence Sony 84
## Profitability Rotten.Tomatoes.. Worldwide.Gross release_year
## 1 2.896019 96 $521.28 2008
## 2 11.089742 89 $22.18 2007
## 3 1.365692 89 $355.01 2010
## 4 1.384167 85 $16.61 2008
## 5 0.825800 83 $8.26 2011
## 6 8.744706 93 $148.66 2011
q4 <- q1 %>%
mutate(Profitability_millions = Profitability)
head(q4)
## movie_title Genre Lead.Studio
## 1 Zack and Miri Make a Porno Romance The Weinstein Company
## 2 Youth in Revolt Comedy The Weinstein Company
## 3 You Will Meet a Tall Dark Stranger Comedy Independent
## 4 When in Rome Comedy Disney
## 5 What Happens in Vegas Comedy Fox
## 6 Water For Elephants Drama 20th Century Fox
## Audience.score.. Profitability Rotten.Tomatoes.. Worldwide.Gross release_year
## 1 70 1.747542 64 $41.94 2008
## 2 52 1.090000 68 $19.62 2010
## 3 35 1.211818 43 $26.66 2010
## 4 44 0.000000 15 $43.04 2010
## 5 72 6.267647 28 $219.37 2008
## 6 72 3.081421 60 $117.09 2011
## Profitability_millions
## 1 1.747542
## 2 1.090000
## 3 1.211818
## 4 0.000000
## 5 6.267647
## 6 3.081421
q5 <- q1 %>%
arrange(desc(Rotten.Tomatoes..),desc(Profitability))
head(q5)
## movie_title Genre Lead.Studio Audience.score.. Profitability
## 1 WALL-E Animation Disney 89 2.896019
## 2 Midnight in Paris Romence Sony 84 8.744706
## 3 Enchanted Comedy Disney 80 4.005737
## 4 Knocked Up Comedy Universal 83 6.636402
## 5 Waitress Romance Independent 67 11.089742
## 6 A Serious Man Drama Universal 64 4.382857
## Rotten.Tomatoes.. Worldwide.Gross release_year
## 1 96 $521.28 2008
## 2 93 $148.66 2011
## 3 93 $340.49 2007
## 4 91 $219 2007
## 5 89 $22.18 2007
## 6 89 $30.68 2009
q6 <- movies %>%
rename(movie_title = Film, release_year = Year) %>%
select(movie_title, release_year, Genre, Profitability , Rotten.Tomatoes..) %>%
filter(release_year > 2000 & Rotten.Tomatoes.. > 80) %>%
mutate(Profitability_millions = Profitability / 1e6) %>%
arrange(desc(Rotten.Tomatoes..), desc(Profitability))
Yes the best movies are the most popular based on the relationship between high rotten tomatoe scores and the high levels of profitability. # EXTRA CREDIT (4 points) # Create a summary dataframe that shows the average rating and Profitability_millions for movies by Genre. Hint: You’ll need to use group_by() and summarize().