q1 <- movies %>%
rename(movie_title = Film , release_year = Year)
head(q1)
## # A tibble: 6 × 8
## movie_title Genre `Lead Studio` `Audience score %` Profitability
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Zack and Miri Make a Por… Roma… The Weinstei… 70 1.75
## 2 Youth in Revolt Come… The Weinstei… 52 1.09
## 3 You Will Meet a Tall Dar… Come… Independent 35 1.21
## 4 When in Rome Come… Disney 44 0
## 5 What Happens in Vegas Come… Fox 72 6.27
## 6 Water For Elephants Drama 20th Century… 72 3.08
## # ℹ 3 more variables: `Rotten Tomatoes %` <dbl>, `Worldwide Gross` <chr>,
## # release_year <dbl>
q2 <- q1 %>%
select(movie_title, release_year, Genre, Profitability)
head(q2)
## # A tibble: 6 × 4
## movie_title release_year Genre Profitability
## <chr> <dbl> <chr> <dbl>
## 1 Zack and Miri Make a Porno 2008 Romance 1.75
## 2 Youth in Revolt 2010 Comedy 1.09
## 3 You Will Meet a Tall Dark Stranger 2010 Comedy 1.21
## 4 When in Rome 2010 Comedy 0
## 5 What Happens in Vegas 2008 Comedy 6.27
## 6 Water For Elephants 2011 Drama 3.08
q3 <- q1 %>%
filter(release_year > 2000 , `Rotten Tomatoes %` > 80)
head(q3)
## # A tibble: 6 × 8
## movie_title Genre `Lead Studio` `Audience score %` Profitability
## <chr> <chr> <chr> <dbl> <dbl>
## 1 WALL-E Animati… Disney 89 2.90
## 2 Waitress Romance Independent 67 11.1
## 3 Tangled Animati… Disney 88 1.37
## 4 Rachel Getting Married Drama Independent 61 1.38
## 5 My Week with Marilyn Drama The Weinstei… 84 0.826
## 6 Midnight in Paris Romence Sony 84 8.74
## # ℹ 3 more variables: `Rotten Tomatoes %` <dbl>, `Worldwide Gross` <chr>,
## # release_year <dbl>
q4 <- q3 %>%
mutate(Profitability_millions = Profitability * 1000000)
head(select(q4, Profitability_millions, Profitability))
## # A tibble: 6 × 2
## Profitability_millions Profitability
## <dbl> <dbl>
## 1 2896019. 2.90
## 2 11089742. 11.1
## 3 1365692. 1.37
## 4 1384167. 1.38
## 5 825800 0.826
## 6 8744706. 8.74
q5 <- q4 %>%
arrange(desc(`Rotten Tomatoes %`), desc (Profitability))
head(select(q5,`Rotten Tomatoes %`, Profitability))
## # A tibble: 6 × 2
## `Rotten Tomatoes %` Profitability
## <dbl> <dbl>
## 1 96 2.90
## 2 93 8.74
## 3 93 4.01
## 4 91 6.64
## 5 89 11.1
## 6 89 4.38
q6 <- movies %>%
rename( movie_title = Film, release_year = Year) %>%
filter(release_year > 2000 & `Rotten Tomatoes %` > 80) %>%
select(movie_title, release_year, Genre, Profitability, `Rotten Tomatoes %`) %>%
mutate(Profitability_millions = Profitability * 1000000) %>%
arrange(desc(`Rotten Tomatoes %`), desc (Profitability))
head(q6)
## # A tibble: 6 × 6
## movie_title release_year Genre Profitability `Rotten Tomatoes %`
## <chr> <dbl> <chr> <dbl> <dbl>
## 1 WALL-E 2008 Animation 2.90 96
## 2 Midnight in Paris 2011 Romence 8.74 93
## 3 Enchanted 2007 Comedy 4.01 93
## 4 Knocked Up 2007 Comedy 6.64 91
## 5 Waitress 2007 Romance 11.1 89
## 6 A Serious Man 2009 Drama 4.38 89
## # ℹ 1 more variable: Profitability_millions <dbl>
Based on the data from question 6, the best movies, if speaking in terms of profitability, are not necessarily the most popular. If best is determined by profitability, then movies with the highest rotten tomatoes ratings do not always have the highest profitability. For example, WALL-E has the highest rotten tomato rating of 96%, but has lower profitability than Midnight in Paris, which has rotten tomatoes of 93%. This appears to hint that the best movies, those with highest profitability, are not necessarily the most popular, those with the highest rotten tomato rating.
extra_credit <- q4 %>%
group_by(Genre) %>%
summarize(
avg_rating = mean(`Rotten Tomatoes %` , na.rm= TRUE),
avg_profitability = mean(Profitability_millions, na.rm = TRUE))
print(extra_credit)
## # A tibble: 6 × 3
## Genre avg_rating avg_profitability
## <chr> <dbl> <dbl>
## 1 Animation 92.5 2130856.
## 2 Comedy 89.3 5038005.
## 3 Drama 85.7 2197608.
## 4 Romance 87 5544871.
## 5 Romence 93 8744706.
## 6 comedy 87 8096000