library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
movies <- read_csv("https://gist.githubusercontent.com/tiangechen/b68782efa49a16edaf07dc2cdaa855ea/raw/0c794a9717f18b094eabab2cd6a6b9a226903577/movies.csv")
## Rows: 77 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Film, Genre, Lead Studio, Worldwide Gross
## dbl (4): Audience score %, Profitability, Rotten Tomatoes %, Year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
q1 <- movies %>%
rename(movie_title = Film , release_year = Year)
(q1)
## # A tibble: 77 × 8
## movie_title Genre `Lead Studio` `Audience score %` Profitability
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Zack and Miri Make a Po… Roma… The Weinstei… 70 1.75
## 2 Youth in Revolt Come… The Weinstei… 52 1.09
## 3 You Will Meet a Tall Da… Come… Independent 35 1.21
## 4 When in Rome Come… Disney 44 0
## 5 What Happens in Vegas Come… Fox 72 6.27
## 6 Water For Elephants Drama 20th Century… 72 3.08
## 7 WALL-E Anim… Disney 89 2.90
## 8 Waitress Roma… Independent 67 11.1
## 9 Waiting For Forever Roma… Independent 53 0.005
## 10 Valentine's Day Come… Warner Bros. 54 4.18
## # ℹ 67 more rows
## # ℹ 3 more variables: `Rotten Tomatoes %` <dbl>, `Worldwide Gross` <chr>,
## # release_year <dbl>
q2 <- q1 %>%
select(movie_title, release_year, Genre, Profitability)
(q2)
## # A tibble: 77 × 4
## movie_title release_year Genre Profitability
## <chr> <dbl> <chr> <dbl>
## 1 Zack and Miri Make a Porno 2008 Romance 1.75
## 2 Youth in Revolt 2010 Comedy 1.09
## 3 You Will Meet a Tall Dark Stranger 2010 Comedy 1.21
## 4 When in Rome 2010 Comedy 0
## 5 What Happens in Vegas 2008 Comedy 6.27
## 6 Water For Elephants 2011 Drama 3.08
## 7 WALL-E 2008 Animation 2.90
## 8 Waitress 2007 Romance 11.1
## 9 Waiting For Forever 2011 Romance 0.005
## 10 Valentine's Day 2010 Comedy 4.18
## # ℹ 67 more rows
q3 <- q1 %>%
filter(release_year > 2000 & 'Rotten Tomatoes %' > 80)
(q3)
## # A tibble: 77 × 8
## movie_title Genre `Lead Studio` `Audience score %` Profitability
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Zack and Miri Make a Po… Roma… The Weinstei… 70 1.75
## 2 Youth in Revolt Come… The Weinstei… 52 1.09
## 3 You Will Meet a Tall Da… Come… Independent 35 1.21
## 4 When in Rome Come… Disney 44 0
## 5 What Happens in Vegas Come… Fox 72 6.27
## 6 Water For Elephants Drama 20th Century… 72 3.08
## 7 WALL-E Anim… Disney 89 2.90
## 8 Waitress Roma… Independent 67 11.1
## 9 Waiting For Forever Roma… Independent 53 0.005
## 10 Valentine's Day Come… Warner Bros. 54 4.18
## # ℹ 67 more rows
## # ℹ 3 more variables: `Rotten Tomatoes %` <dbl>, `Worldwide Gross` <chr>,
## # release_year <dbl>
q4 <- q3 %>%
mutate(Profitability_millions = Profitability*1e6)
(q4)
## # A tibble: 77 × 9
## movie_title Genre `Lead Studio` `Audience score %` Profitability
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Zack and Miri Make a Po… Roma… The Weinstei… 70 1.75
## 2 Youth in Revolt Come… The Weinstei… 52 1.09
## 3 You Will Meet a Tall Da… Come… Independent 35 1.21
## 4 When in Rome Come… Disney 44 0
## 5 What Happens in Vegas Come… Fox 72 6.27
## 6 Water For Elephants Drama 20th Century… 72 3.08
## 7 WALL-E Anim… Disney 89 2.90
## 8 Waitress Roma… Independent 67 11.1
## 9 Waiting For Forever Roma… Independent 53 0.005
## 10 Valentine's Day Come… Warner Bros. 54 4.18
## # ℹ 67 more rows
## # ℹ 4 more variables: `Rotten Tomatoes %` <dbl>, `Worldwide Gross` <chr>,
## # release_year <dbl>, Profitability_millions <dbl>
q5 <- q4 %>%
arrange(desc('Rotten Tomatoes %'), desc(Profitability_millions))
(q5)
## # A tibble: 77 × 9
## movie_title Genre `Lead Studio` `Audience score %` Profitability
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Fireproof Drama Independent 51 66.9
## 2 High School Musical 3: … Come… Disney 76 22.9
## 3 The Twilight Saga: New … Drama Summit 78 14.2
## 4 Waitress Roma… Independent 67 11.1
## 5 Twilight Roma… Summit 82 10.2
## 6 Mamma Mia! Come… Universal 76 9.23
## 7 Mamma Mia! Come… Universal 76 9.23
## 8 Midnight in Paris Rome… Sony 84 8.74
## 9 (500) Days of Summer come… Fox 81 8.10
## 10 The Proposal Come… Disney 74 7.87
## # ℹ 67 more rows
## # ℹ 4 more variables: `Rotten Tomatoes %` <dbl>, `Worldwide Gross` <chr>,
## # release_year <dbl>, Profitability_millions <dbl>
q6 <- movies %>%
rename(movie_title = Film, release_year = Year) %>%
select(movie_title, release_year, Genre, Profitability, 'Rotten Tomatoes %') %>%
filter(release_year > 2000 & 'Rotten Tomatoes %' > 80) %>%
mutate(Profitability_millions = Profitability * 1e6) %>%
arrange(desc('Rotten Tomatoes %'), desc(Profitability_millions))
(q6)
## # A tibble: 77 × 6
## movie_title release_year Genre Profitability `Rotten Tomatoes %`
## <chr> <dbl> <chr> <dbl> <dbl>
## 1 Fireproof 2008 Drama 66.9 40
## 2 High School Musical 3: … 2008 Come… 22.9 65
## 3 The Twilight Saga: New … 2009 Drama 14.2 27
## 4 Waitress 2007 Roma… 11.1 89
## 5 Twilight 2008 Roma… 10.2 49
## 6 Mamma Mia! 2008 Come… 9.23 53
## 7 Mamma Mia! 2008 Come… 9.23 53
## 8 Midnight in Paris 2011 Rome… 8.74 93
## 9 (500) Days of Summer 2009 come… 8.10 87
## 10 The Proposal 2009 Come… 7.87 43
## # ℹ 67 more rows
## # ℹ 1 more variable: Profitability_millions <dbl>
The most profitable movies are not considered the most popular as seen from the data. When filtering the data to show the most profitable movies, they weren’t always the highest rated rotten tomatoes ratings. In conclusion, the movies with the highest gross aren’t the highest ratings per rotten tomatoes.
summary_df <- movies %>%
group_by(Genre) %>%
summarize(
Avg_Rating = mean(`Rotten Tomatoes %`, na.rm = TRUE),
Avg_Profitability_millions = mean(Profitability, na.rm = TRUE)
)
print(summary_df)
## # A tibble: 10 × 3
## Genre Avg_Rating Avg_Profitability_millions
## <chr> <dbl> <dbl>
## 1 Action 11 1.25
## 2 Animation 74.2 3.76
## 3 Comdy 13 2.65
## 4 Comedy 42.7 3.78
## 5 Drama 51.5 8.41
## 6 Fantasy 73 1.78
## 7 Romance 42.1 3.98
## 8 Romence 93 8.74
## 9 comedy 87 8.10
## 10 romance 54 0.653