library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
movies <- read_csv("https://gist.githubusercontent.com/tiangechen/b68782efa49a16edaf07dc2cdaa855ea/raw/0c794a9717f18b094eabab2cd6a6b9a226903577/movies.csv")
## Rows: 77 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Film, Genre, Lead Studio, Worldwide Gross
## dbl (4): Audience score %, Profitability, Rotten Tomatoes %, Year
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
q1 <- movies %>%
rename(movie_title = Film , release_year = Year)
head(q1)
q2 <- q1 %>%
select(movie_title, release_year, Genre, Profitability)
head(q2)
q3 <- q1 %>%
filter(release_year > 2000 & 'Rotten Tomatoes %' > 80)
head(q3)
q4 <- q3 %>%
mutate(Profitability_millions = Profitability*1e6)
head(q4)
q5 <- q4 %>%
arrange(desc('Rotten Tomatoes %'), desc(Profitability_millions))
head(q5)
q6 <- movies %>%
rename(movie_title = Film, release_year = Year) %>%
select(movie_title, release_year, Genre, Profitability, 'Rotten Tomatoes %') %>%
filter(release_year > 2000 & 'Rotten Tomatoes %' > 80) %>%
mutate(Profitability_millions = Profitability * 1e6) %>%
arrange(desc('Rotten Tomatoes %'), desc(Profitability_millions))
head(q6)
The most profitable movies are not considered the most popular as seen from the data. When filtering the data to show the most profitable movies, they weren’t always the highest rated rotten tomatoes ratings. In conclusion, the movies with the highest gross aren’t the highest ratings per rotten tomatoes.
head(q7)
summary_df <- movies %>%
group_by(Genre) %>%
summarize(
Avg_Rating = mean(`Rotten Tomatoes %`, na.rm = TRUE),
Avg_Profitability_millions = mean(Profitability, na.rm = TRUE)
)
print(summary_df)