library(tidyverse)
df <- read_csv("data/original/Movies_Updated.csv")
df_avgIMDb_byYear <- df %>%
group_by(Year) %>%
summarise (mean_IMDb = mean(IMDb, na.rm = TRUE))
df_RT_byYear <- df %>%
group_by(Year) %>%
summarise (mean_RT = mean(`Rotten Tomatoes`, na.rm = TRUE))
ggplot(df, aes(x=reorder(factor(Genre),Genre, function(x) length(x))))+
geom_bar(aes(fill=Genre), width = 0.8, position = position_dodge(2))+
scale_fill_brewer(palette="Set3")+
coord_flip()+
labs(
x="Genre",
title ="The Number of Movies by Genre",
side=1, line=10, cex=5
) + facet_wrap(~Platforms, nrow = 2) +
xlim("Mystery", "Sci-Fi", "Crime", "Animation","Fantasy","Romance", "Family","Adventure", "Thriller", "Action", "Comedy", "Drama")
ggplot(df, aes(x= Platforms)) + geom_bar(aes(fill = Platforms), width = 0.8, position = position_dodge(2)) + facet_wrap(~Age, nrow = 2) + scale_fill_brewer(palette="Set2")+
labs(
title ="The Number of Movies in Streaming Platforms by Age")+
scale_x_discrete(labels=c("Disney+","Hulu","Netflix", "Prime"))
The dotted line marks the average rating number of IMDb and Rotten Tomatoes respectively.
ggplot(df, aes(x=IMDb)) + geom_histogram(binwidth=.5, fill = "gold2")+
geom_vline(aes(xintercept=mean(IMDb, na.rm=T)),
color="orange2", linetype="dashed", size=0.5)+
geom_text(aes(x = 63.97 , y = 0, label = "63.97"), color = "orange3", size = 3) +
facet_wrap(~Platforms, nrow = 4)+
labs(
x="IMDb Rating",
title ="Distribution of movies in terms of IMDb rating in different platforms") +
scale_x_continuous (breaks =c(40, 60, 80,100))
ggplot(df, aes(x= `Rotten Tomatoes`)) + geom_histogram(binwidth=.5, fill = "tomato1")+
geom_vline(aes(xintercept=mean(`Rotten Tomatoes`, na.rm=T)),
color="brown3", linetype="dashed", size=0.5) + facet_wrap(~Platforms, nrow = 4)+
geom_text(aes(x = 64.76 , y = 0, label = "64.76"), color = "brown4", size = 3) +
labs(
x="Rotten Tomatoes Rating",
title ="Distribution of movies in terms of Rotten Tomatoes rating in different platforms"
)
The straight line in the graphs indicates the regression line.
plot(df_avgIMDb_byYear, aes(x= Year, y = mean_IMDb), main="Average IMDb Ratings by Year", pch=19) + abline(lm(mean_IMDb~Year, df_avgIMDb_byYear), col="gold3") +
scale_x_continuous("Year")
## NULL
plot(df_RT_byYear, aes(x= Year, y = mean_RT), main="Average Rotten Tomatoes Ratings by Year",
pch=19) + abline(lm(mean_RT~Year, df_RT_byYear), col="tomato1")+
scale_x_continuous("Year")
## NULL