Import Data

library(tidyverse)

df <- read_csv("data/original/Movies_Updated.csv")
df_avgIMDb_byYear <- df %>%
  group_by(Year) %>%
  summarise (mean_IMDb = mean(IMDb, na.rm = TRUE))

df_RT_byYear <- df %>%
  group_by(Year) %>%
  summarise (mean_RT = mean(`Rotten Tomatoes`, na.rm = TRUE))

Distribution of Movies in the Top 12 Genres on Streaming Platforms

ggplot(df, aes(x=reorder(factor(Genre),Genre, function(x) length(x))))+
  geom_bar(aes(fill=Genre), width = 0.8, position = position_dodge(2))+ 
  scale_fill_brewer(palette="Set3")+
  coord_flip()+
  labs(
    x="Genre",
    title ="The Number of Movies by Genre",
    side=1, line=10, cex=5
  ) + facet_wrap(~Platforms, nrow = 2) +
 xlim("Mystery", "Sci-Fi", "Crime", "Animation","Fantasy","Romance", "Family","Adventure", "Thriller", "Action", "Comedy", "Drama")

Distribution of Movies on Streaming Platforms by Age

ggplot(df, aes(x= Platforms)) + geom_bar(aes(fill = Platforms), width = 0.8, position = position_dodge(2)) + facet_wrap(~Age, nrow = 2) + scale_fill_brewer(palette="Set2")+
  labs(
    title ="The Number of Movies in Streaming Platforms by Age")+ 
  scale_x_discrete(labels=c("Disney+","Hulu","Netflix", "Prime"))

Distribution of Movies in terms of Two Rating Systems in Different Platforms

The dotted line marks the average rating number of IMDb and Rotten Tomatoes respectively.

ggplot(df, aes(x=IMDb)) + geom_histogram(binwidth=.5, fill = "gold2")+
  geom_vline(aes(xintercept=mean(IMDb, na.rm=T)),  
             color="orange2", linetype="dashed", size=0.5)+ 
  geom_text(aes(x = 63.97 , y = 0, label = "63.97"), color = "orange3", size = 3) +
  facet_wrap(~Platforms, nrow = 4)+
  labs(
    x="IMDb Rating",
    title ="Distribution of movies in terms of IMDb rating in different platforms") +
  scale_x_continuous (breaks =c(40, 60, 80,100))

ggplot(df, aes(x= `Rotten Tomatoes`)) + geom_histogram(binwidth=.5, fill = "tomato1")+
  geom_vline(aes(xintercept=mean(`Rotten Tomatoes`, na.rm=T)),  
             color="brown3", linetype="dashed", size=0.5) + facet_wrap(~Platforms, nrow = 4)+
   geom_text(aes(x = 64.76 , y = 0, label = "64.76"), color = "brown4", size = 3) +
  labs(
    x="Rotten Tomatoes Rating",
    title ="Distribution of movies in terms of Rotten Tomatoes rating in different platforms"
  )

Average IMDb Ratings vs Rotten Tomatoes Ratings by Year

The straight line in the graphs indicates the regression line.

plot(df_avgIMDb_byYear, aes(x= Year, y = mean_IMDb), main="Average IMDb Ratings by Year", pch=19) + abline(lm(mean_IMDb~Year, df_avgIMDb_byYear), col="gold3") +
   scale_x_continuous("Year") 
## NULL
plot(df_RT_byYear, aes(x= Year, y = mean_RT), main="Average Rotten Tomatoes Ratings by Year", 
     pch=19) + abline(lm(mean_RT~Year, df_RT_byYear), col="tomato1")+
   scale_x_continuous("Year") 
## NULL