R Markdown

This is an R Markdown document on Movies.

library('tidyverse')
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.6     v purrr   0.3.4
## v tibble  3.1.8     v dplyr   1.0.7
## v tidyr   1.2.1     v stringr 1.4.1
## v readr   2.1.3     v forcats 0.5.2
## Warning: package 'ggplot2' was built under R version 4.1.3
## Warning: package 'tidyr' was built under R version 4.1.3
## Warning: package 'readr' was built under R version 4.1.3
## Warning: package 'stringr' was built under R version 4.1.3
## Warning: package 'forcats' was built under R version 4.1.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
data(movies, package="ggplot2movies")
movies <- movies %>% unite("type", Action:Short, remove=FALSE)
with(movies, table(type))
## type
## 0_0_0_0_0_0_0 0_0_0_0_0_0_1 0_0_0_0_0_1_0 0_0_0_0_0_1_1 0_0_0_0_1_0_0 
##         12786          2724           537            43          2384 
## 0_0_0_0_1_0_1 0_0_0_0_1_1_0 0_0_0_0_1_1_1 0_0_0_1_0_0_0 0_0_0_1_0_0_1 
##           795             2             2         14235           858 
## 0_0_0_1_0_1_0 0_0_0_1_0_1_1 0_0_0_1_1_0_0 0_0_0_1_1_0_1 0_0_0_1_1_1_0 
##          1717            29            98            11             3 
## 0_0_0_1_1_1_1 0_0_1_0_0_0_0 0_0_1_0_0_0_1 0_0_1_0_0_1_0 0_0_1_0_0_1_1 
##             1          8237          1548          1373            60 
## 0_0_1_0_1_0_0 0_0_1_0_1_0_1 0_0_1_0_1_1_0 0_0_1_1_0_0_0 0_0_1_1_0_0_1 
##            86            21             1          2164           106 
## 0_0_1_1_0_1_0 0_0_1_1_0_1_1 0_0_1_1_1_0_0 0_0_1_1_1_1_0 0_1_0_0_0_0_0 
##           649            15             8             1           308 
## 0_1_0_0_0_0_1 0_1_0_0_0_1_0 0_1_0_0_0_1_1 0_1_0_0_1_0_0 0_1_0_0_1_0_1 
##           943             9             5             8            20 
## 0_1_0_1_0_0_0 0_1_0_1_0_0_1 0_1_0_1_0_1_0 0_1_0_1_1_0_0 0_1_0_1_1_0_1 
##            35            47             3             1             1 
## 0_1_1_0_0_0_0 0_1_1_0_0_0_1 0_1_1_0_0_1_0 0_1_1_0_0_1_1 0_1_1_0_1_0_0 
##           113          2058             7             7             2 
## 0_1_1_0_1_0_1 0_1_1_1_0_0_0 0_1_1_1_0_0_1 0_1_1_1_0_1_0 0_1_1_1_1_0_1 
##             9            17             8             3             2 
## 1_0_0_0_0_0_0 1_0_0_0_0_0_1 1_0_0_0_0_1_0 1_0_0_0_0_1_1 1_0_0_0_1_0_0 
##          2040            63            73             3             9 
## 1_0_0_0_1_0_1 1_0_0_1_0_0_0 1_0_0_1_0_0_1 1_0_0_1_0_1_0 1_0_0_1_0_1_1 
##             5          1527            15           115             2 
## 1_0_0_1_1_0_0 1_0_1_0_0_0_0 1_0_1_0_0_0_1 1_0_1_0_0_1_0 1_0_1_0_0_1_1 
##             1           533            39            56             1 
## 1_0_1_0_1_0_0 1_0_1_1_0_0_0 1_0_1_1_0_0_1 1_0_1_1_0_1_0 1_1_0_0_0_0_0 
##             1            99             1            21            35 
## 1_1_0_0_0_0_1 1_1_0_0_0_1_0 1_1_0_1_0_0_0 1_1_0_1_0_0_1 1_1_0_1_0_1_0 
##             8             3             8             3             2 
## 1_1_1_0_0_0_0 1_1_1_0_0_0_1 1_1_1_0_0_1_0 1_1_1_1_0_0_0 
##            14             5             1             5
moviesT <- movies %>% group_by(type) %>% mutate(m=n()) %>% filter(m>1000)
with(moviesT, table(type))
## type
## 0_0_0_0_0_0_0 0_0_0_0_0_0_1 0_0_0_0_1_0_0 0_0_0_1_0_0_0 0_0_0_1_0_1_0 
##         12786          2724          2384         14235          1717 
## 0_0_1_0_0_0_0 0_0_1_0_0_0_1 0_0_1_0_0_1_0 0_0_1_1_0_0_0 0_1_1_0_0_0_1 
##          8237          1548          1373          2164          2058 
## 1_0_0_0_0_0_0 1_0_0_1_0_0_0 
##          2040          1527
moviesT <- moviesT %>% mutate(Type=case_when(
  type=="0_0_0_0_0_0_0" ~ "none",
  type=="0_0_0_0_0_0_1" ~ "Short",
  type=="0_0_0_0_1_0_0" ~ "Documentary",
  type=="0_0_0_1_0_0_0" ~ "Drama",
  type=="0_0_0_1_0_1_0" ~ "Drama_Romance",
  type=="0_0_1_0_0_0_0" ~ "Comedy",
  type=="0_0_1_0_0_0_1" ~ "Comedy_Short",
  type=="0_0_1_0_0_1_0" ~ "Romcom",
  type=="0_0_1_1_0_0_0" ~ "Comedy_Drama",
  type=="0_1_1_0_0_0_1" ~ "Animation_Comedy_Short",
  type=="1_0_0_0_0_0_0" ~ "Action",
  type=="1_0_0_1_0_0_0" ~ "Action_Drama"
))
with(moviesT, table(Type))
## Type
##                 Action           Action_Drama Animation_Comedy_Short 
##                   2040                   1527                   2058 
##                 Comedy           Comedy_Drama           Comedy_Short 
##                   8237                   2164                   1548 
##            Documentary                  Drama          Drama_Romance 
##                   2384                  14235                   1717 
##                   none                 Romcom                  Short 
##                  12786                   1373                   2724
library(ggthemes)
ggplot(moviesT,aes(votes, rating)) + geom_point(size=1, aes(colour=Type)) + geom_hline(yintercept=9) + xlab("no of votes") + facet_wrap(vars(Type)) + scale_colour_tableau(palette="Tableau 20") + theme(legend.position="none")

moviesTx <- moviesT %>% group_by(Type) %>% mutate(maxV=max(votes)) %>% filter(maxV > 50000)
ggplot(moviesTx,aes(votes, rating)) + geom_point(size=1, aes(colour=Type)) + geom_hline(yintercept=9) + xlab("no of votes") + facet_wrap(vars(Type)) + scale_colour_colorblind() + theme(legend.position="none")