Tidy Tuesday Week 11 2021

library(tidyverse)
library(ggridges)
library(ggplot2)
library(dplyr) # easier data wrangling 
library(viridis) # colour blind friendly palette, works in B&W also
library(lubridate) # for easy date manipulation
library(ggExtra) # because remembering ggplot theme options is beyond me
library(tidyr)

For Tidy Tuesday Week 11 I am going to break down the Bechdel Test movie data and aggregate it into a table heat map visualization. If you are wondering, the Bechdel Test measures whether a fictional work features at least two women who talk to each other about something other than a man.

raw_bechdel <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-03-09/raw_bechdel.csv')
movies <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-03-09/movies.csv')

The genre information is locked in a column where each genre is separated by a comma. I’ll create a new table with counts of each genre each year then aggregate.

cleaned <- movies %>%
  inner_join(raw_bechdel[,3:5], by='imdb_id') %>%
  mutate(domgross_2013 = as.double(domgross_2013)) %>%
  mutate(intgross_2013 = as.double(intgross_2013)) %>%
  mutate(totalNet_2013 = domgross_2013 + intgross_2013 - budget_2013) %>%
  mutate(costNetRatio_2013 = totalNet_2013/budget_2013) %>%
  mutate(runtime = as.integer(gsub(" min", "",runtime))) 

# break down and reconstitute genre column
lst <- strsplit(as.character(cleaned$genre),", ")
lvl <- unique(unlist(lst))      
res <- data.frame(rating = cleaned$rating,
                  rated = cleaned$rated,
                  year = cleaned$year,
                  imdb_id=cleaned$imdb_id,
                  do.call(rbind,lapply(lst, function(x) table(factor(x, levels=lvl)))), 
                  stringsAsFactors=FALSE)

genreCounts <- res %>%
  gather(genreDecp,count,"Biography":"Documentary") %>%
  group_by(year,genreDecp,rating) %>%
  summarise(total=sum(count))

With everything aggregated, I filter the data to the most relevant and create the visualization.

p <- genreCounts %>%
  filter(rating!=0 
         & (genreDecp=='Drama' 
         | genreDecp=='Action' 
         | genreDecp=='Comedy' 
         | genreDecp=='Romance' 
         | genreDecp=='Thriller')) %>%
  ggplot(.,aes(rating,year,fill=total))+
  geom_tile(color= "white",size=0.1) + 
  scale_fill_viridis(name="Number of Movies",option ="C")
p <-p + facet_grid(~genreDecp)
p <-p + scale_y_continuous(breaks = unique(genreCounts$year))
#p <-p + scale_x_continuous(breaks =c(1,10,20,31))
p <-p + theme_minimal(base_size = 8)
p <-p + labs(title= paste("Bechdel Test Movie Analysis 1970-2013"), 
             subtitle = "Total number of Movies by Year, Genre, and Test Rating",
             caption = "Tidy Tuesday Week 11 2021\n SeanPJ.Com",
             x="Bechdel Rating", 
             y="")
p <-p + theme(legend.position = "bottom")+
  theme(plot.title=element_text(size = 14))+
  theme(axis.text.y=element_text(size=6)) +
  theme(strip.background = element_rect(colour="white"))+
  theme(plot.title=element_text(hjust=0))+
  theme(axis.ticks=element_blank())+
  theme(axis.text=element_text(size=7))+
  theme(legend.title=element_text(size=8))+
  theme(legend.text=element_text(size=6))+
  removeGrid()

p

Tidy Tuesday Week 11 2021

Sean Jackson

3/11/2021