library(tidyverse)
library(ggridges)
library(ggplot2)
library(dplyr) # easier data wrangling
library(viridis) # colour blind friendly palette, works in B&W also
library(lubridate) # for easy date manipulation
library(ggExtra) # because remembering ggplot theme options is beyond me
library(tidyr)
For Tidy Tuesday Week 11 I am going to break down the Bechdel Test movie data and aggregate it into a table heat map visualization. If you are wondering, the Bechdel Test measures whether a fictional work features at least two women who talk to each other about something other than a man.
raw_bechdel <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-03-09/raw_bechdel.csv')
movies <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-03-09/movies.csv')
The genre information is locked in a column where each genre is separated by a comma. I’ll create a new table with counts of each genre each year then aggregate.
cleaned <- movies %>%
inner_join(raw_bechdel[,3:5], by='imdb_id') %>%
mutate(domgross_2013 = as.double(domgross_2013)) %>%
mutate(intgross_2013 = as.double(intgross_2013)) %>%
mutate(totalNet_2013 = domgross_2013 + intgross_2013 - budget_2013) %>%
mutate(costNetRatio_2013 = totalNet_2013/budget_2013) %>%
mutate(runtime = as.integer(gsub(" min", "",runtime)))
# break down and reconstitute genre column
lst <- strsplit(as.character(cleaned$genre),", ")
lvl <- unique(unlist(lst))
res <- data.frame(rating = cleaned$rating,
rated = cleaned$rated,
year = cleaned$year,
imdb_id=cleaned$imdb_id,
do.call(rbind,lapply(lst, function(x) table(factor(x, levels=lvl)))),
stringsAsFactors=FALSE)
genreCounts <- res %>%
gather(genreDecp,count,"Biography":"Documentary") %>%
group_by(year,genreDecp,rating) %>%
summarise(total=sum(count))
With everything aggregated, I filter the data to the most relevant and create the visualization.
p <- genreCounts %>%
filter(rating!=0
& (genreDecp=='Drama'
| genreDecp=='Action'
| genreDecp=='Comedy'
| genreDecp=='Romance'
| genreDecp=='Thriller')) %>%
ggplot(.,aes(rating,year,fill=total))+
geom_tile(color= "white",size=0.1) +
scale_fill_viridis(name="Number of Movies",option ="C")
p <-p + facet_grid(~genreDecp)
p <-p + scale_y_continuous(breaks = unique(genreCounts$year))
#p <-p + scale_x_continuous(breaks =c(1,10,20,31))
p <-p + theme_minimal(base_size = 8)
p <-p + labs(title= paste("Bechdel Test Movie Analysis 1970-2013"),
subtitle = "Total number of Movies by Year, Genre, and Test Rating",
caption = "Tidy Tuesday Week 11 2021\n SeanPJ.Com",
x="Bechdel Rating",
y="")
p <-p + theme(legend.position = "bottom")+
theme(plot.title=element_text(size = 14))+
theme(axis.text.y=element_text(size=6)) +
theme(strip.background = element_rect(colour="white"))+
theme(plot.title=element_text(hjust=0))+
theme(axis.ticks=element_blank())+
theme(axis.text=element_text(size=7))+
theme(legend.title=element_text(size=8))+
theme(legend.text=element_text(size=6))+
removeGrid()
p