This is a report done on biopics. A biopic is a film that dramatizes a look at someones life, usually someone famous. This study looks deeper into which people of color have had more biopics than others, and also what gender has had more. The first graphic looks at people of color versus white people. The second graphic looks at what gender these people are. It is concluded that most biopics are done by people of white skin color and typically males.

Recreated “‘Straight Outta Compton’ Is The Rare Biopic Not About White Dudes” from https://fivethirtyeight.com/features/straight-outta-compton-is-the-rare-biopic-not-about-white-dudes/

library(knitr)

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(ggthemes)
library(fivethirtyeight)
## Some larger datasets need to be installed separately, like senators and
## house_district_forecast. To install these, we recommend you install the
## fivethirtyeightdata package by running:
## install.packages('fivethirtyeightdata', repos =
## 'https://fivethirtyeightdata.github.io/drat/', type = 'source')
data(biopics)
glimpse(biopics)
## Rows: 761
## Columns: 14
## $ title              <chr> "10 Rillington Place", "12 Years a Slave", "127 Hou…
## $ site               <chr> "tt0066730", "tt2024544", "tt1542344", "tt2833074",…
## $ country            <chr> "UK", "US/UK", "US/UK", "Canada", "US", "US", "UK",…
## $ year_release       <int> 1971, 2013, 2010, 2014, 1998, 2008, 2002, 2013, 199…
## $ box_office         <dbl> NA, 5.67e+07, 1.83e+07, NA, 5.37e+05, 8.12e+07, 1.1…
## $ director           <chr> "Richard Fleischer", "Steve McQueen", "Danny Boyle"…
## $ number_of_subjects <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 3, 3, 3, 1, 1, …
## $ subject            <chr> "John Christie", "Solomon Northup", "Aron Ralston",…
## $ type_of_subject    <chr> "Criminal", "Other", "Athlete", "Other", "Other", "…
## $ race_known         <chr> "Unknown", "Known", "Unknown", "Known", "Unknown", …
## $ subject_race       <chr> NA, "African American", NA, "White", NA, "Asian Ame…
## $ person_of_color    <lgl> FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE…
## $ subject_sex        <chr> "Male", "Male", "Male", "Male", "Male", "Male", "Ma…
## $ lead_actor_actress <chr> "Richard Attenborough", "Chiwetel Ejiofor", "James …

This is checking to see if skin color is unknown, or if known, determine if white or nonwhite.

biopics <- biopics %>% 
  mutate(race_plotting = ifelse(race_known == "Unknown", "Unknown", ifelse(subject_race == "White", "White subjects", "Nonwhite subjects")))
  
race_plot <- biopics %>% 
  group_by(year_release, race_plotting) %>%
  summarize(total = n())
## `summarise()` has grouped output by 'year_release'. You can override using the
## `.groups` argument.
race_plot$race_plotting <- factor(race_plot$race_plotting, levels = c("White subjects", "Nonwhite subjects", "Unknown"))

Creating a variable.

ggplot(data = race_plot, aes(x= year_release))

Adding a geom layer to make bars.

ggplot(data = race_plot, aes(x = year_release, y = total)) +
  geom_bar(stat = "identity")

Adding colors.

ggplot(data = race_plot, aes(x= year_release, y = total)) +
  geom_bar(aes(fill = race_plotting), stat = "identity") 

Adding labels.

Getting the correct colors.

Cleaning up the graphic and adding a legend.

ggplot(data = race_plot, aes(x= year_release, y = total)) +
  geom_bar(aes(fill = race_plotting), stat = "identity", width = 0.6) + 
  labs(title = "Biopic Subjects Are Mostly White", 
       subtitle = "Number of white and nonwhite subjects in 761 biopics since 1915", 
       x = NULL, 
       y = NULL) +
  scale_fill_manual(values = c("#d847be", "#909392", "#36bc6c")) +
  theme(legend.position = c(.2,.7), 
        legend.background = element_rect(fill="lightgray",
                                         size=0.5, 
                                         linetype="solid", 
                                         colour ="darkgray"),
        legend.title = element_blank())

Using a preloaded theme that fivethirtyeight has available.

Getting our data by calculating the number of biopics for men and women.

gender_plotting <- biopics %>%
  group_by(subject_sex,year_release) %>%
  summarize(total = n())
## `summarise()` has grouped output by 'subject_sex'. You can override using the
## `.groups` argument.

Creating labels.

ggplot(data = gender_plotting, aes(x = year_release, y = total))

Adding a geom layer.

ggplot(data = gender_plotting, aes(x = year_release, y = total)) +
  geom_bar(stat = "identity")

Adding colors and a legend.

gender_plotting$subject_sex <- factor(gender_plotting$subject_sex, levels = c("Male", "Female"))

ggplot(data = gender_plotting, aes(x = year_release, y = total, fill = subject_sex)) +
  geom_bar(stat = "identity") 

Changing width of bars.

g <- ggplot(data = gender_plotting, aes(x = year_release, y = total, fill = subject_sex)) +
  geom_bar(stat = "identity", width = 0.6) +
  labs(title = "Biopic Subjects Are Mostly Male", 
       subtitle = "Number of male and female subjects in 761 biopics since 1915", 
       x = NULL, 
       y = NULL)

g

Getting the correct colors.

g <- g +
  scale_fill_manual(values = c("#f9cd52", "#4304a0"))

g

Adding fivethirtyeight theme.