This is a report done on biopics. A biopic is a film that dramatizes
a look at someones life, usually someone famous. This study looks deeper
into which people of color have had more biopics than others, and also
what gender has had more. The first graphic looks at people of color
versus white people. The second graphic looks at what gender these
people are. It is concluded that most biopics are done by people of
white skin color and typically males.
library(knitr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggthemes)
library(fivethirtyeight)
## Some larger datasets need to be installed separately, like senators and
## house_district_forecast. To install these, we recommend you install the
## fivethirtyeightdata package by running:
## install.packages('fivethirtyeightdata', repos =
## 'https://fivethirtyeightdata.github.io/drat/', type = 'source')
data(biopics)
glimpse(biopics)
## Rows: 761
## Columns: 14
## $ title <chr> "10 Rillington Place", "12 Years a Slave", "127 Hou…
## $ site <chr> "tt0066730", "tt2024544", "tt1542344", "tt2833074",…
## $ country <chr> "UK", "US/UK", "US/UK", "Canada", "US", "US", "UK",…
## $ year_release <int> 1971, 2013, 2010, 2014, 1998, 2008, 2002, 2013, 199…
## $ box_office <dbl> NA, 5.67e+07, 1.83e+07, NA, 5.37e+05, 8.12e+07, 1.1…
## $ director <chr> "Richard Fleischer", "Steve McQueen", "Danny Boyle"…
## $ number_of_subjects <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 3, 3, 3, 1, 1, …
## $ subject <chr> "John Christie", "Solomon Northup", "Aron Ralston",…
## $ type_of_subject <chr> "Criminal", "Other", "Athlete", "Other", "Other", "…
## $ race_known <chr> "Unknown", "Known", "Unknown", "Known", "Unknown", …
## $ subject_race <chr> NA, "African American", NA, "White", NA, "Asian Ame…
## $ person_of_color <lgl> FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, TRUE…
## $ subject_sex <chr> "Male", "Male", "Male", "Male", "Male", "Male", "Ma…
## $ lead_actor_actress <chr> "Richard Attenborough", "Chiwetel Ejiofor", "James …
This is checking to see if skin color is unknown, or if known,
determine if white or nonwhite.
biopics <- biopics %>%
mutate(race_plotting = ifelse(race_known == "Unknown", "Unknown", ifelse(subject_race == "White", "White subjects", "Nonwhite subjects")))
race_plot <- biopics %>%
group_by(year_release, race_plotting) %>%
summarize(total = n())
## `summarise()` has grouped output by 'year_release'. You can override using the
## `.groups` argument.
race_plot$race_plotting <- factor(race_plot$race_plotting, levels = c("White subjects", "Nonwhite subjects", "Unknown"))
Creating a variable.
ggplot(data = race_plot, aes(x= year_release))

Adding a geom layer to make bars.
ggplot(data = race_plot, aes(x = year_release, y = total)) +
geom_bar(stat = "identity")

Adding colors.
ggplot(data = race_plot, aes(x= year_release, y = total)) +
geom_bar(aes(fill = race_plotting), stat = "identity")

Adding labels.

Getting the correct colors.

Cleaning up the graphic and adding a legend.
ggplot(data = race_plot, aes(x= year_release, y = total)) +
geom_bar(aes(fill = race_plotting), stat = "identity", width = 0.6) +
labs(title = "Biopic Subjects Are Mostly White",
subtitle = "Number of white and nonwhite subjects in 761 biopics since 1915",
x = NULL,
y = NULL) +
scale_fill_manual(values = c("#d847be", "#909392", "#36bc6c")) +
theme(legend.position = c(.2,.7),
legend.background = element_rect(fill="lightgray",
size=0.5,
linetype="solid",
colour ="darkgray"),
legend.title = element_blank())

Using a preloaded theme that fivethirtyeight has available.

Getting our data by calculating the number of biopics for men and
women.
gender_plotting <- biopics %>%
group_by(subject_sex,year_release) %>%
summarize(total = n())
## `summarise()` has grouped output by 'subject_sex'. You can override using the
## `.groups` argument.
Creating labels.
ggplot(data = gender_plotting, aes(x = year_release, y = total))

Adding a geom layer.
ggplot(data = gender_plotting, aes(x = year_release, y = total)) +
geom_bar(stat = "identity")

Adding colors and a legend.
gender_plotting$subject_sex <- factor(gender_plotting$subject_sex, levels = c("Male", "Female"))
ggplot(data = gender_plotting, aes(x = year_release, y = total, fill = subject_sex)) +
geom_bar(stat = "identity")

Changing width of bars.
g <- ggplot(data = gender_plotting, aes(x = year_release, y = total, fill = subject_sex)) +
geom_bar(stat = "identity", width = 0.6) +
labs(title = "Biopic Subjects Are Mostly Male",
subtitle = "Number of male and female subjects in 761 biopics since 1915",
x = NULL,
y = NULL)
g

Getting the correct colors.
g <- g +
scale_fill_manual(values = c("#f9cd52", "#4304a0"))
g

Adding fivethirtyeight theme.
