Exploring how to use TidyVerse packages to analyze the Bechdel test dataset (simple measure of female representation) from FiveThirtyEight.
bechdel_clean <- bechdel_df %>%
select(year, title, binary, budget_2013, domgross_2013) %>%
mutate(
pass = ifelse(binary == "PASS", "Pass", "Fail"),
roi = (domgross_2013 - budget_2013) / budget_2013
)
head(bechdel_clean)
## # A tibble: 6 × 7
## year title binary budget_2013 domgross_2013 pass roi
## <int> <chr> <chr> <int> <dbl> <chr> <dbl>
## 1 2013 21 & Over FAIL 13000000 25682380 Fail 0.976
## 2 2012 Dredd 3D PASS 45658735 13611086 Pass -0.702
## 3 2013 12 Years a Slave FAIL 20000000 53107035 Fail 1.66
## 4 2013 2 Guns FAIL 61000000 75612460 Fail 0.240
## 5 2013 42 FAIL 40000000 95020213 Fail 1.38
## 6 2013 47 Ronin FAIL 225000000 38362475 Fail -0.830
yearly_summary <- bechdel_clean %>%
group_by(year) %>%
summarize(
total_movies = n(),
pass_rate = mean(pass == "Pass"),
avg_roi = mean(roi, na.rm = TRUE)
)
ggplot(yearly_summary, aes(x = year, y = pass_rate)) +
geom_line(color = "blue", linewidth = 1) +
geom_smooth(method = "loess", se = TRUE, color = "red") +
labs(
title = "Movies Passing the Bechdel Test (1970-2013)",
subtitle = "Percentage of movies that pass the Bechdel test has increased over time",
x = "Year",
y = "Pass Rate",
caption = "Data source: FiveThirtyEight"
) +
theme_minimal() +
scale_y_continuous(labels = scales::percent)
bechdel_budget <- bechdel_df %>%
filter(!is.na(budget_2013)) %>%
mutate(pass = ifelse(binary == "PASS", "Pass", "Fail")) %>%
group_by(pass) %>%
summarize(
avg_budget = mean(budget_2013, na.rm = TRUE),
median_budget = median(budget_2013, na.rm = TRUE),
count = n()
)
print(bechdel_budget)
## # A tibble: 2 × 4
## pass avg_budget median_budget count
## <chr> <dbl> <int> <int>
## 1 Fail 62911555. 44016858 991
## 2 Pass 46274167. 31459218 803
ggplot(yearly_summary, aes(x = year)) +
geom_line(aes(y = pass_rate, color = "Pass Rate"), linewidth = 1) +
geom_line(aes(y = avg_roi, color = "Average ROI"), linewidth = 1) +
scale_color_manual(values = c("Pass Rate" = "blue", "Average ROI" = "red")) +
labs(
title = "Bechdel Test Pass Rate and ROI Over Time",
x = "Year",
y = "Rate",
color = "Metric"
) +
theme_minimal() +
scale_y_continuous(labels = scales::percent)