OBJECTIVE

Exploring how to use TidyVerse packages to analyze the Bechdel test dataset (simple measure of female representation) from FiveThirtyEight.

CLEAN

bechdel_clean <- bechdel_df %>%
  select(year, title, binary, budget_2013, domgross_2013) %>%
  mutate(
    pass = ifelse(binary == "PASS", "Pass", "Fail"),
    roi = (domgross_2013 - budget_2013) / budget_2013
  )

head(bechdel_clean)
## # A tibble: 6 × 7
##    year title            binary budget_2013 domgross_2013 pass     roi
##   <int> <chr>            <chr>        <int>         <dbl> <chr>  <dbl>
## 1  2013 21 & Over        FAIL      13000000      25682380 Fail   0.976
## 2  2012 Dredd 3D         PASS      45658735      13611086 Pass  -0.702
## 3  2013 12 Years a Slave FAIL      20000000      53107035 Fail   1.66 
## 4  2013 2 Guns           FAIL      61000000      75612460 Fail   0.240
## 5  2013 42               FAIL      40000000      95020213 Fail   1.38 
## 6  2013 47 Ronin         FAIL     225000000      38362475 Fail  -0.830

ANALYZE

yearly_summary <- bechdel_clean %>%
  group_by(year) %>%
  summarize(
    total_movies = n(),
    pass_rate = mean(pass == "Pass"),
    avg_roi = mean(roi, na.rm = TRUE)
  )

ggplot(yearly_summary, aes(x = year, y = pass_rate)) +
  geom_line(color = "blue", linewidth = 1) +
  geom_smooth(method = "loess", se = TRUE, color = "red") +
  labs(
    title = "Movies Passing the Bechdel Test (1970-2013)",
    subtitle = "Percentage of movies that pass the Bechdel test has increased over time",
    x = "Year",
    y = "Pass Rate",
    caption = "Data source: FiveThirtyEight"
  ) +
  theme_minimal() +
  scale_y_continuous(labels = scales::percent)

COMPARE

bechdel_budget <- bechdel_df %>%
  filter(!is.na(budget_2013)) %>%
  mutate(pass = ifelse(binary == "PASS", "Pass", "Fail")) %>%
  group_by(pass) %>%
  summarize(
    avg_budget = mean(budget_2013, na.rm = TRUE),
    median_budget = median(budget_2013, na.rm = TRUE),
    count = n()
  )

print(bechdel_budget)
## # A tibble: 2 × 4
##   pass  avg_budget median_budget count
##   <chr>      <dbl>         <int> <int>
## 1 Fail   62911555.      44016858   991
## 2 Pass   46274167.      31459218   803

ROI

ggplot(yearly_summary, aes(x = year)) +
  geom_line(aes(y = pass_rate, color = "Pass Rate"), linewidth = 1) +
  geom_line(aes(y = avg_roi, color = "Average ROI"), linewidth = 1) +
  scale_color_manual(values = c("Pass Rate" = "blue", "Average ROI" = "red")) +
  labs(
    title = "Bechdel Test Pass Rate and ROI Over Time",
    x = "Year",
    y = "Rate",
    color = "Metric"
  ) +
  theme_minimal() +
  scale_y_continuous(labels = scales::percent)

METHODS

FINDINGS