# Load packages
library(bayesrules)
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Importar datos 
data(bechdel, package = "bayesrules")

#resumen por año de las películas
bechdel %>% 
  tabyl(year) %>% 
  adorn_totals("row")
##   year    n      percent
##   1970    1 0.0005574136
##   1971    5 0.0027870680
##   1972    3 0.0016722408
##   1973    5 0.0027870680
##   1974    7 0.0039018952
##   1975    5 0.0027870680
##   1976    8 0.0044593088
##   1977    7 0.0039018952
##   1978    8 0.0044593088
##   1979    5 0.0027870680
##   1980   14 0.0078037904
##   1981    9 0.0050167224
##   1982   14 0.0078037904
##   1983    5 0.0027870680
##   1984   16 0.0089186176
##   1985   10 0.0055741360
##   1986   10 0.0055741360
##   1987   14 0.0078037904
##   1988   19 0.0105908584
##   1989   14 0.0078037904
##   1990   15 0.0083612040
##   1991   13 0.0072463768
##   1992   20 0.0111482720
##   1993   16 0.0089186176
##   1994   26 0.0144927536
##   1995   36 0.0200668896
##   1996   42 0.0234113712
##   1997   51 0.0284280936
##   1998   62 0.0345596433
##   1999   56 0.0312151616
##   2000   63 0.0351170569
##   2001   64 0.0356744705
##   2002   80 0.0445930881
##   2003   64 0.0356744705
##   2004   81 0.0451505017
##   2005  100 0.0557413601
##   2006   90 0.0501672241
##   2007   73 0.0406911929
##   2008  101 0.0562987737
##   2009  124 0.0691192865
##   2010  129 0.0719063545
##   2011  124 0.0691192865
##   2012   86 0.0479375697
##   2013   99 0.0551839465
##  Total 1794 1.0000000000
  1. John has a flat Beta(1, 1) prior and analyzes movies from the year 1980.
#a)John día 1 (alpha = 1, beta = 1). Películas de 1980
bechdel_1980 <- bechdel %>% 
  filter(year == 1980) 
head(bechdel_1980)
## # A tibble: 6 × 3
##    year title                    binary
##   <dbl> <chr>                    <chr> 
## 1  1980 Airplane!                FAIL  
## 2  1980 Can&#39;t Stop the Music PASS  
## 3  1980 Fame                     PASS  
## 4  1980 Flash Gordon             FAIL  
## 5  1980 Friday the 13th          FAIL  
## 6  1980 Heaven&#39;s Gate        FAIL
#porcentajes fail - pass
bechdel_1980 %>% 
 tabyl(binary) %>% 
  adorn_totals("row")
##  binary  n   percent
##    FAIL 10 0.7142857
##    PASS  4 0.2857143
##   Total 14 1.0000000
# Plot the Beta-Binomial model
plot_beta_binomial(alpha = 1, beta = 1, y = 4, n = 14)

# Summaries of the Beta-Binomial model
summarize_beta_binomial(alpha = 1, beta = 1, y = 4, n = 14)
##       model alpha beta   mean      mode        var        sd
## 1     prior     1    1 0.5000       NaN 0.08333333 0.2886751
## 2 posterior     5   11 0.3125 0.2857143 0.01263787 0.1124183
  1. The next day, John analyzes movies from the year 1990, while building off their analysis from the previous day.
#b)John día 2 (alpha = 5, beta = 11. Películas de 1990
bechdel_1990 <- bechdel %>% 
  filter(year == 1990) 
head(bechdel_1990)
## # A tibble: 6 × 3
##    year title                        binary
##   <dbl> <chr>                        <chr> 
## 1  1990 Back to the Future Part III  FAIL  
## 2  1990 Child&#39;s Play 2           PASS  
## 3  1990 Dark Angel (I Come in Peace) FAIL  
## 4  1990 Die Hard 2                   FAIL  
## 5  1990 Edward Scissorhands          PASS  
## 6  1990 Flatliners                   PASS
#porcentajes fail - pass
bechdel_1990 %>% 
  tabyl(binary) %>% 
  adorn_totals("row")
##  binary  n percent
##    FAIL  9     0.6
##    PASS  6     0.4
##   Total 15     1.0
# Plot the Beta-Binomial model
plot_beta_binomial(alpha = 5, beta = 11, y = 6, n = 15)

# Summaries of the Beta-Binomial model
summarize_beta_binomial(alpha = 5, beta = 11, y = 6, n = 15)
##       model alpha beta      mean      mode         var         sd
## 1     prior     5   11 0.3125000 0.2857143 0.012637868 0.11241827
## 2 posterior    11   20 0.3548387 0.3448276 0.007154006 0.08458136
  1. The third day, John analyzes movies from the year 2000, while again building off of their analyses from the previous two days.
#c)John día 3 (alpha = 11, beta = 20). Películas de 2000
bechdel_2000 <- bechdel %>% 
  filter(year == 2000) 
head(bechdel_2000)
## # A tibble: 6 × 3
##    year title               binary
##   <dbl> <chr>               <chr> 
## 1  2000 28 Days             PASS  
## 2  2000 Almost Famous       PASS  
## 3  2000 American Psycho     FAIL  
## 4  2000 Beautiful Creatures PASS  
## 5  2000 Best in Show        PASS  
## 6  2000 Billy Elliot        PASS
#porcentajes fail - pass
bechdel_2000 %>% 
  tabyl(binary) %>% 
  adorn_totals("row")
##  binary  n   percent
##    FAIL 34 0.5396825
##    PASS 29 0.4603175
##   Total 63 1.0000000
# Plot the Beta-Binomial model
plot_beta_binomial(alpha = 11, beta = 20, y = 29, n = 63)

# Summaries of the Beta-Binomial model
summarize_beta_binomial(alpha = 11, beta = 20, y = 29, n = 63)
##       model alpha beta      mean      mode         var         sd
## 1     prior    11   20 0.3548387 0.3448276 0.007154006 0.08458136
## 2 posterior    40   54 0.4255319 0.4239130 0.002573205 0.05072677
  1. Jenna also starts her analysis with a \(Beta(1, 1)\) prior, but analyzes movies from 1980, 1990, 2000 all on day one.
#d)Jenna día 1 (alpha = 1, beta = 1). Películas de 1980, 1990, 2000
bechdel_80_90_20 <- bechdel %>% 
  filter(year %in% c(1980 , 1990 , 2000)) 
bechdel_80_90_20
## # A tibble: 92 × 3
##     year title               binary
##    <dbl> <chr>               <chr> 
##  1  2000 28 Days             PASS  
##  2  2000 Almost Famous       PASS  
##  3  2000 American Psycho     FAIL  
##  4  2000 Beautiful Creatures PASS  
##  5  2000 Best in Show        PASS  
##  6  2000 Billy Elliot        PASS  
##  7  2000 Boiler Room         FAIL  
##  8  2000 Bring It On         PASS  
##  9  2000 Cast Away           FAIL  
## 10  2000 Cecil B. Demented   PASS  
## # ℹ 82 more rows
#resumen por año de la película
bechdel_80_90_20 %>% 
  tabyl(year) %>% 
  adorn_totals("row")
##   year  n   percent
##   1980 14 0.1521739
##   1990 15 0.1630435
##   2000 63 0.6847826
##  Total 92 1.0000000
#porcentajes fail - pass
bechdel_80_90_20 %>% 
  tabyl(binary) %>% 
  adorn_totals("row")
##  binary  n  percent
##    FAIL 53 0.576087
##    PASS 39 0.423913
##   Total 92 1.000000
# Plot the Beta-Binomial model
plot_beta_binomial(alpha = 1, beta = 1, y = 39, n = 92)

# Summaries of the Beta-Binomial model
summarize_beta_binomial(alpha = 1, beta = 1, y = 39, n = 92)
##       model alpha beta      mean     mode         var         sd
## 1     prior     1    1 0.5000000      NaN 0.083333333 0.28867513
## 2 posterior    40   54 0.4255319 0.423913 0.002573205 0.05072677