Init

#global options
options(
  digits = 2,
  contrasts = c("contr.treatment", "contr.treatment")
)

#packages
library(kirkegaard)

## Loading required package: tidyverse

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
## 
## 
## Attaching package: 'magrittr'
## 
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
## 
## 
## Loading required package: weights
## 
## Loading required package: Hmisc
## 
## 
## Attaching package: 'Hmisc'
## 
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## 
## 
## Loading required package: assertthat
## 
## 
## Attaching package: 'assertthat'
## 
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
## 
## 
## Loading required package: psych
## 
## 
## Attaching package: 'psych'
## 
## 
## The following object is masked from 'package:Hmisc':
## 
##     describe
## 
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## 
## 
## Attaching package: 'kirkegaard'
## 
## 
## The following object is masked from 'package:psych':
## 
##     rescale
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     are_equal
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_logical
## 
## 
## The following object is masked from 'package:base':
## 
##     +

load_packages(
  bayesplot
)

## This is bayesplot version 1.11.1
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
##    * Does _not_ affect other ggplot2 plots
##    * See ?bayesplot_theme_set for details on theme setting

#ggplot2
theme_set(theme_bw())

Functions

Example

# Set seed for reproducibility
set.seed(3) #I p-hacked this value to get a prior with mean of ~50%

# Parameters
n <- 1000  # Sample size for each study
p_true <- 0.5  # True proportion in the population

# Simulate 5 initial studies with results around 50%
study_results <- rbinom(5, size = n, prob = p_true) / n

# Calculate prior parameters (mean of the initial studies as the prior mean)
prior_alpha <- sum(study_results) * n + 1  # Prior alpha
prior_beta <- (5 * n - sum(study_results) * n) + 1  # Prior beta

# Simulate the 6th study with an outlier result
study6_result <- rbinom(1, size = n, prob = 0.57) / n

# Evidence distribution parameters based on the 6th study alone
evidence_alpha <- study6_result * n + 1
evidence_beta <- (n - study6_result * n) + 1

# Posterior parameters after updating with 6th study result
posterior_alpha <- prior_alpha + study6_result * n
posterior_beta <- prior_beta + n - study6_result * n
posterior_mean <- posterior_alpha / (posterior_alpha + posterior_beta)

# Plot prior and posterior distributions
x_vals <- seq(0, 1, length.out = 1000)
prior_dist <- dbeta(x_vals, prior_alpha, prior_beta)
posterior_dist <- dbeta(x_vals, posterior_alpha, posterior_beta)
evidence_dist <- dbeta(x_vals, evidence_alpha, evidence_beta)

#plot it using ggplot
bind_rows(
  data.frame(
    x = x_vals, 
    y = posterior_dist, 
    type = "posterior"
    ),
  data.frame(
  x = x_vals, 
  y = prior_dist, 
  type = "prior"
  ),
  data.frame(
    x = x_vals, 
    y = evidence_dist, 
    type = "evidence"
  )
  ) %>% 
    #remove near-zero values
    filter(y > 1e-3) %>%
  ggplot(aes(x, y, color = type)) +
  geom_line() +
  coord_cartesian(xlim = c(.4, .65)) +
  theme(legend.position = "none") +
  labs(title = "Prior and posterior distributions for hypothetical polls with outlier",
       x = "Proportion",
       y = "Density") +
  #add labels for the distributions
  #use the correct color values
  annotate("text", x = mean(study_results), y = 2, label = "Prior", color = "#619CFF") +
  annotate("text", x = posterior_mean, y = 4, label = "Posterior", color = "#00BA38") +
  annotate("text", x = study6_result, y = 3, label = "Evidence", color = "#F8766D")

#save
GG_save("figs/Bayes polls example.png")

Meta

write_sessioninfo()

## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Linux Mint 21.1
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_DK.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_DK.UTF-8        LC_COLLATE=en_DK.UTF-8    
##  [5] LC_MONETARY=en_DK.UTF-8    LC_MESSAGES=en_DK.UTF-8   
##  [7] LC_PAPER=en_DK.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_DK.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Copenhagen
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] bayesplot_1.11.1      kirkegaard_2024-09-27 psych_2.4.6.26       
##  [4] assertthat_0.2.1      weights_1.0.4         Hmisc_5.1-3          
##  [7] magrittr_2.0.3        lubridate_1.9.3       forcats_1.0.0        
## [10] stringr_1.5.1         dplyr_1.1.4           purrr_1.0.2          
## [13] readr_2.1.5           tidyr_1.3.1           tibble_3.2.1         
## [16] ggplot2_3.5.1         tidyverse_2.0.0      
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1  farver_2.1.2      fastmap_1.2.0     digest_0.6.37    
##  [5] rpart_4.1.23      timechange_0.3.0  lifecycle_1.0.4   cluster_2.1.6    
##  [9] survival_3.7-0    gdata_3.0.1       compiler_4.4.1    rlang_1.1.4      
## [13] sass_0.4.9        tools_4.4.1       utf8_1.2.4        yaml_2.3.10      
## [17] data.table_1.16.2 knitr_1.48        labeling_0.4.3    htmlwidgets_1.6.4
## [21] mnormt_2.1.1      withr_3.0.1       foreign_0.8-86    nnet_7.3-19      
## [25] grid_4.4.1        fansi_1.0.6       jomo_2.7-6        colorspace_2.1-1 
## [29] mice_3.16.0       scales_1.3.0      gtools_3.9.5      iterators_1.0.14 
## [33] MASS_7.3-61       cli_3.6.3         rmarkdown_2.28    ragg_1.3.3       
## [37] generics_0.1.3    rstudioapi_0.17.1 tzdb_0.4.0        minqa_1.2.8      
## [41] cachem_1.1.0      splines_4.4.1     parallel_4.4.1    base64enc_0.1-3  
## [45] vctrs_0.6.5       boot_1.3-30       glmnet_4.1-8      Matrix_1.6-5     
## [49] jsonlite_1.8.9    hms_1.1.3         mitml_0.4-5       Formula_1.2-5    
## [53] htmlTable_2.4.3   systemfonts_1.1.0 foreach_1.5.2     jquerylib_0.1.4  
## [57] glue_1.8.0        nloptr_2.1.1      pan_1.9           codetools_0.2-19 
## [61] stringi_1.8.4     gtable_0.3.6      shape_1.4.6.1     lme4_1.1-35.5    
## [65] munsell_0.5.1     pillar_1.9.0      htmltools_0.5.8.1 R6_2.5.1         
## [69] textshaping_0.4.0 evaluate_1.0.1    lattice_0.22-5    highr_0.11       
## [73] backports_1.5.0   broom_1.0.7       bslib_0.8.0       Rcpp_1.0.13      
## [77] gridExtra_2.3     nlme_3.1-165      checkmate_2.3.2   xfun_0.48        
## [81] pkgconfig_2.0.3

Herding, polls of polls and Bayes’ theorem

04 November, 2024

Init

Functions

Example

Meta