Init
#global options
options(
digits = 2,
contrasts = c("contr.treatment", "contr.treatment")
)
#packages
library(kirkegaard)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## âś” dplyr 1.1.4 âś” readr 2.1.5
## âś” forcats 1.0.0 âś” stringr 1.5.1
## âś” ggplot2 3.5.1 âś” tibble 3.2.1
## âś” lubridate 1.9.3 âś” tidyr 1.3.1
## âś” purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## âś– dplyr::filter() masks stats::filter()
## âś– dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: magrittr
##
##
## Attaching package: 'magrittr'
##
##
## The following object is masked from 'package:purrr':
##
## set_names
##
##
## The following object is masked from 'package:tidyr':
##
## extract
##
##
## Loading required package: weights
##
## Loading required package: Hmisc
##
##
## Attaching package: 'Hmisc'
##
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
##
## The following objects are masked from 'package:base':
##
## format.pval, units
##
##
## Loading required package: assertthat
##
##
## Attaching package: 'assertthat'
##
##
## The following object is masked from 'package:tibble':
##
## has_name
##
##
## Loading required package: psych
##
##
## Attaching package: 'psych'
##
##
## The following object is masked from 'package:Hmisc':
##
## describe
##
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
##
##
## Attaching package: 'kirkegaard'
##
##
## The following object is masked from 'package:psych':
##
## rescale
##
##
## The following object is masked from 'package:assertthat':
##
## are_equal
##
##
## The following object is masked from 'package:purrr':
##
## is_logical
##
##
## The following object is masked from 'package:base':
##
## +
load_packages(
bayesplot
)
## This is bayesplot version 1.11.1
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
## * Does _not_ affect other ggplot2 plots
## * See ?bayesplot_theme_set for details on theme setting
#ggplot2
theme_set(theme_bw())
Functions
Example
# Set seed for reproducibility
set.seed(3) #I p-hacked this value to get a prior with mean of ~50%
# Parameters
n <- 1000 # Sample size for each study
p_true <- 0.5 # True proportion in the population
# Simulate 5 initial studies with results around 50%
study_results <- rbinom(5, size = n, prob = p_true) / n
# Calculate prior parameters (mean of the initial studies as the prior mean)
prior_alpha <- sum(study_results) * n + 1 # Prior alpha
prior_beta <- (5 * n - sum(study_results) * n) + 1 # Prior beta
# Simulate the 6th study with an outlier result
study6_result <- rbinom(1, size = n, prob = 0.57) / n
# Evidence distribution parameters based on the 6th study alone
evidence_alpha <- study6_result * n + 1
evidence_beta <- (n - study6_result * n) + 1
# Posterior parameters after updating with 6th study result
posterior_alpha <- prior_alpha + study6_result * n
posterior_beta <- prior_beta + n - study6_result * n
posterior_mean <- posterior_alpha / (posterior_alpha + posterior_beta)
# Plot prior and posterior distributions
x_vals <- seq(0, 1, length.out = 1000)
prior_dist <- dbeta(x_vals, prior_alpha, prior_beta)
posterior_dist <- dbeta(x_vals, posterior_alpha, posterior_beta)
evidence_dist <- dbeta(x_vals, evidence_alpha, evidence_beta)
#plot it using ggplot
bind_rows(
data.frame(
x = x_vals,
y = posterior_dist,
type = "posterior"
),
data.frame(
x = x_vals,
y = prior_dist,
type = "prior"
),
data.frame(
x = x_vals,
y = evidence_dist,
type = "evidence"
)
) %>%
#remove near-zero values
filter(y > 1e-3) %>%
ggplot(aes(x, y, color = type)) +
geom_line() +
coord_cartesian(xlim = c(.4, .65)) +
theme(legend.position = "none") +
labs(title = "Prior and posterior distributions for hypothetical polls with outlier",
x = "Proportion",
y = "Density") +
#add labels for the distributions
#use the correct color values
annotate("text", x = mean(study_results), y = 2, label = "Prior", color = "#619CFF") +
annotate("text", x = posterior_mean, y = 4, label = "Posterior", color = "#00BA38") +
annotate("text", x = study6_result, y = 3, label = "Evidence", color = "#F8766D")

#save
GG_save("figs/Bayes polls example.png")