Purpose. This appendix displays the full analysis workflow for two studies on probability formatting (percent vs. 1‑in‑X variants). It is designed so readers can inspect, reproduce, and audit all steps. Heavy Bayesian fits are cached to avoid long re-runs.
# Core tidyverse
library(dplyr)
library(tidyr)
library(stringr)
library(purrr)
library(ggplot2)
# Stats & modeling
library(brms)
library(broom)
library(emmeans)
library(coin)
library(pROC)
# Viz & tables
library(ggdist)
library(scales)
library(flextable)
library(officer)# Helper: cache heavy model fits to RDS files
fit_or_load <- function(fit_expr, rds_path) {
if (file.exists(rds_path)) {
readRDS(rds_path)
} else {
fit <- eval.parent(substitute(fit_expr))
dir.create(dirname(rds_path), recursive = TRUE, showWarnings = FALSE)
saveRDS(fit, rds_path)
fit
}
}Notes on caching: - All chunk-level caching
is enabled via cache=TRUE. - Additionally, Bayesian
brm() fits are saved/loaded from
cache/models/*.rds using fit_or_load() so they
never recompute unless you delete those files. - To force a refit,
delete the RDS in cache/models/ or set a new
seed. - .csv files can be downloaded at https://osf.io/7shqn/files
# TIP: replace with here::here() if you prefer project‑relative paths
full_S1 <- read.csv("~/Desktop/Junk/Google Drive/Research/--Research -- Decimal comprehension/R&R/full_S1.csv")
full_S2 <- read.csv("~/Desktop/Junk/Google Drive/Research/--Research -- Decimal comprehension/R&R/full_S2.csv")
# Light sanity checks
stopifnot(all(c("raw_resp","format","oom") %in% names(full_S1)))
stopifnot(all(c("raw_resp","format","oom","cond_col") %in% names(full_S2)))# Function for reporting polynomial trends on a binary DV vs. OOM
trend_analysis <- function(df, outcome = "raw_resp", predictor = "oom") {
if (!all(c(outcome, predictor) %in% colnames(df))) stop("Required columns not found.")
make_formula <- function(degree) as.formula(paste0(outcome, " ~ poly(", predictor, ", ", degree, ", raw = TRUE)"))
degrees <- 1:4
results <- tibble(
model = c("linear","quadratic","cubic","quartic"),
AIC = NA_real_, BIC = NA_real_, ROC = NA_real_
)
for (i in seq_along(degrees)) {
f <- make_formula(degrees[i])
fit <- tryCatch(glm(f, data = df, family = binomial), error = function(e) NULL, warning = function(w) NULL)
if (!is.null(fit)) {
results$AIC[i] <- AIC(fit)
results$BIC[i] <- BIC(fit)
if (length(unique(df[[outcome]])) == 2) {
roc_obj <- tryCatch(roc(df[[outcome]], fitted(fit), quiet = TRUE), error = function(e) NA_real_)
results$ROC[i] <- ifelse(is.list(roc_obj) && !is.null(roc_obj$auc), as.numeric(roc_obj$auc), NA_real_)
}
}
}
results
}df_S1_digit <- full_S1 %>%
dplyr::filter(format %in% c("percent","NX")) %>%
dplyr::mutate(
condition = factor(ifelse(format == "percent", "percent", "fraction"))
) %>%
dplyr::select(raw_resp, condition)brms_S1_digit <- fit_or_load(
brm(
formula = raw_resp ~ condition,
data = df_S1_digit,
family = bernoulli(),
chains = 4, cores = 4, seed = 123
),
"cache/models/brms_S1_digit.rds"
)
summary(brms_S1_digit)## Family: bernoulli
## Links: mu = logit
## Formula: raw_resp ~ condition
## Data: df_S1_digit (Number of observations: 1367)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -1.42 0.10 -1.61 -1.23 1.00 3838 2815
## conditionpercent -0.77 0.16 -1.08 -0.46 1.00 2099 2133
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
df_S1_frac <- full_S1 %>% filter(format == "NX") %>% mutate(condition = "1-in-X", y = raw_resp)
df_S2_frac <- full_S2 %>% filter(grepl("^X1.in.", cond_col)) %>% mutate(condition = "1-in-2X", y = raw_resp)
NXv2X <- bind_rows(df_S1_frac, df_S2_frac) %>% mutate(condition = factor(condition))
brms_fraction_1vs2 <- fit_or_load(
brm(y ~ condition, data = NXv2X, family = bernoulli(), chains = 4, cores = 4, seed = 123),
"cache/models/brms_fraction_1vs2.rds"
)
summary(brms_fraction_1vs2)## Family: bernoulli
## Links: mu = logit
## Formula: y ~ condition
## Data: NXv2X (Number of observations: 1906)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -1.07 0.07 -1.20 -0.94 1.00 4188 2686
## condition1MinMX -0.35 0.12 -0.59 -0.11 1.00 3867 2720
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
perm_test_1vs2 <- coin::independence_test(y ~ condition, data = NXv2X, distribution = approximate(nresample = 10000))
perm_test_1vs2##
## Approximative General Independence Test
##
## data: y by condition (1-in-2X, 1-in-X)
## Z = 2.9856, p-value = 0.0037
## alternative hypothesis: two.sided
NX <- full_S1 %>% filter(format == "NX") %>% mutate(condition = "1-in-X") %>% select(raw_resp, oom, condition)
NX5 <- full_S2 %>% filter(grepl("^X5\\.in", cond_col)) %>% mutate(condition = "5-in-X") %>% select(raw_resp, oom, condition)
NXv5NX <- bind_rows(NX, NX5) %>% mutate(condition = factor(condition))
brms_fraction_5vs1 <- fit_or_load(
brm(raw_resp ~ condition, data = NXv5NX, family = bernoulli(), chains = 4, cores = 4, seed = 123),
"cache/models/brms_fraction_5vs1.rds"
)
summary(brms_fraction_5vs1)## Family: bernoulli
## Links: mu = logit
## Formula: raw_resp ~ condition
## Data: NXv5NX (Number of observations: 1701)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -1.42 0.10 -1.62 -1.23 1.00 3302 2542
## condition5MinMX 0.10 0.13 -0.16 0.35 1.00 3258 2429
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
perm_test_fraction_5vs1 <- coin::independence_test(raw_resp ~ condition, data = NXv5NX, distribution = approximate(nresample = 10000))
perm_test_fraction_5vs1##
## Approximative General Independence Test
##
## data: raw_resp by condition (1-in-X, 5-in-X)
## Z = -0.79846, p-value = 0.4228
## alternative hypothesis: two.sided
one_in_2X <- full_S2 %>% filter(grepl("^X1\\.", cond_col)) %>% mutate(condition = "1-in-2X") %>% select(raw_resp, oom, condition)
five_in_X <- full_S2 %>% filter(grepl("^X5\\.", cond_col)) %>% mutate(condition = "5-in-X") %>% select(raw_resp, oom, condition)
NX2v5NX <- bind_rows(one_in_2X, five_in_X) %>% mutate(condition = factor(condition))
brms_fraction_5vs2 <- fit_or_load(
brm(raw_resp ~ condition, data = NX2v5NX, family = bernoulli(), chains = 4, cores = 4, seed = 123),
"cache/models/brms_fraction_5vs2.rds"
)
summary(brms_fraction_5vs2)## Family: bernoulli
## Links: mu = logit
## Formula: raw_resp ~ condition
## Data: NX2v5NX (Number of observations: 2253)
## Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
## total post-warmup draws = 4000
##
## Regression Coefficients:
## Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept -1.07 0.07 -1.20 -0.94 1.00 3291 2738
## condition5MinMX -0.25 0.10 -0.45 -0.06 1.00 2748 2578
##
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
perm_test_fraction_5vs2 <- coin::independence_test(raw_resp ~ condition, data = NX2v5NX, distribution = approximate(nresample = 10000))
perm_test_fraction_5vs2##
## Approximative General Independence Test
##
## data: raw_resp by condition (1-in-2X, 5-in-X)
## Z = 2.4821, p-value = 0.0145
## alternative hypothesis: two.sided
# 5% condition (percent labels containing "5")
df_5perc <- full_S2 %>% filter(grepl("5", cond_col) & grepl("perc", cond_col))
trend_analysis(df_5perc)# 1-in-2X conditions
df_1in2X <- full_S2 %>% filter(grepl("^X1\\.in\\.", cond_col)) %>% mutate(condition = cond_col) %>% select(raw_resp, oom, condition)
trend_analysis(df_1in2X)# 5-in-X conditions
df_5inX <- full_S2 %>% filter(grepl("^X5\\.in\\.", cond_col)) %>% mutate(condition = cond_col) %>% select(raw_resp, oom, condition)
trend_analysis(df_5inX)## R version 4.5.1 (2025-06-13)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/Chicago
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] officer_0.7.0 flextable_0.9.10 scales_1.4.0 ggdist_3.3.3
## [5] pROC_1.19.0.1 coin_1.4-3 survival_3.8-3 emmeans_1.11.2
## [9] broom_1.0.9 brms_2.23.0 Rcpp_1.1.0 ggplot2_3.5.2
## [13] purrr_1.1.0 stringr_1.5.1 tidyr_1.3.1 dplyr_1.1.4
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.1 libcoin_1.0-10 farver_2.1.2
## [4] loo_2.8.0 fastmap_1.2.0 TH.data_1.1-4
## [7] tensorA_0.36.2.1 fontquiver_0.2.1 digest_0.6.37
## [10] estimability_1.5.1 lifecycle_1.0.4 StanHeaders_2.32.10
## [13] magrittr_2.0.3 posterior_1.6.1 compiler_4.5.1
## [16] rlang_1.1.6 sass_0.4.10 tools_4.5.1
## [19] yaml_2.3.10 data.table_1.17.8 knitr_1.50
## [22] askpass_1.2.1 bridgesampling_1.1-2 pkgbuild_1.4.8
## [25] plyr_1.8.9 xml2_1.3.8 RColorBrewer_1.1-3
## [28] multcomp_1.4-28 abind_1.4-8 withr_3.0.2
## [31] grid_4.5.1 stats4_4.5.1 gdtools_0.4.3
## [34] colorspace_2.1-1 inline_0.3.21 xtable_1.8-4
## [37] MASS_7.3-65 cli_3.6.5 mvtnorm_1.3-3
## [40] rmarkdown_2.29 ragg_1.4.0 generics_0.1.4
## [43] RcppParallel_5.1.11-1 rstudioapi_0.17.1 reshape2_1.4.4
## [46] cachem_1.1.0 rstan_2.32.7 modeltools_0.2-24
## [49] splines_4.5.1 bayesplot_1.14.0 parallel_4.5.1
## [52] matrixStats_1.5.0 vctrs_0.6.5 Matrix_1.7-3
## [55] sandwich_3.1-1 jsonlite_2.0.0 fontBitstreamVera_0.1.1
## [58] systemfonts_1.2.3 jquerylib_0.1.4 glue_1.8.0
## [61] codetools_0.2-20 distributional_0.5.0 stringi_1.8.7
## [64] gtable_0.3.6 QuickJSR_1.8.1 tibble_3.3.0
## [67] pillar_1.11.0 htmltools_0.5.8.1 Brobdingnag_1.2-9
## [70] openssl_2.3.3 R6_2.6.1 textshaping_1.0.1
## [73] evaluate_1.0.4 lattice_0.22-7 backports_1.5.0
## [76] fontLiberation_0.1.0 bslib_0.9.0 rstantools_2.5.0
## [79] zip_2.3.3 uuid_1.2-1 gridExtra_2.3
## [82] coda_0.19-4.1 nlme_3.1-168 checkmate_2.3.3
## [85] xfun_0.52 zoo_1.8-14 pkgconfig_2.0.3
full_S1.csv and full_S2.csv in the
specified paths (or edit the import chunk to your pathscache/models/*.rds after the first run.cache/models/.