Purpose. This appendix displays the full analysis workflow for two studies on probability formatting (percent vs. 1‑in‑X variants). It is designed so readers can inspect, reproduce, and audit all steps. Heavy Bayesian fits are cached to avoid long re-runs.

1 Reproducibility & Setup

# Core tidyverse
library(dplyr)
library(tidyr)
library(stringr)
library(purrr)
library(ggplot2)

# Stats & modeling
library(brms)
library(broom)
library(emmeans)
library(coin)
library(pROC)

# Viz & tables
library(ggdist)
library(scales)
library(flextable)
library(officer)

# Helper: cache heavy model fits to RDS files
fit_or_load <- function(fit_expr, rds_path) {
  if (file.exists(rds_path)) {
    readRDS(rds_path)
  } else {
    fit <- eval.parent(substitute(fit_expr))
    dir.create(dirname(rds_path), recursive = TRUE, showWarnings = FALSE)
    saveRDS(fit, rds_path)
    fit
  }
}

Notes on caching: - All chunk-level caching is enabled via cache=TRUE. - Additionally, Bayesian brm() fits are saved/loaded from cache/models/*.rds using fit_or_load() so they never recompute unless you delete those files. - To force a refit, delete the RDS in cache/models/ or set a new seed. - .csv files can be downloaded at https://osf.io/7shqn/files

2 Data Import

# TIP: replace with here::here() if you prefer project‑relative paths
full_S1 <- read.csv("~/Desktop/Junk/Google Drive/Research/--Research -- Decimal comprehension/R&R/full_S1.csv")
full_S2 <- read.csv("~/Desktop/Junk/Google Drive/Research/--Research -- Decimal comprehension/R&R/full_S2.csv")

# Light sanity checks
stopifnot(all(c("raw_resp","format","oom") %in% names(full_S1)))
stopifnot(all(c("raw_resp","format","oom","cond_col") %in% names(full_S2)))

3 Utilities

# Function for reporting polynomial trends on a binary DV vs. OOM
trend_analysis <- function(df, outcome = "raw_resp", predictor = "oom") {
  if (!all(c(outcome, predictor) %in% colnames(df))) stop("Required columns not found.")
  make_formula <- function(degree) as.formula(paste0(outcome, " ~ poly(", predictor, ", ", degree, ", raw = TRUE)"))
  degrees <- 1:4
  results <- tibble(
    model = c("linear","quadratic","cubic","quartic"),
    AIC = NA_real_, BIC = NA_real_, ROC = NA_real_
  )
  for (i in seq_along(degrees)) {
    f <- make_formula(degrees[i])
    fit <- tryCatch(glm(f, data = df, family = binomial), error = function(e) NULL, warning = function(w) NULL)
    if (!is.null(fit)) {
      results$AIC[i] <- AIC(fit)
      results$BIC[i] <- BIC(fit)
      if (length(unique(df[[outcome]])) == 2) {
        roc_obj <- tryCatch(roc(df[[outcome]], fitted(fit), quiet = TRUE), error = function(e) NA_real_)
        results$ROC[i] <- ifelse(is.list(roc_obj) && !is.null(roc_obj$auc), as.numeric(roc_obj$auc), NA_real_)
      }
    }
  }
  results
}

4 Study 1 — Percent vs. 1‑in‑X (Core Analyses)

4.1 Contrast: percent vs. 1‑in‑X

df_S1_digit <- full_S1 %>%
  dplyr::filter(format %in% c("percent","NX")) %>%
  dplyr::mutate(
    condition = factor(ifelse(format == "percent", "percent", "fraction"))
  ) %>%
  dplyr::select(raw_resp, condition)

brms_S1_digit <- fit_or_load(
  brm(
    formula = raw_resp ~ condition,
    data = df_S1_digit,
    family = bernoulli(),
    chains = 4, cores = 4, seed = 123
  ),
  "cache/models/brms_S1_digit.rds"
)
summary(brms_S1_digit)

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: raw_resp ~ condition 
##    Data: df_S1_digit (Number of observations: 1367) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Regression Coefficients:
##                  Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept           -1.42      0.10    -1.61    -1.23 1.00     3838     2815
## conditionpercent    -0.77      0.16    -1.08    -0.46 1.00     2099     2133
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

4.2 1‑in‑X (S1) vs 1‑in‑2X (S2)

df_S1_frac <- full_S1 %>% filter(format == "NX") %>% mutate(condition = "1-in-X", y = raw_resp)
df_S2_frac <- full_S2 %>% filter(grepl("^X1.in.", cond_col)) %>% mutate(condition = "1-in-2X", y = raw_resp)
NXv2X <- bind_rows(df_S1_frac, df_S2_frac) %>% mutate(condition = factor(condition))


brms_fraction_1vs2 <- fit_or_load(
brm(y ~ condition, data = NXv2X, family = bernoulli(), chains = 4, cores = 4, seed = 123),
"cache/models/brms_fraction_1vs2.rds"
)
summary(brms_fraction_1vs2)

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: y ~ condition 
##    Data: NXv2X (Number of observations: 1906) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Regression Coefficients:
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept          -1.07      0.07    -1.20    -0.94 1.00     4188     2686
## condition1MinMX    -0.35      0.12    -0.59    -0.11 1.00     3867     2720
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

perm_test_1vs2 <- coin::independence_test(y ~ condition, data = NXv2X, distribution = approximate(nresample = 10000))
perm_test_1vs2

## 
##  Approximative General Independence Test
## 
## data:  y by condition (1-in-2X, 1-in-X)
## Z = 2.9856, p-value = 0.0037
## alternative hypothesis: two.sided

4.3 5‑in‑X (S2) vs 1‑in‑X (S1)

NX <- full_S1 %>% filter(format == "NX") %>% mutate(condition = "1-in-X") %>% select(raw_resp, oom, condition)
NX5 <- full_S2 %>% filter(grepl("^X5\\.in", cond_col)) %>% mutate(condition = "5-in-X") %>% select(raw_resp, oom, condition)
NXv5NX <- bind_rows(NX, NX5) %>% mutate(condition = factor(condition))


brms_fraction_5vs1 <- fit_or_load(
brm(raw_resp ~ condition, data = NXv5NX, family = bernoulli(), chains = 4, cores = 4, seed = 123),
"cache/models/brms_fraction_5vs1.rds"
)
summary(brms_fraction_5vs1)

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: raw_resp ~ condition 
##    Data: NXv5NX (Number of observations: 1701) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Regression Coefficients:
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept          -1.42      0.10    -1.62    -1.23 1.00     3302     2542
## condition5MinMX     0.10      0.13    -0.16     0.35 1.00     3258     2429
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

perm_test_fraction_5vs1 <- coin::independence_test(raw_resp ~ condition, data = NXv5NX, distribution = approximate(nresample = 10000))
perm_test_fraction_5vs1

## 
##  Approximative General Independence Test
## 
## data:  raw_resp by condition (1-in-X, 5-in-X)
## Z = -0.79846, p-value = 0.4228
## alternative hypothesis: two.sided

4.4 5‑in‑X vs 1‑in‑2X (both S2)

one_in_2X <- full_S2 %>% filter(grepl("^X1\\.", cond_col)) %>% mutate(condition = "1-in-2X") %>% select(raw_resp, oom, condition)
five_in_X <- full_S2 %>% filter(grepl("^X5\\.", cond_col)) %>% mutate(condition = "5-in-X") %>% select(raw_resp, oom, condition)
NX2v5NX <- bind_rows(one_in_2X, five_in_X) %>% mutate(condition = factor(condition))


brms_fraction_5vs2 <- fit_or_load(
brm(raw_resp ~ condition, data = NX2v5NX, family = bernoulli(), chains = 4, cores = 4, seed = 123),
"cache/models/brms_fraction_5vs2.rds"
)
summary(brms_fraction_5vs2)

##  Family: bernoulli 
##   Links: mu = logit 
## Formula: raw_resp ~ condition 
##    Data: NX2v5NX (Number of observations: 2253) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Regression Coefficients:
##                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept          -1.07      0.07    -1.20    -0.94 1.00     3291     2738
## condition5MinMX    -0.25      0.10    -0.45    -0.06 1.00     2748     2578
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

perm_test_fraction_5vs2 <- coin::independence_test(raw_resp ~ condition, data = NX2v5NX, distribution = approximate(nresample = 10000))
perm_test_fraction_5vs2

## 
##  Approximative General Independence Test
## 
## data:  raw_resp by condition (1-in-2X, 5-in-X)
## Z = 2.4821, p-value = 0.0145
## alternative hypothesis: two.sided

5 Polynomial Trends within S2 Subsets

# 5% condition (percent labels containing "5")
df_5perc <- full_S2 %>% filter(grepl("5", cond_col) & grepl("perc", cond_col))
trend_analysis(df_5perc)

# 1-in-2X conditions
df_1in2X <- full_S2 %>% filter(grepl("^X1\\.in\\.", cond_col)) %>% mutate(condition = cond_col) %>% select(raw_resp, oom, condition)
trend_analysis(df_1in2X)

# 5-in-X conditions
df_5inX <- full_S2 %>% filter(grepl("^X5\\.in\\.", cond_col)) %>% mutate(condition = cond_col) %>% select(raw_resp, oom, condition)
trend_analysis(df_5inX)

6 Session Info

sessionInfo()

## R version 4.5.1 (2025-06-13)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/Chicago
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] officer_0.7.0    flextable_0.9.10 scales_1.4.0     ggdist_3.3.3    
##  [5] pROC_1.19.0.1    coin_1.4-3       survival_3.8-3   emmeans_1.11.2  
##  [9] broom_1.0.9      brms_2.23.0      Rcpp_1.1.0       ggplot2_3.5.2   
## [13] purrr_1.1.0      stringr_1.5.1    tidyr_1.3.1      dplyr_1.1.4     
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1        libcoin_1.0-10          farver_2.1.2           
##  [4] loo_2.8.0               fastmap_1.2.0           TH.data_1.1-4          
##  [7] tensorA_0.36.2.1        fontquiver_0.2.1        digest_0.6.37          
## [10] estimability_1.5.1      lifecycle_1.0.4         StanHeaders_2.32.10    
## [13] magrittr_2.0.3          posterior_1.6.1         compiler_4.5.1         
## [16] rlang_1.1.6             sass_0.4.10             tools_4.5.1            
## [19] yaml_2.3.10             data.table_1.17.8       knitr_1.50             
## [22] askpass_1.2.1           bridgesampling_1.1-2    pkgbuild_1.4.8         
## [25] plyr_1.8.9              xml2_1.3.8              RColorBrewer_1.1-3     
## [28] multcomp_1.4-28         abind_1.4-8             withr_3.0.2            
## [31] grid_4.5.1              stats4_4.5.1            gdtools_0.4.3          
## [34] colorspace_2.1-1        inline_0.3.21           xtable_1.8-4           
## [37] MASS_7.3-65             cli_3.6.5               mvtnorm_1.3-3          
## [40] rmarkdown_2.29          ragg_1.4.0              generics_0.1.4         
## [43] RcppParallel_5.1.11-1   rstudioapi_0.17.1       reshape2_1.4.4         
## [46] cachem_1.1.0            rstan_2.32.7            modeltools_0.2-24      
## [49] splines_4.5.1           bayesplot_1.14.0        parallel_4.5.1         
## [52] matrixStats_1.5.0       vctrs_0.6.5             Matrix_1.7-3           
## [55] sandwich_3.1-1          jsonlite_2.0.0          fontBitstreamVera_0.1.1
## [58] systemfonts_1.2.3       jquerylib_0.1.4         glue_1.8.0             
## [61] codetools_0.2-20        distributional_0.5.0    stringi_1.8.7          
## [64] gtable_0.3.6            QuickJSR_1.8.1          tibble_3.3.0           
## [67] pillar_1.11.0           htmltools_0.5.8.1       Brobdingnag_1.2-9      
## [70] openssl_2.3.3           R6_2.6.1                textshaping_1.0.1      
## [73] evaluate_1.0.4          lattice_0.22-7          backports_1.5.0        
## [76] fontLiberation_0.1.0    bslib_0.9.0             rstantools_2.5.0       
## [79] zip_2.3.3               uuid_1.2-1              gridExtra_2.3          
## [82] coda_0.19-4.1           nlme_3.1-168            checkmate_2.3.3        
## [85] xfun_0.52               zoo_1.8-14              pkgconfig_2.0.3

7 How to Reproduce

Install the required packages listed in the libraries chunk.
Place full_S1.csv and full_S2.csv in the specified paths (or edit the import chunk to your paths
Knit this file to HTML. Heavy models will be loaded from cache/models/*.rds after the first run.
To force refits, delete files in cache/models/.

Reproducible Appendix: Decimal Comprehension Studies

Mark LaCour (CROAK Lab, UL Lafayette)