Effect Change Decompositions

Code
library(dplyr)
library(ggplot2)
library(knitr)
library(tidyr)

Data Loading

Each dataset is loaded below. Variable and condition information per file:

Dataset DV column Condition column Condition values
minread_memory dv condition 1, 2
minread_obvious dv condition 1, 2
transaction_memory dv condition 1, 2
transaction_obvious dv condition 1, 2
longread_memory dv_avg condition_opposing 0, 1
longread_obvious dv_avg condition “before”, “after”
metaphor_memory dv_friday condition 1, 2
metaphor_obvious dv_friday condition 1, 2
social_memory dv condition 1, 2
social_obvious dv condition 1, 2
numeric_memory dv condition 0, 1
numeric_obvious dv condition 0, 1
Code
df_minread_memory     <- read.csv("minread_memory.csv")
df_minread_obvious    <- read.csv("minread_obvious.csv")
df_transaction_memory <- read.csv("transaction_memory.csv")
df_transaction_obvious<- read.csv("transaction_obvious.csv")
df_longread_memory    <- read.csv("longread_memory.csv")
df_longread_obvious   <- read.csv("longread_obvious.csv")
df_metaphor_memory    <- read.csv("metaphor_memory_v2.csv")
df_metaphor_obvious   <- read.csv("metaphor_obvious_v2.csv")
df_social_memory      <- read.csv("social_memory.csv")
df_social_obvious     <- read.csv("social_obvious.csv")
df_numeric_memory     <- read.csv("numeric_memory.csv")
df_numeric_obvious    <- read.csv("numeric_obvious.csv")

cat("Rows loaded:\n")
Rows loaded:
Code
cat("  minread_memory:     ", nrow(df_minread_memory), "\n")
  minread_memory:      405 
Code
cat("  minread_obvious:    ", nrow(df_minread_obvious), "\n")
  minread_obvious:     420 
Code
cat("  transaction_memory: ", nrow(df_transaction_memory), "\n")
  transaction_memory:  401 
Code
cat("  transaction_obvious:", nrow(df_transaction_obvious), "\n")
  transaction_obvious: 410 
Code
cat("  longread_memory:    ", nrow(df_longread_memory), "\n")
  longread_memory:     614 
Code
cat("  longread_obvious:   ", nrow(df_longread_obvious), "\n")
  longread_obvious:    597 
Code
cat("  metaphor_memory:    ", nrow(df_metaphor_memory), "\n")
  metaphor_memory:     409 
Code
cat("  metaphor_obvious:   ", nrow(df_metaphor_obvious), "\n")
  metaphor_obvious:    412 
Code
cat("  social_memory:      ", nrow(df_social_memory), "\n")
  social_memory:       412 
Code
cat("  social_obvious:     ", nrow(df_social_obvious), "\n")
  social_obvious:      420 
Code
cat("  numeric_memory:     ", nrow(df_numeric_memory), "\n")
  numeric_memory:      414 
Code
cat("  numeric_obvious:    ", nrow(df_numeric_obvious), "\n")
  numeric_obvious:     406 

Core Functions

Code
# Pooled SD (unpaired two-sample)
pooled_sd <- function(x1, x2) {
  x1 <- x1[!is.na(x1)]
  x2 <- x2[!is.na(x2)]
  n1 <- length(x1); n2 <- length(x2)
  sqrt(((n1 - 1) * sd(x1)^2 + (n2 - 1) * sd(x2)^2) / (n1 + n2 - 2))
}

# Compute means by condition, pre and post exclusions, and return a tidy table
compute_means <- function(data, dv_col, cond_col, cond_vals) {
  v1 <- cond_vals[1]; v2 <- cond_vals[2]

  get_group <- function(df, cv) df[[dv_col]][df[[cond_col]] == cv]

  pre_g1  <- get_group(data, v1)
  pre_g2  <- get_group(data, v2)
  post_g1 <- get_group(data[data$att_correct == 1, ], v1)
  post_g2 <- get_group(data[data$att_correct == 1, ], v2)

  data.frame(
    sample    = c("pre (full)", "pre (full)", "post (retained)", "post (retained)"),
    condition = c(as.character(v1), as.character(v2),
                  as.character(v1), as.character(v2)),
    n         = c(sum(!is.na(pre_g1)),  sum(!is.na(pre_g2)),
                  sum(!is.na(post_g1)), sum(!is.na(post_g2))),
    mean_dv   = c(mean(pre_g1,  na.rm = TRUE), mean(pre_g2,  na.rm = TRUE),
                  mean(post_g1, na.rm = TRUE), mean(post_g2, na.rm = TRUE)),
    sd_dv     = c(sd(pre_g1,  na.rm = TRUE),   sd(pre_g2,  na.rm = TRUE),
                  sd(post_g1, na.rm = TRUE),    sd(post_g2, na.rm = TRUE))
  )
}

# Shapley decomposition of change in Cohen's d
compute_decomp <- function(data, dv_col = "dv", cond_col = "condition",
                           cond_vals = c(1, 2)) {
  v1 <- cond_vals[1]; v2 <- cond_vals[2]

  get_group <- function(df, cv) df[[dv_col]][df[[cond_col]] == cv]

  pre_g1  <- get_group(data, v1)
  pre_g2  <- get_group(data, v2)
  post_g1 <- get_group(data[data$att_correct == 1, ], v1)
  post_g2 <- get_group(data[data$att_correct == 1, ], v2)

  mean_pre_1  <- mean(pre_g1,  na.rm = TRUE)
  mean_pre_2  <- mean(pre_g2,  na.rm = TRUE)
  mean_post_1 <- mean(post_g1, na.rm = TRUE)
  mean_post_2 <- mean(post_g2, na.rm = TRUE)

  sd_pooled_pre  <- pooled_sd(pre_g1,  pre_g2)
  sd_pooled_post <- pooled_sd(post_g1, post_g2)

  deltaM_pre  <- mean_pre_1  - mean_pre_2
  deltaM_post <- mean_post_1 - mean_post_2

  d_pre  <- deltaM_pre  / sd_pooled_pre
  d_post <- deltaM_post / sd_pooled_post
  delta_d_total <- d_post - d_pre

  # Path A: means change first, then SD
  delta_d_means_A <- (deltaM_post / sd_pooled_pre)  - (deltaM_pre  / sd_pooled_pre)
  delta_d_sd_A    <- (deltaM_post / sd_pooled_post) - (deltaM_post / sd_pooled_pre)

  # Path B: SD changes first, then means
  delta_d_sd_B    <- (deltaM_pre  / sd_pooled_post) - (deltaM_pre  / sd_pooled_pre)
  delta_d_means_B <- (deltaM_post / sd_pooled_post) - (deltaM_pre  / sd_pooled_post)

  # Shapley averages
  delta_d_means_shapley <- 0.5 * (delta_d_means_A + delta_d_means_B)
  delta_d_sd_shapley    <- 0.5 * (delta_d_sd_A    + delta_d_sd_B)

  pct_from_means <- if (abs(delta_d_total) < 1e-10) NA_real_ else
    100 * delta_d_means_shapley / delta_d_total
  pct_from_sd    <- if (abs(delta_d_total) < 1e-10) NA_real_ else
    100 * delta_d_sd_shapley / delta_d_total

  c(
    mean_pre_g1           = mean_pre_1,
    mean_pre_g2           = mean_pre_2,
    mean_post_g1          = mean_post_1,
    mean_post_g2          = mean_post_2,
    sd_pooled_pre         = sd_pooled_pre,
    sd_pooled_post        = sd_pooled_post,
    deltaM_pre            = deltaM_pre,
    deltaM_post           = deltaM_post,
    d_pre                 = d_pre,
    d_post                = d_post,
    delta_d_total         = delta_d_total,
    delta_d_means_shapley = delta_d_means_shapley,
    delta_d_sd_shapley    = delta_d_sd_shapley,
    pct_from_means        = pct_from_means,
    pct_from_sd           = pct_from_sd,
    abs_mean_minus_abs_sd = abs(delta_d_means_shapley) - abs(delta_d_sd_shapley)
  )
}

# Bootstrap wrapper
bootstrap_decomp <- function(data, dv_col, cond_col, cond_vals, B = 5000, seed = 123) {
  set.seed(seed)
  main_est <- compute_decomp(data, dv_col, cond_col, cond_vals)

  boot_mat <- replicate(B, {
    boot_dat <- data[sample(nrow(data), nrow(data), replace = TRUE), ]
    compute_decomp(boot_dat, dv_col, cond_col, cond_vals)
  })

  boot_df <- as.data.frame(t(boot_mat))

  data.frame(
    statistic = names(main_est),
    estimate  = main_est,
    ci_lower  = apply(boot_df, 2, quantile, probs = 0.025, na.rm = TRUE),
    ci_upper  = apply(boot_df, 2, quantile, probs = 0.975, na.rm = TRUE),
    row.names = NULL
  )
}

A note on percentages outside [0%, 100%]

The Shapley decomposition guarantees that delta_d_means_shapley + delta_d_sd_shapley = delta_d_total exactly, so pct_from_means + pct_from_sd = 100% always. However, the individual percentages can fall outside [0%, 100%] whenever the mean-shift and SD-change contributions have opposite signs — i.e., one mechanism amplifies Δd while the other dampens it.

Concrete example — Longread × Obvious: The total Δd ≈ +0.009. The mean-shift contribution alone would produce Δd ≈ +0.018 (means became more different post-exclusion), but the SD increase post-exclusion partially offsets this, contributing Δd ≈ −0.009. The two partially cancel to produce the observed net Δd. This yields pct_from_means ≈ +199% and pct_from_sd ≈ −99%. This is the mathematically correct decomposition: the two forces are working against each other, and neither alone would correctly summarise the change.

In these cases, the most interpretable quantities are the signed Shapley contributions (delta_d_means_shapley and delta_d_sd_shapley) rather than the percentages.


Analyses by Dataset

1. Minread × Memory

Code
means_minread_memory <- compute_means(
  df_minread_memory, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(means_minread_memory, digits = 4, caption = "Minread × Memory: Means by condition, pre and post exclusions")
Minread × Memory: Means by condition, pre and post exclusions
sample condition n mean_dv sd_dv
pre (full) 1 223 0.6413 0.4807
pre (full) 2 182 0.1813 0.3863
post (retained) 1 71 0.6620 0.4764
post (retained) 2 81 0.2593 0.4410
Code
decomp_minread_memory <- bootstrap_decomp(
  df_minread_memory, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(decomp_minread_memory, digits = 4,
      caption = "Minread × Memory: Shapley decomposition with 95% bootstrap CIs")
Minread × Memory: Shapley decomposition with 95% bootstrap CIs
statistic estimate ci_lower ci_upper
mean_pre_g1 0.6413 0.5794 0.7054
mean_pre_g2 0.1813 0.1271 0.2373
mean_post_g1 0.6620 0.5500 0.7693
mean_post_g2 0.2593 0.1667 0.3580
sd_pooled_pre 0.4408 0.4154 0.4600
sd_pooled_post 0.4578 0.4172 0.4837
deltaM_pre 0.4599 0.3763 0.5448
deltaM_post 0.4027 0.2528 0.5446
d_pre 1.0433 0.8219 1.3081
d_post 0.8796 0.5260 1.3008
delta_d_total -0.1637 -0.4420 0.1475
delta_d_means_shapley -0.1274 -0.3775 0.1171
delta_d_sd_shapley -0.0363 -0.0774 0.0308
pct_from_means 77.8031 30.7086 119.9245
pct_from_sd 22.1969 -19.9245 69.2914
abs_mean_minus_abs_sd 0.0911 -0.0047 0.3151

2. Minread × Obvious

Code
means_minread_obvious <- compute_means(
  df_minread_obvious, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(means_minread_obvious, digits = 4, caption = "Minread × Obvious: Means by condition, pre and post exclusions")
Minread × Obvious: Means by condition, pre and post exclusions
sample condition n mean_dv sd_dv
pre (full) 1 193 0.5855 0.4939
pre (full) 2 227 0.1674 0.3742
post (retained) 1 155 0.5097 0.5015
post (retained) 2 177 0.1921 0.3951
Code
decomp_minread_obvious <- bootstrap_decomp(
  df_minread_obvious, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(decomp_minread_obvious, digits = 4,
      caption = "Minread × Obvious: Shapley decomposition with 95% bootstrap CIs")
Minread × Obvious: Shapley decomposition with 95% bootstrap CIs
statistic estimate ci_lower ci_upper
mean_pre_g1 0.5855 0.5153 0.6543
mean_pre_g2 0.1674 0.1198 0.2181
mean_post_g1 0.5097 0.4303 0.5886
mean_post_g2 0.1921 0.1358 0.2514
sd_pooled_pre 0.4333 0.4086 0.4535
sd_pooled_post 0.4479 0.4225 0.4671
deltaM_pre 0.4181 0.3330 0.5029
deltaM_post 0.3176 0.2189 0.4164
d_pre 0.9649 0.7394 1.2219
d_post 0.7090 0.4757 0.9654
delta_d_total -0.2559 -0.3530 -0.1597
delta_d_means_shapley -0.2282 -0.3193 -0.1413
delta_d_sd_shapley -0.0277 -0.0538 -0.0066
pct_from_means 89.1787 79.9773 97.1815
pct_from_sd 10.8213 2.8185 20.0227
abs_mean_minus_abs_sd 0.2005 0.1172 0.2925

3. Transaction × Memory

Code
means_transaction_memory <- compute_means(
  df_transaction_memory, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(means_transaction_memory, digits = 4, caption = "Transaction × Memory: Means by condition, pre and post exclusions")
Transaction × Memory: Means by condition, pre and post exclusions
sample condition n mean_dv sd_dv
pre (full) 1 200 19.9671 18.9687
pre (full) 2 201 19.6474 19.6386
post (retained) 1 131 14.9828 16.5126
post (retained) 2 134 14.2546 17.7483
Code
decomp_transaction_memory <- bootstrap_decomp(
  df_transaction_memory, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(decomp_transaction_memory, digits = 4,
      caption = "Transaction × Memory: Shapley decomposition with 95% bootstrap CIs")
Transaction × Memory: Shapley decomposition with 95% bootstrap CIs
statistic estimate ci_lower ci_upper
mean_pre_g1 19.9671 17.3748 22.6443
mean_pre_g2 19.6474 17.0298 22.4260
mean_post_g1 14.9828 12.1665 17.8683
mean_post_g2 14.2546 11.3876 17.3991
sd_pooled_pre 19.3074 18.3667 20.0541
sd_pooled_post 17.1486 15.4296 18.5414
deltaM_pre 0.3197 -3.4746 4.1064
deltaM_post 0.7282 -3.3924 4.8904
d_pre 0.0166 -0.1801 0.2146
d_post 0.0425 -0.1989 0.2868
delta_d_total 0.0259 -0.1389 0.2040
delta_d_means_shapley 0.0225 -0.1369 0.1945
delta_d_sd_shapley 0.0034 -0.0212 0.0323
pct_from_means 86.8135 -79.6101 252.9230
pct_from_sd 13.1865 -152.9230 179.6101
abs_mean_minus_abs_sd 0.0191 -0.0115 0.1885

4. Transaction × Obvious

Code
means_transaction_obvious <- compute_means(
  df_transaction_obvious, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(means_transaction_obvious, digits = 4, caption = "Transaction × Obvious: Means by condition, pre and post exclusions")
Transaction × Obvious: Means by condition, pre and post exclusions
sample condition n mean_dv sd_dv
pre (full) 1 217 21.6877 20.1795
pre (full) 2 193 18.9179 19.7491
post (retained) 1 154 16.2938 17.8786
post (retained) 2 136 12.5329 16.7046
Code
decomp_transaction_obvious <- bootstrap_decomp(
  df_transaction_obvious, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(decomp_transaction_obvious, digits = 4,
      caption = "Transaction × Obvious: Shapley decomposition with 95% bootstrap CIs")
Transaction × Obvious: Shapley decomposition with 95% bootstrap CIs
statistic estimate ci_lower ci_upper
mean_pre_g1 21.6877 19.0777 24.3312
mean_pre_g2 18.9179 16.2530 21.7719
mean_post_g1 16.2938 13.5347 19.1846
mean_post_g2 12.5329 9.9305 15.4650
sd_pooled_pre 19.9781 19.1196 20.6520
sd_pooled_post 17.3382 15.7319 18.6849
deltaM_pre 2.7699 -1.0943 6.5195
deltaM_post 3.7608 -0.3038 7.6647
d_pre 0.1386 -0.0550 0.3296
d_post 0.2169 -0.0182 0.4526
delta_d_total 0.0783 -0.0803 0.2474
delta_d_means_shapley 0.0534 -0.1007 0.2137
delta_d_sd_shapley 0.0249 -0.0030 0.0612
pct_from_means 68.2024 -205.1873 358.7921
pct_from_sd 31.7976 -258.7921 305.1873
abs_mean_minus_abs_sd 0.0285 -0.0330 0.1860

5. Longread × Memory

DV: dv_avg; condition column: condition_opposing (values 0, 1)

Code
means_longread_memory <- compute_means(
  df_longread_memory, dv_col = "dv_avg", cond_col = "condition_opposing", cond_vals = c(0, 1)
)
kable(means_longread_memory, digits = 4, caption = "Longread × Memory: Means by condition, pre and post exclusions")
Longread × Memory: Means by condition, pre and post exclusions
sample condition n mean_dv sd_dv
pre (full) 0 315 11.8048 3.0313
pre (full) 1 299 8.5485 5.0117
post (retained) 0 216 12.0347 2.8501
post (retained) 1 161 5.4441 4.4665
Code
decomp_longread_memory <- bootstrap_decomp(
  df_longread_memory, dv_col = "dv_avg", cond_col = "condition_opposing", cond_vals = c(0, 1)
)
kable(decomp_longread_memory, digits = 4,
      caption = "Longread × Memory: Shapley decomposition with 95% bootstrap CIs")
Longread × Memory: Shapley decomposition with 95% bootstrap CIs
statistic estimate ci_lower ci_upper
mean_pre_g1 11.8048 11.4670 12.1262
mean_pre_g2 8.5485 7.9821 9.1197
mean_post_g1 12.0347 11.6500 12.4000
mean_post_g2 5.4441 4.7704 6.1587
sd_pooled_pre 4.1164 3.9207 4.2986
sd_pooled_post 3.6290 3.3539 3.8703
deltaM_pre 3.2563 2.5800 3.9043
deltaM_post 6.5906 5.7788 7.3486
d_pre 0.7910 0.6284 0.9590
d_post 1.8161 1.5249 2.1555
delta_d_total 1.0251 0.8018 1.3016
delta_d_means_shapley 0.8644 0.7064 1.0394
delta_d_sd_shapley 0.1607 0.0731 0.2872
pct_from_means 84.3268 77.0908 91.3514
pct_from_sd 15.6732 8.6486 22.9092
abs_mean_minus_abs_sd 0.7038 0.5815 0.8292

6. Longread × Obvious

DV: dv_avg; condition column: condition (values “before”, “after”)

Code
means_longread_obvious <- compute_means(
  df_longread_obvious, dv_col = "dv_avg", cond_col = "condition", cond_vals = c("before", "after")
)
kable(means_longread_obvious, digits = 4, caption = "Longread × Obvious: Means by condition, pre and post exclusions")
Longread × Obvious: Means by condition, pre and post exclusions
sample condition n mean_dv sd_dv
pre (full) before 297 10.3502 4.3118
pre (full) after 300 9.9417 4.4540
post (retained) before 194 9.4716 4.7924
post (retained) after 203 8.9828 4.7918
Code
decomp_longread_obvious <- bootstrap_decomp(
  df_longread_obvious, dv_col = "dv_avg", cond_col = "condition", cond_vals = c("before", "after")
)
kable(decomp_longread_obvious, digits = 4,
      caption = "Longread × Obvious: Shapley decomposition with 95% bootstrap CIs")
Longread × Obvious: Shapley decomposition with 95% bootstrap CIs
statistic estimate ci_lower ci_upper
mean_pre_g1 10.3502 9.8482 10.8389
mean_pre_g2 9.9417 9.4228 10.4365
mean_post_g1 9.4716 8.7969 10.1529
mean_post_g2 8.9828 8.3131 9.6533
sd_pooled_pre 4.3839 4.1522 4.5897
sd_pooled_post 4.7921 4.5554 4.9915
deltaM_pre 0.4085 -0.3049 1.1033
deltaM_post 0.4889 -0.4295 1.4478
d_pre 0.0932 -0.0701 0.2531
d_post 0.1020 -0.0895 0.3064
delta_d_total 0.0088 -0.0842 0.1049
delta_d_means_shapley 0.0176 -0.0846 0.1210
delta_d_sd_shapley -0.0087 -0.0247 0.0067
pct_from_means 198.6613 -164.6657 434.3307
pct_from_sd -98.6613 -334.3307 264.6657
abs_mean_minus_abs_sd 0.0088 -0.0095 0.1089

7. Metaphor × Memory

Source: metaphor_memory_v2.csv; DV: dv_friday; condition column: condition (values 1, 2)

Code
means_metaphor_memory <- compute_means(
  df_metaphor_memory, dv_col = "dv_friday", cond_col = "condition", cond_vals = c(1, 2)
)
kable(means_metaphor_memory, digits = 4, caption = "Metaphor × Memory: Means by condition, pre and post exclusions")
Metaphor × Memory: Means by condition, pre and post exclusions
sample condition n mean_dv sd_dv
pre (full) 1 211 0.5355 0.4999
pre (full) 2 198 0.5455 0.4992
post (retained) 1 115 0.5913 0.4937
post (retained) 2 154 0.5779 0.4955
Code
decomp_metaphor_memory <- bootstrap_decomp(
  df_metaphor_memory, dv_col = "dv_friday", cond_col = "condition", cond_vals = c(1, 2)
)
kable(decomp_metaphor_memory, digits = 4,
      caption = "Metaphor × Memory: Shapley decomposition with 95% bootstrap CIs")
Metaphor × Memory: Shapley decomposition with 95% bootstrap CIs
statistic estimate ci_lower ci_upper
mean_pre_g1 0.5355 0.4686 0.6042
mean_pre_g2 0.5455 0.4778 0.6134
mean_post_g1 0.5913 0.5000 0.6803
mean_post_g2 0.5779 0.5000 0.6568
sd_pooled_pre 0.4996 0.4924 0.5011
sd_pooled_post 0.4948 0.4799 0.5008
deltaM_pre -0.0099 -0.1054 0.0859
deltaM_post 0.0134 -0.1068 0.1278
d_pre -0.0198 -0.2123 0.1723
d_post 0.0270 -0.2181 0.2612
delta_d_total 0.0469 -0.1075 0.1888
delta_d_means_shapley 0.0469 -0.1061 0.1873
delta_d_sd_shapley 0.0000 -0.0032 0.0036
pct_from_means 99.9278 84.9999 114.3187
pct_from_sd 0.0722 -14.3187 15.0001
abs_mean_minus_abs_sd 0.0468 0.0020 0.1858

8. Metaphor × Obvious

Source: metaphor_obvious_v2.csv; DV: dv_friday; condition column: condition (values 1, 2)

Code
means_metaphor_obvious <- compute_means(
  df_metaphor_obvious, dv_col = "dv_friday", cond_col = "condition", cond_vals = c(1, 2)
)
kable(means_metaphor_obvious, digits = 4, caption = "Metaphor × Obvious: Means by condition, pre and post exclusions")
Metaphor × Obvious: Means by condition, pre and post exclusions
sample condition n mean_dv sd_dv
pre (full) 1 200 0.5300 0.5004
pre (full) 2 212 0.5142 0.5010
post (retained) 1 147 0.5850 0.4944
post (retained) 2 152 0.5592 0.4981
Code
decomp_metaphor_obvious <- bootstrap_decomp(
  df_metaphor_obvious, dv_col = "dv_friday", cond_col = "condition", cond_vals = c(1, 2)
)
kable(decomp_metaphor_obvious, digits = 4,
      caption = "Metaphor × Obvious: Shapley decomposition with 95% bootstrap CIs")
Metaphor × Obvious: Shapley decomposition with 95% bootstrap CIs
statistic estimate ci_lower ci_upper
mean_pre_g1 0.5300 0.4605 0.5990
mean_pre_g2 0.5142 0.4455 0.5806
mean_post_g1 0.5850 0.5064 0.6645
mean_post_g2 0.5592 0.4788 0.6369
sd_pooled_pre 0.5007 0.4949 0.5012
sd_pooled_post 0.4963 0.4835 0.5010
deltaM_pre 0.0158 -0.0810 0.1133
deltaM_post 0.0258 -0.0862 0.1376
d_pre 0.0317 -0.1623 0.2285
d_post 0.0520 -0.1750 0.2809
delta_d_total 0.0204 -0.1006 0.1415
delta_d_means_shapley 0.0200 -0.0999 0.1406
delta_d_sd_shapley 0.0004 -0.0019 0.0040
pct_from_means 98.1977 76.3625 117.7665
pct_from_sd 1.8023 -17.7665 23.6375
abs_mean_minus_abs_sd 0.0196 0.0011 0.1425

9. Social × Memory

Code
means_social_memory <- compute_means(
  df_social_memory, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(means_social_memory, digits = 4, caption = "Social × Memory: Means by condition, pre and post exclusions")
Social × Memory: Means by condition, pre and post exclusions
sample condition n mean_dv sd_dv
pre (full) 1 196 6.3673 0.8989
pre (full) 2 215 5.4930 1.3426
post (retained) 1 152 6.5658 0.7340
post (retained) 2 168 5.3690 1.4292
Code
decomp_social_memory <- bootstrap_decomp(
  df_social_memory, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(decomp_social_memory, digits = 4,
      caption = "Social × Memory: Shapley decomposition with 95% bootstrap CIs")
Social × Memory: Shapley decomposition with 95% bootstrap CIs
statistic estimate ci_lower ci_upper
mean_pre_g1 6.3673 6.2319 6.4868
mean_pre_g2 5.4930 5.3139 5.6638
mean_post_g1 6.5658 6.4458 6.6757
mean_post_g2 5.3690 5.1509 5.5818
sd_pooled_pre 1.1525 1.0463 1.2538
sd_pooled_post 1.1526 1.0354 1.2676
deltaM_pre 0.8743 0.6578 1.0916
deltaM_post 1.1967 0.9541 1.4377
d_pre 0.7586 0.5738 0.9476
d_post 1.0383 0.8418 1.2467
delta_d_total 0.2797 0.1624 0.4095
delta_d_means_shapley 0.2797 0.1869 0.3822
delta_d_sd_shapley -0.0001 -0.0341 0.0436
pct_from_means 100.0188 87.8016 118.0013
pct_from_sd -0.0188 -18.0013 12.1984
abs_mean_minus_abs_sd 0.2797 0.1622 0.3609

10. Social × Obvious

Code
means_social_obvious <- compute_means(
  df_social_obvious, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(means_social_obvious, digits = 4, caption = "Social × Obvious: Means by condition, pre and post exclusions")
Social × Obvious: Means by condition, pre and post exclusions
sample condition n mean_dv sd_dv
pre (full) 1 192 6.2812 0.9995
pre (full) 2 228 5.5000 1.3907
post (retained) 1 147 6.4354 0.9513
post (retained) 2 176 5.4205 1.4200
Code
decomp_social_obvious <- bootstrap_decomp(
  df_social_obvious, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(decomp_social_obvious, digits = 4,
      caption = "Social × Obvious: Shapley decomposition with 95% bootstrap CIs")
Social × Obvious: Shapley decomposition with 95% bootstrap CIs
statistic estimate ci_lower ci_upper
mean_pre_g1 6.2812 6.1344 6.4145
mean_pre_g2 5.5000 5.3209 5.6830
mean_post_g1 6.4354 6.2756 6.5833
mean_post_g2 5.4205 5.2105 5.6243
sd_pooled_pre 1.2275 1.1111 1.3345
sd_pooled_post 1.2292 1.0983 1.3555
deltaM_pre 0.7812 0.5532 1.0039
deltaM_post 1.0149 0.7607 1.2663
d_pre 0.6365 0.4526 0.8293
d_post 0.8257 0.6107 1.0592
delta_d_total 0.1892 0.0824 0.3177
delta_d_means_shapley 0.1902 0.0855 0.3031
delta_d_sd_shapley -0.0010 -0.0292 0.0383
pct_from_means 100.5436 81.2371 122.6541
pct_from_sd -0.5436 -22.6541 18.7629
abs_mean_minus_abs_sd 0.1892 0.0710 0.2880

11. Numeric × Memory

Code
means_numeric_memory <- compute_means(
  df_numeric_memory, dv_col = "dv", cond_col = "condition", cond_vals = c(0, 1)
)
kable(means_numeric_memory, digits = 4, caption = "Numeric × Memory: Means by condition, pre and post exclusions")
Numeric × Memory: Means by condition, pre and post exclusions
sample condition n mean_dv sd_dv
pre (full) 0 200 3.2942 4.1369
pre (full) 1 214 2.1030 1.4019
post (retained) 0 167 3.5724 4.4706
post (retained) 1 180 2.1195 1.5123
Code
decomp_numeric_memory <- bootstrap_decomp(
  df_numeric_memory, dv_col = "dv", cond_col = "condition", cond_vals = c(0, 1)
)
kable(decomp_numeric_memory, digits = 4,
      caption = "Numeric × Memory: Shapley decomposition with 95% bootstrap CIs")
Numeric × Memory: Shapley decomposition with 95% bootstrap CIs
statistic estimate ci_lower ci_upper
mean_pre_g1 3.2942 2.7838 3.8940
mean_pre_g2 2.1030 1.9256 2.3054
mean_post_g1 3.5724 2.9686 4.2717
mean_post_g2 2.1195 1.9114 2.3547
sd_pooled_pre 3.0467 1.8926 4.1435
sd_pooled_post 3.2868 2.0291 4.4658
deltaM_pre 1.1912 0.6361 1.8404
deltaM_post 1.4529 0.7949 2.2163
d_pre 0.3910 0.2665 0.5478
d_post 0.4420 0.3103 0.6182
delta_d_total 0.0510 0.0211 0.0931
delta_d_means_shapley 0.0827 0.0463 0.1329
delta_d_sd_shapley -0.0317 -0.0437 -0.0206
pct_from_means 162.1223 134.1525 239.3141
pct_from_sd -62.1223 -139.3141 -34.1525
abs_mean_minus_abs_sd 0.0510 0.0211 0.0931

12. Numeric × Obvious

Code
means_numeric_obvious <- compute_means(
  df_numeric_obvious, dv_col = "dv", cond_col = "condition", cond_vals = c(0, 1)
)
kable(means_numeric_obvious, digits = 4, caption = "Numeric × Obvious: Means by condition, pre and post exclusions")
Numeric × Obvious: Means by condition, pre and post exclusions
sample condition n mean_dv sd_dv
pre (full) 0 211 2.7723 1.9897
pre (full) 1 195 2.0531 1.0984
post (retained) 0 165 2.9171 2.1169
post (retained) 1 160 2.0526 1.1462
Code
decomp_numeric_obvious <- bootstrap_decomp(
  df_numeric_obvious, dv_col = "dv", cond_col = "condition", cond_vals = c(0, 1)
)
kable(decomp_numeric_obvious, digits = 4,
      caption = "Numeric × Obvious: Shapley decomposition with 95% bootstrap CIs")
Numeric × Obvious: Shapley decomposition with 95% bootstrap CIs
statistic estimate ci_lower ci_upper
mean_pre_g1 2.7723 2.5082 3.0516
mean_pre_g2 2.0531 1.9092 2.2110
mean_post_g1 2.9171 2.6034 3.2554
mean_post_g2 2.0526 1.8833 2.2381
sd_pooled_pre 1.6240 1.3776 1.8539
sd_pooled_post 1.7094 1.4395 1.9609
deltaM_pre 0.7192 0.4185 1.0296
deltaM_post 0.8645 0.5065 1.2409
d_pre 0.4429 0.2688 0.6105
d_post 0.5058 0.3097 0.7024
delta_d_total 0.0629 -0.0039 0.1381
delta_d_means_shapley 0.0873 0.0110 0.1684
delta_d_sd_shapley -0.0244 -0.0440 -0.0021
pct_from_means 138.7423 49.4049 317.4241
pct_from_sd -38.7423 -217.4241 50.5951
abs_mean_minus_abs_sd 0.0629 -0.0001 0.1381

Summary Across All 12 Cases

Code
# Collect key decomposition estimates into one table
decomp_list <- list(
  "Minread × Memory"       = decomp_minread_memory,
  "Minread × Obvious"      = decomp_minread_obvious,
  "Transaction × Memory"   = decomp_transaction_memory,
  "Transaction × Obvious"  = decomp_transaction_obvious,
  "Longread × Memory"      = decomp_longread_memory,
  "Longread × Obvious"     = decomp_longread_obvious,
  "Metaphor × Memory"      = decomp_metaphor_memory,
  "Metaphor × Obvious"     = decomp_metaphor_obvious,
  "Social × Memory"        = decomp_social_memory,
  "Social × Obvious"       = decomp_social_obvious,
  "Numeric × Memory"       = decomp_numeric_memory,
  "Numeric × Obvious"      = decomp_numeric_obvious
)

key_stats <- c("d_pre", "d_post", "delta_d_total",
               "delta_d_means_shapley", "delta_d_sd_shapley",
               "pct_from_means", "pct_from_sd")

summary_df <- lapply(names(decomp_list), function(nm) {
  d <- decomp_list[[nm]]
  row <- d[d$statistic %in% key_stats, c("statistic", "estimate")]
  row$case <- nm
  row
}) |> bind_rows() |>
  pivot_wider(names_from = statistic, values_from = estimate) |>
  select(case, all_of(key_stats))

kable(summary_df, digits = 3,
      caption = "Summary: Shapley decomposition across all 12 paradigm × attention-check cases",
      col.names = c("Case", "d (pre)", "d (post)", "Δd total",
                    "Δd means (Shapley)", "Δd SD (Shapley)",
                    "% from means", "% from SD"))
Summary: Shapley decomposition across all 12 paradigm × attention-check cases
Case d (pre) d (post) Δd total Δd means (Shapley) Δd SD (Shapley) % from means % from SD
Minread × Memory 1.043 0.880 -0.164 -0.127 -0.036 77.803 22.197
Minread × Obvious 0.965 0.709 -0.256 -0.228 -0.028 89.179 10.821
Transaction × Memory 0.017 0.042 0.026 0.022 0.003 86.814 13.186
Transaction × Obvious 0.139 0.217 0.078 0.053 0.025 68.202 31.798
Longread × Memory 0.791 1.816 1.025 0.864 0.161 84.327 15.673
Longread × Obvious 0.093 0.102 0.009 0.018 -0.009 198.661 -98.661
Metaphor × Memory -0.020 0.027 0.047 0.047 0.000 99.928 0.072
Metaphor × Obvious 0.032 0.052 0.020 0.020 0.000 98.198 1.802
Social × Memory 0.759 1.038 0.280 0.280 0.000 100.019 -0.019
Social × Obvious 0.636 0.826 0.189 0.190 -0.001 100.544 -0.544
Numeric × Memory 0.391 0.442 0.051 0.083 -0.032 162.122 -62.122
Numeric × Obvious 0.443 0.506 0.063 0.087 -0.024 138.742 -38.742

Visualization: Decomposition Waterfall

Code
library(patchwork)

# Build long-form data for stacked bar
plot_df <- summary_df |>
  mutate(
    paradigm = sub(" × .*", "", case),
    ac_type  = sub(".* × ", "", case),
    means_contrib = delta_d_means_shapley,
    sd_contrib    = delta_d_sd_shapley
  ) |>
  select(case, paradigm, ac_type, d_pre, d_post, delta_d_total,
         means_contrib, sd_contrib) |>
  pivot_longer(
    cols = c(means_contrib, sd_contrib),
    names_to  = "component",
    values_to = "contribution"
  ) |>
  mutate(
    component = recode(component,
      "means_contrib" = "Mean shift",
      "sd_contrib"    = "SD change"
    ),
    case = factor(case, levels = unique(summary_df$case))
  )

# Shared layer builder so both sub-plots stay visually identical
make_bars <- function(data) {
  # Anchor labels to the outer edge of the bar stack (not delta_d_total).
  # When components have opposite signs, delta_d_total falls *inside* the
  # taller bar, so placing text there buries it. Instead we use the top of
  # the positive stack (for net-positive bars) or the bottom of the negative
  # stack (for net-negative bars).
  label_df <- data |>
    group_by(case, paradigm, ac_type, delta_d_total) |>
    summarise(
      y_top = sum(contribution[contribution > 0], na.rm = TRUE),
      y_bot = sum(contribution[contribution < 0], na.rm = TRUE),
      .groups = "drop"
    ) |>
    mutate(
      y_label   = ifelse(delta_d_total >= 0, y_top, y_bot),
      vjust_val = ifelse(delta_d_total >= 0, -0.4, 1.2)
    )

  ggplot(data, aes(x = ac_type, y = contribution, fill = component)) +
    geom_col(position = "stack", width = 0.65, color = "white", linewidth = 0.3) +
    geom_hline(yintercept = 0, linewidth = 0.4, color = "grey30") +
    geom_text(
      data = label_df,
      aes(x = ac_type, y = y_label,
          label = sprintf("Δd=%.2f", delta_d_total),
          vjust = vjust_val),
      inherit.aes = FALSE,
      size = 2.8, fontface = "bold"
    ) +
    scale_fill_manual(
      values = c("Mean shift" = "#2166ac", "SD change" = "#d6604d"),
      name   = "Contribution"
    ) +
    labs(x = "Attention check type",
         y = expression(Delta * italic(d) ~ "(Shapley contribution)")) +
    theme_bw(base_size = 12) +
    theme(
      strip.background = element_rect(fill = "grey92"),
      strip.text       = element_text(face = "bold"),
      legend.position  = "bottom",
      panel.grid.minor = element_blank()
    )
}

# --- Sub-plot A: the 5 non-Longread paradigms, y from -0.32 to 0.4 ---
p_other <- make_bars(plot_df |> filter(paradigm != "Longread")) +
  facet_wrap(~ paradigm, nrow = 2, scales = "free_x") +
  coord_cartesian(ylim = c(-0.32, 0.4))

# --- Sub-plot B: Longread alone, free y scale ---
p_long <- make_bars(plot_df |> filter(paradigm == "Longread")) +
  facet_wrap(~ paradigm, nrow = 2, scales = "free_x") +
  theme(legend.position = "none")   # legend carried by p_other

# Combine: Longread panel on the left (narrow), others on the right (wide)
p_long + p_other +
  plot_layout(widths = c(1, 5), guides = "collect") +
  plot_annotation(
    title    = "Shapley Decomposition of Change in Cohen's d",
    subtitle = "Each bar = Δd (post − pre); decomposed into mean-shift and SD-change contributions.\nLongread y-axis is free; all other paradigms share y ≤ 0.4.",
    theme = theme(
      plot.title    = element_text(face = "bold", size = 13),
      plot.subtitle = element_text(size = 10, color = "grey30"),
      legend.position = "bottom"
    )
  )


Logit-Transformed DVs: Longread & Social

For bounded Likert-type DVs, inattentive respondents tend to cluster at the endpoints, which can suppress post-exclusion SD changes on the raw scale. Logit-transforming the DV “unbounds” the scale, potentially allowing SD to shift more when inattentive responders are removed.

Procedure

  1. Rescale \(x\) to the open unit interval using theoretical scale bounds: \(p = (x - x_{\min}) / (x_{\max} - x_{\min})\)
  2. Apply the Smithson & Verkuilen (2006) compression to push any exact 0/1 values away from the boundary: \(p' = \bigl(p \cdot (n - 1) + 0.5\bigr) \,/\, n\) where \(n\) is the number of non-missing observations in each dataset.
  3. Apply the logit: \(\tilde{x} = \log\!\bigl(p' / (1 - p')\bigr)\)

Scale bounds used:

Dataset Scale \(x_{\min}\) \(x_{\max}\)
longread_memory / longread_obvious dv_avg (avg of 1–15 items) 1 15
social_memory / social_obvious dv (0–7 integer) 0 7
Code
# Smithson & Verkuilen logit transformation for a bounded scale
logit_sv <- function(x, scale_min, scale_max) {
  n <- sum(!is.na(x))
  p  <- (x - scale_min) / (scale_max - scale_min)   # rescale to [0, 1]
  p2 <- (p * (n - 1) + 0.5) / n                     # S&V compression
  log(p2 / (1 - p2))                                 # logit
}

df_longread_memory_logit  <- df_longread_memory  |>
  mutate(dv_logit = logit_sv(dv_avg, 1, 15))

df_longread_obvious_logit <- df_longread_obvious |>
  mutate(dv_logit = logit_sv(dv_avg, 1, 15))

df_social_memory_logit    <- df_social_memory    |>
  mutate(dv_logit = logit_sv(dv, 0, 7))

df_social_obvious_logit   <- df_social_obvious   |>
  mutate(dv_logit = logit_sv(dv, 0, 7))

# Quick sanity check: raw vs logit SD before and after exclusions
check_sd <- function(df, raw_col, logit_col, cond_col, att_col = "att_correct") {
  raw_pre  <- sd(df[[raw_col]],   na.rm = TRUE)
  raw_post <- sd(df[[raw_col]][df[[att_col]] == 1],   na.rm = TRUE)
  log_pre  <- sd(df[[logit_col]], na.rm = TRUE)
  log_post <- sd(df[[logit_col]][df[[att_col]] == 1], na.rm = TRUE)
  data.frame(
    sd_raw_pre  = raw_pre,  sd_raw_post  = raw_post,
    sd_log_pre  = log_pre,  sd_log_post  = log_post,
    pct_change_raw   = 100 * (raw_post - raw_pre)  / raw_pre,
    pct_change_logit = 100 * (log_post - log_pre)  / log_pre
  )
}

sd_checks <- bind_rows(
  check_sd(df_longread_memory_logit,  "dv_avg", "dv_logit", "condition_opposing") |>
    mutate(dataset = "Longread × Memory"),
  check_sd(df_longread_obvious_logit, "dv_avg", "dv_logit", "condition") |>
    mutate(dataset = "Longread × Obvious"),
  check_sd(df_social_memory_logit,    "dv",     "dv_logit", "condition") |>
    mutate(dataset = "Social × Memory"),
  check_sd(df_social_obvious_logit,   "dv",     "dv_logit", "condition") |>
    mutate(dataset = "Social × Obvious")
) |> select(dataset, everything())

kable(sd_checks, digits = 4,
      caption = "Sanity check: pooled SD before and after exclusions, raw vs logit scale. A larger % change on the logit scale would indicate the transformation is 'freeing up' the tails.")
Sanity check: pooled SD before and after exclusions, raw vs logit scale. A larger % change on the logit scale would indicate the transformation is ‘freeing up’ the tails.
dataset sd_raw_pre sd_raw_post sd_log_pre sd_log_post pct_change_raw pct_change_logit
Longread × Memory 4.4239 4.8776 3.2688 3.7195 10.2553 13.7898
Longread × Obvious 4.3849 4.7923 3.2438 3.7226 9.2894 14.7590
Social × Memory 1.2639 1.2972 2.8233 2.8564 2.6338 1.1742
Social × Obvious 1.2865 1.3276 2.8194 2.8851 3.1983 2.3320

Decompositions on logit scale

Code
cat("=== Longread x Memory (logit DV) ===\n")
=== Longread x Memory (logit DV) ===
Code
decomp_longread_memory_logit <- bootstrap_decomp(
  df_longread_memory_logit,
  dv_col    = "dv_logit",
  cond_col  = "condition_opposing",
  cond_vals = c(0, 1)
)
kable(decomp_longread_memory_logit, digits = 4,
      caption = "Longread × Memory — logit DV: Shapley decomposition with 95% bootstrap CIs")
Longread × Memory — logit DV: Shapley decomposition with 95% bootstrap CIs
statistic estimate ci_lower ci_upper
mean_pre_g1 2.0716 1.7908 2.3552
mean_pre_g2 -0.1829 -0.5805 0.2222
mean_post_g1 2.3652 2.0174 2.7111
mean_post_g2 -2.0875 -2.6197 -1.5377
sd_pooled_pre 3.0706 2.8636 3.2667
sd_pooled_post 2.9991 2.7824 3.2035
deltaM_pre 2.2545 1.7604 2.7460
deltaM_post 4.4528 3.7907 5.0917
d_pre 0.7342 0.5821 0.8878
d_post 1.4847 1.2703 1.7085
delta_d_total 0.7505 0.5861 0.9280
delta_d_means_shapley 0.7245 0.5977 0.8615
delta_d_sd_shapley 0.0260 -0.0320 0.0906
pct_from_means 96.5325 89.6224 105.0317
pct_from_sd 3.4675 -5.0317 10.3776
abs_mean_minus_abs_sd 0.6984 0.5720 0.8132
Code
cat("=== Longread x Obvious (logit DV) ===\n")
=== Longread x Obvious (logit DV) ===
Code
decomp_longread_obvious_logit <- bootstrap_decomp(
  df_longread_obvious_logit,
  dv_col    = "dv_logit",
  cond_col  = "condition",
  cond_vals = c("before", "after")
)
kable(decomp_longread_obvious_logit, digits = 4,
      caption = "Longread × Obvious — logit DV: Shapley decomposition with 95% bootstrap CIs")
Longread × Obvious — logit DV: Shapley decomposition with 95% bootstrap CIs
statistic estimate ci_lower ci_upper
mean_pre_g1 0.9089 0.5453 1.2602
mean_pre_g2 0.9108 0.5201 1.2873
mean_post_g1 0.5871 0.0596 1.1073
mean_post_g2 0.5087 -0.0224 1.0365
sd_pooled_pre 3.2465 2.9938 3.4719
sd_pooled_post 3.7271 3.4332 3.9911
deltaM_pre -0.0018 -0.5222 0.5188
deltaM_post 0.0785 -0.6443 0.8243
d_pre -0.0006 -0.1597 0.1615
d_post 0.0211 -0.1726 0.2230
delta_d_total 0.0216 -0.0523 0.0998
delta_d_means_shapley 0.0231 -0.0647 0.1154
delta_d_sd_shapley -0.0015 -0.0257 0.0228
pct_from_means 107.0376 -202.5491 451.1258
pct_from_sd -7.0376 -351.1258 302.5491
abs_mean_minus_abs_sd 0.0216 -0.0089 0.0998
Code
cat("=== Social x Memory (logit DV) ===\n")
=== Social x Memory (logit DV) ===
Code
decomp_social_memory_logit <- bootstrap_decomp(
  df_social_memory_logit,
  dv_col    = "dv_logit",
  cond_col  = "condition",
  cond_vals = c(1, 2)
)
kable(decomp_social_memory_logit, digits = 4,
      caption = "Social × Memory — logit DV: Shapley decomposition with 95% bootstrap CIs")
Social × Memory — logit DV: Shapley decomposition with 95% bootstrap CIs
statistic estimate ci_lower ci_upper
mean_pre_g1 4.4580 4.0671 4.8135
mean_pre_g2 2.3753 2.0516 2.7153
mean_post_g1 5.0336 4.6327 5.4180
mean_post_g2 2.2619 1.8918 2.6469
sd_pooled_pre 2.5835 2.4614 2.6832
sd_pooled_post 2.5014 2.3392 2.6317
deltaM_pre 2.0828 1.5726 2.5631
deltaM_post 2.7717 2.2095 3.2960
d_pre 0.8062 0.5922 1.0333
d_post 1.1081 0.8447 1.3998
delta_d_total 0.3019 0.1696 0.4653
delta_d_means_shapley 0.2711 0.1617 0.3947
delta_d_sd_shapley 0.0308 0.0006 0.0802
pct_from_means 89.7838 81.0617 99.6981
pct_from_sd 10.2162 0.3019 18.9383
abs_mean_minus_abs_sd 0.2402 0.1499 0.3340
Code
cat("=== Social x Obvious (logit DV) ===\n")
=== Social x Obvious (logit DV) ===
Code
decomp_social_obvious_logit <- bootstrap_decomp(
  df_social_obvious_logit,
  dv_col    = "dv_logit",
  cond_col  = "condition",
  cond_vals = c(1, 2)
)
kable(decomp_social_obvious_logit, digits = 4,
      caption = "Social × Obvious — logit DV: Shapley decomposition with 95% bootstrap CIs")
Social × Obvious — logit DV: Shapley decomposition with 95% bootstrap CIs
statistic estimate ci_lower ci_upper
mean_pre_g1 4.2792 3.8874 4.6593
mean_pre_g2 2.5152 2.1791 2.8537
mean_post_g1 4.7880 4.3591 5.2063
mean_post_g2 2.4237 2.0341 2.8238
sd_pooled_pre 2.6818 2.5660 2.7757
sd_pooled_post 2.6372 2.4854 2.7602
deltaM_pre 1.7640 1.2372 2.2631
deltaM_post 2.3643 1.7857 2.9211
d_pre 0.6578 0.4532 0.8701
d_post 0.8965 0.6562 1.1676
delta_d_total 0.2387 0.1110 0.3871
delta_d_means_shapley 0.2257 0.1122 0.3464
delta_d_sd_shapley 0.0130 -0.0068 0.0467
pct_from_means 94.5535 86.1882 104.8002
pct_from_sd 5.4465 -4.8002 13.8118
abs_mean_minus_abs_sd 0.2127 0.1072 0.3116

Visualization: Raw vs Logit Decomposition

Code
# Helper: extract key stats from a bootstrap_decomp result into one tidy row
extract_row <- function(decomp_result, case_label, scale_label) {
  d <- decomp_result
  row <- d[d$statistic %in% c("d_pre","d_post","delta_d_total",
                               "delta_d_means_shapley","delta_d_sd_shapley"), ]
  as.data.frame(t(setNames(row$estimate, row$statistic))) |>
    mutate(case = case_label, scale = scale_label)
}

logit_summary <- bind_rows(
  extract_row(decomp_longread_memory,       "Longread × Memory",  "Raw"),
  extract_row(decomp_longread_memory_logit, "Longread × Memory",  "Logit"),
  extract_row(decomp_longread_obvious,      "Longread × Obvious", "Raw"),
  extract_row(decomp_longread_obvious_logit,"Longread × Obvious", "Logit"),
  extract_row(decomp_social_memory,         "Social × Memory",    "Raw"),
  extract_row(decomp_social_memory_logit,   "Social × Memory",    "Logit"),
  extract_row(decomp_social_obvious,        "Social × Obvious",   "Raw"),
  extract_row(decomp_social_obvious_logit,  "Social × Obvious",   "Logit")
) |>
  mutate(scale = factor(scale, levels = c("Raw", "Logit")))

# Long form for stacked bars
logit_plot_df <- logit_summary |>
  pivot_longer(
    cols      = c(delta_d_means_shapley, delta_d_sd_shapley),
    names_to  = "component",
    values_to = "contribution"
  ) |>
  mutate(
    component = recode(component,
      "delta_d_means_shapley" = "Mean shift",
      "delta_d_sd_shapley"    = "SD change"
    )
  )

# Label anchoring (same logic as main plot)
logit_label_df <- logit_plot_df |>
  group_by(case, scale, delta_d_total) |>
  summarise(
    y_top = sum(contribution[contribution > 0], na.rm = TRUE),
    y_bot = sum(contribution[contribution < 0], na.rm = TRUE),
    .groups = "drop"
  ) |>
  mutate(
    y_label   = ifelse(delta_d_total >= 0, y_top, y_bot),
    vjust_val = ifelse(delta_d_total >= 0, -0.4, 1.2)
  )

ggplot(logit_plot_df,
       aes(x = scale, y = contribution, fill = component)) +
  geom_col(position = "stack", width = 0.6, color = "white", linewidth = 0.3) +
  geom_hline(yintercept = 0, linewidth = 0.4, color = "grey30") +
  geom_text(
    data = logit_label_df,
    aes(x = scale, y = y_label,
        label = sprintf("Δd=%.3f", delta_d_total),
        vjust = vjust_val),
    inherit.aes = FALSE,
    size = 2.7, fontface = "bold"
  ) +
  facet_wrap(~ case, nrow = 2, scales = "free_y") +
  scale_fill_manual(
    values = c("Mean shift" = "#2166ac", "SD change" = "#d6604d"),
    name   = "Contribution"
  ) +
  labs(
    title    = "Effect of Logit-Transforming the DV on Shapley Decomposition",
    subtitle = "Each bar pair shows raw vs logit-transformed DV for the same dataset.\nA taller red segment on the Logit bar = SD playing a larger role after unbounding the scale.",
    x        = NULL,
    y        = expression(Delta * italic(d) ~ "(Shapley contribution)")
  ) +
  theme_bw(base_size = 12) +
  theme(
    strip.background = element_rect(fill = "grey92"),
    strip.text       = element_text(face = "bold"),
    legend.position  = "bottom",
    panel.grid.minor = element_blank()
  )


Residual Variance Pre vs Post Exclusions

A complementary way to ask whether attention-check exclusions reduce noise is to regress the DV on the condition assignment and examine how much residual variance remains before and after exclusions. The residual variance is the variance left over after accounting for the experimental manipulation — it is a direct measure of within-condition noise. If ACs remove inattentive respondents who add random noise, residual variance should fall post-exclusion.

Method: For each dataset, fit lm(dv ~ factor(condition)) on the full sample (pre) and on the retained subsample (att_correct == 1, post). Residual variance is extracted as \(\hat{\sigma}^2\) (the mean squared error from summary(lm)$sigma^2). We then compute the absolute and percentage reduction.

Code
# ── Load the 24 additional AC-type files ─────────────────────────────────────
df_minread_imc         <- read.csv("minread_imc.csv")
df_minread_english     <- read.csv("minread_english.csv")
df_minread_selfreport  <- read.csv("minread_selfreport.csv")
df_minread_straightline<- read.csv("minread_straightline.csv")

df_transaction_imc          <- read.csv("transaction_imc.csv")
df_transaction_english      <- read.csv("transaction_english.csv")
df_transaction_selfreport   <- read.csv("transaction_selfreport.csv")
df_transaction_straightline <- read.csv("transaction_straightline.csv")

df_longread_imc          <- read.csv("longread_imc.csv")
df_longread_english      <- read.csv("longread_english.csv")
df_longread_selfreport   <- read.csv("longread_selfreport.csv")
df_longread_straightline <- read.csv("longread_straightline.csv")

df_metaphor_imc          <- read.csv("metaphor_imc_v2.csv")
df_metaphor_english      <- read.csv("metaphor_english_v2.csv")
df_metaphor_selfreport   <- read.csv("metaphor_selfreport_v2.csv")
df_metaphor_straightline <- read.csv("metaphor_straightline_v2.csv")

df_social_imc          <- read.csv("social_imc.csv")
df_social_english      <- read.csv("social_english.csv")
df_social_selfreport   <- read.csv("social_selfreport.csv")
df_social_straightline <- read.csv("social_straightline.csv")

df_numeric_imc          <- read.csv("numeric_imc.csv")
df_numeric_english      <- read.csv("numeric_english.csv")
df_numeric_selfreport   <- read.csv("numeric_selfreport.csv")
df_numeric_straightline <- read.csv("numeric_straightline.csv")

# ── Dataset config ─────────────────────────────────────────────────────────
# focal = TRUE  → Memory / Obvious (plotted in colour)
# focal = FALSE → the 4 background AC types (plotted in grey)

datasets_cfg <- list(
  # Minread
  list(label="Minread × Memory",            df=df_minread_memory,
       dv="dv", cond="condition",            focal=TRUE),
  list(label="Minread × Obvious",           df=df_minread_obvious,
       dv="dv", cond="condition",            focal=TRUE),
  list(label="Minread × IMC",               df=df_minread_imc,
       dv="dv", cond="condition",            focal=FALSE),
  list(label="Minread × English",           df=df_minread_english,
       dv="dv", cond="condition",            focal=FALSE),
  list(label="Minread × Self-report",       df=df_minread_selfreport,
       dv="dv", cond="condition",            focal=FALSE),
  list(label="Minread × Straightline",      df=df_minread_straightline,
       dv="dv", cond="condition",            focal=FALSE),
  # Transaction
  list(label="Transaction × Memory",        df=df_transaction_memory,
       dv="dv", cond="condition",            focal=TRUE),
  list(label="Transaction × Obvious",       df=df_transaction_obvious,
       dv="dv", cond="condition",            focal=TRUE),
  list(label="Transaction × IMC",           df=df_transaction_imc,
       dv="dv", cond="condition",            focal=FALSE),
  list(label="Transaction × English",       df=df_transaction_english,
       dv="dv", cond="condition",            focal=FALSE),
  list(label="Transaction × Self-report",   df=df_transaction_selfreport,
       dv="dv", cond="condition",            focal=FALSE),
  list(label="Transaction × Straightline",  df=df_transaction_straightline,
       dv="dv", cond="condition",            focal=FALSE),
  # Longread
  list(label="Longread × Memory",           df=df_longread_memory,
       dv="dv_avg", cond="condition_opposing", focal=TRUE),
  list(label="Longread × Obvious",          df=df_longread_obvious,
       dv="dv_avg", cond="condition",          focal=TRUE),
  list(label="Longread × IMC",              df=df_longread_imc,
       dv="dv_avg", cond="condition",          focal=FALSE),
  list(label="Longread × English",          df=df_longread_english,
       dv="dv_avg", cond="condition",          focal=FALSE),
  list(label="Longread × Self-report",      df=df_longread_selfreport,
       dv="dv_avg", cond="condition",          focal=FALSE),
  list(label="Longread × Straightline",     df=df_longread_straightline,
       dv="dv_avg", cond="condition",          focal=FALSE),
  # Metaphor
  list(label="Metaphor × Memory",           df=df_metaphor_memory,
       dv="dv_friday", cond="condition",     focal=TRUE),
  list(label="Metaphor × Obvious",          df=df_metaphor_obvious,
       dv="dv_friday", cond="condition",     focal=TRUE),
  list(label="Metaphor × IMC",              df=df_metaphor_imc,
       dv="dv_friday", cond="condition",     focal=FALSE),
  list(label="Metaphor × English",          df=df_metaphor_english,
       dv="dv_friday", cond="condition",     focal=FALSE),
  list(label="Metaphor × Self-report",      df=df_metaphor_selfreport,
       dv="dv_friday", cond="condition",     focal=FALSE),
  list(label="Metaphor × Straightline",     df=df_metaphor_straightline,
       dv="dv_friday", cond="condition",     focal=FALSE),
  # Social
  list(label="Social × Memory",             df=df_social_memory,
       dv="dv", cond="condition",            focal=TRUE),
  list(label="Social × Obvious",            df=df_social_obvious,
       dv="dv", cond="condition",            focal=TRUE),
  list(label="Social × IMC",                df=df_social_imc,
       dv="dv", cond="condition",            focal=FALSE),
  list(label="Social × English",            df=df_social_english,
       dv="dv", cond="condition",            focal=FALSE),
  list(label="Social × Self-report",        df=df_social_selfreport,
       dv="dv", cond="condition",            focal=FALSE),
  list(label="Social × Straightline",       df=df_social_straightline,
       dv="dv", cond="condition",            focal=FALSE),
  # Numeric
  list(label="Numeric × Memory",            df=df_numeric_memory,
       dv="dv", cond="condition",            focal=TRUE),
  list(label="Numeric × Obvious",           df=df_numeric_obvious,
       dv="dv", cond="condition",            focal=TRUE),
  list(label="Numeric × IMC",               df=df_numeric_imc,
       dv="dv", cond="condition",            focal=FALSE),
  list(label="Numeric × English",           df=df_numeric_english,
       dv="dv", cond="condition",            focal=FALSE),
  list(label="Numeric × Self-report",       df=df_numeric_selfreport,
       dv="dv", cond="condition",            focal=FALSE),
  list(label="Numeric × Straightline",      df=df_numeric_straightline,
       dv="dv", cond="condition",            focal=FALSE)
)

# ── Fit function ────────────────────────────────────────────────────────────
fit_resid_var <- function(cfg) {
  df       <- cfg$df
  dv_col   <- cfg$dv
  cond_col <- cfg$cond

  df_pre  <- df[!is.na(df[[dv_col]]) & !is.na(df[[cond_col]]), ]
  df_post <- df_pre[df_pre$att_correct == 1, ]

  fmla <- as.formula(paste0("`", dv_col, "` ~ factor(`", cond_col, "`)"))

  fit_pre  <- lm(fmla, data = df_pre)
  fit_post <- lm(fmla, data = df_post)

  sigma2_pre  <- summary(fit_pre)$sigma^2
  sigma2_post <- summary(fit_post)$sigma^2

  data.frame(
    case           = cfg$label,
    paradigm       = sub(" × .*", "", cfg$label),
    ac_type        = sub(".* × ", "", cfg$label),
    focal          = cfg$focal,
    n_pre          = nrow(df_pre),
    n_post         = nrow(df_post),
    pct_retained   = round(100 * nrow(df_post) / nrow(df_pre), 1),
    resid_var_pre  = sigma2_pre,
    resid_var_post = sigma2_post,
    resid_sd_pre   = sqrt(sigma2_pre),
    resid_sd_post  = sqrt(sigma2_post),
    abs_reduction  = sigma2_pre - sigma2_post,
    pct_reduction  = 100 * (sigma2_pre - sigma2_post) / sigma2_pre
  )
}

resid_var_df     <- lapply(datasets_cfg, fit_resid_var) |> bind_rows()
resid_var_focal  <- resid_var_df |> filter(focal)
resid_var_bg     <- resid_var_df |> filter(!focal)

Summary Table

Code
# All 36 cases, sorted by paradigm then AC type
kable(
  resid_var_df |>
    arrange(paradigm, ac_type) |>
    select(case, n_pre, n_post, pct_retained,
           resid_var_pre, resid_var_post,
           abs_reduction, pct_reduction),
  digits  = c(0, 0, 0, 1, 4, 4, 4, 2),
  caption = "Residual variance (σ²) from lm(DV ~ condition) before and after attention-check exclusions, all 6 AC types × 6 paradigms. A positive % reduction indicates that exclusions removed noise.",
  col.names = c("Case", "N (pre)", "N (post)", "% retained",
                "σ² pre", "σ² post",
                "Δσ² (abs)", "Δσ² (%)")
)
Residual variance (σ²) from lm(DV ~ condition) before and after attention-check exclusions, all 6 AC types × 6 paradigms. A positive % reduction indicates that exclusions removed noise.
Case N (pre) N (post) % retained σ² pre σ² post Δσ² (abs) Δσ² (%)
Longread × English 582 408 70.1 18.2423 18.9348 -0.6925 -3.80
Longread × IMC 609 538 88.3 18.1459 19.0699 -0.9240 -5.09
Longread × Memory 614 377 61.4 16.9449 13.1693 3.7756 22.28
Longread × Obvious 597 397 66.5 19.2182 22.9640 -3.7458 -19.49
Longread × Self-report 599 593 99.0 19.4092 19.3443 0.0649 0.33
Longread × Straightline 595 537 90.3 19.7833 20.4448 -0.6615 -3.34
Metaphor × English 399 303 75.9 0.2424 0.2376 0.0048 1.98
Metaphor × IMC 406 355 87.4 0.2508 0.2500 0.0007 0.29
Metaphor × Memory 409 269 65.8 0.2496 0.2448 0.0048 1.92
Metaphor × Obvious 412 299 72.6 0.2507 0.2463 0.0044 1.74
Metaphor × Self-report 407 401 98.5 0.2456 0.2449 0.0007 0.27
Metaphor × Straightline 406 374 92.1 0.2502 0.2496 0.0006 0.24
Minread × English 407 300 73.7 0.1943 0.1953 -0.0009 -0.48
Minread × IMC 417 378 90.6 0.1978 0.2021 -0.0043 -2.20
Minread × Memory 405 152 37.5 0.1943 0.2096 -0.0153 -7.86
Minread × Obvious 420 332 79.0 0.1877 0.2006 -0.0129 -6.86
Minread × Self-report 408 406 99.5 0.2082 0.2087 -0.0005 -0.26
Minread × Straightline 405 373 92.1 0.1692 0.1708 -0.0016 -0.94
Numeric × English 393 307 78.1 1.3838 1.5281 -0.1443 -10.43
Numeric × IMC 402 361 89.8 1.4909 1.5428 -0.0519 -3.48
Numeric × Memory 414 347 83.8 9.2822 10.8033 -1.5211 -16.39
Numeric × Obvious 406 325 80.0 2.6372 2.9219 -0.2847 -10.80
Numeric × Self-report 405 396 97.8 3.7992 3.8668 -0.0676 -1.78
Numeric × Straightline 407 388 95.3 2.8046 2.7681 0.0365 1.30
Social × English 411 295 71.8 1.3756 1.4215 -0.0459 -3.33
Social × IMC 422 373 88.4 1.2809 1.3376 -0.0567 -4.43
Social × Memory 411 320 77.9 1.3283 1.3285 -0.0002 -0.01
Social × Obvious 420 323 76.9 1.5067 1.5110 -0.0042 -0.28
Social × Self-report 401 392 97.8 1.3443 1.3499 -0.0056 -0.42
Social × Straightline 414 396 95.7 1.4798 1.5066 -0.0267 -1.81
Transaction × English 409 308 75.3 335.6572 299.4908 36.1663 10.77
Transaction × IMC 409 366 89.5 349.7239 307.3752 42.3487 12.11
Transaction × Memory 401 265 66.1 372.7749 294.0758 78.6991 21.11
Transaction × Obvious 410 290 70.7 399.1257 300.6138 98.5120 24.68
Transaction × Self-report 410 403 98.3 397.8065 396.5318 1.2747 0.32
Transaction × Straightline 419 381 90.9 350.8063 332.3446 18.4616 5.26

Visualization 1: Paired Dot Plot (pre vs post σ²)

Each line connects the pre- and post-exclusion residual variance for the same dataset. A downward-sloping line means exclusions reduced noise.

Code
# Colour palette: Memory + Obvious in colour; 4 background ACs in
# descending shades of grey (darker = more "serious" check)
ac_colors <- c(
  "Memory"      = "#1f78b4",   # blue
  "Obvious"     = "#9e3fa8",   # purple
  "IMC"         = "#252525",   # near-black grey
  "English"     = "#636363",   # dark grey
  "Self-report" = "#969696",   # mid grey
  "Straightline"= "#bdbdbd"    # light grey
)

# Line thickness: focal lines heavier so they read over the grey
ac_linewidth <- c(
  "Memory"      = 1.2,
  "Obvious"     = 1.2,
  "IMC"         = 0.65,
  "English"     = 0.65,
  "Self-report" = 0.65,
  "Straightline"= 0.65
)

ac_pointsize <- c(
  "Memory"      = 3.0,
  "Obvious"     = 3.0,
  "IMC"         = 2.0,
  "English"     = 2.0,
  "Self-report" = 2.0,
  "Straightline"= 2.0
)

# Pivot all 36 cases to long form
resid_long_all <- resid_var_df |>
  select(case, paradigm, ac_type, focal, resid_var_pre, resid_var_post) |>
  pivot_longer(
    cols      = c(resid_var_pre, resid_var_post),
    names_to  = "sample",
    values_to = "resid_var"
  ) |>
  mutate(
    sample  = recode(sample,
      "resid_var_pre"  = "Pre-exclusion",
      "resid_var_post" = "Post-exclusion"
    ),
    sample  = factor(sample, levels = c("Pre-exclusion", "Post-exclusion")),
    ac_type = factor(ac_type, levels = names(ac_colors))
  )

# ── Facet label mapping: title + citation on second line ──────────────────
# Uses plain \n so no extra packages are needed.
# If ggtext is available (requires litedown: install.packages("litedown")),
# swap element_text() for element_markdown() below and use the HTML version.
paradigm_labels <- c(
  "Minread"     = "Scale Response\n(Schwarz et al. 1985)",
  "Transaction" = "Transaction Utility\n(Thaler 1999)",
  "Longread"    = "Correspondence Bias\n(Miyamoto & Kitayama 2002)",
  "Metaphor"    = "Metaphoric Structuring\n(Boroditsky 2000)",
  "Social"      = "Less-is-Better\n(Hsee 1998)",
  "Numeric"     = "Retrospective Gambler's Fallacy\n(Oppenheimer & Monin 2009)"
)

resid_long_all <- resid_long_all |>
  mutate(
    paradigm = recode(paradigm, !!!paradigm_labels),
    paradigm = factor(paradigm, levels = c(
      "Correspondence Bias\n(Miyamoto & Kitayama 2002)",
      "Less-is-Better\n(Hsee 1998)",
      "Metaphoric Structuring\n(Boroditsky 2000)",
      "Retrospective Gambler's Fallacy\n(Oppenheimer & Monin 2009)",
      "Transaction Utility\n(Thaler 1999)",
      "Scale Response\n(Schwarz et al. 1985)"
    ))
  )

# Draw background (grey) ACs first, then focal (coloured) ACs on top
resid_long_bg    <- resid_long_all |> filter(!focal)
resid_long_focal <- resid_long_all |> filter(focal)

ggplot() +
  # ── Grey background ACs ────────────────────────────────────────────────
  geom_line(
    data = resid_long_bg,
    aes(x = sample, y = resid_var, color = ac_type,
        linewidth = ac_type, group = case)
  ) +
  geom_point(
    data = resid_long_bg,
    aes(x = sample, y = resid_var, color = ac_type, group = case,
        size = ac_type)
  ) +
  # ── Coloured focal ACs ─────────────────────────────────────────────────
  geom_line(
    data = resid_long_focal,
    aes(x = sample, y = resid_var, color = ac_type,
        linewidth = ac_type, group = case)
  ) +
  geom_point(
    data = resid_long_focal,
    aes(x = sample, y = resid_var, color = ac_type,
        size = ac_type, group = case)
  ) +
  facet_wrap(~ paradigm, nrow = 2, scales = "free_y") +
  scale_color_manual(values = ac_colors, name = "AC type") +
  scale_linewidth_manual(values = ac_linewidth, guide = "none") +
  scale_size_manual(values = ac_pointsize, guide = "none") +
  # Force 0 into each panel's y range without clipping data.
  expand_limits(y = 0) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.08))) +
  scale_x_discrete(expand = expansion(add = c(0.3, 0.3))) +
  labs(
    title    = "Residual Variance Before and After Attention-Check Exclusions",
    subtitle = "Y-axis starts at 0 so slope steepness reflects true proportional change.\nMemory & Obvious shown in colour; IMC, English, Self-report, Straightline in grey.",
    x        = NULL,
    y        = expression(Residual ~ variance ~ (hat(sigma)^2))
  ) +
  theme_bw(base_size = 12) +
  theme(
    strip.background = element_rect(fill = "grey92"),
    strip.text       = element_text(face = "bold", size = 9, lineheight = 1.2),
    legend.position  = "bottom",
    panel.grid.minor = element_blank()
  ) +
  guides(color = guide_legend(nrow = 1, override.aes = list(linewidth = 1.2)))

Visualization 2: % Change in Residual Variance

Horizontal bar chart with one column per paradigm. Bars show % change in variance (negative = variance fell post-exclusion = noise reduced = green; positive = variance rose = orange). AC types run along the y-axis within each panel.

Code
# Apply the same paradigm labels and factor order as Visualization 1
viz2_df <- resid_var_df |>
  mutate(
    # Flip sign: negative now means variance decreased (the "good" direction)
    pct_change = -pct_reduction,
    paradigm   = recode(paradigm, !!!paradigm_labels),
    paradigm   = factor(paradigm, levels = c(
      "Correspondence Bias\n(Miyamoto & Kitayama 2002)",
      "Less-is-Better\n(Hsee 1998)",
      "Retrospective Gambler's Fallacy\n(Oppenheimer & Monin 2009)",
      "Transaction Utility\n(Thaler 1999)",
      "Metaphoric Structuring\n(Boroditsky 2000)",
      "Scale Response\n(Schwarz et al. 1985)"
    )),
    ac_type = factor(ac_type, levels = rev(c(
      "Memory", "Obvious", "IMC", "English", "Self-report", "Straightline"
    )))
  )

ggplot(viz2_df,
       aes(y = ac_type, x = pct_change, fill = pct_change < 0)) +
  geom_col(width = 0.65, color = "white", linewidth = 0.25) +
  geom_vline(xintercept = 0, linewidth = 0.5, color = "grey30") +
  geom_text(
    aes(
      label = paste0(round(pct_change, 1), "%"),
      hjust = ifelse(pct_change < 0, 1.08, -0.08)
    ),
    size = 2.5, fontface = "bold"
  ) +
  facet_wrap(~ paradigm, ncol = 6) +
  scale_x_continuous(expand = expansion(mult = c(0.35, 0.35))) +
  scale_fill_manual(
    values = c("TRUE" = "#35978f", "FALSE" = "#bf812d"),
    labels = c("TRUE" = "Variance decreased", "FALSE" = "Variance increased"),
    name   = NULL
  ) +
  labs(
    title    = "% Change in Residual Variance After Attention-Check Exclusions",
    subtitle = "Negative = variance fell post-exclusion (noise reduced); positive = variance rose.\nResidual variance from lm(DV ~ condition).",
    x        = "% change in σ²",
    y        = NULL
  ) +
  theme_bw(base_size = 11) +
  theme(
    strip.background = element_rect(fill = "grey92"),
    strip.text       = element_text(face = "bold", size = 8, lineheight = 1.2),
    legend.position  = "bottom",
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank()
  )

Visualization 3: Transaction Utility — Robustness to DV Transformation

The same % change in residual variance is shown for the Transaction Utility paradigm under three versions of the DV:

  • Raw — original DV as analysed above
  • Z-scored\((x - \bar{x}) / s\). Because z-scoring is a linear transformation, the % change in σ² is mathematically identical to raw (the constant scaling cancels in the ratio). It appears here as a transparency check.
  • Rank-INT — rank-based inverse-normal transformation (Blom, 1958): \(\Phi^{-1}\!\bigl((r_i - 0.5) / n\bigr)\). This is nonlinear: it stretches the tails and compresses the centre, so it can genuinely shift how much variance moves when inattentive responders are removed. If the pattern survives here, the conclusion is stronger.

If all three columns show the same pattern, attention-check performance for Transaction Utility is robust to scale assumptions.

Code
# ── Helper: rank inverse-normal (Blom) transformation ──────────────────────
rank_int <- function(x) {
  n <- sum(!is.na(x))
  qnorm((rank(x, ties.method = "average", na.last = "keep") - 0.5) / n)
}

# ── Add transformed DVs to every transaction dataframe ─────────────────────
trans_dfs <- list(
  Memory      = df_transaction_memory,
  Obvious     = df_transaction_obvious,
  IMC         = df_transaction_imc,
  English     = df_transaction_english,
  `Self-report`  = df_transaction_selfreport,
  Straightline   = df_transaction_straightline
)

trans_dfs <- lapply(trans_dfs, function(df) {
  df$dv_z   <- as.numeric(scale(df$dv))
  df$dv_int <- rank_int(df$dv)
  df
})

# ── Compute % change in residual variance for one DV column ────────────────
pct_change_resid <- function(df, dv_col, cond_col = "condition") {
  df_pre  <- df[!is.na(df[[dv_col]]) & !is.na(df[[cond_col]]), ]
  df_post <- df_pre[df_pre$att_correct == 1, ]
  fmla    <- as.formula(paste0("`", dv_col, "` ~ factor(`", cond_col, "`)"))
  s2_pre  <- summary(lm(fmla, data = df_pre))$sigma^2
  s2_post <- summary(lm(fmla, data = df_post))$sigma^2
  -100 * (s2_pre - s2_post) / s2_pre   # negative = variance decreased
}

# ── Build results across all AC types × 3 transformations ──────────────────
transform_results <- lapply(names(trans_dfs), function(ac) {
  df <- trans_dfs[[ac]]
  data.frame(
    ac_type    = ac,
    focal      = ac %in% c("Memory", "Obvious"),
    Raw        = pct_change_resid(df, "dv"),
    `Z-scored` = pct_change_resid(df, "dv_z"),
    `Rank-INT` = pct_change_resid(df, "dv_int"),
    check.names = FALSE
  )
}) |>
  bind_rows() |>
  pivot_longer(
    cols      = c("Raw", "Z-scored", "Rank-INT"),
    names_to  = "transformation",
    values_to = "pct_change"
  ) |>
  mutate(
    transformation = factor(transformation,
                            levels = c("Raw", "Z-scored", "Rank-INT")),
    ac_type = factor(ac_type,
                     levels = rev(c("Memory","Obvious","IMC",
                                    "English","Self-report","Straightline")))
  )

ggplot(transform_results,
       aes(y = ac_type, x = pct_change, fill = pct_change < 0)) +
  geom_col(width = 0.65, color = "white", linewidth = 0.25) +
  geom_vline(xintercept = 0, linewidth = 0.5, color = "grey30") +
  geom_text(
    aes(
      label = paste0(round(pct_change, 1), "%"),
      hjust = ifelse(pct_change < 0, 1.08, -0.08)
    ),
    size = 2.8, fontface = "bold"
  ) +
  facet_wrap(~ transformation, ncol = 3) +
  scale_fill_manual(
    values = c("TRUE" = "#35978f", "FALSE" = "#bf812d"),
    labels = c("TRUE" = "Variance decreased", "FALSE" = "Variance increased"),
    name   = NULL
  ) +
  scale_x_continuous(expand = expansion(mult = c(0.35, 0.35))) +
  labs(
    title    = "Transaction Utility: % Change in Residual Variance by DV Transformation",
    subtitle = "Z-scored is identical to Raw by construction (linear transformation).\nDivergence in Rank-INT indicates the result depends on tail behaviour.",
    x        = "% change in σ²  (negative = variance fell)",
    y        = NULL
  ) +
  theme_bw(base_size = 11) +
  theme(
    strip.background  = element_rect(fill = "grey92"),
    strip.text        = element_text(face = "bold", size = 10),
    legend.position   = "bottom",
    panel.grid.minor  = element_blank(),
    panel.grid.major.y = element_blank()
  )

Visualization 4: % Retained vs % Noise Reduction (Scatter)

Does removing more people lead to more noise reduction? This scatter plots the percentage of respondents excluded (x-axis) against the percentage reduction in residual variance (y-axis), one point per paradigm × AC combination.

Code
ggplot(resid_var_df,
       aes(x = 100 - pct_retained, y = pct_reduction,
           color = paradigm, shape = ac_type, label = case)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "grey60") +
  geom_smooth(
    aes(group = 1), method = "lm", se = TRUE,
    color = "grey40", linewidth = 0.8, linetype = "solid", alpha = 0.15
  ) +
  geom_point(size = 4) +
  ggrepel::geom_text_repel(size = 2.6, max.overlaps = 20,
                            show.legend = FALSE) +
  scale_color_brewer(palette = "Dark2", name = "Paradigm") +
  scale_shape_manual(values = c("Memory" = 16, "Obvious" = 17),
                     name = "AC type") +
  labs(
    title    = "Exclusion Rate vs Noise Reduction",
    subtitle = "Each point = one paradigm × AC combination",
    x        = "% excluded (100 − % retained)",
    y        = "% reduction in residual σ²"
  ) +
  theme_bw(base_size = 12) +
  theme(
    legend.position  = "right",
    panel.grid.minor = element_blank()
  )