Effect Change Decompositions

Code

library(dplyr)
library(ggplot2)
library(knitr)
library(tidyr)

Data Loading

Each dataset is loaded below. Variable and condition information per file:

Dataset	DV column	Condition column	Condition values
minread_memory	`dv`	`condition`	1, 2
minread_obvious	`dv`	`condition`	1, 2
transaction_memory	`dv`	`condition`	1, 2
transaction_obvious	`dv`	`condition`	1, 2
longread_memory	`dv_avg`	`condition_opposing`	0, 1
longread_obvious	`dv_avg`	`condition`	“before”, “after”
metaphor_memory	`dv_friday`	`condition`	1, 2
metaphor_obvious	`dv_friday`	`condition`	1, 2
social_memory	`dv`	`condition`	1, 2
social_obvious	`dv`	`condition`	1, 2
numeric_memory	`dv`	`condition`	0, 1
numeric_obvious	`dv`	`condition`	0, 1

Code

df_minread_memory     <- read.csv("minread_memory.csv")
df_minread_obvious    <- read.csv("minread_obvious.csv")
df_transaction_memory <- read.csv("transaction_memory.csv")
df_transaction_obvious<- read.csv("transaction_obvious.csv")
df_longread_memory    <- read.csv("longread_memory.csv")
df_longread_obvious   <- read.csv("longread_obvious.csv")
df_metaphor_memory    <- read.csv("metaphor_memory_v2.csv")
df_metaphor_obvious   <- read.csv("metaphor_obvious_v2.csv")
df_social_memory      <- read.csv("social_memory.csv")
df_social_obvious     <- read.csv("social_obvious.csv")
df_numeric_memory     <- read.csv("numeric_memory.csv")
df_numeric_obvious    <- read.csv("numeric_obvious.csv")

cat("Rows loaded:\n")

Rows loaded:

Code

cat("  minread_memory:     ", nrow(df_minread_memory), "\n")

  minread_memory:      405

Code

cat("  minread_obvious:    ", nrow(df_minread_obvious), "\n")

  minread_obvious:     420

Code

cat("  transaction_memory: ", nrow(df_transaction_memory), "\n")

  transaction_memory:  401

Code

cat("  transaction_obvious:", nrow(df_transaction_obvious), "\n")

  transaction_obvious: 410

Code

cat("  longread_memory:    ", nrow(df_longread_memory), "\n")

  longread_memory:     614

Code

cat("  longread_obvious:   ", nrow(df_longread_obvious), "\n")

  longread_obvious:    597

Code

cat("  metaphor_memory:    ", nrow(df_metaphor_memory), "\n")

  metaphor_memory:     409

Code

cat("  metaphor_obvious:   ", nrow(df_metaphor_obvious), "\n")

  metaphor_obvious:    412

Code

cat("  social_memory:      ", nrow(df_social_memory), "\n")

  social_memory:       412

Code

cat("  social_obvious:     ", nrow(df_social_obvious), "\n")

  social_obvious:      420

Code

cat("  numeric_memory:     ", nrow(df_numeric_memory), "\n")

  numeric_memory:      414

Code

cat("  numeric_obvious:    ", nrow(df_numeric_obvious), "\n")

  numeric_obvious:     406

Core Functions

Code

# Pooled SD (unpaired two-sample)
pooled_sd <- function(x1, x2) {
  x1 <- x1[!is.na(x1)]
  x2 <- x2[!is.na(x2)]
  n1 <- length(x1); n2 <- length(x2)
  sqrt(((n1 - 1) * sd(x1)^2 + (n2 - 1) * sd(x2)^2) / (n1 + n2 - 2))
}

# Compute means by condition, pre and post exclusions, and return a tidy table
compute_means <- function(data, dv_col, cond_col, cond_vals) {
  v1 <- cond_vals[1]; v2 <- cond_vals[2]

  get_group <- function(df, cv) df[[dv_col]][df[[cond_col]] == cv]

  pre_g1  <- get_group(data, v1)
  pre_g2  <- get_group(data, v2)
  post_g1 <- get_group(data[data$att_correct == 1, ], v1)
  post_g2 <- get_group(data[data$att_correct == 1, ], v2)

  data.frame(
    sample    = c("pre (full)", "pre (full)", "post (retained)", "post (retained)"),
    condition = c(as.character(v1), as.character(v2),
                  as.character(v1), as.character(v2)),
    n         = c(sum(!is.na(pre_g1)),  sum(!is.na(pre_g2)),
                  sum(!is.na(post_g1)), sum(!is.na(post_g2))),
    mean_dv   = c(mean(pre_g1,  na.rm = TRUE), mean(pre_g2,  na.rm = TRUE),
                  mean(post_g1, na.rm = TRUE), mean(post_g2, na.rm = TRUE)),
    sd_dv     = c(sd(pre_g1,  na.rm = TRUE),   sd(pre_g2,  na.rm = TRUE),
                  sd(post_g1, na.rm = TRUE),    sd(post_g2, na.rm = TRUE))
  )
}

# Shapley decomposition of change in Cohen's d
compute_decomp <- function(data, dv_col = "dv", cond_col = "condition",
                           cond_vals = c(1, 2)) {
  v1 <- cond_vals[1]; v2 <- cond_vals[2]

  get_group <- function(df, cv) df[[dv_col]][df[[cond_col]] == cv]

  pre_g1  <- get_group(data, v1)
  pre_g2  <- get_group(data, v2)
  post_g1 <- get_group(data[data$att_correct == 1, ], v1)
  post_g2 <- get_group(data[data$att_correct == 1, ], v2)

  mean_pre_1  <- mean(pre_g1,  na.rm = TRUE)
  mean_pre_2  <- mean(pre_g2,  na.rm = TRUE)
  mean_post_1 <- mean(post_g1, na.rm = TRUE)
  mean_post_2 <- mean(post_g2, na.rm = TRUE)

  sd_pooled_pre  <- pooled_sd(pre_g1,  pre_g2)
  sd_pooled_post <- pooled_sd(post_g1, post_g2)

  deltaM_pre  <- mean_pre_1  - mean_pre_2
  deltaM_post <- mean_post_1 - mean_post_2

  d_pre  <- deltaM_pre  / sd_pooled_pre
  d_post <- deltaM_post / sd_pooled_post
  delta_d_total <- d_post - d_pre

  # Path A: means change first, then SD
  delta_d_means_A <- (deltaM_post / sd_pooled_pre)  - (deltaM_pre  / sd_pooled_pre)
  delta_d_sd_A    <- (deltaM_post / sd_pooled_post) - (deltaM_post / sd_pooled_pre)

  # Path B: SD changes first, then means
  delta_d_sd_B    <- (deltaM_pre  / sd_pooled_post) - (deltaM_pre  / sd_pooled_pre)
  delta_d_means_B <- (deltaM_post / sd_pooled_post) - (deltaM_pre  / sd_pooled_post)

  # Shapley averages
  delta_d_means_shapley <- 0.5 * (delta_d_means_A + delta_d_means_B)
  delta_d_sd_shapley    <- 0.5 * (delta_d_sd_A    + delta_d_sd_B)

  pct_from_means <- if (abs(delta_d_total) < 1e-10) NA_real_ else
    100 * delta_d_means_shapley / delta_d_total
  pct_from_sd    <- if (abs(delta_d_total) < 1e-10) NA_real_ else
    100 * delta_d_sd_shapley / delta_d_total

  c(
    mean_pre_g1           = mean_pre_1,
    mean_pre_g2           = mean_pre_2,
    mean_post_g1          = mean_post_1,
    mean_post_g2          = mean_post_2,
    sd_pooled_pre         = sd_pooled_pre,
    sd_pooled_post        = sd_pooled_post,
    deltaM_pre            = deltaM_pre,
    deltaM_post           = deltaM_post,
    d_pre                 = d_pre,
    d_post                = d_post,
    delta_d_total         = delta_d_total,
    delta_d_means_shapley = delta_d_means_shapley,
    delta_d_sd_shapley    = delta_d_sd_shapley,
    pct_from_means        = pct_from_means,
    pct_from_sd           = pct_from_sd,
    abs_mean_minus_abs_sd = abs(delta_d_means_shapley) - abs(delta_d_sd_shapley)
  )
}

# Bootstrap wrapper
bootstrap_decomp <- function(data, dv_col, cond_col, cond_vals, B = 5000, seed = 123) {
  set.seed(seed)
  main_est <- compute_decomp(data, dv_col, cond_col, cond_vals)

  boot_mat <- replicate(B, {
    boot_dat <- data[sample(nrow(data), nrow(data), replace = TRUE), ]
    compute_decomp(boot_dat, dv_col, cond_col, cond_vals)
  })

  boot_df <- as.data.frame(t(boot_mat))

  data.frame(
    statistic = names(main_est),
    estimate  = main_est,
    ci_lower  = apply(boot_df, 2, quantile, probs = 0.025, na.rm = TRUE),
    ci_upper  = apply(boot_df, 2, quantile, probs = 0.975, na.rm = TRUE),
    row.names = NULL
  )
}

A note on percentages outside [0%, 100%]

The Shapley decomposition guarantees that delta_d_means_shapley + delta_d_sd_shapley = delta_d_total exactly, so pct_from_means + pct_from_sd = 100% always. However, the individual percentages can fall outside [0%, 100%] whenever the mean-shift and SD-change contributions have opposite signs — i.e., one mechanism amplifies Δd while the other dampens it.

Concrete example — Longread × Obvious: The total Δd ≈ +0.009. The mean-shift contribution alone would produce Δd ≈ +0.018 (means became more different post-exclusion), but the SD increase post-exclusion partially offsets this, contributing Δd ≈ −0.009. The two partially cancel to produce the observed net Δd. This yields pct_from_means ≈ +199% and pct_from_sd ≈ −99%. This is the mathematically correct decomposition: the two forces are working against each other, and neither alone would correctly summarise the change.

In these cases, the most interpretable quantities are the signed Shapley contributions (delta_d_means_shapley and delta_d_sd_shapley) rather than the percentages.

Analyses by Dataset

1. Minread × Memory

Code

means_minread_memory <- compute_means(
  df_minread_memory, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(means_minread_memory, digits = 4, caption = "Minread × Memory: Means by condition, pre and post exclusions")

Minread × Memory: Means by condition, pre and post exclusions
sample	condition	n	mean_dv	sd_dv
pre (full)	1	223	0.6413	0.4807
pre (full)	2	182	0.1813	0.3863
post (retained)	1	71	0.6620	0.4764
post (retained)	2	81	0.2593	0.4410

Code

decomp_minread_memory <- bootstrap_decomp(
  df_minread_memory, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(decomp_minread_memory, digits = 4,
      caption = "Minread × Memory: Shapley decomposition with 95% bootstrap CIs")

Minread × Memory: Shapley decomposition with 95% bootstrap CIs
statistic	estimate	ci_lower	ci_upper
mean_pre_g1	0.6413	0.5794	0.7054
mean_pre_g2	0.1813	0.1271	0.2373
mean_post_g1	0.6620	0.5500	0.7693
mean_post_g2	0.2593	0.1667	0.3580
sd_pooled_pre	0.4408	0.4154	0.4600
sd_pooled_post	0.4578	0.4172	0.4837
deltaM_pre	0.4599	0.3763	0.5448
deltaM_post	0.4027	0.2528	0.5446
d_pre	1.0433	0.8219	1.3081
d_post	0.8796	0.5260	1.3008
delta_d_total	-0.1637	-0.4420	0.1475
delta_d_means_shapley	-0.1274	-0.3775	0.1171
delta_d_sd_shapley	-0.0363	-0.0774	0.0308
pct_from_means	77.8031	30.7086	119.9245
pct_from_sd	22.1969	-19.9245	69.2914
abs_mean_minus_abs_sd	0.0911	-0.0047	0.3151

2. Minread × Obvious

Code

means_minread_obvious <- compute_means(
  df_minread_obvious, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(means_minread_obvious, digits = 4, caption = "Minread × Obvious: Means by condition, pre and post exclusions")

Minread × Obvious: Means by condition, pre and post exclusions
sample	condition	n	mean_dv	sd_dv
pre (full)	1	193	0.5855	0.4939
pre (full)	2	227	0.1674	0.3742
post (retained)	1	155	0.5097	0.5015
post (retained)	2	177	0.1921	0.3951

Code

decomp_minread_obvious <- bootstrap_decomp(
  df_minread_obvious, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(decomp_minread_obvious, digits = 4,
      caption = "Minread × Obvious: Shapley decomposition with 95% bootstrap CIs")

Minread × Obvious: Shapley decomposition with 95% bootstrap CIs
statistic	estimate	ci_lower	ci_upper
mean_pre_g1	0.5855	0.5153	0.6543
mean_pre_g2	0.1674	0.1198	0.2181
mean_post_g1	0.5097	0.4303	0.5886
mean_post_g2	0.1921	0.1358	0.2514
sd_pooled_pre	0.4333	0.4086	0.4535
sd_pooled_post	0.4479	0.4225	0.4671
deltaM_pre	0.4181	0.3330	0.5029
deltaM_post	0.3176	0.2189	0.4164
d_pre	0.9649	0.7394	1.2219
d_post	0.7090	0.4757	0.9654
delta_d_total	-0.2559	-0.3530	-0.1597
delta_d_means_shapley	-0.2282	-0.3193	-0.1413
delta_d_sd_shapley	-0.0277	-0.0538	-0.0066
pct_from_means	89.1787	79.9773	97.1815
pct_from_sd	10.8213	2.8185	20.0227
abs_mean_minus_abs_sd	0.2005	0.1172	0.2925

3. Transaction × Memory

Code

means_transaction_memory <- compute_means(
  df_transaction_memory, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(means_transaction_memory, digits = 4, caption = "Transaction × Memory: Means by condition, pre and post exclusions")

Transaction × Memory: Means by condition, pre and post exclusions
sample	condition	n	mean_dv	sd_dv
pre (full)	1	200	19.9671	18.9687
pre (full)	2	201	19.6474	19.6386
post (retained)	1	131	14.9828	16.5126
post (retained)	2	134	14.2546	17.7483

Code

decomp_transaction_memory <- bootstrap_decomp(
  df_transaction_memory, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(decomp_transaction_memory, digits = 4,
      caption = "Transaction × Memory: Shapley decomposition with 95% bootstrap CIs")

Transaction × Memory: Shapley decomposition with 95% bootstrap CIs
statistic	estimate	ci_lower	ci_upper
mean_pre_g1	19.9671	17.3748	22.6443
mean_pre_g2	19.6474	17.0298	22.4260
mean_post_g1	14.9828	12.1665	17.8683
mean_post_g2	14.2546	11.3876	17.3991
sd_pooled_pre	19.3074	18.3667	20.0541
sd_pooled_post	17.1486	15.4296	18.5414
deltaM_pre	0.3197	-3.4746	4.1064
deltaM_post	0.7282	-3.3924	4.8904
d_pre	0.0166	-0.1801	0.2146
d_post	0.0425	-0.1989	0.2868
delta_d_total	0.0259	-0.1389	0.2040
delta_d_means_shapley	0.0225	-0.1369	0.1945
delta_d_sd_shapley	0.0034	-0.0212	0.0323
pct_from_means	86.8135	-79.6101	252.9230
pct_from_sd	13.1865	-152.9230	179.6101
abs_mean_minus_abs_sd	0.0191	-0.0115	0.1885

4. Transaction × Obvious

Code

means_transaction_obvious <- compute_means(
  df_transaction_obvious, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(means_transaction_obvious, digits = 4, caption = "Transaction × Obvious: Means by condition, pre and post exclusions")

Transaction × Obvious: Means by condition, pre and post exclusions
sample	condition	n	mean_dv	sd_dv
pre (full)	1	217	21.6877	20.1795
pre (full)	2	193	18.9179	19.7491
post (retained)	1	154	16.2938	17.8786
post (retained)	2	136	12.5329	16.7046

Code

decomp_transaction_obvious <- bootstrap_decomp(
  df_transaction_obvious, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(decomp_transaction_obvious, digits = 4,
      caption = "Transaction × Obvious: Shapley decomposition with 95% bootstrap CIs")

Transaction × Obvious: Shapley decomposition with 95% bootstrap CIs
statistic	estimate	ci_lower	ci_upper
mean_pre_g1	21.6877	19.0777	24.3312
mean_pre_g2	18.9179	16.2530	21.7719
mean_post_g1	16.2938	13.5347	19.1846
mean_post_g2	12.5329	9.9305	15.4650
sd_pooled_pre	19.9781	19.1196	20.6520
sd_pooled_post	17.3382	15.7319	18.6849
deltaM_pre	2.7699	-1.0943	6.5195
deltaM_post	3.7608	-0.3038	7.6647
d_pre	0.1386	-0.0550	0.3296
d_post	0.2169	-0.0182	0.4526
delta_d_total	0.0783	-0.0803	0.2474
delta_d_means_shapley	0.0534	-0.1007	0.2137
delta_d_sd_shapley	0.0249	-0.0030	0.0612
pct_from_means	68.2024	-205.1873	358.7921
pct_from_sd	31.7976	-258.7921	305.1873
abs_mean_minus_abs_sd	0.0285	-0.0330	0.1860

5. Longread × Memory

DV: dv_avg; condition column: condition_opposing (values 0, 1)

Code

means_longread_memory <- compute_means(
  df_longread_memory, dv_col = "dv_avg", cond_col = "condition_opposing", cond_vals = c(0, 1)
)
kable(means_longread_memory, digits = 4, caption = "Longread × Memory: Means by condition, pre and post exclusions")

Longread × Memory: Means by condition, pre and post exclusions
sample	condition	n	mean_dv	sd_dv
pre (full)	0	315	11.8048	3.0313
pre (full)	1	299	8.5485	5.0117
post (retained)	0	216	12.0347	2.8501
post (retained)	1	161	5.4441	4.4665

Code

decomp_longread_memory <- bootstrap_decomp(
  df_longread_memory, dv_col = "dv_avg", cond_col = "condition_opposing", cond_vals = c(0, 1)
)
kable(decomp_longread_memory, digits = 4,
      caption = "Longread × Memory: Shapley decomposition with 95% bootstrap CIs")

Longread × Memory: Shapley decomposition with 95% bootstrap CIs
statistic	estimate	ci_lower	ci_upper
mean_pre_g1	11.8048	11.4670	12.1262
mean_pre_g2	8.5485	7.9821	9.1197
mean_post_g1	12.0347	11.6500	12.4000
mean_post_g2	5.4441	4.7704	6.1587
sd_pooled_pre	4.1164	3.9207	4.2986
sd_pooled_post	3.6290	3.3539	3.8703
deltaM_pre	3.2563	2.5800	3.9043
deltaM_post	6.5906	5.7788	7.3486
d_pre	0.7910	0.6284	0.9590
d_post	1.8161	1.5249	2.1555
delta_d_total	1.0251	0.8018	1.3016
delta_d_means_shapley	0.8644	0.7064	1.0394
delta_d_sd_shapley	0.1607	0.0731	0.2872
pct_from_means	84.3268	77.0908	91.3514
pct_from_sd	15.6732	8.6486	22.9092
abs_mean_minus_abs_sd	0.7038	0.5815	0.8292

6. Longread × Obvious

DV: dv_avg; condition column: condition (values “before”, “after”)

Code

means_longread_obvious <- compute_means(
  df_longread_obvious, dv_col = "dv_avg", cond_col = "condition", cond_vals = c("before", "after")
)
kable(means_longread_obvious, digits = 4, caption = "Longread × Obvious: Means by condition, pre and post exclusions")

Longread × Obvious: Means by condition, pre and post exclusions
sample	condition	n	mean_dv	sd_dv
pre (full)	before	297	10.3502	4.3118
pre (full)	after	300	9.9417	4.4540
post (retained)	before	194	9.4716	4.7924
post (retained)	after	203	8.9828	4.7918

Code

decomp_longread_obvious <- bootstrap_decomp(
  df_longread_obvious, dv_col = "dv_avg", cond_col = "condition", cond_vals = c("before", "after")
)
kable(decomp_longread_obvious, digits = 4,
      caption = "Longread × Obvious: Shapley decomposition with 95% bootstrap CIs")

Longread × Obvious: Shapley decomposition with 95% bootstrap CIs
statistic	estimate	ci_lower	ci_upper
mean_pre_g1	10.3502	9.8482	10.8389
mean_pre_g2	9.9417	9.4228	10.4365
mean_post_g1	9.4716	8.7969	10.1529
mean_post_g2	8.9828	8.3131	9.6533
sd_pooled_pre	4.3839	4.1522	4.5897
sd_pooled_post	4.7921	4.5554	4.9915
deltaM_pre	0.4085	-0.3049	1.1033
deltaM_post	0.4889	-0.4295	1.4478
d_pre	0.0932	-0.0701	0.2531
d_post	0.1020	-0.0895	0.3064
delta_d_total	0.0088	-0.0842	0.1049
delta_d_means_shapley	0.0176	-0.0846	0.1210
delta_d_sd_shapley	-0.0087	-0.0247	0.0067
pct_from_means	198.6613	-164.6657	434.3307
pct_from_sd	-98.6613	-334.3307	264.6657
abs_mean_minus_abs_sd	0.0088	-0.0095	0.1089

7. Metaphor × Memory

Source: metaphor_memory_v2.csv; DV: dv_friday; condition column: condition (values 1, 2)

Code

means_metaphor_memory <- compute_means(
  df_metaphor_memory, dv_col = "dv_friday", cond_col = "condition", cond_vals = c(1, 2)
)
kable(means_metaphor_memory, digits = 4, caption = "Metaphor × Memory: Means by condition, pre and post exclusions")

Metaphor × Memory: Means by condition, pre and post exclusions
sample	condition	n	mean_dv	sd_dv
pre (full)	1	211	0.5355	0.4999
pre (full)	2	198	0.5455	0.4992
post (retained)	1	115	0.5913	0.4937
post (retained)	2	154	0.5779	0.4955

Code

decomp_metaphor_memory <- bootstrap_decomp(
  df_metaphor_memory, dv_col = "dv_friday", cond_col = "condition", cond_vals = c(1, 2)
)
kable(decomp_metaphor_memory, digits = 4,
      caption = "Metaphor × Memory: Shapley decomposition with 95% bootstrap CIs")

Metaphor × Memory: Shapley decomposition with 95% bootstrap CIs
statistic	estimate	ci_lower	ci_upper
mean_pre_g1	0.5355	0.4686	0.6042
mean_pre_g2	0.5455	0.4778	0.6134
mean_post_g1	0.5913	0.5000	0.6803
mean_post_g2	0.5779	0.5000	0.6568
sd_pooled_pre	0.4996	0.4924	0.5011
sd_pooled_post	0.4948	0.4799	0.5008
deltaM_pre	-0.0099	-0.1054	0.0859
deltaM_post	0.0134	-0.1068	0.1278
d_pre	-0.0198	-0.2123	0.1723
d_post	0.0270	-0.2181	0.2612
delta_d_total	0.0469	-0.1075	0.1888
delta_d_means_shapley	0.0469	-0.1061	0.1873
delta_d_sd_shapley	0.0000	-0.0032	0.0036
pct_from_means	99.9278	84.9999	114.3187
pct_from_sd	0.0722	-14.3187	15.0001
abs_mean_minus_abs_sd	0.0468	0.0020	0.1858

8. Metaphor × Obvious

Source: metaphor_obvious_v2.csv; DV: dv_friday; condition column: condition (values 1, 2)

Code

means_metaphor_obvious <- compute_means(
  df_metaphor_obvious, dv_col = "dv_friday", cond_col = "condition", cond_vals = c(1, 2)
)
kable(means_metaphor_obvious, digits = 4, caption = "Metaphor × Obvious: Means by condition, pre and post exclusions")

Metaphor × Obvious: Means by condition, pre and post exclusions
sample	condition	n	mean_dv	sd_dv
pre (full)	1	200	0.5300	0.5004
pre (full)	2	212	0.5142	0.5010
post (retained)	1	147	0.5850	0.4944
post (retained)	2	152	0.5592	0.4981

Code

decomp_metaphor_obvious <- bootstrap_decomp(
  df_metaphor_obvious, dv_col = "dv_friday", cond_col = "condition", cond_vals = c(1, 2)
)
kable(decomp_metaphor_obvious, digits = 4,
      caption = "Metaphor × Obvious: Shapley decomposition with 95% bootstrap CIs")

Metaphor × Obvious: Shapley decomposition with 95% bootstrap CIs
statistic	estimate	ci_lower	ci_upper
mean_pre_g1	0.5300	0.4605	0.5990
mean_pre_g2	0.5142	0.4455	0.5806
mean_post_g1	0.5850	0.5064	0.6645
mean_post_g2	0.5592	0.4788	0.6369
sd_pooled_pre	0.5007	0.4949	0.5012
sd_pooled_post	0.4963	0.4835	0.5010
deltaM_pre	0.0158	-0.0810	0.1133
deltaM_post	0.0258	-0.0862	0.1376
d_pre	0.0317	-0.1623	0.2285
d_post	0.0520	-0.1750	0.2809
delta_d_total	0.0204	-0.1006	0.1415
delta_d_means_shapley	0.0200	-0.0999	0.1406
delta_d_sd_shapley	0.0004	-0.0019	0.0040
pct_from_means	98.1977	76.3625	117.7665
pct_from_sd	1.8023	-17.7665	23.6375
abs_mean_minus_abs_sd	0.0196	0.0011	0.1425

9. Social × Memory

Code

means_social_memory <- compute_means(
  df_social_memory, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(means_social_memory, digits = 4, caption = "Social × Memory: Means by condition, pre and post exclusions")

Social × Memory: Means by condition, pre and post exclusions
sample	condition	n	mean_dv	sd_dv
pre (full)	1	196	6.3673	0.8989
pre (full)	2	215	5.4930	1.3426
post (retained)	1	152	6.5658	0.7340
post (retained)	2	168	5.3690	1.4292

Code

decomp_social_memory <- bootstrap_decomp(
  df_social_memory, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(decomp_social_memory, digits = 4,
      caption = "Social × Memory: Shapley decomposition with 95% bootstrap CIs")

Social × Memory: Shapley decomposition with 95% bootstrap CIs
statistic	estimate	ci_lower	ci_upper
mean_pre_g1	6.3673	6.2319	6.4868
mean_pre_g2	5.4930	5.3139	5.6638
mean_post_g1	6.5658	6.4458	6.6757
mean_post_g2	5.3690	5.1509	5.5818
sd_pooled_pre	1.1525	1.0463	1.2538
sd_pooled_post	1.1526	1.0354	1.2676
deltaM_pre	0.8743	0.6578	1.0916
deltaM_post	1.1967	0.9541	1.4377
d_pre	0.7586	0.5738	0.9476
d_post	1.0383	0.8418	1.2467
delta_d_total	0.2797	0.1624	0.4095
delta_d_means_shapley	0.2797	0.1869	0.3822
delta_d_sd_shapley	-0.0001	-0.0341	0.0436
pct_from_means	100.0188	87.8016	118.0013
pct_from_sd	-0.0188	-18.0013	12.1984
abs_mean_minus_abs_sd	0.2797	0.1622	0.3609

10. Social × Obvious

Code

means_social_obvious <- compute_means(
  df_social_obvious, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(means_social_obvious, digits = 4, caption = "Social × Obvious: Means by condition, pre and post exclusions")

Social × Obvious: Means by condition, pre and post exclusions
sample	condition	n	mean_dv	sd_dv
pre (full)	1	192	6.2812	0.9995
pre (full)	2	228	5.5000	1.3907
post (retained)	1	147	6.4354	0.9513
post (retained)	2	176	5.4205	1.4200

Code

decomp_social_obvious <- bootstrap_decomp(
  df_social_obvious, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(decomp_social_obvious, digits = 4,
      caption = "Social × Obvious: Shapley decomposition with 95% bootstrap CIs")

Social × Obvious: Shapley decomposition with 95% bootstrap CIs
statistic	estimate	ci_lower	ci_upper
mean_pre_g1	6.2812	6.1344	6.4145
mean_pre_g2	5.5000	5.3209	5.6830
mean_post_g1	6.4354	6.2756	6.5833
mean_post_g2	5.4205	5.2105	5.6243
sd_pooled_pre	1.2275	1.1111	1.3345
sd_pooled_post	1.2292	1.0983	1.3555
deltaM_pre	0.7812	0.5532	1.0039
deltaM_post	1.0149	0.7607	1.2663
d_pre	0.6365	0.4526	0.8293
d_post	0.8257	0.6107	1.0592
delta_d_total	0.1892	0.0824	0.3177
delta_d_means_shapley	0.1902	0.0855	0.3031
delta_d_sd_shapley	-0.0010	-0.0292	0.0383
pct_from_means	100.5436	81.2371	122.6541
pct_from_sd	-0.5436	-22.6541	18.7629
abs_mean_minus_abs_sd	0.1892	0.0710	0.2880

11. Numeric × Memory

Code

means_numeric_memory <- compute_means(
  df_numeric_memory, dv_col = "dv", cond_col = "condition", cond_vals = c(0, 1)
)
kable(means_numeric_memory, digits = 4, caption = "Numeric × Memory: Means by condition, pre and post exclusions")

Numeric × Memory: Means by condition, pre and post exclusions
sample	condition	n	mean_dv	sd_dv
pre (full)	0	200	3.2942	4.1369
pre (full)	1	214	2.1030	1.4019
post (retained)	0	167	3.5724	4.4706
post (retained)	1	180	2.1195	1.5123

Code

decomp_numeric_memory <- bootstrap_decomp(
  df_numeric_memory, dv_col = "dv", cond_col = "condition", cond_vals = c(0, 1)
)
kable(decomp_numeric_memory, digits = 4,
      caption = "Numeric × Memory: Shapley decomposition with 95% bootstrap CIs")

Numeric × Memory: Shapley decomposition with 95% bootstrap CIs
statistic	estimate	ci_lower	ci_upper
mean_pre_g1	3.2942	2.7838	3.8940
mean_pre_g2	2.1030	1.9256	2.3054
mean_post_g1	3.5724	2.9686	4.2717
mean_post_g2	2.1195	1.9114	2.3547
sd_pooled_pre	3.0467	1.8926	4.1435
sd_pooled_post	3.2868	2.0291	4.4658
deltaM_pre	1.1912	0.6361	1.8404
deltaM_post	1.4529	0.7949	2.2163
d_pre	0.3910	0.2665	0.5478
d_post	0.4420	0.3103	0.6182
delta_d_total	0.0510	0.0211	0.0931
delta_d_means_shapley	0.0827	0.0463	0.1329
delta_d_sd_shapley	-0.0317	-0.0437	-0.0206
pct_from_means	162.1223	134.1525	239.3141
pct_from_sd	-62.1223	-139.3141	-34.1525
abs_mean_minus_abs_sd	0.0510	0.0211	0.0931

12. Numeric × Obvious

Code

means_numeric_obvious <- compute_means(
  df_numeric_obvious, dv_col = "dv", cond_col = "condition", cond_vals = c(0, 1)
)
kable(means_numeric_obvious, digits = 4, caption = "Numeric × Obvious: Means by condition, pre and post exclusions")

Numeric × Obvious: Means by condition, pre and post exclusions
sample	condition	n	mean_dv	sd_dv
pre (full)	0	211	2.7723	1.9897
pre (full)	1	195	2.0531	1.0984
post (retained)	0	165	2.9171	2.1169
post (retained)	1	160	2.0526	1.1462

Code

decomp_numeric_obvious <- bootstrap_decomp(
  df_numeric_obvious, dv_col = "dv", cond_col = "condition", cond_vals = c(0, 1)
)
kable(decomp_numeric_obvious, digits = 4,
      caption = "Numeric × Obvious: Shapley decomposition with 95% bootstrap CIs")

Numeric × Obvious: Shapley decomposition with 95% bootstrap CIs
statistic	estimate	ci_lower	ci_upper
mean_pre_g1	2.7723	2.5082	3.0516
mean_pre_g2	2.0531	1.9092	2.2110
mean_post_g1	2.9171	2.6034	3.2554
mean_post_g2	2.0526	1.8833	2.2381
sd_pooled_pre	1.6240	1.3776	1.8539
sd_pooled_post	1.7094	1.4395	1.9609
deltaM_pre	0.7192	0.4185	1.0296
deltaM_post	0.8645	0.5065	1.2409
d_pre	0.4429	0.2688	0.6105
d_post	0.5058	0.3097	0.7024
delta_d_total	0.0629	-0.0039	0.1381
delta_d_means_shapley	0.0873	0.0110	0.1684
delta_d_sd_shapley	-0.0244	-0.0440	-0.0021
pct_from_means	138.7423	49.4049	317.4241
pct_from_sd	-38.7423	-217.4241	50.5951
abs_mean_minus_abs_sd	0.0629	-0.0001	0.1381

Summary Across All 12 Cases

Code

# Collect key decomposition estimates into one table
decomp_list <- list(
  "Minread × Memory"       = decomp_minread_memory,
  "Minread × Obvious"      = decomp_minread_obvious,
  "Transaction × Memory"   = decomp_transaction_memory,
  "Transaction × Obvious"  = decomp_transaction_obvious,
  "Longread × Memory"      = decomp_longread_memory,
  "Longread × Obvious"     = decomp_longread_obvious,
  "Metaphor × Memory"      = decomp_metaphor_memory,
  "Metaphor × Obvious"     = decomp_metaphor_obvious,
  "Social × Memory"        = decomp_social_memory,
  "Social × Obvious"       = decomp_social_obvious,
  "Numeric × Memory"       = decomp_numeric_memory,
  "Numeric × Obvious"      = decomp_numeric_obvious
)

key_stats <- c("d_pre", "d_post", "delta_d_total",
               "delta_d_means_shapley", "delta_d_sd_shapley",
               "pct_from_means", "pct_from_sd")

summary_df <- lapply(names(decomp_list), function(nm) {
  d <- decomp_list[[nm]]
  row <- d[d$statistic %in% key_stats, c("statistic", "estimate")]
  row$case <- nm
  row
}) |> bind_rows() |>
  pivot_wider(names_from = statistic, values_from = estimate) |>
  select(case, all_of(key_stats))

kable(summary_df, digits = 3,
      caption = "Summary: Shapley decomposition across all 12 paradigm × attention-check cases",
      col.names = c("Case", "d (pre)", "d (post)", "Δd total",
                    "Δd means (Shapley)", "Δd SD (Shapley)",
                    "% from means", "% from SD"))

Summary: Shapley decomposition across all 12 paradigm × attention-check cases
Case	d (pre)	d (post)	Δd total	Δd means (Shapley)	Δd SD (Shapley)	% from means	% from SD
Minread × Memory	1.043	0.880	-0.164	-0.127	-0.036	77.803	22.197
Minread × Obvious	0.965	0.709	-0.256	-0.228	-0.028	89.179	10.821
Transaction × Memory	0.017	0.042	0.026	0.022	0.003	86.814	13.186
Transaction × Obvious	0.139	0.217	0.078	0.053	0.025	68.202	31.798
Longread × Memory	0.791	1.816	1.025	0.864	0.161	84.327	15.673
Longread × Obvious	0.093	0.102	0.009	0.018	-0.009	198.661	-98.661
Metaphor × Memory	-0.020	0.027	0.047	0.047	0.000	99.928	0.072
Metaphor × Obvious	0.032	0.052	0.020	0.020	0.000	98.198	1.802
Social × Memory	0.759	1.038	0.280	0.280	0.000	100.019	-0.019
Social × Obvious	0.636	0.826	0.189	0.190	-0.001	100.544	-0.544
Numeric × Memory	0.391	0.442	0.051	0.083	-0.032	162.122	-62.122
Numeric × Obvious	0.443	0.506	0.063	0.087	-0.024	138.742	-38.742

Visualization: Decomposition Waterfall

Code

library(patchwork)

# Build long-form data for stacked bar
plot_df <- summary_df |>
  mutate(
    paradigm = sub(" × .*", "", case),
    ac_type  = sub(".* × ", "", case),
    means_contrib = delta_d_means_shapley,
    sd_contrib    = delta_d_sd_shapley
  ) |>
  select(case, paradigm, ac_type, d_pre, d_post, delta_d_total,
         means_contrib, sd_contrib) |>
  pivot_longer(
    cols = c(means_contrib, sd_contrib),
    names_to  = "component",
    values_to = "contribution"
  ) |>
  mutate(
    component = recode(component,
      "means_contrib" = "Mean shift",
      "sd_contrib"    = "SD change"
    ),
    case = factor(case, levels = unique(summary_df$case))
  )

# Shared layer builder so both sub-plots stay visually identical
make_bars <- function(data) {
  # Anchor labels to the outer edge of the bar stack (not delta_d_total).
  # When components have opposite signs, delta_d_total falls *inside* the
  # taller bar, so placing text there buries it. Instead we use the top of
  # the positive stack (for net-positive bars) or the bottom of the negative
  # stack (for net-negative bars).
  label_df <- data |>
    group_by(case, paradigm, ac_type, delta_d_total) |>
    summarise(
      y_top = sum(contribution[contribution > 0], na.rm = TRUE),
      y_bot = sum(contribution[contribution < 0], na.rm = TRUE),
      .groups = "drop"
    ) |>
    mutate(
      y_label   = ifelse(delta_d_total >= 0, y_top, y_bot),
      vjust_val = ifelse(delta_d_total >= 0, -0.4, 1.2)
    )

  ggplot(data, aes(x = ac_type, y = contribution, fill = component)) +
    geom_col(position = "stack", width = 0.65, color = "white", linewidth = 0.3) +
    geom_hline(yintercept = 0, linewidth = 0.4, color = "grey30") +
    geom_text(
      data = label_df,
      aes(x = ac_type, y = y_label,
          label = sprintf("Δd=%.2f", delta_d_total),
          vjust = vjust_val),
      inherit.aes = FALSE,
      size = 2.8, fontface = "bold"
    ) +
    scale_fill_manual(
      values = c("Mean shift" = "#2166ac", "SD change" = "#d6604d"),
      name   = "Contribution"
    ) +
    labs(x = "Attention check type",
         y = expression(Delta * italic(d) ~ "(Shapley contribution)")) +
    theme_bw(base_size = 12) +
    theme(
      strip.background = element_rect(fill = "grey92"),
      strip.text       = element_text(face = "bold"),
      legend.position  = "bottom",
      panel.grid.minor = element_blank()
    )
}

# --- Sub-plot A: the 5 non-Longread paradigms, y from -0.32 to 0.4 ---
p_other <- make_bars(plot_df |> filter(paradigm != "Longread")) +
  facet_wrap(~ paradigm, nrow = 2, scales = "free_x") +
  coord_cartesian(ylim = c(-0.32, 0.4))

# --- Sub-plot B: Longread alone, free y scale ---
p_long <- make_bars(plot_df |> filter(paradigm == "Longread")) +
  facet_wrap(~ paradigm, nrow = 2, scales = "free_x") +
  theme(legend.position = "none")   # legend carried by p_other

# Combine: Longread panel on the left (narrow), others on the right (wide)
p_long + p_other +
  plot_layout(widths = c(1, 5), guides = "collect") +
  plot_annotation(
    title    = "Shapley Decomposition of Change in Cohen's d",
    subtitle = "Each bar = Δd (post − pre); decomposed into mean-shift and SD-change contributions.\nLongread y-axis is free; all other paradigms share y ≤ 0.4.",
    theme = theme(
      plot.title    = element_text(face = "bold", size = 13),
      plot.subtitle = element_text(size = 10, color = "grey30"),
      legend.position = "bottom"
    )
  )

Logit-Transformed DVs: Longread & Social

For bounded Likert-type DVs, inattentive respondents tend to cluster at the endpoints, which can suppress post-exclusion SD changes on the raw scale. Logit-transforming the DV “unbounds” the scale, potentially allowing SD to shift more when inattentive responders are removed.

Procedure

Rescale $x$ to the open unit interval using theoretical scale bounds: $p = (x - x_{\min}) / (x_{\max} - x_{\min})$
Apply the Smithson & Verkuilen (2006) compression to push any exact 0/1 values away from the boundary: $p' = \bigl(p \cdot (n - 1) + 0.5\bigr) \,/\, n$ where $n$ is the number of non-missing observations in each dataset.
Apply the logit: $\tilde{x} = \log\!\bigl(p' / (1 - p')\bigr)$

Scale bounds used:

Dataset	Scale	$x_{\min}$	$x_{\max}$
longread_memory / longread_obvious	`dv_avg` (avg of 1–15 items)	1	15
social_memory / social_obvious	`dv` (0–7 integer)	0	7

Code

# Smithson & Verkuilen logit transformation for a bounded scale
logit_sv <- function(x, scale_min, scale_max) {
  n <- sum(!is.na(x))
  p  <- (x - scale_min) / (scale_max - scale_min)   # rescale to [0, 1]
  p2 <- (p * (n - 1) + 0.5) / n                     # S&V compression
  log(p2 / (1 - p2))                                 # logit
}

df_longread_memory_logit  <- df_longread_memory  |>
  mutate(dv_logit = logit_sv(dv_avg, 1, 15))

df_longread_obvious_logit <- df_longread_obvious |>
  mutate(dv_logit = logit_sv(dv_avg, 1, 15))

df_social_memory_logit    <- df_social_memory    |>
  mutate(dv_logit = logit_sv(dv, 0, 7))

df_social_obvious_logit   <- df_social_obvious   |>
  mutate(dv_logit = logit_sv(dv, 0, 7))

# Quick sanity check: raw vs logit SD before and after exclusions
check_sd <- function(df, raw_col, logit_col, cond_col, att_col = "att_correct") {
  raw_pre  <- sd(df[[raw_col]],   na.rm = TRUE)
  raw_post <- sd(df[[raw_col]][df[[att_col]] == 1],   na.rm = TRUE)
  log_pre  <- sd(df[[logit_col]], na.rm = TRUE)
  log_post <- sd(df[[logit_col]][df[[att_col]] == 1], na.rm = TRUE)
  data.frame(
    sd_raw_pre  = raw_pre,  sd_raw_post  = raw_post,
    sd_log_pre  = log_pre,  sd_log_post  = log_post,
    pct_change_raw   = 100 * (raw_post - raw_pre)  / raw_pre,
    pct_change_logit = 100 * (log_post - log_pre)  / log_pre
  )
}

sd_checks <- bind_rows(
  check_sd(df_longread_memory_logit,  "dv_avg", "dv_logit", "condition_opposing") |>
    mutate(dataset = "Longread × Memory"),
  check_sd(df_longread_obvious_logit, "dv_avg", "dv_logit", "condition") |>
    mutate(dataset = "Longread × Obvious"),
  check_sd(df_social_memory_logit,    "dv",     "dv_logit", "condition") |>
    mutate(dataset = "Social × Memory"),
  check_sd(df_social_obvious_logit,   "dv",     "dv_logit", "condition") |>
    mutate(dataset = "Social × Obvious")
) |> select(dataset, everything())

kable(sd_checks, digits = 4,
      caption = "Sanity check: pooled SD before and after exclusions, raw vs logit scale. A larger % change on the logit scale would indicate the transformation is 'freeing up' the tails.")

Sanity check: pooled SD before and after exclusions, raw vs logit scale. A larger % change on the logit scale would indicate the transformation is ‘freeing up’ the tails.
dataset	sd_raw_pre	sd_raw_post	sd_log_pre	sd_log_post	pct_change_raw	pct_change_logit
Longread × Memory	4.4239	4.8776	3.2688	3.7195	10.2553	13.7898
Longread × Obvious	4.3849	4.7923	3.2438	3.7226	9.2894	14.7590
Social × Memory	1.2639	1.2972	2.8233	2.8564	2.6338	1.1742
Social × Obvious	1.2865	1.3276	2.8194	2.8851	3.1983	2.3320

Decompositions on logit scale

Code

cat("=== Longread x Memory (logit DV) ===\n")

=== Longread x Memory (logit DV) ===

Code

decomp_longread_memory_logit <- bootstrap_decomp(
  df_longread_memory_logit,
  dv_col    = "dv_logit",
  cond_col  = "condition_opposing",
  cond_vals = c(0, 1)
)
kable(decomp_longread_memory_logit, digits = 4,
      caption = "Longread × Memory — logit DV: Shapley decomposition with 95% bootstrap CIs")

Longread × Memory — logit DV: Shapley decomposition with 95% bootstrap CIs
statistic	estimate	ci_lower	ci_upper
mean_pre_g1	2.0716	1.7908	2.3552
mean_pre_g2	-0.1829	-0.5805	0.2222
mean_post_g1	2.3652	2.0174	2.7111
mean_post_g2	-2.0875	-2.6197	-1.5377
sd_pooled_pre	3.0706	2.8636	3.2667
sd_pooled_post	2.9991	2.7824	3.2035
deltaM_pre	2.2545	1.7604	2.7460
deltaM_post	4.4528	3.7907	5.0917
d_pre	0.7342	0.5821	0.8878
d_post	1.4847	1.2703	1.7085
delta_d_total	0.7505	0.5861	0.9280
delta_d_means_shapley	0.7245	0.5977	0.8615
delta_d_sd_shapley	0.0260	-0.0320	0.0906
pct_from_means	96.5325	89.6224	105.0317
pct_from_sd	3.4675	-5.0317	10.3776
abs_mean_minus_abs_sd	0.6984	0.5720	0.8132

Code

cat("=== Longread x Obvious (logit DV) ===\n")

=== Longread x Obvious (logit DV) ===

Code

decomp_longread_obvious_logit <- bootstrap_decomp(
  df_longread_obvious_logit,
  dv_col    = "dv_logit",
  cond_col  = "condition",
  cond_vals = c("before", "after")
)
kable(decomp_longread_obvious_logit, digits = 4,
      caption = "Longread × Obvious — logit DV: Shapley decomposition with 95% bootstrap CIs")

Longread × Obvious — logit DV: Shapley decomposition with 95% bootstrap CIs
statistic	estimate	ci_lower	ci_upper
mean_pre_g1	0.9089	0.5453	1.2602
mean_pre_g2	0.9108	0.5201	1.2873
mean_post_g1	0.5871	0.0596	1.1073
mean_post_g2	0.5087	-0.0224	1.0365
sd_pooled_pre	3.2465	2.9938	3.4719
sd_pooled_post	3.7271	3.4332	3.9911
deltaM_pre	-0.0018	-0.5222	0.5188
deltaM_post	0.0785	-0.6443	0.8243
d_pre	-0.0006	-0.1597	0.1615
d_post	0.0211	-0.1726	0.2230
delta_d_total	0.0216	-0.0523	0.0998
delta_d_means_shapley	0.0231	-0.0647	0.1154
delta_d_sd_shapley	-0.0015	-0.0257	0.0228
pct_from_means	107.0376	-202.5491	451.1258
pct_from_sd	-7.0376	-351.1258	302.5491
abs_mean_minus_abs_sd	0.0216	-0.0089	0.0998

Code

cat("=== Social x Memory (logit DV) ===\n")

=== Social x Memory (logit DV) ===

Code

decomp_social_memory_logit <- bootstrap_decomp(
  df_social_memory_logit,
  dv_col    = "dv_logit",
  cond_col  = "condition",
  cond_vals = c(1, 2)
)
kable(decomp_social_memory_logit, digits = 4,
      caption = "Social × Memory — logit DV: Shapley decomposition with 95% bootstrap CIs")

Social × Memory — logit DV: Shapley decomposition with 95% bootstrap CIs
statistic	estimate	ci_lower	ci_upper
mean_pre_g1	4.4580	4.0671	4.8135
mean_pre_g2	2.3753	2.0516	2.7153
mean_post_g1	5.0336	4.6327	5.4180
mean_post_g2	2.2619	1.8918	2.6469
sd_pooled_pre	2.5835	2.4614	2.6832
sd_pooled_post	2.5014	2.3392	2.6317
deltaM_pre	2.0828	1.5726	2.5631
deltaM_post	2.7717	2.2095	3.2960
d_pre	0.8062	0.5922	1.0333
d_post	1.1081	0.8447	1.3998
delta_d_total	0.3019	0.1696	0.4653
delta_d_means_shapley	0.2711	0.1617	0.3947
delta_d_sd_shapley	0.0308	0.0006	0.0802
pct_from_means	89.7838	81.0617	99.6981
pct_from_sd	10.2162	0.3019	18.9383
abs_mean_minus_abs_sd	0.2402	0.1499	0.3340

Code

cat("=== Social x Obvious (logit DV) ===\n")

=== Social x Obvious (logit DV) ===

Code

decomp_social_obvious_logit <- bootstrap_decomp(
  df_social_obvious_logit,
  dv_col    = "dv_logit",
  cond_col  = "condition",
  cond_vals = c(1, 2)
)
kable(decomp_social_obvious_logit, digits = 4,
      caption = "Social × Obvious — logit DV: Shapley decomposition with 95% bootstrap CIs")

Social × Obvious — logit DV: Shapley decomposition with 95% bootstrap CIs
statistic	estimate	ci_lower	ci_upper
mean_pre_g1	4.2792	3.8874	4.6593
mean_pre_g2	2.5152	2.1791	2.8537
mean_post_g1	4.7880	4.3591	5.2063
mean_post_g2	2.4237	2.0341	2.8238
sd_pooled_pre	2.6818	2.5660	2.7757
sd_pooled_post	2.6372	2.4854	2.7602
deltaM_pre	1.7640	1.2372	2.2631
deltaM_post	2.3643	1.7857	2.9211
d_pre	0.6578	0.4532	0.8701
d_post	0.8965	0.6562	1.1676
delta_d_total	0.2387	0.1110	0.3871
delta_d_means_shapley	0.2257	0.1122	0.3464
delta_d_sd_shapley	0.0130	-0.0068	0.0467
pct_from_means	94.5535	86.1882	104.8002
pct_from_sd	5.4465	-4.8002	13.8118
abs_mean_minus_abs_sd	0.2127	0.1072	0.3116

Visualization: Raw vs Logit Decomposition

Code

# Helper: extract key stats from a bootstrap_decomp result into one tidy row
extract_row <- function(decomp_result, case_label, scale_label) {
  d <- decomp_result
  row <- d[d$statistic %in% c("d_pre","d_post","delta_d_total",
                               "delta_d_means_shapley","delta_d_sd_shapley"), ]
  as.data.frame(t(setNames(row$estimate, row$statistic))) |>
    mutate(case = case_label, scale = scale_label)
}

logit_summary <- bind_rows(
  extract_row(decomp_longread_memory,       "Longread × Memory",  "Raw"),
  extract_row(decomp_longread_memory_logit, "Longread × Memory",  "Logit"),
  extract_row(decomp_longread_obvious,      "Longread × Obvious", "Raw"),
  extract_row(decomp_longread_obvious_logit,"Longread × Obvious", "Logit"),
  extract_row(decomp_social_memory,         "Social × Memory",    "Raw"),
  extract_row(decomp_social_memory_logit,   "Social × Memory",    "Logit"),
  extract_row(decomp_social_obvious,        "Social × Obvious",   "Raw"),
  extract_row(decomp_social_obvious_logit,  "Social × Obvious",   "Logit")
) |>
  mutate(scale = factor(scale, levels = c("Raw", "Logit")))

# Long form for stacked bars
logit_plot_df <- logit_summary |>
  pivot_longer(
    cols      = c(delta_d_means_shapley, delta_d_sd_shapley),
    names_to  = "component",
    values_to = "contribution"
  ) |>
  mutate(
    component = recode(component,
      "delta_d_means_shapley" = "Mean shift",
      "delta_d_sd_shapley"    = "SD change"
    )
  )

# Label anchoring (same logic as main plot)
logit_label_df <- logit_plot_df |>
  group_by(case, scale, delta_d_total) |>
  summarise(
    y_top = sum(contribution[contribution > 0], na.rm = TRUE),
    y_bot = sum(contribution[contribution < 0], na.rm = TRUE),
    .groups = "drop"
  ) |>
  mutate(
    y_label   = ifelse(delta_d_total >= 0, y_top, y_bot),
    vjust_val = ifelse(delta_d_total >= 0, -0.4, 1.2)
  )

ggplot(logit_plot_df,
       aes(x = scale, y = contribution, fill = component)) +
  geom_col(position = "stack", width = 0.6, color = "white", linewidth = 0.3) +
  geom_hline(yintercept = 0, linewidth = 0.4, color = "grey30") +
  geom_text(
    data = logit_label_df,
    aes(x = scale, y = y_label,
        label = sprintf("Δd=%.3f", delta_d_total),
        vjust = vjust_val),
    inherit.aes = FALSE,
    size = 2.7, fontface = "bold"
  ) +
  facet_wrap(~ case, nrow = 2, scales = "free_y") +
  scale_fill_manual(
    values = c("Mean shift" = "#2166ac", "SD change" = "#d6604d"),
    name   = "Contribution"
  ) +
  labs(
    title    = "Effect of Logit-Transforming the DV on Shapley Decomposition",
    subtitle = "Each bar pair shows raw vs logit-transformed DV for the same dataset.\nA taller red segment on the Logit bar = SD playing a larger role after unbounding the scale.",
    x        = NULL,
    y        = expression(Delta * italic(d) ~ "(Shapley contribution)")
  ) +
  theme_bw(base_size = 12) +
  theme(
    strip.background = element_rect(fill = "grey92"),
    strip.text       = element_text(face = "bold"),
    legend.position  = "bottom",
    panel.grid.minor = element_blank()
  )

Residual Variance Pre vs Post Exclusions

A complementary way to ask whether attention-check exclusions reduce noise is to regress the DV on the condition assignment and examine how much residual variance remains before and after exclusions. The residual variance is the variance left over after accounting for the experimental manipulation — it is a direct measure of within-condition noise. If ACs remove inattentive respondents who add random noise, residual variance should fall post-exclusion.

Method: For each dataset, fit lm(dv ~ factor(condition)) on the full sample (pre) and on the retained subsample (att_correct == 1, post). Residual variance is extracted as $\hat{\sigma}^2$ (the mean squared error from summary(lm)$sigma^2). We then compute the absolute and percentage reduction.

Code

# ── Load the 24 additional AC-type files ─────────────────────────────────────
df_minread_imc         <- read.csv("minread_imc.csv")
df_minread_english     <- read.csv("minread_english.csv")
df_minread_selfreport  <- read.csv("minread_selfreport.csv")
df_minread_straightline<- read.csv("minread_straightline.csv")

df_transaction_imc          <- read.csv("transaction_imc.csv")
df_transaction_english      <- read.csv("transaction_english.csv")
df_transaction_selfreport   <- read.csv("transaction_selfreport.csv")
df_transaction_straightline <- read.csv("transaction_straightline.csv")

df_longread_imc          <- read.csv("longread_imc.csv")
df_longread_english      <- read.csv("longread_english.csv")
df_longread_selfreport   <- read.csv("longread_selfreport.csv")
df_longread_straightline <- read.csv("longread_straightline.csv")

df_metaphor_imc          <- read.csv("metaphor_imc_v2.csv")
df_metaphor_english      <- read.csv("metaphor_english_v2.csv")
df_metaphor_selfreport   <- read.csv("metaphor_selfreport_v2.csv")
df_metaphor_straightline <- read.csv("metaphor_straightline_v2.csv")

df_social_imc          <- read.csv("social_imc.csv")
df_social_english      <- read.csv("social_english.csv")
df_social_selfreport   <- read.csv("social_selfreport.csv")
df_social_straightline <- read.csv("social_straightline.csv")

df_numeric_imc          <- read.csv("numeric_imc.csv")
df_numeric_english      <- read.csv("numeric_english.csv")
df_numeric_selfreport   <- read.csv("numeric_selfreport.csv")
df_numeric_straightline <- read.csv("numeric_straightline.csv")

# ── Dataset config ─────────────────────────────────────────────────────────
# focal = TRUE  → Memory / Obvious (plotted in colour)
# focal = FALSE → the 4 background AC types (plotted in grey)

datasets_cfg <- list(
  # Minread
  list(label="Minread × Memory",            df=df_minread_memory,
       dv="dv", cond="condition",            focal=TRUE),
  list(label="Minread × Obvious",           df=df_minread_obvious,
       dv="dv", cond="condition",            focal=TRUE),
  list(label="Minread × IMC",               df=df_minread_imc,
       dv="dv", cond="condition",            focal=FALSE),
  list(label="Minread × English",           df=df_minread_english,
       dv="dv", cond="condition",            focal=FALSE),
  list(label="Minread × Self-report",       df=df_minread_selfreport,
       dv="dv", cond="condition",            focal=FALSE),
  list(label="Minread × Straightline",      df=df_minread_straightline,
       dv="dv", cond="condition",            focal=FALSE),
  # Transaction
  list(label="Transaction × Memory",        df=df_transaction_memory,
       dv="dv", cond="condition",            focal=TRUE),
  list(label="Transaction × Obvious",       df=df_transaction_obvious,
       dv="dv", cond="condition",            focal=TRUE),
  list(label="Transaction × IMC",           df=df_transaction_imc,
       dv="dv", cond="condition",            focal=FALSE),
  list(label="Transaction × English",       df=df_transaction_english,
       dv="dv", cond="condition",            focal=FALSE),
  list(label="Transaction × Self-report",   df=df_transaction_selfreport,
       dv="dv", cond="condition",            focal=FALSE),
  list(label="Transaction × Straightline",  df=df_transaction_straightline,
       dv="dv", cond="condition",            focal=FALSE),
  # Longread
  list(label="Longread × Memory",           df=df_longread_memory,
       dv="dv_avg", cond="condition_opposing", focal=TRUE),
  list(label="Longread × Obvious",          df=df_longread_obvious,
       dv="dv_avg", cond="condition",          focal=TRUE),
  list(label="Longread × IMC",              df=df_longread_imc,
       dv="dv_avg", cond="condition",          focal=FALSE),
  list(label="Longread × English",          df=df_longread_english,
       dv="dv_avg", cond="condition",          focal=FALSE),
  list(label="Longread × Self-report",      df=df_longread_selfreport,
       dv="dv_avg", cond="condition",          focal=FALSE),
  list(label="Longread × Straightline",     df=df_longread_straightline,
       dv="dv_avg", cond="condition",          focal=FALSE),
  # Metaphor
  list(label="Metaphor × Memory",           df=df_metaphor_memory,
       dv="dv_friday", cond="condition",     focal=TRUE),
  list(label="Metaphor × Obvious",          df=df_metaphor_obvious,
       dv="dv_friday", cond="condition",     focal=TRUE),
  list(label="Metaphor × IMC",              df=df_metaphor_imc,
       dv="dv_friday", cond="condition",     focal=FALSE),
  list(label="Metaphor × English",          df=df_metaphor_english,
       dv="dv_friday", cond="condition",     focal=FALSE),
  list(label="Metaphor × Self-report",      df=df_metaphor_selfreport,
       dv="dv_friday", cond="condition",     focal=FALSE),
  list(label="Metaphor × Straightline",     df=df_metaphor_straightline,
       dv="dv_friday", cond="condition",     focal=FALSE),
  # Social
  list(label="Social × Memory",             df=df_social_memory,
       dv="dv", cond="condition",            focal=TRUE),
  list(label="Social × Obvious",            df=df_social_obvious,
       dv="dv", cond="condition",            focal=TRUE),
  list(label="Social × IMC",                df=df_social_imc,
       dv="dv", cond="condition",            focal=FALSE),
  list(label="Social × English",            df=df_social_english,
       dv="dv", cond="condition",            focal=FALSE),
  list(label="Social × Self-report",        df=df_social_selfreport,
       dv="dv", cond="condition",            focal=FALSE),
  list(label="Social × Straightline",       df=df_social_straightline,
       dv="dv", cond="condition",            focal=FALSE),
  # Numeric
  list(label="Numeric × Memory",            df=df_numeric_memory,
       dv="dv", cond="condition",            focal=TRUE),
  list(label="Numeric × Obvious",           df=df_numeric_obvious,
       dv="dv", cond="condition",            focal=TRUE),
  list(label="Numeric × IMC",               df=df_numeric_imc,
       dv="dv", cond="condition",            focal=FALSE),
  list(label="Numeric × English",           df=df_numeric_english,
       dv="dv", cond="condition",            focal=FALSE),
  list(label="Numeric × Self-report",       df=df_numeric_selfreport,
       dv="dv", cond="condition",            focal=FALSE),
  list(label="Numeric × Straightline",      df=df_numeric_straightline,
       dv="dv", cond="condition",            focal=FALSE)
)

# ── Fit function ────────────────────────────────────────────────────────────
fit_resid_var <- function(cfg) {
  df       <- cfg$df
  dv_col   <- cfg$dv
  cond_col <- cfg$cond

  df_pre  <- df[!is.na(df[[dv_col]]) & !is.na(df[[cond_col]]), ]
  df_post <- df_pre[df_pre$att_correct == 1, ]

  fmla <- as.formula(paste0("`", dv_col, "` ~ factor(`", cond_col, "`)"))

  fit_pre  <- lm(fmla, data = df_pre)
  fit_post <- lm(fmla, data = df_post)

  sigma2_pre  <- summary(fit_pre)$sigma^2
  sigma2_post <- summary(fit_post)$sigma^2

  data.frame(
    case           = cfg$label,
    paradigm       = sub(" × .*", "", cfg$label),
    ac_type        = sub(".* × ", "", cfg$label),
    focal          = cfg$focal,
    n_pre          = nrow(df_pre),
    n_post         = nrow(df_post),
    pct_retained   = round(100 * nrow(df_post) / nrow(df_pre), 1),
    resid_var_pre  = sigma2_pre,
    resid_var_post = sigma2_post,
    resid_sd_pre   = sqrt(sigma2_pre),
    resid_sd_post  = sqrt(sigma2_post),
    abs_reduction  = sigma2_pre - sigma2_post,
    pct_reduction  = 100 * (sigma2_pre - sigma2_post) / sigma2_pre
  )
}

resid_var_df     <- lapply(datasets_cfg, fit_resid_var) |> bind_rows()
resid_var_focal  <- resid_var_df |> filter(focal)
resid_var_bg     <- resid_var_df |> filter(!focal)

Summary Table

Code

# All 36 cases, sorted by paradigm then AC type
kable(
  resid_var_df |>
    arrange(paradigm, ac_type) |>
    select(case, n_pre, n_post, pct_retained,
           resid_var_pre, resid_var_post,
           abs_reduction, pct_reduction),
  digits  = c(0, 0, 0, 1, 4, 4, 4, 2),
  caption = "Residual variance (σ²) from lm(DV ~ condition) before and after attention-check exclusions, all 6 AC types × 6 paradigms. A positive % reduction indicates that exclusions removed noise.",
  col.names = c("Case", "N (pre)", "N (post)", "% retained",
                "σ² pre", "σ² post",
                "Δσ² (abs)", "Δσ² (%)")
)

Residual variance (σ²) from lm(DV ~ condition) before and after attention-check exclusions, all 6 AC types × 6 paradigms. A positive % reduction indicates that exclusions removed noise.
Case	N (pre)	N (post)	% retained	σ² pre	σ² post	Δσ² (abs)	Δσ² (%)
Longread × English	582	408	70.1	18.2423	18.9348	-0.6925	-3.80
Longread × IMC	609	538	88.3	18.1459	19.0699	-0.9240	-5.09
Longread × Memory	614	377	61.4	16.9449	13.1693	3.7756	22.28
Longread × Obvious	597	397	66.5	19.2182	22.9640	-3.7458	-19.49
Longread × Self-report	599	593	99.0	19.4092	19.3443	0.0649	0.33
Longread × Straightline	595	537	90.3	19.7833	20.4448	-0.6615	-3.34
Metaphor × English	399	303	75.9	0.2424	0.2376	0.0048	1.98
Metaphor × IMC	406	355	87.4	0.2508	0.2500	0.0007	0.29
Metaphor × Memory	409	269	65.8	0.2496	0.2448	0.0048	1.92
Metaphor × Obvious	412	299	72.6	0.2507	0.2463	0.0044	1.74
Metaphor × Self-report	407	401	98.5	0.2456	0.2449	0.0007	0.27
Metaphor × Straightline	406	374	92.1	0.2502	0.2496	0.0006	0.24
Minread × English	407	300	73.7	0.1943	0.1953	-0.0009	-0.48
Minread × IMC	417	378	90.6	0.1978	0.2021	-0.0043	-2.20
Minread × Memory	405	152	37.5	0.1943	0.2096	-0.0153	-7.86
Minread × Obvious	420	332	79.0	0.1877	0.2006	-0.0129	-6.86
Minread × Self-report	408	406	99.5	0.2082	0.2087	-0.0005	-0.26
Minread × Straightline	405	373	92.1	0.1692	0.1708	-0.0016	-0.94
Numeric × English	393	307	78.1	1.3838	1.5281	-0.1443	-10.43
Numeric × IMC	402	361	89.8	1.4909	1.5428	-0.0519	-3.48
Numeric × Memory	414	347	83.8	9.2822	10.8033	-1.5211	-16.39
Numeric × Obvious	406	325	80.0	2.6372	2.9219	-0.2847	-10.80
Numeric × Self-report	405	396	97.8	3.7992	3.8668	-0.0676	-1.78
Numeric × Straightline	407	388	95.3	2.8046	2.7681	0.0365	1.30
Social × English	411	295	71.8	1.3756	1.4215	-0.0459	-3.33
Social × IMC	422	373	88.4	1.2809	1.3376	-0.0567	-4.43
Social × Memory	411	320	77.9	1.3283	1.3285	-0.0002	-0.01
Social × Obvious	420	323	76.9	1.5067	1.5110	-0.0042	-0.28
Social × Self-report	401	392	97.8	1.3443	1.3499	-0.0056	-0.42
Social × Straightline	414	396	95.7	1.4798	1.5066	-0.0267	-1.81
Transaction × English	409	308	75.3	335.6572	299.4908	36.1663	10.77
Transaction × IMC	409	366	89.5	349.7239	307.3752	42.3487	12.11
Transaction × Memory	401	265	66.1	372.7749	294.0758	78.6991	21.11
Transaction × Obvious	410	290	70.7	399.1257	300.6138	98.5120	24.68
Transaction × Self-report	410	403	98.3	397.8065	396.5318	1.2747	0.32
Transaction × Straightline	419	381	90.9	350.8063	332.3446	18.4616	5.26

Visualization 1: Paired Dot Plot (pre vs post σ²)

Each line connects the pre- and post-exclusion residual variance for the same dataset. A downward-sloping line means exclusions reduced noise.

Code

# Colour palette: Memory + Obvious in colour; 4 background ACs in
# descending shades of grey (darker = more "serious" check)
ac_colors <- c(
  "Memory"      = "#1f78b4",   # blue
  "Obvious"     = "#9e3fa8",   # purple
  "IMC"         = "#252525",   # near-black grey
  "English"     = "#636363",   # dark grey
  "Self-report" = "#969696",   # mid grey
  "Straightline"= "#bdbdbd"    # light grey
)

# Line thickness: focal lines heavier so they read over the grey
ac_linewidth <- c(
  "Memory"      = 1.2,
  "Obvious"     = 1.2,
  "IMC"         = 0.65,
  "English"     = 0.65,
  "Self-report" = 0.65,
  "Straightline"= 0.65
)

ac_pointsize <- c(
  "Memory"      = 3.0,
  "Obvious"     = 3.0,
  "IMC"         = 2.0,
  "English"     = 2.0,
  "Self-report" = 2.0,
  "Straightline"= 2.0
)

# Pivot all 36 cases to long form
resid_long_all <- resid_var_df |>
  select(case, paradigm, ac_type, focal, resid_var_pre, resid_var_post) |>
  pivot_longer(
    cols      = c(resid_var_pre, resid_var_post),
    names_to  = "sample",
    values_to = "resid_var"
  ) |>
  mutate(
    sample  = recode(sample,
      "resid_var_pre"  = "Pre-exclusion",
      "resid_var_post" = "Post-exclusion"
    ),
    sample  = factor(sample, levels = c("Pre-exclusion", "Post-exclusion")),
    ac_type = factor(ac_type, levels = names(ac_colors))
  )

# ── Facet label mapping: title + citation on second line ──────────────────
# Uses plain \n so no extra packages are needed.
# If ggtext is available (requires litedown: install.packages("litedown")),
# swap element_text() for element_markdown() below and use the HTML version.
paradigm_labels <- c(
  "Minread"     = "Scale Response\n(Schwarz et al. 1985)",
  "Transaction" = "Transaction Utility\n(Thaler 1999)",
  "Longread"    = "Correspondence Bias\n(Miyamoto & Kitayama 2002)",
  "Metaphor"    = "Metaphoric Structuring\n(Boroditsky 2000)",
  "Social"      = "Less-is-Better\n(Hsee 1998)",
  "Numeric"     = "Retrospective Gambler's Fallacy\n(Oppenheimer & Monin 2009)"
)

resid_long_all <- resid_long_all |>
  mutate(
    paradigm = recode(paradigm, !!!paradigm_labels),
    paradigm = factor(paradigm, levels = c(
      "Correspondence Bias\n(Miyamoto & Kitayama 2002)",
      "Less-is-Better\n(Hsee 1998)",
      "Metaphoric Structuring\n(Boroditsky 2000)",
      "Retrospective Gambler's Fallacy\n(Oppenheimer & Monin 2009)",
      "Transaction Utility\n(Thaler 1999)",
      "Scale Response\n(Schwarz et al. 1985)"
    ))
  )

# Draw background (grey) ACs first, then focal (coloured) ACs on top
resid_long_bg    <- resid_long_all |> filter(!focal)
resid_long_focal <- resid_long_all |> filter(focal)

ggplot() +
  # ── Grey background ACs ────────────────────────────────────────────────
  geom_line(
    data = resid_long_bg,
    aes(x = sample, y = resid_var, color = ac_type,
        linewidth = ac_type, group = case)
  ) +
  geom_point(
    data = resid_long_bg,
    aes(x = sample, y = resid_var, color = ac_type, group = case,
        size = ac_type)
  ) +
  # ── Coloured focal ACs ─────────────────────────────────────────────────
  geom_line(
    data = resid_long_focal,
    aes(x = sample, y = resid_var, color = ac_type,
        linewidth = ac_type, group = case)
  ) +
  geom_point(
    data = resid_long_focal,
    aes(x = sample, y = resid_var, color = ac_type,
        size = ac_type, group = case)
  ) +
  facet_wrap(~ paradigm, nrow = 2, scales = "free_y") +
  scale_color_manual(values = ac_colors, name = "AC type") +
  scale_linewidth_manual(values = ac_linewidth, guide = "none") +
  scale_size_manual(values = ac_pointsize, guide = "none") +
  # Force 0 into each panel's y range without clipping data.
  expand_limits(y = 0) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.08))) +
  scale_x_discrete(expand = expansion(add = c(0.3, 0.3))) +
  labs(
    title    = "Residual Variance Before and After Attention-Check Exclusions",
    subtitle = "Y-axis starts at 0 so slope steepness reflects true proportional change.\nMemory & Obvious shown in colour; IMC, English, Self-report, Straightline in grey.",
    x        = NULL,
    y        = expression(Residual ~ variance ~ (hat(sigma)^2))
  ) +
  theme_bw(base_size = 12) +
  theme(
    strip.background = element_rect(fill = "grey92"),
    strip.text       = element_text(face = "bold", size = 9, lineheight = 1.2),
    legend.position  = "bottom",
    panel.grid.minor = element_blank()
  ) +
  guides(color = guide_legend(nrow = 1, override.aes = list(linewidth = 1.2)))

Visualization 2: % Change in Residual Variance

Horizontal bar chart with one column per paradigm. Bars show % change in variance (negative = variance fell post-exclusion = noise reduced = green; positive = variance rose = orange). AC types run along the y-axis within each panel.

Code

# Apply the same paradigm labels and factor order as Visualization 1
viz2_df <- resid_var_df |>
  mutate(
    # Flip sign: negative now means variance decreased (the "good" direction)
    pct_change = -pct_reduction,
    paradigm   = recode(paradigm, !!!paradigm_labels),
    paradigm   = factor(paradigm, levels = c(
      "Correspondence Bias\n(Miyamoto & Kitayama 2002)",
      "Less-is-Better\n(Hsee 1998)",
      "Retrospective Gambler's Fallacy\n(Oppenheimer & Monin 2009)",
      "Transaction Utility\n(Thaler 1999)",
      "Metaphoric Structuring\n(Boroditsky 2000)",
      "Scale Response\n(Schwarz et al. 1985)"
    )),
    ac_type = factor(ac_type, levels = rev(c(
      "Memory", "Obvious", "IMC", "English", "Self-report", "Straightline"
    )))
  )

ggplot(viz2_df,
       aes(y = ac_type, x = pct_change, fill = pct_change < 0)) +
  geom_col(width = 0.65, color = "white", linewidth = 0.25) +
  geom_vline(xintercept = 0, linewidth = 0.5, color = "grey30") +
  geom_text(
    aes(
      label = paste0(round(pct_change, 1), "%"),
      hjust = ifelse(pct_change < 0, 1.08, -0.08)
    ),
    size = 2.5, fontface = "bold"
  ) +
  facet_wrap(~ paradigm, ncol = 6) +
  scale_x_continuous(expand = expansion(mult = c(0.35, 0.35))) +
  scale_fill_manual(
    values = c("TRUE" = "#35978f", "FALSE" = "#bf812d"),
    labels = c("TRUE" = "Variance decreased", "FALSE" = "Variance increased"),
    name   = NULL
  ) +
  labs(
    title    = "% Change in Residual Variance After Attention-Check Exclusions",
    subtitle = "Negative = variance fell post-exclusion (noise reduced); positive = variance rose.\nResidual variance from lm(DV ~ condition).",
    x        = "% change in σ²",
    y        = NULL
  ) +
  theme_bw(base_size = 11) +
  theme(
    strip.background = element_rect(fill = "grey92"),
    strip.text       = element_text(face = "bold", size = 8, lineheight = 1.2),
    legend.position  = "bottom",
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank()
  )

Visualization 3: Transaction Utility — Robustness to DV Transformation

The same % change in residual variance is shown for the Transaction Utility paradigm under three versions of the DV:

Raw — original DV as analysed above
Z-scored — $(x - \bar{x}) / s$. Because z-scoring is a linear transformation, the % change in σ² is mathematically identical to raw (the constant scaling cancels in the ratio). It appears here as a transparency check.
Rank-INT — rank-based inverse-normal transformation (Blom, 1958): $\Phi^{-1}\!\bigl((r_i - 0.5) / n\bigr)$. This is nonlinear: it stretches the tails and compresses the centre, so it can genuinely shift how much variance moves when inattentive responders are removed. If the pattern survives here, the conclusion is stronger.

If all three columns show the same pattern, attention-check performance for Transaction Utility is robust to scale assumptions.

Code

# ── Helper: rank inverse-normal (Blom) transformation ──────────────────────
rank_int <- function(x) {
  n <- sum(!is.na(x))
  qnorm((rank(x, ties.method = "average", na.last = "keep") - 0.5) / n)
}

# ── Add transformed DVs to every transaction dataframe ─────────────────────
trans_dfs <- list(
  Memory      = df_transaction_memory,
  Obvious     = df_transaction_obvious,
  IMC         = df_transaction_imc,
  English     = df_transaction_english,
  `Self-report`  = df_transaction_selfreport,
  Straightline   = df_transaction_straightline
)

trans_dfs <- lapply(trans_dfs, function(df) {
  df$dv_z   <- as.numeric(scale(df$dv))
  df$dv_int <- rank_int(df$dv)
  df
})

# ── Compute % change in residual variance for one DV column ────────────────
pct_change_resid <- function(df, dv_col, cond_col = "condition") {
  df_pre  <- df[!is.na(df[[dv_col]]) & !is.na(df[[cond_col]]), ]
  df_post <- df_pre[df_pre$att_correct == 1, ]
  fmla    <- as.formula(paste0("`", dv_col, "` ~ factor(`", cond_col, "`)"))
  s2_pre  <- summary(lm(fmla, data = df_pre))$sigma^2
  s2_post <- summary(lm(fmla, data = df_post))$sigma^2
  -100 * (s2_pre - s2_post) / s2_pre   # negative = variance decreased
}

# ── Build results across all AC types × 3 transformations ──────────────────
transform_results <- lapply(names(trans_dfs), function(ac) {
  df <- trans_dfs[[ac]]
  data.frame(
    ac_type    = ac,
    focal      = ac %in% c("Memory", "Obvious"),
    Raw        = pct_change_resid(df, "dv"),
    `Z-scored` = pct_change_resid(df, "dv_z"),
    `Rank-INT` = pct_change_resid(df, "dv_int"),
    check.names = FALSE
  )
}) |>
  bind_rows() |>
  pivot_longer(
    cols      = c("Raw", "Z-scored", "Rank-INT"),
    names_to  = "transformation",
    values_to = "pct_change"
  ) |>
  mutate(
    transformation = factor(transformation,
                            levels = c("Raw", "Z-scored", "Rank-INT")),
    ac_type = factor(ac_type,
                     levels = rev(c("Memory","Obvious","IMC",
                                    "English","Self-report","Straightline")))
  )

ggplot(transform_results,
       aes(y = ac_type, x = pct_change, fill = pct_change < 0)) +
  geom_col(width = 0.65, color = "white", linewidth = 0.25) +
  geom_vline(xintercept = 0, linewidth = 0.5, color = "grey30") +
  geom_text(
    aes(
      label = paste0(round(pct_change, 1), "%"),
      hjust = ifelse(pct_change < 0, 1.08, -0.08)
    ),
    size = 2.8, fontface = "bold"
  ) +
  facet_wrap(~ transformation, ncol = 3) +
  scale_fill_manual(
    values = c("TRUE" = "#35978f", "FALSE" = "#bf812d"),
    labels = c("TRUE" = "Variance decreased", "FALSE" = "Variance increased"),
    name   = NULL
  ) +
  scale_x_continuous(expand = expansion(mult = c(0.35, 0.35))) +
  labs(
    title    = "Transaction Utility: % Change in Residual Variance by DV Transformation",
    subtitle = "Z-scored is identical to Raw by construction (linear transformation).\nDivergence in Rank-INT indicates the result depends on tail behaviour.",
    x        = "% change in σ²  (negative = variance fell)",
    y        = NULL
  ) +
  theme_bw(base_size = 11) +
  theme(
    strip.background  = element_rect(fill = "grey92"),
    strip.text        = element_text(face = "bold", size = 10),
    legend.position   = "bottom",
    panel.grid.minor  = element_blank(),
    panel.grid.major.y = element_blank()
  )

Visualization 4: % Retained vs % Noise Reduction (Scatter)

Does removing more people lead to more noise reduction? This scatter plots the percentage of respondents excluded (x-axis) against the percentage reduction in residual variance (y-axis), one point per paradigm × AC combination.

Code

ggplot(resid_var_df,
       aes(x = 100 - pct_retained, y = pct_reduction,
           color = paradigm, shape = ac_type, label = case)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "grey60") +
  geom_smooth(
    aes(group = 1), method = "lm", se = TRUE,
    color = "grey40", linewidth = 0.8, linetype = "solid", alpha = 0.15
  ) +
  geom_point(size = 4) +
  ggrepel::geom_text_repel(size = 2.6, max.overlaps = 20,
                            show.legend = FALSE) +
  scale_color_brewer(palette = "Dark2", name = "Paradigm") +
  scale_shape_manual(values = c("Memory" = 16, "Obvious" = 17),
                     name = "AC type") +
  labs(
    title    = "Exclusion Rate vs Noise Reduction",
    subtitle = "Each point = one paradigm × AC combination",
    x        = "% excluded (100 − % retained)",
    y        = "% reduction in residual σ²"
  ) +
  theme_bw(base_size = 12) +
  theme(
    legend.position  = "right",
    panel.grid.minor = element_blank()
  )