Code
library(dplyr)
library(ggplot2)
library(knitr)
library(tidyr)library(dplyr)
library(ggplot2)
library(knitr)
library(tidyr)Each dataset is loaded below. Variable and condition information per file:
| Dataset | DV column | Condition column | Condition values |
|---|---|---|---|
| minread_memory | dv |
condition |
1, 2 |
| minread_obvious | dv |
condition |
1, 2 |
| transaction_memory | dv |
condition |
1, 2 |
| transaction_obvious | dv |
condition |
1, 2 |
| longread_memory | dv_avg |
condition_opposing |
0, 1 |
| longread_obvious | dv_avg |
condition |
“before”, “after” |
| metaphor_memory | dv_friday |
condition |
1, 2 |
| metaphor_obvious | dv_friday |
condition |
1, 2 |
| social_memory | dv |
condition |
1, 2 |
| social_obvious | dv |
condition |
1, 2 |
| numeric_memory | dv |
condition |
0, 1 |
| numeric_obvious | dv |
condition |
0, 1 |
df_minread_memory <- read.csv("minread_memory.csv")
df_minread_obvious <- read.csv("minread_obvious.csv")
df_transaction_memory <- read.csv("transaction_memory.csv")
df_transaction_obvious<- read.csv("transaction_obvious.csv")
df_longread_memory <- read.csv("longread_memory.csv")
df_longread_obvious <- read.csv("longread_obvious.csv")
df_metaphor_memory <- read.csv("metaphor_memory_v2.csv")
df_metaphor_obvious <- read.csv("metaphor_obvious_v2.csv")
df_social_memory <- read.csv("social_memory.csv")
df_social_obvious <- read.csv("social_obvious.csv")
df_numeric_memory <- read.csv("numeric_memory.csv")
df_numeric_obvious <- read.csv("numeric_obvious.csv")
cat("Rows loaded:\n")Rows loaded:
cat(" minread_memory: ", nrow(df_minread_memory), "\n") minread_memory: 405
cat(" minread_obvious: ", nrow(df_minread_obvious), "\n") minread_obvious: 420
cat(" transaction_memory: ", nrow(df_transaction_memory), "\n") transaction_memory: 401
cat(" transaction_obvious:", nrow(df_transaction_obvious), "\n") transaction_obvious: 410
cat(" longread_memory: ", nrow(df_longread_memory), "\n") longread_memory: 614
cat(" longread_obvious: ", nrow(df_longread_obvious), "\n") longread_obvious: 597
cat(" metaphor_memory: ", nrow(df_metaphor_memory), "\n") metaphor_memory: 409
cat(" metaphor_obvious: ", nrow(df_metaphor_obvious), "\n") metaphor_obvious: 412
cat(" social_memory: ", nrow(df_social_memory), "\n") social_memory: 412
cat(" social_obvious: ", nrow(df_social_obvious), "\n") social_obvious: 420
cat(" numeric_memory: ", nrow(df_numeric_memory), "\n") numeric_memory: 414
cat(" numeric_obvious: ", nrow(df_numeric_obvious), "\n") numeric_obvious: 406
# Pooled SD (unpaired two-sample)
pooled_sd <- function(x1, x2) {
x1 <- x1[!is.na(x1)]
x2 <- x2[!is.na(x2)]
n1 <- length(x1); n2 <- length(x2)
sqrt(((n1 - 1) * sd(x1)^2 + (n2 - 1) * sd(x2)^2) / (n1 + n2 - 2))
}
# Compute means by condition, pre and post exclusions, and return a tidy table
compute_means <- function(data, dv_col, cond_col, cond_vals) {
v1 <- cond_vals[1]; v2 <- cond_vals[2]
get_group <- function(df, cv) df[[dv_col]][df[[cond_col]] == cv]
pre_g1 <- get_group(data, v1)
pre_g2 <- get_group(data, v2)
post_g1 <- get_group(data[data$att_correct == 1, ], v1)
post_g2 <- get_group(data[data$att_correct == 1, ], v2)
data.frame(
sample = c("pre (full)", "pre (full)", "post (retained)", "post (retained)"),
condition = c(as.character(v1), as.character(v2),
as.character(v1), as.character(v2)),
n = c(sum(!is.na(pre_g1)), sum(!is.na(pre_g2)),
sum(!is.na(post_g1)), sum(!is.na(post_g2))),
mean_dv = c(mean(pre_g1, na.rm = TRUE), mean(pre_g2, na.rm = TRUE),
mean(post_g1, na.rm = TRUE), mean(post_g2, na.rm = TRUE)),
sd_dv = c(sd(pre_g1, na.rm = TRUE), sd(pre_g2, na.rm = TRUE),
sd(post_g1, na.rm = TRUE), sd(post_g2, na.rm = TRUE))
)
}
# Shapley decomposition of change in Cohen's d
compute_decomp <- function(data, dv_col = "dv", cond_col = "condition",
cond_vals = c(1, 2)) {
v1 <- cond_vals[1]; v2 <- cond_vals[2]
get_group <- function(df, cv) df[[dv_col]][df[[cond_col]] == cv]
pre_g1 <- get_group(data, v1)
pre_g2 <- get_group(data, v2)
post_g1 <- get_group(data[data$att_correct == 1, ], v1)
post_g2 <- get_group(data[data$att_correct == 1, ], v2)
mean_pre_1 <- mean(pre_g1, na.rm = TRUE)
mean_pre_2 <- mean(pre_g2, na.rm = TRUE)
mean_post_1 <- mean(post_g1, na.rm = TRUE)
mean_post_2 <- mean(post_g2, na.rm = TRUE)
sd_pooled_pre <- pooled_sd(pre_g1, pre_g2)
sd_pooled_post <- pooled_sd(post_g1, post_g2)
deltaM_pre <- mean_pre_1 - mean_pre_2
deltaM_post <- mean_post_1 - mean_post_2
d_pre <- deltaM_pre / sd_pooled_pre
d_post <- deltaM_post / sd_pooled_post
delta_d_total <- d_post - d_pre
# Path A: means change first, then SD
delta_d_means_A <- (deltaM_post / sd_pooled_pre) - (deltaM_pre / sd_pooled_pre)
delta_d_sd_A <- (deltaM_post / sd_pooled_post) - (deltaM_post / sd_pooled_pre)
# Path B: SD changes first, then means
delta_d_sd_B <- (deltaM_pre / sd_pooled_post) - (deltaM_pre / sd_pooled_pre)
delta_d_means_B <- (deltaM_post / sd_pooled_post) - (deltaM_pre / sd_pooled_post)
# Shapley averages
delta_d_means_shapley <- 0.5 * (delta_d_means_A + delta_d_means_B)
delta_d_sd_shapley <- 0.5 * (delta_d_sd_A + delta_d_sd_B)
pct_from_means <- if (abs(delta_d_total) < 1e-10) NA_real_ else
100 * delta_d_means_shapley / delta_d_total
pct_from_sd <- if (abs(delta_d_total) < 1e-10) NA_real_ else
100 * delta_d_sd_shapley / delta_d_total
c(
mean_pre_g1 = mean_pre_1,
mean_pre_g2 = mean_pre_2,
mean_post_g1 = mean_post_1,
mean_post_g2 = mean_post_2,
sd_pooled_pre = sd_pooled_pre,
sd_pooled_post = sd_pooled_post,
deltaM_pre = deltaM_pre,
deltaM_post = deltaM_post,
d_pre = d_pre,
d_post = d_post,
delta_d_total = delta_d_total,
delta_d_means_shapley = delta_d_means_shapley,
delta_d_sd_shapley = delta_d_sd_shapley,
pct_from_means = pct_from_means,
pct_from_sd = pct_from_sd,
abs_mean_minus_abs_sd = abs(delta_d_means_shapley) - abs(delta_d_sd_shapley)
)
}
# Bootstrap wrapper
bootstrap_decomp <- function(data, dv_col, cond_col, cond_vals, B = 5000, seed = 123) {
set.seed(seed)
main_est <- compute_decomp(data, dv_col, cond_col, cond_vals)
boot_mat <- replicate(B, {
boot_dat <- data[sample(nrow(data), nrow(data), replace = TRUE), ]
compute_decomp(boot_dat, dv_col, cond_col, cond_vals)
})
boot_df <- as.data.frame(t(boot_mat))
data.frame(
statistic = names(main_est),
estimate = main_est,
ci_lower = apply(boot_df, 2, quantile, probs = 0.025, na.rm = TRUE),
ci_upper = apply(boot_df, 2, quantile, probs = 0.975, na.rm = TRUE),
row.names = NULL
)
}The Shapley decomposition guarantees that delta_d_means_shapley + delta_d_sd_shapley = delta_d_total exactly, so pct_from_means + pct_from_sd = 100% always. However, the individual percentages can fall outside [0%, 100%] whenever the mean-shift and SD-change contributions have opposite signs — i.e., one mechanism amplifies Δd while the other dampens it.
Concrete example — Longread × Obvious: The total Δd ≈ +0.009. The mean-shift contribution alone would produce Δd ≈ +0.018 (means became more different post-exclusion), but the SD increase post-exclusion partially offsets this, contributing Δd ≈ −0.009. The two partially cancel to produce the observed net Δd. This yields pct_from_means ≈ +199% and pct_from_sd ≈ −99%. This is the mathematically correct decomposition: the two forces are working against each other, and neither alone would correctly summarise the change.
In these cases, the most interpretable quantities are the signed Shapley contributions (delta_d_means_shapley and delta_d_sd_shapley) rather than the percentages.
means_minread_memory <- compute_means(
df_minread_memory, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(means_minread_memory, digits = 4, caption = "Minread × Memory: Means by condition, pre and post exclusions")| sample | condition | n | mean_dv | sd_dv |
|---|---|---|---|---|
| pre (full) | 1 | 223 | 0.6413 | 0.4807 |
| pre (full) | 2 | 182 | 0.1813 | 0.3863 |
| post (retained) | 1 | 71 | 0.6620 | 0.4764 |
| post (retained) | 2 | 81 | 0.2593 | 0.4410 |
decomp_minread_memory <- bootstrap_decomp(
df_minread_memory, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(decomp_minread_memory, digits = 4,
caption = "Minread × Memory: Shapley decomposition with 95% bootstrap CIs")| statistic | estimate | ci_lower | ci_upper |
|---|---|---|---|
| mean_pre_g1 | 0.6413 | 0.5794 | 0.7054 |
| mean_pre_g2 | 0.1813 | 0.1271 | 0.2373 |
| mean_post_g1 | 0.6620 | 0.5500 | 0.7693 |
| mean_post_g2 | 0.2593 | 0.1667 | 0.3580 |
| sd_pooled_pre | 0.4408 | 0.4154 | 0.4600 |
| sd_pooled_post | 0.4578 | 0.4172 | 0.4837 |
| deltaM_pre | 0.4599 | 0.3763 | 0.5448 |
| deltaM_post | 0.4027 | 0.2528 | 0.5446 |
| d_pre | 1.0433 | 0.8219 | 1.3081 |
| d_post | 0.8796 | 0.5260 | 1.3008 |
| delta_d_total | -0.1637 | -0.4420 | 0.1475 |
| delta_d_means_shapley | -0.1274 | -0.3775 | 0.1171 |
| delta_d_sd_shapley | -0.0363 | -0.0774 | 0.0308 |
| pct_from_means | 77.8031 | 30.7086 | 119.9245 |
| pct_from_sd | 22.1969 | -19.9245 | 69.2914 |
| abs_mean_minus_abs_sd | 0.0911 | -0.0047 | 0.3151 |
means_minread_obvious <- compute_means(
df_minread_obvious, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(means_minread_obvious, digits = 4, caption = "Minread × Obvious: Means by condition, pre and post exclusions")| sample | condition | n | mean_dv | sd_dv |
|---|---|---|---|---|
| pre (full) | 1 | 193 | 0.5855 | 0.4939 |
| pre (full) | 2 | 227 | 0.1674 | 0.3742 |
| post (retained) | 1 | 155 | 0.5097 | 0.5015 |
| post (retained) | 2 | 177 | 0.1921 | 0.3951 |
decomp_minread_obvious <- bootstrap_decomp(
df_minread_obvious, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(decomp_minread_obvious, digits = 4,
caption = "Minread × Obvious: Shapley decomposition with 95% bootstrap CIs")| statistic | estimate | ci_lower | ci_upper |
|---|---|---|---|
| mean_pre_g1 | 0.5855 | 0.5153 | 0.6543 |
| mean_pre_g2 | 0.1674 | 0.1198 | 0.2181 |
| mean_post_g1 | 0.5097 | 0.4303 | 0.5886 |
| mean_post_g2 | 0.1921 | 0.1358 | 0.2514 |
| sd_pooled_pre | 0.4333 | 0.4086 | 0.4535 |
| sd_pooled_post | 0.4479 | 0.4225 | 0.4671 |
| deltaM_pre | 0.4181 | 0.3330 | 0.5029 |
| deltaM_post | 0.3176 | 0.2189 | 0.4164 |
| d_pre | 0.9649 | 0.7394 | 1.2219 |
| d_post | 0.7090 | 0.4757 | 0.9654 |
| delta_d_total | -0.2559 | -0.3530 | -0.1597 |
| delta_d_means_shapley | -0.2282 | -0.3193 | -0.1413 |
| delta_d_sd_shapley | -0.0277 | -0.0538 | -0.0066 |
| pct_from_means | 89.1787 | 79.9773 | 97.1815 |
| pct_from_sd | 10.8213 | 2.8185 | 20.0227 |
| abs_mean_minus_abs_sd | 0.2005 | 0.1172 | 0.2925 |
means_transaction_memory <- compute_means(
df_transaction_memory, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(means_transaction_memory, digits = 4, caption = "Transaction × Memory: Means by condition, pre and post exclusions")| sample | condition | n | mean_dv | sd_dv |
|---|---|---|---|---|
| pre (full) | 1 | 200 | 19.9671 | 18.9687 |
| pre (full) | 2 | 201 | 19.6474 | 19.6386 |
| post (retained) | 1 | 131 | 14.9828 | 16.5126 |
| post (retained) | 2 | 134 | 14.2546 | 17.7483 |
decomp_transaction_memory <- bootstrap_decomp(
df_transaction_memory, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(decomp_transaction_memory, digits = 4,
caption = "Transaction × Memory: Shapley decomposition with 95% bootstrap CIs")| statistic | estimate | ci_lower | ci_upper |
|---|---|---|---|
| mean_pre_g1 | 19.9671 | 17.3748 | 22.6443 |
| mean_pre_g2 | 19.6474 | 17.0298 | 22.4260 |
| mean_post_g1 | 14.9828 | 12.1665 | 17.8683 |
| mean_post_g2 | 14.2546 | 11.3876 | 17.3991 |
| sd_pooled_pre | 19.3074 | 18.3667 | 20.0541 |
| sd_pooled_post | 17.1486 | 15.4296 | 18.5414 |
| deltaM_pre | 0.3197 | -3.4746 | 4.1064 |
| deltaM_post | 0.7282 | -3.3924 | 4.8904 |
| d_pre | 0.0166 | -0.1801 | 0.2146 |
| d_post | 0.0425 | -0.1989 | 0.2868 |
| delta_d_total | 0.0259 | -0.1389 | 0.2040 |
| delta_d_means_shapley | 0.0225 | -0.1369 | 0.1945 |
| delta_d_sd_shapley | 0.0034 | -0.0212 | 0.0323 |
| pct_from_means | 86.8135 | -79.6101 | 252.9230 |
| pct_from_sd | 13.1865 | -152.9230 | 179.6101 |
| abs_mean_minus_abs_sd | 0.0191 | -0.0115 | 0.1885 |
means_transaction_obvious <- compute_means(
df_transaction_obvious, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(means_transaction_obvious, digits = 4, caption = "Transaction × Obvious: Means by condition, pre and post exclusions")| sample | condition | n | mean_dv | sd_dv |
|---|---|---|---|---|
| pre (full) | 1 | 217 | 21.6877 | 20.1795 |
| pre (full) | 2 | 193 | 18.9179 | 19.7491 |
| post (retained) | 1 | 154 | 16.2938 | 17.8786 |
| post (retained) | 2 | 136 | 12.5329 | 16.7046 |
decomp_transaction_obvious <- bootstrap_decomp(
df_transaction_obvious, dv_col = "dv", cond_col = "condition", cond_vals = c(1, 2)
)
kable(decomp_transaction_obvious, digits = 4,
caption = "Transaction × Obvious: Shapley decomposition with 95% bootstrap CIs")| statistic | estimate | ci_lower | ci_upper |
|---|---|---|---|
| mean_pre_g1 | 21.6877 | 19.0777 | 24.3312 |
| mean_pre_g2 | 18.9179 | 16.2530 | 21.7719 |
| mean_post_g1 | 16.2938 | 13.5347 | 19.1846 |
| mean_post_g2 | 12.5329 | 9.9305 | 15.4650 |
| sd_pooled_pre | 19.9781 | 19.1196 | 20.6520 |
| sd_pooled_post | 17.3382 | 15.7319 | 18.6849 |
| deltaM_pre | 2.7699 | -1.0943 | 6.5195 |
| deltaM_post | 3.7608 | -0.3038 | 7.6647 |
| d_pre | 0.1386 | -0.0550 | 0.3296 |
| d_post | 0.2169 | -0.0182 | 0.4526 |
| delta_d_total | 0.0783 | -0.0803 | 0.2474 |
| delta_d_means_shapley | 0.0534 | -0.1007 | 0.2137 |
| delta_d_sd_shapley | 0.0249 | -0.0030 | 0.0612 |
| pct_from_means | 68.2024 | -205.1873 | 358.7921 |
| pct_from_sd | 31.7976 | -258.7921 | 305.1873 |
| abs_mean_minus_abs_sd | 0.0285 | -0.0330 | 0.1860 |
DV: dv_avg; condition column: condition_opposing (values 0, 1)
means_longread_memory <- compute_means(
df_longread_memory, dv_col = "dv_avg", cond_col = "condition_opposing", cond_vals = c(0, 1)
)
kable(means_longread_memory, digits = 4, caption = "Longread × Memory: Means by condition, pre and post exclusions")| sample | condition | n | mean_dv | sd_dv |
|---|---|---|---|---|
| pre (full) | 0 | 315 | 11.8048 | 3.0313 |
| pre (full) | 1 | 299 | 8.5485 | 5.0117 |
| post (retained) | 0 | 216 | 12.0347 | 2.8501 |
| post (retained) | 1 | 161 | 5.4441 | 4.4665 |
decomp_longread_memory <- bootstrap_decomp(
df_longread_memory, dv_col = "dv_avg", cond_col = "condition_opposing", cond_vals = c(0, 1)
)
kable(decomp_longread_memory, digits = 4,
caption = "Longread × Memory: Shapley decomposition with 95% bootstrap CIs")| statistic | estimate | ci_lower | ci_upper |
|---|---|---|---|
| mean_pre_g1 | 11.8048 | 11.4670 | 12.1262 |
| mean_pre_g2 | 8.5485 | 7.9821 | 9.1197 |
| mean_post_g1 | 12.0347 | 11.6500 | 12.4000 |
| mean_post_g2 | 5.4441 | 4.7704 | 6.1587 |
| sd_pooled_pre | 4.1164 | 3.9207 | 4.2986 |
| sd_pooled_post | 3.6290 | 3.3539 | 3.8703 |
| deltaM_pre | 3.2563 | 2.5800 | 3.9043 |
| deltaM_post | 6.5906 | 5.7788 | 7.3486 |
| d_pre | 0.7910 | 0.6284 | 0.9590 |
| d_post | 1.8161 | 1.5249 | 2.1555 |
| delta_d_total | 1.0251 | 0.8018 | 1.3016 |
| delta_d_means_shapley | 0.8644 | 0.7064 | 1.0394 |
| delta_d_sd_shapley | 0.1607 | 0.0731 | 0.2872 |
| pct_from_means | 84.3268 | 77.0908 | 91.3514 |
| pct_from_sd | 15.6732 | 8.6486 | 22.9092 |
| abs_mean_minus_abs_sd | 0.7038 | 0.5815 | 0.8292 |
DV: dv_avg; condition column: condition (values “before”, “after”)
means_longread_obvious <- compute_means(
df_longread_obvious, dv_col = "dv_avg", cond_col = "condition", cond_vals = c("before", "after")
)
kable(means_longread_obvious, digits = 4, caption = "Longread × Obvious: Means by condition, pre and post exclusions")| sample | condition | n | mean_dv | sd_dv |
|---|---|---|---|---|
| pre (full) | before | 297 | 10.3502 | 4.3118 |
| pre (full) | after | 300 | 9.9417 | 4.4540 |
| post (retained) | before | 194 | 9.4716 | 4.7924 |
| post (retained) | after | 203 | 8.9828 | 4.7918 |
decomp_longread_obvious <- bootstrap_decomp(
df_longread_obvious, dv_col = "dv_avg", cond_col = "condition", cond_vals = c("before", "after")
)
kable(decomp_longread_obvious, digits = 4,
caption = "Longread × Obvious: Shapley decomposition with 95% bootstrap CIs")| statistic | estimate | ci_lower | ci_upper |
|---|---|---|---|
| mean_pre_g1 | 10.3502 | 9.8482 | 10.8389 |
| mean_pre_g2 | 9.9417 | 9.4228 | 10.4365 |
| mean_post_g1 | 9.4716 | 8.7969 | 10.1529 |
| mean_post_g2 | 8.9828 | 8.3131 | 9.6533 |
| sd_pooled_pre | 4.3839 | 4.1522 | 4.5897 |
| sd_pooled_post | 4.7921 | 4.5554 | 4.9915 |
| deltaM_pre | 0.4085 | -0.3049 | 1.1033 |
| deltaM_post | 0.4889 | -0.4295 | 1.4478 |
| d_pre | 0.0932 | -0.0701 | 0.2531 |
| d_post | 0.1020 | -0.0895 | 0.3064 |
| delta_d_total | 0.0088 | -0.0842 | 0.1049 |
| delta_d_means_shapley | 0.0176 | -0.0846 | 0.1210 |
| delta_d_sd_shapley | -0.0087 | -0.0247 | 0.0067 |
| pct_from_means | 198.6613 | -164.6657 | 434.3307 |
| pct_from_sd | -98.6613 | -334.3307 | 264.6657 |
| abs_mean_minus_abs_sd | 0.0088 | -0.0095 | 0.1089 |
Source: metaphor_memory_v2.csv; DV: dv_friday; condition column: condition (values 1, 2)
means_metaphor_memory <- compute_means(
df_metaphor_memory, dv_col = "dv_friday", cond_col = "condition", cond_vals = c(1, 2)
)
kable(means_metaphor_memory, digits = 4, caption = "Metaphor × Memory: Means by condition, pre and post exclusions")| sample | condition | n | mean_dv | sd_dv |
|---|---|---|---|---|
| pre (full) | 1 | 211 | 0.5355 | 0.4999 |
| pre (full) | 2 | 198 | 0.5455 | 0.4992 |
| post (retained) | 1 | 115 | 0.5913 | 0.4937 |
| post (retained) | 2 | 154 | 0.5779 | 0.4955 |
decomp_metaphor_memory <- bootstrap_decomp(
df_metaphor_memory, dv_col = "dv_friday", cond_col = "condition", cond_vals = c(1, 2)
)
kable(decomp_metaphor_memory, digits = 4,
caption = "Metaphor × Memory: Shapley decomposition with 95% bootstrap CIs")| statistic | estimate | ci_lower | ci_upper |
|---|---|---|---|
| mean_pre_g1 | 0.5355 | 0.4686 | 0.6042 |
| mean_pre_g2 | 0.5455 | 0.4778 | 0.6134 |
| mean_post_g1 | 0.5913 | 0.5000 | 0.6803 |
| mean_post_g2 | 0.5779 | 0.5000 | 0.6568 |
| sd_pooled_pre | 0.4996 | 0.4924 | 0.5011 |
| sd_pooled_post | 0.4948 | 0.4799 | 0.5008 |
| deltaM_pre | -0.0099 | -0.1054 | 0.0859 |
| deltaM_post | 0.0134 | -0.1068 | 0.1278 |
| d_pre | -0.0198 | -0.2123 | 0.1723 |
| d_post | 0.0270 | -0.2181 | 0.2612 |
| delta_d_total | 0.0469 | -0.1075 | 0.1888 |
| delta_d_means_shapley | 0.0469 | -0.1061 | 0.1873 |
| delta_d_sd_shapley | 0.0000 | -0.0032 | 0.0036 |
| pct_from_means | 99.9278 | 84.9999 | 114.3187 |
| pct_from_sd | 0.0722 | -14.3187 | 15.0001 |
| abs_mean_minus_abs_sd | 0.0468 | 0.0020 | 0.1858 |
Source: metaphor_obvious_v2.csv; DV: dv_friday; condition column: condition (values 1, 2)
means_metaphor_obvious <- compute_means(
df_metaphor_obvious, dv_col = "dv_friday", cond_col = "condition", cond_vals = c(1, 2)
)
kable(means_metaphor_obvious, digits = 4, caption = "Metaphor × Obvious: Means by condition, pre and post exclusions")| sample | condition | n | mean_dv | sd_dv |
|---|---|---|---|---|
| pre (full) | 1 | 200 | 0.5300 | 0.5004 |
| pre (full) | 2 | 212 | 0.5142 | 0.5010 |
| post (retained) | 1 | 147 | 0.5850 | 0.4944 |
| post (retained) | 2 | 152 | 0.5592 | 0.4981 |
decomp_metaphor_obvious <- bootstrap_decomp(
df_metaphor_obvious, dv_col = "dv_friday", cond_col = "condition", cond_vals = c(1, 2)
)
kable(decomp_metaphor_obvious, digits = 4,
caption = "Metaphor × Obvious: Shapley decomposition with 95% bootstrap CIs")| statistic | estimate | ci_lower | ci_upper |
|---|---|---|---|
| mean_pre_g1 | 0.5300 | 0.4605 | 0.5990 |
| mean_pre_g2 | 0.5142 | 0.4455 | 0.5806 |
| mean_post_g1 | 0.5850 | 0.5064 | 0.6645 |
| mean_post_g2 | 0.5592 | 0.4788 | 0.6369 |
| sd_pooled_pre | 0.5007 | 0.4949 | 0.5012 |
| sd_pooled_post | 0.4963 | 0.4835 | 0.5010 |
| deltaM_pre | 0.0158 | -0.0810 | 0.1133 |
| deltaM_post | 0.0258 | -0.0862 | 0.1376 |
| d_pre | 0.0317 | -0.1623 | 0.2285 |
| d_post | 0.0520 | -0.1750 | 0.2809 |
| delta_d_total | 0.0204 | -0.1006 | 0.1415 |
| delta_d_means_shapley | 0.0200 | -0.0999 | 0.1406 |
| delta_d_sd_shapley | 0.0004 | -0.0019 | 0.0040 |
| pct_from_means | 98.1977 | 76.3625 | 117.7665 |
| pct_from_sd | 1.8023 | -17.7665 | 23.6375 |
| abs_mean_minus_abs_sd | 0.0196 | 0.0011 | 0.1425 |
means_numeric_memory <- compute_means(
df_numeric_memory, dv_col = "dv", cond_col = "condition", cond_vals = c(0, 1)
)
kable(means_numeric_memory, digits = 4, caption = "Numeric × Memory: Means by condition, pre and post exclusions")| sample | condition | n | mean_dv | sd_dv |
|---|---|---|---|---|
| pre (full) | 0 | 200 | 3.2942 | 4.1369 |
| pre (full) | 1 | 214 | 2.1030 | 1.4019 |
| post (retained) | 0 | 167 | 3.5724 | 4.4706 |
| post (retained) | 1 | 180 | 2.1195 | 1.5123 |
decomp_numeric_memory <- bootstrap_decomp(
df_numeric_memory, dv_col = "dv", cond_col = "condition", cond_vals = c(0, 1)
)
kable(decomp_numeric_memory, digits = 4,
caption = "Numeric × Memory: Shapley decomposition with 95% bootstrap CIs")| statistic | estimate | ci_lower | ci_upper |
|---|---|---|---|
| mean_pre_g1 | 3.2942 | 2.7838 | 3.8940 |
| mean_pre_g2 | 2.1030 | 1.9256 | 2.3054 |
| mean_post_g1 | 3.5724 | 2.9686 | 4.2717 |
| mean_post_g2 | 2.1195 | 1.9114 | 2.3547 |
| sd_pooled_pre | 3.0467 | 1.8926 | 4.1435 |
| sd_pooled_post | 3.2868 | 2.0291 | 4.4658 |
| deltaM_pre | 1.1912 | 0.6361 | 1.8404 |
| deltaM_post | 1.4529 | 0.7949 | 2.2163 |
| d_pre | 0.3910 | 0.2665 | 0.5478 |
| d_post | 0.4420 | 0.3103 | 0.6182 |
| delta_d_total | 0.0510 | 0.0211 | 0.0931 |
| delta_d_means_shapley | 0.0827 | 0.0463 | 0.1329 |
| delta_d_sd_shapley | -0.0317 | -0.0437 | -0.0206 |
| pct_from_means | 162.1223 | 134.1525 | 239.3141 |
| pct_from_sd | -62.1223 | -139.3141 | -34.1525 |
| abs_mean_minus_abs_sd | 0.0510 | 0.0211 | 0.0931 |
means_numeric_obvious <- compute_means(
df_numeric_obvious, dv_col = "dv", cond_col = "condition", cond_vals = c(0, 1)
)
kable(means_numeric_obvious, digits = 4, caption = "Numeric × Obvious: Means by condition, pre and post exclusions")| sample | condition | n | mean_dv | sd_dv |
|---|---|---|---|---|
| pre (full) | 0 | 211 | 2.7723 | 1.9897 |
| pre (full) | 1 | 195 | 2.0531 | 1.0984 |
| post (retained) | 0 | 165 | 2.9171 | 2.1169 |
| post (retained) | 1 | 160 | 2.0526 | 1.1462 |
decomp_numeric_obvious <- bootstrap_decomp(
df_numeric_obvious, dv_col = "dv", cond_col = "condition", cond_vals = c(0, 1)
)
kable(decomp_numeric_obvious, digits = 4,
caption = "Numeric × Obvious: Shapley decomposition with 95% bootstrap CIs")| statistic | estimate | ci_lower | ci_upper |
|---|---|---|---|
| mean_pre_g1 | 2.7723 | 2.5082 | 3.0516 |
| mean_pre_g2 | 2.0531 | 1.9092 | 2.2110 |
| mean_post_g1 | 2.9171 | 2.6034 | 3.2554 |
| mean_post_g2 | 2.0526 | 1.8833 | 2.2381 |
| sd_pooled_pre | 1.6240 | 1.3776 | 1.8539 |
| sd_pooled_post | 1.7094 | 1.4395 | 1.9609 |
| deltaM_pre | 0.7192 | 0.4185 | 1.0296 |
| deltaM_post | 0.8645 | 0.5065 | 1.2409 |
| d_pre | 0.4429 | 0.2688 | 0.6105 |
| d_post | 0.5058 | 0.3097 | 0.7024 |
| delta_d_total | 0.0629 | -0.0039 | 0.1381 |
| delta_d_means_shapley | 0.0873 | 0.0110 | 0.1684 |
| delta_d_sd_shapley | -0.0244 | -0.0440 | -0.0021 |
| pct_from_means | 138.7423 | 49.4049 | 317.4241 |
| pct_from_sd | -38.7423 | -217.4241 | 50.5951 |
| abs_mean_minus_abs_sd | 0.0629 | -0.0001 | 0.1381 |
# Collect key decomposition estimates into one table
decomp_list <- list(
"Minread × Memory" = decomp_minread_memory,
"Minread × Obvious" = decomp_minread_obvious,
"Transaction × Memory" = decomp_transaction_memory,
"Transaction × Obvious" = decomp_transaction_obvious,
"Longread × Memory" = decomp_longread_memory,
"Longread × Obvious" = decomp_longread_obvious,
"Metaphor × Memory" = decomp_metaphor_memory,
"Metaphor × Obvious" = decomp_metaphor_obvious,
"Social × Memory" = decomp_social_memory,
"Social × Obvious" = decomp_social_obvious,
"Numeric × Memory" = decomp_numeric_memory,
"Numeric × Obvious" = decomp_numeric_obvious
)
key_stats <- c("d_pre", "d_post", "delta_d_total",
"delta_d_means_shapley", "delta_d_sd_shapley",
"pct_from_means", "pct_from_sd")
summary_df <- lapply(names(decomp_list), function(nm) {
d <- decomp_list[[nm]]
row <- d[d$statistic %in% key_stats, c("statistic", "estimate")]
row$case <- nm
row
}) |> bind_rows() |>
pivot_wider(names_from = statistic, values_from = estimate) |>
select(case, all_of(key_stats))
kable(summary_df, digits = 3,
caption = "Summary: Shapley decomposition across all 12 paradigm × attention-check cases",
col.names = c("Case", "d (pre)", "d (post)", "Δd total",
"Δd means (Shapley)", "Δd SD (Shapley)",
"% from means", "% from SD"))| Case | d (pre) | d (post) | Δd total | Δd means (Shapley) | Δd SD (Shapley) | % from means | % from SD |
|---|---|---|---|---|---|---|---|
| Minread × Memory | 1.043 | 0.880 | -0.164 | -0.127 | -0.036 | 77.803 | 22.197 |
| Minread × Obvious | 0.965 | 0.709 | -0.256 | -0.228 | -0.028 | 89.179 | 10.821 |
| Transaction × Memory | 0.017 | 0.042 | 0.026 | 0.022 | 0.003 | 86.814 | 13.186 |
| Transaction × Obvious | 0.139 | 0.217 | 0.078 | 0.053 | 0.025 | 68.202 | 31.798 |
| Longread × Memory | 0.791 | 1.816 | 1.025 | 0.864 | 0.161 | 84.327 | 15.673 |
| Longread × Obvious | 0.093 | 0.102 | 0.009 | 0.018 | -0.009 | 198.661 | -98.661 |
| Metaphor × Memory | -0.020 | 0.027 | 0.047 | 0.047 | 0.000 | 99.928 | 0.072 |
| Metaphor × Obvious | 0.032 | 0.052 | 0.020 | 0.020 | 0.000 | 98.198 | 1.802 |
| Social × Memory | 0.759 | 1.038 | 0.280 | 0.280 | 0.000 | 100.019 | -0.019 |
| Social × Obvious | 0.636 | 0.826 | 0.189 | 0.190 | -0.001 | 100.544 | -0.544 |
| Numeric × Memory | 0.391 | 0.442 | 0.051 | 0.083 | -0.032 | 162.122 | -62.122 |
| Numeric × Obvious | 0.443 | 0.506 | 0.063 | 0.087 | -0.024 | 138.742 | -38.742 |
library(patchwork)
# Build long-form data for stacked bar
plot_df <- summary_df |>
mutate(
paradigm = sub(" × .*", "", case),
ac_type = sub(".* × ", "", case),
means_contrib = delta_d_means_shapley,
sd_contrib = delta_d_sd_shapley
) |>
select(case, paradigm, ac_type, d_pre, d_post, delta_d_total,
means_contrib, sd_contrib) |>
pivot_longer(
cols = c(means_contrib, sd_contrib),
names_to = "component",
values_to = "contribution"
) |>
mutate(
component = recode(component,
"means_contrib" = "Mean shift",
"sd_contrib" = "SD change"
),
case = factor(case, levels = unique(summary_df$case))
)
# Shared layer builder so both sub-plots stay visually identical
make_bars <- function(data) {
# Anchor labels to the outer edge of the bar stack (not delta_d_total).
# When components have opposite signs, delta_d_total falls *inside* the
# taller bar, so placing text there buries it. Instead we use the top of
# the positive stack (for net-positive bars) or the bottom of the negative
# stack (for net-negative bars).
label_df <- data |>
group_by(case, paradigm, ac_type, delta_d_total) |>
summarise(
y_top = sum(contribution[contribution > 0], na.rm = TRUE),
y_bot = sum(contribution[contribution < 0], na.rm = TRUE),
.groups = "drop"
) |>
mutate(
y_label = ifelse(delta_d_total >= 0, y_top, y_bot),
vjust_val = ifelse(delta_d_total >= 0, -0.4, 1.2)
)
ggplot(data, aes(x = ac_type, y = contribution, fill = component)) +
geom_col(position = "stack", width = 0.65, color = "white", linewidth = 0.3) +
geom_hline(yintercept = 0, linewidth = 0.4, color = "grey30") +
geom_text(
data = label_df,
aes(x = ac_type, y = y_label,
label = sprintf("Δd=%.2f", delta_d_total),
vjust = vjust_val),
inherit.aes = FALSE,
size = 2.8, fontface = "bold"
) +
scale_fill_manual(
values = c("Mean shift" = "#2166ac", "SD change" = "#d6604d"),
name = "Contribution"
) +
labs(x = "Attention check type",
y = expression(Delta * italic(d) ~ "(Shapley contribution)")) +
theme_bw(base_size = 12) +
theme(
strip.background = element_rect(fill = "grey92"),
strip.text = element_text(face = "bold"),
legend.position = "bottom",
panel.grid.minor = element_blank()
)
}
# --- Sub-plot A: the 5 non-Longread paradigms, y from -0.32 to 0.4 ---
p_other <- make_bars(plot_df |> filter(paradigm != "Longread")) +
facet_wrap(~ paradigm, nrow = 2, scales = "free_x") +
coord_cartesian(ylim = c(-0.32, 0.4))
# --- Sub-plot B: Longread alone, free y scale ---
p_long <- make_bars(plot_df |> filter(paradigm == "Longread")) +
facet_wrap(~ paradigm, nrow = 2, scales = "free_x") +
theme(legend.position = "none") # legend carried by p_other
# Combine: Longread panel on the left (narrow), others on the right (wide)
p_long + p_other +
plot_layout(widths = c(1, 5), guides = "collect") +
plot_annotation(
title = "Shapley Decomposition of Change in Cohen's d",
subtitle = "Each bar = Δd (post − pre); decomposed into mean-shift and SD-change contributions.\nLongread y-axis is free; all other paradigms share y ≤ 0.4.",
theme = theme(
plot.title = element_text(face = "bold", size = 13),
plot.subtitle = element_text(size = 10, color = "grey30"),
legend.position = "bottom"
)
)A complementary way to ask whether attention-check exclusions reduce noise is to regress the DV on the condition assignment and examine how much residual variance remains before and after exclusions. The residual variance is the variance left over after accounting for the experimental manipulation — it is a direct measure of within-condition noise. If ACs remove inattentive respondents who add random noise, residual variance should fall post-exclusion.
Method: For each dataset, fit lm(dv ~ factor(condition)) on the full sample (pre) and on the retained subsample (att_correct == 1, post). Residual variance is extracted as \(\hat{\sigma}^2\) (the mean squared error from summary(lm)$sigma^2). We then compute the absolute and percentage reduction.
# ── Load the 24 additional AC-type files ─────────────────────────────────────
df_minread_imc <- read.csv("minread_imc.csv")
df_minread_english <- read.csv("minread_english.csv")
df_minread_selfreport <- read.csv("minread_selfreport.csv")
df_minread_straightline<- read.csv("minread_straightline.csv")
df_transaction_imc <- read.csv("transaction_imc.csv")
df_transaction_english <- read.csv("transaction_english.csv")
df_transaction_selfreport <- read.csv("transaction_selfreport.csv")
df_transaction_straightline <- read.csv("transaction_straightline.csv")
df_longread_imc <- read.csv("longread_imc.csv")
df_longread_english <- read.csv("longread_english.csv")
df_longread_selfreport <- read.csv("longread_selfreport.csv")
df_longread_straightline <- read.csv("longread_straightline.csv")
df_metaphor_imc <- read.csv("metaphor_imc_v2.csv")
df_metaphor_english <- read.csv("metaphor_english_v2.csv")
df_metaphor_selfreport <- read.csv("metaphor_selfreport_v2.csv")
df_metaphor_straightline <- read.csv("metaphor_straightline_v2.csv")
df_social_imc <- read.csv("social_imc.csv")
df_social_english <- read.csv("social_english.csv")
df_social_selfreport <- read.csv("social_selfreport.csv")
df_social_straightline <- read.csv("social_straightline.csv")
df_numeric_imc <- read.csv("numeric_imc.csv")
df_numeric_english <- read.csv("numeric_english.csv")
df_numeric_selfreport <- read.csv("numeric_selfreport.csv")
df_numeric_straightline <- read.csv("numeric_straightline.csv")
# ── Dataset config ─────────────────────────────────────────────────────────
# focal = TRUE → Memory / Obvious (plotted in colour)
# focal = FALSE → the 4 background AC types (plotted in grey)
datasets_cfg <- list(
# Minread
list(label="Minread × Memory", df=df_minread_memory,
dv="dv", cond="condition", focal=TRUE),
list(label="Minread × Obvious", df=df_minread_obvious,
dv="dv", cond="condition", focal=TRUE),
list(label="Minread × IMC", df=df_minread_imc,
dv="dv", cond="condition", focal=FALSE),
list(label="Minread × English", df=df_minread_english,
dv="dv", cond="condition", focal=FALSE),
list(label="Minread × Self-report", df=df_minread_selfreport,
dv="dv", cond="condition", focal=FALSE),
list(label="Minread × Straightline", df=df_minread_straightline,
dv="dv", cond="condition", focal=FALSE),
# Transaction
list(label="Transaction × Memory", df=df_transaction_memory,
dv="dv", cond="condition", focal=TRUE),
list(label="Transaction × Obvious", df=df_transaction_obvious,
dv="dv", cond="condition", focal=TRUE),
list(label="Transaction × IMC", df=df_transaction_imc,
dv="dv", cond="condition", focal=FALSE),
list(label="Transaction × English", df=df_transaction_english,
dv="dv", cond="condition", focal=FALSE),
list(label="Transaction × Self-report", df=df_transaction_selfreport,
dv="dv", cond="condition", focal=FALSE),
list(label="Transaction × Straightline", df=df_transaction_straightline,
dv="dv", cond="condition", focal=FALSE),
# Longread
list(label="Longread × Memory", df=df_longread_memory,
dv="dv_avg", cond="condition_opposing", focal=TRUE),
list(label="Longread × Obvious", df=df_longread_obvious,
dv="dv_avg", cond="condition", focal=TRUE),
list(label="Longread × IMC", df=df_longread_imc,
dv="dv_avg", cond="condition", focal=FALSE),
list(label="Longread × English", df=df_longread_english,
dv="dv_avg", cond="condition", focal=FALSE),
list(label="Longread × Self-report", df=df_longread_selfreport,
dv="dv_avg", cond="condition", focal=FALSE),
list(label="Longread × Straightline", df=df_longread_straightline,
dv="dv_avg", cond="condition", focal=FALSE),
# Metaphor
list(label="Metaphor × Memory", df=df_metaphor_memory,
dv="dv_friday", cond="condition", focal=TRUE),
list(label="Metaphor × Obvious", df=df_metaphor_obvious,
dv="dv_friday", cond="condition", focal=TRUE),
list(label="Metaphor × IMC", df=df_metaphor_imc,
dv="dv_friday", cond="condition", focal=FALSE),
list(label="Metaphor × English", df=df_metaphor_english,
dv="dv_friday", cond="condition", focal=FALSE),
list(label="Metaphor × Self-report", df=df_metaphor_selfreport,
dv="dv_friday", cond="condition", focal=FALSE),
list(label="Metaphor × Straightline", df=df_metaphor_straightline,
dv="dv_friday", cond="condition", focal=FALSE),
# Social
list(label="Social × Memory", df=df_social_memory,
dv="dv", cond="condition", focal=TRUE),
list(label="Social × Obvious", df=df_social_obvious,
dv="dv", cond="condition", focal=TRUE),
list(label="Social × IMC", df=df_social_imc,
dv="dv", cond="condition", focal=FALSE),
list(label="Social × English", df=df_social_english,
dv="dv", cond="condition", focal=FALSE),
list(label="Social × Self-report", df=df_social_selfreport,
dv="dv", cond="condition", focal=FALSE),
list(label="Social × Straightline", df=df_social_straightline,
dv="dv", cond="condition", focal=FALSE),
# Numeric
list(label="Numeric × Memory", df=df_numeric_memory,
dv="dv", cond="condition", focal=TRUE),
list(label="Numeric × Obvious", df=df_numeric_obvious,
dv="dv", cond="condition", focal=TRUE),
list(label="Numeric × IMC", df=df_numeric_imc,
dv="dv", cond="condition", focal=FALSE),
list(label="Numeric × English", df=df_numeric_english,
dv="dv", cond="condition", focal=FALSE),
list(label="Numeric × Self-report", df=df_numeric_selfreport,
dv="dv", cond="condition", focal=FALSE),
list(label="Numeric × Straightline", df=df_numeric_straightline,
dv="dv", cond="condition", focal=FALSE)
)
# ── Fit function ────────────────────────────────────────────────────────────
fit_resid_var <- function(cfg) {
df <- cfg$df
dv_col <- cfg$dv
cond_col <- cfg$cond
df_pre <- df[!is.na(df[[dv_col]]) & !is.na(df[[cond_col]]), ]
df_post <- df_pre[df_pre$att_correct == 1, ]
fmla <- as.formula(paste0("`", dv_col, "` ~ factor(`", cond_col, "`)"))
fit_pre <- lm(fmla, data = df_pre)
fit_post <- lm(fmla, data = df_post)
sigma2_pre <- summary(fit_pre)$sigma^2
sigma2_post <- summary(fit_post)$sigma^2
data.frame(
case = cfg$label,
paradigm = sub(" × .*", "", cfg$label),
ac_type = sub(".* × ", "", cfg$label),
focal = cfg$focal,
n_pre = nrow(df_pre),
n_post = nrow(df_post),
pct_retained = round(100 * nrow(df_post) / nrow(df_pre), 1),
resid_var_pre = sigma2_pre,
resid_var_post = sigma2_post,
resid_sd_pre = sqrt(sigma2_pre),
resid_sd_post = sqrt(sigma2_post),
abs_reduction = sigma2_pre - sigma2_post,
pct_reduction = 100 * (sigma2_pre - sigma2_post) / sigma2_pre
)
}
resid_var_df <- lapply(datasets_cfg, fit_resid_var) |> bind_rows()
resid_var_focal <- resid_var_df |> filter(focal)
resid_var_bg <- resid_var_df |> filter(!focal)# All 36 cases, sorted by paradigm then AC type
kable(
resid_var_df |>
arrange(paradigm, ac_type) |>
select(case, n_pre, n_post, pct_retained,
resid_var_pre, resid_var_post,
abs_reduction, pct_reduction),
digits = c(0, 0, 0, 1, 4, 4, 4, 2),
caption = "Residual variance (σ²) from lm(DV ~ condition) before and after attention-check exclusions, all 6 AC types × 6 paradigms. A positive % reduction indicates that exclusions removed noise.",
col.names = c("Case", "N (pre)", "N (post)", "% retained",
"σ² pre", "σ² post",
"Δσ² (abs)", "Δσ² (%)")
)| Case | N (pre) | N (post) | % retained | σ² pre | σ² post | Δσ² (abs) | Δσ² (%) |
|---|---|---|---|---|---|---|---|
| Longread × English | 582 | 408 | 70.1 | 18.2423 | 18.9348 | -0.6925 | -3.80 |
| Longread × IMC | 609 | 538 | 88.3 | 18.1459 | 19.0699 | -0.9240 | -5.09 |
| Longread × Memory | 614 | 377 | 61.4 | 16.9449 | 13.1693 | 3.7756 | 22.28 |
| Longread × Obvious | 597 | 397 | 66.5 | 19.2182 | 22.9640 | -3.7458 | -19.49 |
| Longread × Self-report | 599 | 593 | 99.0 | 19.4092 | 19.3443 | 0.0649 | 0.33 |
| Longread × Straightline | 595 | 537 | 90.3 | 19.7833 | 20.4448 | -0.6615 | -3.34 |
| Metaphor × English | 399 | 303 | 75.9 | 0.2424 | 0.2376 | 0.0048 | 1.98 |
| Metaphor × IMC | 406 | 355 | 87.4 | 0.2508 | 0.2500 | 0.0007 | 0.29 |
| Metaphor × Memory | 409 | 269 | 65.8 | 0.2496 | 0.2448 | 0.0048 | 1.92 |
| Metaphor × Obvious | 412 | 299 | 72.6 | 0.2507 | 0.2463 | 0.0044 | 1.74 |
| Metaphor × Self-report | 407 | 401 | 98.5 | 0.2456 | 0.2449 | 0.0007 | 0.27 |
| Metaphor × Straightline | 406 | 374 | 92.1 | 0.2502 | 0.2496 | 0.0006 | 0.24 |
| Minread × English | 407 | 300 | 73.7 | 0.1943 | 0.1953 | -0.0009 | -0.48 |
| Minread × IMC | 417 | 378 | 90.6 | 0.1978 | 0.2021 | -0.0043 | -2.20 |
| Minread × Memory | 405 | 152 | 37.5 | 0.1943 | 0.2096 | -0.0153 | -7.86 |
| Minread × Obvious | 420 | 332 | 79.0 | 0.1877 | 0.2006 | -0.0129 | -6.86 |
| Minread × Self-report | 408 | 406 | 99.5 | 0.2082 | 0.2087 | -0.0005 | -0.26 |
| Minread × Straightline | 405 | 373 | 92.1 | 0.1692 | 0.1708 | -0.0016 | -0.94 |
| Numeric × English | 393 | 307 | 78.1 | 1.3838 | 1.5281 | -0.1443 | -10.43 |
| Numeric × IMC | 402 | 361 | 89.8 | 1.4909 | 1.5428 | -0.0519 | -3.48 |
| Numeric × Memory | 414 | 347 | 83.8 | 9.2822 | 10.8033 | -1.5211 | -16.39 |
| Numeric × Obvious | 406 | 325 | 80.0 | 2.6372 | 2.9219 | -0.2847 | -10.80 |
| Numeric × Self-report | 405 | 396 | 97.8 | 3.7992 | 3.8668 | -0.0676 | -1.78 |
| Numeric × Straightline | 407 | 388 | 95.3 | 2.8046 | 2.7681 | 0.0365 | 1.30 |
| Social × English | 411 | 295 | 71.8 | 1.3756 | 1.4215 | -0.0459 | -3.33 |
| Social × IMC | 422 | 373 | 88.4 | 1.2809 | 1.3376 | -0.0567 | -4.43 |
| Social × Memory | 411 | 320 | 77.9 | 1.3283 | 1.3285 | -0.0002 | -0.01 |
| Social × Obvious | 420 | 323 | 76.9 | 1.5067 | 1.5110 | -0.0042 | -0.28 |
| Social × Self-report | 401 | 392 | 97.8 | 1.3443 | 1.3499 | -0.0056 | -0.42 |
| Social × Straightline | 414 | 396 | 95.7 | 1.4798 | 1.5066 | -0.0267 | -1.81 |
| Transaction × English | 409 | 308 | 75.3 | 335.6572 | 299.4908 | 36.1663 | 10.77 |
| Transaction × IMC | 409 | 366 | 89.5 | 349.7239 | 307.3752 | 42.3487 | 12.11 |
| Transaction × Memory | 401 | 265 | 66.1 | 372.7749 | 294.0758 | 78.6991 | 21.11 |
| Transaction × Obvious | 410 | 290 | 70.7 | 399.1257 | 300.6138 | 98.5120 | 24.68 |
| Transaction × Self-report | 410 | 403 | 98.3 | 397.8065 | 396.5318 | 1.2747 | 0.32 |
| Transaction × Straightline | 419 | 381 | 90.9 | 350.8063 | 332.3446 | 18.4616 | 5.26 |
Each line connects the pre- and post-exclusion residual variance for the same dataset. A downward-sloping line means exclusions reduced noise.
# Colour palette: Memory + Obvious in colour; 4 background ACs in
# descending shades of grey (darker = more "serious" check)
ac_colors <- c(
"Memory" = "#1f78b4", # blue
"Obvious" = "#9e3fa8", # purple
"IMC" = "#252525", # near-black grey
"English" = "#636363", # dark grey
"Self-report" = "#969696", # mid grey
"Straightline"= "#bdbdbd" # light grey
)
# Line thickness: focal lines heavier so they read over the grey
ac_linewidth <- c(
"Memory" = 1.2,
"Obvious" = 1.2,
"IMC" = 0.65,
"English" = 0.65,
"Self-report" = 0.65,
"Straightline"= 0.65
)
ac_pointsize <- c(
"Memory" = 3.0,
"Obvious" = 3.0,
"IMC" = 2.0,
"English" = 2.0,
"Self-report" = 2.0,
"Straightline"= 2.0
)
# Pivot all 36 cases to long form
resid_long_all <- resid_var_df |>
select(case, paradigm, ac_type, focal, resid_var_pre, resid_var_post) |>
pivot_longer(
cols = c(resid_var_pre, resid_var_post),
names_to = "sample",
values_to = "resid_var"
) |>
mutate(
sample = recode(sample,
"resid_var_pre" = "Pre-exclusion",
"resid_var_post" = "Post-exclusion"
),
sample = factor(sample, levels = c("Pre-exclusion", "Post-exclusion")),
ac_type = factor(ac_type, levels = names(ac_colors))
)
# ── Facet label mapping: title + citation on second line ──────────────────
# Uses plain \n so no extra packages are needed.
# If ggtext is available (requires litedown: install.packages("litedown")),
# swap element_text() for element_markdown() below and use the HTML version.
paradigm_labels <- c(
"Minread" = "Scale Response\n(Schwarz et al. 1985)",
"Transaction" = "Transaction Utility\n(Thaler 1999)",
"Longread" = "Correspondence Bias\n(Miyamoto & Kitayama 2002)",
"Metaphor" = "Metaphoric Structuring\n(Boroditsky 2000)",
"Social" = "Less-is-Better\n(Hsee 1998)",
"Numeric" = "Retrospective Gambler's Fallacy\n(Oppenheimer & Monin 2009)"
)
resid_long_all <- resid_long_all |>
mutate(
paradigm = recode(paradigm, !!!paradigm_labels),
paradigm = factor(paradigm, levels = c(
"Correspondence Bias\n(Miyamoto & Kitayama 2002)",
"Less-is-Better\n(Hsee 1998)",
"Metaphoric Structuring\n(Boroditsky 2000)",
"Retrospective Gambler's Fallacy\n(Oppenheimer & Monin 2009)",
"Transaction Utility\n(Thaler 1999)",
"Scale Response\n(Schwarz et al. 1985)"
))
)
# Draw background (grey) ACs first, then focal (coloured) ACs on top
resid_long_bg <- resid_long_all |> filter(!focal)
resid_long_focal <- resid_long_all |> filter(focal)
ggplot() +
# ── Grey background ACs ────────────────────────────────────────────────
geom_line(
data = resid_long_bg,
aes(x = sample, y = resid_var, color = ac_type,
linewidth = ac_type, group = case)
) +
geom_point(
data = resid_long_bg,
aes(x = sample, y = resid_var, color = ac_type, group = case,
size = ac_type)
) +
# ── Coloured focal ACs ─────────────────────────────────────────────────
geom_line(
data = resid_long_focal,
aes(x = sample, y = resid_var, color = ac_type,
linewidth = ac_type, group = case)
) +
geom_point(
data = resid_long_focal,
aes(x = sample, y = resid_var, color = ac_type,
size = ac_type, group = case)
) +
facet_wrap(~ paradigm, nrow = 2, scales = "free_y") +
scale_color_manual(values = ac_colors, name = "AC type") +
scale_linewidth_manual(values = ac_linewidth, guide = "none") +
scale_size_manual(values = ac_pointsize, guide = "none") +
# Force 0 into each panel's y range without clipping data.
expand_limits(y = 0) +
scale_y_continuous(expand = expansion(mult = c(0, 0.08))) +
scale_x_discrete(expand = expansion(add = c(0.3, 0.3))) +
labs(
title = "Residual Variance Before and After Attention-Check Exclusions",
subtitle = "Y-axis starts at 0 so slope steepness reflects true proportional change.\nMemory & Obvious shown in colour; IMC, English, Self-report, Straightline in grey.",
x = NULL,
y = expression(Residual ~ variance ~ (hat(sigma)^2))
) +
theme_bw(base_size = 12) +
theme(
strip.background = element_rect(fill = "grey92"),
strip.text = element_text(face = "bold", size = 9, lineheight = 1.2),
legend.position = "bottom",
panel.grid.minor = element_blank()
) +
guides(color = guide_legend(nrow = 1, override.aes = list(linewidth = 1.2)))Horizontal bar chart with one column per paradigm. Bars show % change in variance (negative = variance fell post-exclusion = noise reduced = green; positive = variance rose = orange). AC types run along the y-axis within each panel.
# Apply the same paradigm labels and factor order as Visualization 1
viz2_df <- resid_var_df |>
mutate(
# Flip sign: negative now means variance decreased (the "good" direction)
pct_change = -pct_reduction,
paradigm = recode(paradigm, !!!paradigm_labels),
paradigm = factor(paradigm, levels = c(
"Correspondence Bias\n(Miyamoto & Kitayama 2002)",
"Less-is-Better\n(Hsee 1998)",
"Retrospective Gambler's Fallacy\n(Oppenheimer & Monin 2009)",
"Transaction Utility\n(Thaler 1999)",
"Metaphoric Structuring\n(Boroditsky 2000)",
"Scale Response\n(Schwarz et al. 1985)"
)),
ac_type = factor(ac_type, levels = rev(c(
"Memory", "Obvious", "IMC", "English", "Self-report", "Straightline"
)))
)
ggplot(viz2_df,
aes(y = ac_type, x = pct_change, fill = pct_change < 0)) +
geom_col(width = 0.65, color = "white", linewidth = 0.25) +
geom_vline(xintercept = 0, linewidth = 0.5, color = "grey30") +
geom_text(
aes(
label = paste0(round(pct_change, 1), "%"),
hjust = ifelse(pct_change < 0, 1.08, -0.08)
),
size = 2.5, fontface = "bold"
) +
facet_wrap(~ paradigm, ncol = 6) +
scale_x_continuous(expand = expansion(mult = c(0.35, 0.35))) +
scale_fill_manual(
values = c("TRUE" = "#35978f", "FALSE" = "#bf812d"),
labels = c("TRUE" = "Variance decreased", "FALSE" = "Variance increased"),
name = NULL
) +
labs(
title = "% Change in Residual Variance After Attention-Check Exclusions",
subtitle = "Negative = variance fell post-exclusion (noise reduced); positive = variance rose.\nResidual variance from lm(DV ~ condition).",
x = "% change in σ²",
y = NULL
) +
theme_bw(base_size = 11) +
theme(
strip.background = element_rect(fill = "grey92"),
strip.text = element_text(face = "bold", size = 8, lineheight = 1.2),
legend.position = "bottom",
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank()
)The same % change in residual variance is shown for the Transaction Utility paradigm under three versions of the DV:
If all three columns show the same pattern, attention-check performance for Transaction Utility is robust to scale assumptions.
# ── Helper: rank inverse-normal (Blom) transformation ──────────────────────
rank_int <- function(x) {
n <- sum(!is.na(x))
qnorm((rank(x, ties.method = "average", na.last = "keep") - 0.5) / n)
}
# ── Add transformed DVs to every transaction dataframe ─────────────────────
trans_dfs <- list(
Memory = df_transaction_memory,
Obvious = df_transaction_obvious,
IMC = df_transaction_imc,
English = df_transaction_english,
`Self-report` = df_transaction_selfreport,
Straightline = df_transaction_straightline
)
trans_dfs <- lapply(trans_dfs, function(df) {
df$dv_z <- as.numeric(scale(df$dv))
df$dv_int <- rank_int(df$dv)
df
})
# ── Compute % change in residual variance for one DV column ────────────────
pct_change_resid <- function(df, dv_col, cond_col = "condition") {
df_pre <- df[!is.na(df[[dv_col]]) & !is.na(df[[cond_col]]), ]
df_post <- df_pre[df_pre$att_correct == 1, ]
fmla <- as.formula(paste0("`", dv_col, "` ~ factor(`", cond_col, "`)"))
s2_pre <- summary(lm(fmla, data = df_pre))$sigma^2
s2_post <- summary(lm(fmla, data = df_post))$sigma^2
-100 * (s2_pre - s2_post) / s2_pre # negative = variance decreased
}
# ── Build results across all AC types × 3 transformations ──────────────────
transform_results <- lapply(names(trans_dfs), function(ac) {
df <- trans_dfs[[ac]]
data.frame(
ac_type = ac,
focal = ac %in% c("Memory", "Obvious"),
Raw = pct_change_resid(df, "dv"),
`Z-scored` = pct_change_resid(df, "dv_z"),
`Rank-INT` = pct_change_resid(df, "dv_int"),
check.names = FALSE
)
}) |>
bind_rows() |>
pivot_longer(
cols = c("Raw", "Z-scored", "Rank-INT"),
names_to = "transformation",
values_to = "pct_change"
) |>
mutate(
transformation = factor(transformation,
levels = c("Raw", "Z-scored", "Rank-INT")),
ac_type = factor(ac_type,
levels = rev(c("Memory","Obvious","IMC",
"English","Self-report","Straightline")))
)
ggplot(transform_results,
aes(y = ac_type, x = pct_change, fill = pct_change < 0)) +
geom_col(width = 0.65, color = "white", linewidth = 0.25) +
geom_vline(xintercept = 0, linewidth = 0.5, color = "grey30") +
geom_text(
aes(
label = paste0(round(pct_change, 1), "%"),
hjust = ifelse(pct_change < 0, 1.08, -0.08)
),
size = 2.8, fontface = "bold"
) +
facet_wrap(~ transformation, ncol = 3) +
scale_fill_manual(
values = c("TRUE" = "#35978f", "FALSE" = "#bf812d"),
labels = c("TRUE" = "Variance decreased", "FALSE" = "Variance increased"),
name = NULL
) +
scale_x_continuous(expand = expansion(mult = c(0.35, 0.35))) +
labs(
title = "Transaction Utility: % Change in Residual Variance by DV Transformation",
subtitle = "Z-scored is identical to Raw by construction (linear transformation).\nDivergence in Rank-INT indicates the result depends on tail behaviour.",
x = "% change in σ² (negative = variance fell)",
y = NULL
) +
theme_bw(base_size = 11) +
theme(
strip.background = element_rect(fill = "grey92"),
strip.text = element_text(face = "bold", size = 10),
legend.position = "bottom",
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank()
)Does removing more people lead to more noise reduction? This scatter plots the percentage of respondents excluded (x-axis) against the percentage reduction in residual variance (y-axis), one point per paradigm × AC combination.
ggplot(resid_var_df,
aes(x = 100 - pct_retained, y = pct_reduction,
color = paradigm, shape = ac_type, label = case)) +
geom_hline(yintercept = 0, linetype = "dashed", color = "grey60") +
geom_smooth(
aes(group = 1), method = "lm", se = TRUE,
color = "grey40", linewidth = 0.8, linetype = "solid", alpha = 0.15
) +
geom_point(size = 4) +
ggrepel::geom_text_repel(size = 2.6, max.overlaps = 20,
show.legend = FALSE) +
scale_color_brewer(palette = "Dark2", name = "Paradigm") +
scale_shape_manual(values = c("Memory" = 16, "Obvious" = 17),
name = "AC type") +
labs(
title = "Exclusion Rate vs Noise Reduction",
subtitle = "Each point = one paradigm × AC combination",
x = "% excluded (100 − % retained)",
y = "% reduction in residual σ²"
) +
theme_bw(base_size = 12) +
theme(
legend.position = "right",
panel.grid.minor = element_blank()
)
9. Social × Memory
Code
Code