Study Overview
This pilot analysis replicates key findings from:
Beard, C., & Amir, N. (2009). Interpretation in
Social Anxiety: When Meaning Precedes Ambiguity. Cognitive Therapy
and Research, 33(4), 406-415.
The Word Sentence Association Paradigm (WSAP) assesses interpretation
bias by having participants judge whether words (suggesting negative or
benign interpretations) are related to ambiguous sentences.
Note: This is a pilot study with N = 5
participants.
Setup
library(tidyverse)
library(psych)
library(knitr)
library(kableExtra)
library(ggpubr)
library(lsr)
theme_set(theme_bw(base_size = 12))
Configuration
PARTICIPANT_DATA_FOLDER <- "../data/"
CODING_FILE <- "interpretation_coding_reference.csv"
# RT cleaning thresholds
RT_MIN <- 0
RT_MAX <- 10000
OUTPUT_DIR <- getwd()
Data Loading
# Load interpretation coding
interpretation_coding <- read_csv(CODING_FILE, show_col_types = FALSE)
# Load participant data from folder
csv_files <- list.files(PARTICIPANT_DATA_FOLDER,
pattern = "\\.csv$",
full.names = TRUE,
ignore.case = TRUE)
cat("Loading", length(csv_files), "participant file(s)\n")
## Loading 5 participant file(s)
df_list <- lapply(csv_files, function(file) {
temp_df <- read_csv(file, show_col_types = FALSE)
if (!"subject" %in% names(temp_df)) {
temp_df$subject <- tools::file_path_sans_ext(basename(file))
}
return(temp_df)
})
df_raw <- bind_rows(df_list)
cat("Loaded data for", n_distinct(df_raw$subject), "subjects\n")
## Loaded data for 5 subjects
Data Preparation
# Clean and merge with interpretation coding
df <- df_raw %>%
mutate(
endorsed = case_when(
response == "1" | response == 1 ~ 1,
response == "3" | response == 3 ~ 0,
TRUE ~ NA_real_
),
rt = as.numeric(rt),
word = str_trim(word),
sentence = str_trim(sentence)
) %>%
left_join(
interpretation_coding %>%
select(word, sentence, interpretation_type, domain),
by = c("word", "sentence")
) %>%
filter(!is.na(interpretation_type), !is.na(endorsed))
# Apply RT filters
df <- df %>%
filter(rt >= RT_MIN & rt <= RT_MAX)
cat("Total trials after cleaning:", nrow(df), "\n")
## Total trials after cleaning: 549
cat("Subjects:", paste(unique(df$subject), collapse = ", "), "\n")
## Subjects: bv6dbfo3nsmdxqz, f4lw7jnrk05qb34, kqz5695jnknux81, n1468lc63a8mvyn, v4gjwejjlk3g4lh
Calculate Subject-Level Scores
# Calculate endorsement rates and RT by interpretation type
subject_scores <- df %>%
group_by(subject, interpretation_type) %>%
summarise(
endorsement_rate = mean(endorsed, na.rm = TRUE),
mean_rt = mean(rt[endorsed == 1], na.rm = TRUE),
n_trials = n(),
.groups = "drop"
) %>%
pivot_wider(
names_from = interpretation_type,
values_from = c(endorsement_rate, mean_rt, n_trials)
)
# Check the actual column names and rename appropriately
# The interpretation_type values might be lowercase "negative"/"benign"
col_names <- names(subject_scores)
# Find the endorsement and RT columns
endorse_cols <- grep("endorsement_rate_", col_names, value = TRUE)
rt_cols <- grep("mean_rt_", col_names, value = TRUE)
# Rename to standard names (handling both lowercase and capitalized)
subject_scores <- subject_scores %>%
rename_with(~"negative_endorsement", matches("endorsement_rate_(n|N)egative")) %>%
rename_with(~"benign_endorsement", matches("endorsement_rate_(b|B)enign")) %>%
rename_with(~"negative_rt", matches("mean_rt_(n|N)egative")) %>%
rename_with(~"benign_rt", matches("mean_rt_(b|B)enign")) %>%
mutate(
# Bias scores (Negative - Benign)
endorsement_bias = negative_endorsement - benign_endorsement,
rt_bias = negative_rt - benign_rt
)
# Display scores
kable(subject_scores, digits = 3, caption = "Subject-Level Scores") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
Subject-Level Scores
|
subject
|
benign_endorsement
|
negative_endorsement
|
benign_rt
|
negative_rt
|
n_trials_benign
|
n_trials_negative
|
endorsement_bias
|
rt_bias
|
|
bv6dbfo3nsmdxqz
|
0.236
|
0.855
|
486.615
|
489.426
|
55
|
55
|
0.618
|
2.810
|
|
f4lw7jnrk05qb34
|
0.345
|
0.764
|
806.105
|
630.548
|
55
|
55
|
0.418
|
-175.558
|
|
kqz5695jnknux81
|
0.727
|
0.182
|
413.975
|
414.700
|
55
|
55
|
-0.545
|
0.725
|
|
n1468lc63a8mvyn
|
0.709
|
0.091
|
361.821
|
399.400
|
55
|
55
|
-0.618
|
37.579
|
|
v4gjwejjlk3g4lh
|
0.473
|
0.370
|
627.231
|
461.200
|
55
|
54
|
-0.102
|
-166.031
|
Group Assignment Based on SPIN Scores
cat("\n=== GROUP ASSIGNMENT (SPIN-Based) ===\n")
##
## === GROUP ASSIGNMENT (SPIN-Based) ===
# Extract SPIN scores from participant data
spin_scores <- df_raw %>%
filter(!is.na(spin_total)) %>%
group_by(subject) %>%
summarise(
SPIN = first(spin_total),
.groups = "drop"
)
# Merge with subject scores
subject_scores <- subject_scores %>%
left_join(spin_scores, by = "subject")
# Calculate 30th and 70th percentiles
percentile_30 <- quantile(subject_scores$SPIN, 0.30, na.rm = TRUE)
percentile_70 <- quantile(subject_scores$SPIN, 0.70, na.rm = TRUE)
cat("\nSPIN Percentile Cutoffs:\n")
##
## SPIN Percentile Cutoffs:
cat(" 30th percentile:", percentile_30, "\n")
## 30th percentile: 3
cat(" 70th percentile:", percentile_70, "\n\n")
## 70th percentile: 47
# Assign groups based on percentiles
subject_scores <- subject_scores %>%
mutate(
group = case_when(
SPIN <= percentile_30 ~ "LSA",
SPIN >= percentile_70 ~ "HSA",
TRUE ~ "Moderate"
)
)
cat("Group Assignment:\n")
## Group Assignment:
cat(" LSA (Low Social Anxiety): SPIN ≤", percentile_30, "\n")
## LSA (Low Social Anxiety): SPIN ≤ 3
cat(" HSA (High Social Anxiety): SPIN ≥", percentile_70, "\n\n")
## HSA (High Social Anxiety): SPIN ≥ 47
# Show group distribution
group_table <- table(subject_scores$group)
print(group_table)
##
## HSA LSA Moderate
## 2 2 1
# Show individual assignments
cat("\n\nIndividual Participant Assignments:\n")
##
##
## Individual Participant Assignments:
kable(subject_scores %>%
select(subject, SPIN, group, endorsement_bias) %>%
arrange(SPIN),
digits = 3,
caption = "Participants by SPIN Score and Group") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
Participants by SPIN Score and Group
|
subject
|
SPIN
|
group
|
endorsement_bias
|
|
kqz5695jnknux81
|
0
|
LSA
|
-0.545
|
|
n1468lc63a8mvyn
|
0
|
LSA
|
-0.618
|
|
v4gjwejjlk3g4lh
|
15
|
Moderate
|
-0.102
|
|
bv6dbfo3nsmdxqz
|
55
|
HSA
|
0.618
|
|
f4lw7jnrk05qb34
|
68
|
HSA
|
0.418
|
# Create grouped dataset (excluding Moderate)
subject_scores_grouped <- subject_scores %>%
filter(group %in% c("HSA", "LSA"))
Main Analysis: Interpretation Bias
Test 1: Endorsement Rates (Negative vs Benign) - All
Participants
# Prepare data for paired t-test
endorsement_data <- subject_scores %>%
select(subject,
negative = negative_endorsement,
benign = benign_endorsement) %>%
filter(!is.na(negative) & !is.na(benign))
# Paired t-test
endorsement_test <- t.test(endorsement_data$negative,
endorsement_data$benign,
paired = TRUE)
# Effect size
cohens_d_endorse <- cohensD(endorsement_data$negative,
endorsement_data$benign,
method = "paired")
# Display results
cat("\n=== ENDORSEMENT BIAS TEST (All Participants) ===\n")
##
## === ENDORSEMENT BIAS TEST (All Participants) ===
cat("Negative M =", round(mean(endorsement_data$negative), 3), "\n")
## Negative M = 0.452
cat("Benign M =", round(mean(endorsement_data$benign), 3), "\n")
## Benign M = 0.498
cat("t(", endorsement_test$parameter, ") = ", round(endorsement_test$statistic, 3),
", p = ", format.pval(endorsement_test$p.value, digits = 3),
", Cohen's d = ", round(cohens_d_endorse, 3), "\n", sep = "")
## t(4) = -0.185, p = 0.862, Cohen's d = 0.083
Test 2: Reaction Time (Negative vs Benign) - All Participants
# Prepare RT data
rt_data <- subject_scores %>%
select(subject,
negative_rt, benign_rt) %>%
filter(!is.na(negative_rt) & !is.na(benign_rt))
if (nrow(rt_data) >= 2) {
# Paired t-test
rt_test <- t.test(rt_data$negative_rt,
rt_data$benign_rt,
paired = TRUE)
# Effect size
cohens_d_rt <- cohensD(rt_data$negative_rt,
rt_data$benign_rt,
method = "paired")
# Display results
cat("\n=== RT BIAS TEST (All Participants) ===\n")
cat("Negative RT M =", round(mean(rt_data$negative_rt), 2), "ms\n")
cat("Benign RT M =", round(mean(rt_data$benign_rt), 2), "ms\n")
cat("t(", rt_test$parameter, ") = ", round(rt_test$statistic, 3),
", p = ", format.pval(rt_test$p.value, digits = 3),
", Cohen's d = ", round(cohens_d_rt, 3), "\n", sep = "")
} else {
cat("\nInsufficient data for RT analysis\n")
}
##
## === RT BIAS TEST (All Participants) ===
## Negative RT M = 479.05 ms
## Benign RT M = 539.15 ms
## t(4) = -1.315, p = 0.259, Cohen's d = 0.588
Group Analysis: Mixed ANOVA
Mixed ANOVA: Group × Interpretation Type
if (nrow(subject_scores_grouped) >= 2) {
cat("\n=== MIXED ANOVA: GROUP × INTERPRETATION TYPE ===\n")
cat("Note: Small sample size - interpret with caution\n\n")
# Prepare data in long format for ANOVA
anova_data <- subject_scores_grouped %>%
select(subject, group, negative_endorsement, benign_endorsement) %>%
pivot_longer(cols = c(negative_endorsement, benign_endorsement),
names_to = "interpretation_type",
values_to = "endorsement_rate") %>%
mutate(
interpretation_type = factor(
interpretation_type,
levels = c("benign_endorsement", "negative_endorsement"),
labels = c("Benign", "Negative")
),
group = factor(group, levels = c("LSA", "HSA"))
)
# Run mixed ANOVA
anova_result <- aov(endorsement_rate ~ group * interpretation_type +
Error(subject/interpretation_type),
data = anova_data)
cat("ANOVA Results:\n")
print(summary(anova_result))
# Descriptive statistics by group and interpretation type
cat("\n\nDescriptive Statistics:\n")
desc_by_group <- anova_data %>%
group_by(group, interpretation_type) %>%
summarise(
n = n(),
mean = mean(endorsement_rate, na.rm = TRUE),
sd = sd(endorsement_rate, na.rm = TRUE),
se = sd / sqrt(n),
.groups = "drop"
)
kable(desc_by_group, digits = 3,
caption = "Mean Endorsement Rates by Group and Interpretation Type") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
# POST-HOC: Within-group comparisons
cat("\n\n=== POST-HOC: WITHIN-GROUP COMPARISONS ===\n\n")
for (grp in c("HSA", "LSA")) {
grp_data <- subject_scores_grouped %>% filter(group == grp)
if (nrow(grp_data) >= 2) {
t_result <- t.test(grp_data$negative_endorsement,
grp_data$benign_endorsement,
paired = TRUE)
diff <- grp_data$negative_endorsement - grp_data$benign_endorsement
d <- mean(diff) / sd(diff)
cat(grp, "group (n=", nrow(grp_data), "):\n", sep = "")
cat(" Negative: M =", round(mean(grp_data$negative_endorsement), 3), "\n")
cat(" Benign: M =", round(mean(grp_data$benign_endorsement), 3), "\n")
cat(" t(", t_result$parameter, ") = ", round(t_result$statistic, 3),
", p = ", format.pval(t_result$p.value, digits = 3),
", d = ", round(d, 3), "\n\n", sep = "")
}
}
# POST-HOC: Between-group comparisons
cat("\n=== POST-HOC: BETWEEN-GROUP COMPARISONS ===\n\n")
# Bias score comparison
hsa_bias <- subject_scores_grouped %>% filter(group == "HSA") %>% pull(endorsement_bias)
lsa_bias <- subject_scores_grouped %>% filter(group == "LSA") %>% pull(endorsement_bias)
if (length(hsa_bias) >= 2 && length(lsa_bias) >= 2) {
bias_test <- t.test(hsa_bias, lsa_bias)
cat("Endorsement Bias (HSA vs LSA):\n")
cat(" HSA: M =", round(mean(hsa_bias), 3), "\n")
cat(" LSA: M =", round(mean(lsa_bias), 3), "\n")
cat(" t(", round(bias_test$parameter, 1), ") = ", round(bias_test$statistic, 3),
", p = ", format.pval(bias_test$p.value, digits = 3), "\n", sep = "")
}
} else {
cat("\nInsufficient participants for group analysis\n")
}
##
## === MIXED ANOVA: GROUP × INTERPRETATION TYPE ===
## Note: Small sample size - interpret with caution
##
## ANOVA Results:
##
## Error: subject
## Df Sum Sq Mean Sq F value Pr(>F)
## group 1 0.030124 0.030124 19.7 0.0472 *
## Residuals 2 0.003058 0.001529
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Error: subject:interpretation_type
## Df Sum Sq Mean Sq F value Pr(>F)
## interpretation_type 1 0.0020 0.0020 0.358 0.61051
## group:interpretation_type 1 0.6050 0.6050 106.869 0.00923 **
## Residuals 2 0.0113 0.0057
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## Descriptive Statistics:
##
##
## === POST-HOC: WITHIN-GROUP COMPARISONS ===
##
## HSAgroup (n=2):
## Negative: M = 0.809
## Benign: M = 0.291
## t(1) = 5.182, p = 0.121, d = 3.664
##
## LSAgroup (n=2):
## Negative: M = 0.136
## Benign: M = 0.718
## t(1) = -16, p = 0.0397, d = -11.314
##
##
## === POST-HOC: BETWEEN-GROUP COMPARISONS ===
##
## Endorsement Bias (HSA vs LSA):
## HSA: M = 0.518
## LSA: M = -0.582
## t(1.3) = 10.338, p = 0.0359
Visualizations
Figure 3: Group × Interpretation Interaction
if (nrow(subject_scores_grouped) >= 2) {
# Calculate means and SEs for plotting
interaction_summary <- anova_data %>%
group_by(group, interpretation_type) %>%
summarise(
mean = mean(endorsement_rate, na.rm = TRUE),
se = sd(endorsement_rate, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
)
# Line plot showing interaction
p_interaction <- ggplot(interaction_summary,
aes(x = interpretation_type, y = mean,
group = group, color = group, shape = group)) +
geom_line(size = 1.5) +
geom_point(size = 5) +
geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.1, size = 1) +
scale_color_manual(values = c("LSA" = "#2ca02c", "HSA" = "#d62728"),
labels = c("LSA" = "Low Social Anxiety", "HSA" = "High Social Anxiety")) +
scale_shape_manual(values = c("LSA" = 16, "HSA" = 17),
labels = c("LSA" = "Low Social Anxiety", "HSA" = "High Social Anxiety")) +
labs(title = "Group × Interpretation Type Interaction",
subtitle = "Error bars show ±1 SE (Beard & Amir, 2009 key finding)",
x = "Interpretation Type",
y = "Endorsement Rate",
color = "Group",
shape = "Group") +
ylim(0, 1) +
theme(legend.position = "bottom",
plot.title = element_text(face = "bold", size = 16),
axis.title = element_text(face = "bold"))
# Bar plot showing bias by group
bias_by_group <- subject_scores_grouped %>%
group_by(group) %>%
summarise(
mean_bias = mean(endorsement_bias, na.rm = TRUE),
se_bias = sd(endorsement_bias, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
)
p_bias_group <- ggplot(bias_by_group, aes(x = group, y = mean_bias, fill = group)) +
geom_bar(stat = "identity", width = 0.6, color = "black", alpha = 0.7) +
geom_errorbar(aes(ymin = mean_bias - se_bias, ymax = mean_bias + se_bias),
width = 0.2, size = 1) +
geom_hline(yintercept = 0, linetype = "dashed", color = "red", size = 1) +
geom_text(aes(label = sprintf("%.3f", mean_bias)),
vjust = ifelse(bias_by_group$mean_bias > 0, -1.5, 2),
size = 5, fontface = "bold") +
scale_fill_manual(values = c("LSA" = "#2ca02c", "HSA" = "#d62728")) +
scale_x_discrete(labels = c("LSA" = "Low Social\nAnxiety", "HSA" = "High Social\nAnxiety")) +
labs(title = "Endorsement Bias by Group",
subtitle = "Negative - Benign",
x = "Group",
y = "Endorsement Bias Score") +
theme(legend.position = "none",
plot.title = element_text(face = "bold", size = 16),
axis.title = element_text(face = "bold"))
# Combine plots
ggarrange(p_interaction, p_bias_group, ncol = 2, widths = c(1.2, 1))
}

Summary
Sample Size: N = 5 participants
Groups: HSA (n = 2) | LSA (n = 2)
Key Findings:
- Overall Endorsement Bias:
- Mean difference = -0.046
- No significant bias (p = 0.862)
- Response Time:
- No significant RT difference (p = 0.259)
- Group × Interpretation Interaction:
- Mixed ANOVA conducted - see results above
Note: This is a pilot study with limited power.
Results should be interpreted cautiously.
Save Results
write_csv(subject_scores, file.path(OUTPUT_DIR, "pilot_subject_scores.csv"))
write_csv(df, file.path(OUTPUT_DIR, "pilot_cleaned_data.csv"))
cat("Results saved to:\n")
## Results saved to:
cat(" - pilot_subject_scores.csv\n")
## - pilot_subject_scores.csv
cat(" - pilot_cleaned_data.csv\n")
## - pilot_cleaned_data.csv