Study Overview

This pilot analysis replicates key findings from:

Beard, C., & Amir, N. (2009). Interpretation in Social Anxiety: When Meaning Precedes Ambiguity. Cognitive Therapy and Research, 33(4), 406-415.

The Word Sentence Association Paradigm (WSAP) assesses interpretation bias by having participants judge whether words (suggesting negative or benign interpretations) are related to ambiguous sentences.

Note: This is a pilot study with N = 5 participants.


Setup

library(tidyverse)
library(psych)
library(knitr)
library(kableExtra)
library(ggpubr)
library(lsr)

theme_set(theme_bw(base_size = 12))

Configuration

PARTICIPANT_DATA_FOLDER <- "../data/" 
CODING_FILE <- "interpretation_coding_reference.csv"

# RT cleaning thresholds
RT_MIN <- 0
RT_MAX <- 10000

OUTPUT_DIR <- getwd()

Data Loading

# Load interpretation coding
interpretation_coding <- read_csv(CODING_FILE, show_col_types = FALSE)

# Load participant data from folder
csv_files <- list.files(PARTICIPANT_DATA_FOLDER, 
                        pattern = "\\.csv$", 
                        full.names = TRUE,
                        ignore.case = TRUE)

cat("Loading", length(csv_files), "participant file(s)\n")
## Loading 5 participant file(s)
df_list <- lapply(csv_files, function(file) {
  temp_df <- read_csv(file, show_col_types = FALSE)
  if (!"subject" %in% names(temp_df)) {
    temp_df$subject <- tools::file_path_sans_ext(basename(file))
  }
  return(temp_df)
})

df_raw <- bind_rows(df_list)
cat("Loaded data for", n_distinct(df_raw$subject), "subjects\n")
## Loaded data for 5 subjects

Data Preparation

# Clean and merge with interpretation coding
df <- df_raw %>%
  mutate(
    endorsed = case_when(
      response == "1" | response == 1 ~ 1,
      response == "3" | response == 3 ~ 0,
      TRUE ~ NA_real_
    ),
    rt = as.numeric(rt),
    word = str_trim(word),
    sentence = str_trim(sentence)
  ) %>%
  left_join(
    interpretation_coding %>% 
      select(word, sentence, interpretation_type, domain),
    by = c("word", "sentence")
  ) %>%
  filter(!is.na(interpretation_type), !is.na(endorsed))

# Apply RT filters
df <- df %>%
  filter(rt >= RT_MIN & rt <= RT_MAX)

cat("Total trials after cleaning:", nrow(df), "\n")
## Total trials after cleaning: 549
cat("Subjects:", paste(unique(df$subject), collapse = ", "), "\n")
## Subjects: bv6dbfo3nsmdxqz, f4lw7jnrk05qb34, kqz5695jnknux81, n1468lc63a8mvyn, v4gjwejjlk3g4lh

Calculate Subject-Level Scores

# Calculate endorsement rates and RT by interpretation type
subject_scores <- df %>%
  group_by(subject, interpretation_type) %>%
  summarise(
    endorsement_rate = mean(endorsed, na.rm = TRUE),
    mean_rt = mean(rt[endorsed == 1], na.rm = TRUE),
    n_trials = n(),
    .groups = "drop"
  ) %>%
  pivot_wider(
    names_from = interpretation_type,
    values_from = c(endorsement_rate, mean_rt, n_trials)
  )

# Check the actual column names and rename appropriately
# The interpretation_type values might be lowercase "negative"/"benign"
col_names <- names(subject_scores)

# Find the endorsement and RT columns
endorse_cols <- grep("endorsement_rate_", col_names, value = TRUE)
rt_cols <- grep("mean_rt_", col_names, value = TRUE)

# Rename to standard names (handling both lowercase and capitalized)
subject_scores <- subject_scores %>%
  rename_with(~"negative_endorsement", matches("endorsement_rate_(n|N)egative")) %>%
  rename_with(~"benign_endorsement", matches("endorsement_rate_(b|B)enign")) %>%
  rename_with(~"negative_rt", matches("mean_rt_(n|N)egative")) %>%
  rename_with(~"benign_rt", matches("mean_rt_(b|B)enign")) %>%
  mutate(
    # Bias scores (Negative - Benign)
    endorsement_bias = negative_endorsement - benign_endorsement,
    rt_bias = negative_rt - benign_rt
  )

# Display scores
kable(subject_scores, digits = 3, caption = "Subject-Level Scores") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
Subject-Level Scores
subject benign_endorsement negative_endorsement benign_rt negative_rt n_trials_benign n_trials_negative endorsement_bias rt_bias
bv6dbfo3nsmdxqz 0.236 0.855 486.615 489.426 55 55 0.618 2.810
f4lw7jnrk05qb34 0.345 0.764 806.105 630.548 55 55 0.418 -175.558
kqz5695jnknux81 0.727 0.182 413.975 414.700 55 55 -0.545 0.725
n1468lc63a8mvyn 0.709 0.091 361.821 399.400 55 55 -0.618 37.579
v4gjwejjlk3g4lh 0.473 0.370 627.231 461.200 55 54 -0.102 -166.031

Group Assignment Based on SPIN Scores

cat("\n=== GROUP ASSIGNMENT (SPIN-Based) ===\n")
## 
## === GROUP ASSIGNMENT (SPIN-Based) ===
# Extract SPIN scores from participant data
spin_scores <- df_raw %>%
  filter(!is.na(spin_total)) %>%
  group_by(subject) %>%
  summarise(
    SPIN = first(spin_total),
    .groups = "drop"
  )

# Merge with subject scores
subject_scores <- subject_scores %>%
  left_join(spin_scores, by = "subject")

# Calculate 30th and 70th percentiles
percentile_30 <- quantile(subject_scores$SPIN, 0.30, na.rm = TRUE)
percentile_70 <- quantile(subject_scores$SPIN, 0.70, na.rm = TRUE)

cat("\nSPIN Percentile Cutoffs:\n")
## 
## SPIN Percentile Cutoffs:
cat("  30th percentile:", percentile_30, "\n")
##   30th percentile: 3
cat("  70th percentile:", percentile_70, "\n\n")
##   70th percentile: 47
# Assign groups based on percentiles
subject_scores <- subject_scores %>%
  mutate(
    group = case_when(
      SPIN <= percentile_30 ~ "LSA",
      SPIN >= percentile_70 ~ "HSA",
      TRUE ~ "Moderate"
    )
  )

cat("Group Assignment:\n")
## Group Assignment:
cat("  LSA (Low Social Anxiety):  SPIN ≤", percentile_30, "\n")
##   LSA (Low Social Anxiety):  SPIN ≤ 3
cat("  HSA (High Social Anxiety): SPIN ≥", percentile_70, "\n\n")
##   HSA (High Social Anxiety): SPIN ≥ 47
# Show group distribution
group_table <- table(subject_scores$group)
print(group_table)
## 
##      HSA      LSA Moderate 
##        2        2        1
# Show individual assignments
cat("\n\nIndividual Participant Assignments:\n")
## 
## 
## Individual Participant Assignments:
kable(subject_scores %>% 
        select(subject, SPIN, group, endorsement_bias) %>%
        arrange(SPIN),
      digits = 3,
      caption = "Participants by SPIN Score and Group") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Participants by SPIN Score and Group
subject SPIN group endorsement_bias
kqz5695jnknux81 0 LSA -0.545
n1468lc63a8mvyn 0 LSA -0.618
v4gjwejjlk3g4lh 15 Moderate -0.102
bv6dbfo3nsmdxqz 55 HSA 0.618
f4lw7jnrk05qb34 68 HSA 0.418
# Create grouped dataset (excluding Moderate)
subject_scores_grouped <- subject_scores %>%
  filter(group %in% c("HSA", "LSA"))

Main Analysis: Interpretation Bias

Test 1: Endorsement Rates (Negative vs Benign) - All Participants

# Prepare data for paired t-test
endorsement_data <- subject_scores %>%
  select(subject, 
         negative = negative_endorsement,
         benign = benign_endorsement) %>%
  filter(!is.na(negative) & !is.na(benign))

# Paired t-test
endorsement_test <- t.test(endorsement_data$negative, 
                          endorsement_data$benign, 
                          paired = TRUE)

# Effect size
cohens_d_endorse <- cohensD(endorsement_data$negative, 
                            endorsement_data$benign, 
                            method = "paired")

# Display results
cat("\n=== ENDORSEMENT BIAS TEST (All Participants) ===\n")
## 
## === ENDORSEMENT BIAS TEST (All Participants) ===
cat("Negative M =", round(mean(endorsement_data$negative), 3), "\n")
## Negative M = 0.452
cat("Benign M =", round(mean(endorsement_data$benign), 3), "\n")
## Benign M = 0.498
cat("t(", endorsement_test$parameter, ") = ", round(endorsement_test$statistic, 3), 
    ", p = ", format.pval(endorsement_test$p.value, digits = 3), 
    ", Cohen's d = ", round(cohens_d_endorse, 3), "\n", sep = "")
## t(4) = -0.185, p = 0.862, Cohen's d = 0.083

Test 2: Reaction Time (Negative vs Benign) - All Participants

# Prepare RT data
rt_data <- subject_scores %>%
  select(subject,
         negative_rt, benign_rt) %>%
  filter(!is.na(negative_rt) & !is.na(benign_rt))

if (nrow(rt_data) >= 2) {
  # Paired t-test
  rt_test <- t.test(rt_data$negative_rt, 
                   rt_data$benign_rt, 
                   paired = TRUE)
  
  # Effect size
  cohens_d_rt <- cohensD(rt_data$negative_rt, 
                        rt_data$benign_rt, 
                        method = "paired")
  
  # Display results
  cat("\n=== RT BIAS TEST (All Participants) ===\n")
  cat("Negative RT M =", round(mean(rt_data$negative_rt), 2), "ms\n")
  cat("Benign RT M =", round(mean(rt_data$benign_rt), 2), "ms\n")
  cat("t(", rt_test$parameter, ") = ", round(rt_test$statistic, 3), 
      ", p = ", format.pval(rt_test$p.value, digits = 3), 
      ", Cohen's d = ", round(cohens_d_rt, 3), "\n", sep = "")
} else {
  cat("\nInsufficient data for RT analysis\n")
}
## 
## === RT BIAS TEST (All Participants) ===
## Negative RT M = 479.05 ms
## Benign RT M = 539.15 ms
## t(4) = -1.315, p = 0.259, Cohen's d = 0.588

Group Analysis: Mixed ANOVA

Mixed ANOVA: Group × Interpretation Type

if (nrow(subject_scores_grouped) >= 2) {
  
  cat("\n=== MIXED ANOVA: GROUP × INTERPRETATION TYPE ===\n")
  cat("Note: Small sample size - interpret with caution\n\n")
  
  # Prepare data in long format for ANOVA
  anova_data <- subject_scores_grouped %>%
    select(subject, group, negative_endorsement, benign_endorsement) %>%
    pivot_longer(cols = c(negative_endorsement, benign_endorsement),
                 names_to = "interpretation_type",
                 values_to = "endorsement_rate") %>%
    mutate(
      interpretation_type = factor(
        interpretation_type,
        levels = c("benign_endorsement", "negative_endorsement"),
        labels = c("Benign", "Negative")
      ),
      group = factor(group, levels = c("LSA", "HSA"))
    )
  
  # Run mixed ANOVA
  anova_result <- aov(endorsement_rate ~ group * interpretation_type + 
                      Error(subject/interpretation_type), 
                      data = anova_data)
  
  cat("ANOVA Results:\n")
  print(summary(anova_result))
  
  # Descriptive statistics by group and interpretation type
  cat("\n\nDescriptive Statistics:\n")
  desc_by_group <- anova_data %>%
    group_by(group, interpretation_type) %>%
    summarise(
      n = n(),
      mean = mean(endorsement_rate, na.rm = TRUE),
      sd = sd(endorsement_rate, na.rm = TRUE),
      se = sd / sqrt(n),
      .groups = "drop"
    )
  
  kable(desc_by_group, digits = 3,
        caption = "Mean Endorsement Rates by Group and Interpretation Type") %>%
    kable_styling(bootstrap_options = c("striped", "hover"))
  
  # POST-HOC: Within-group comparisons
  cat("\n\n=== POST-HOC: WITHIN-GROUP COMPARISONS ===\n\n")
  
  for (grp in c("HSA", "LSA")) {
    grp_data <- subject_scores_grouped %>% filter(group == grp)
    
    if (nrow(grp_data) >= 2) {
      t_result <- t.test(grp_data$negative_endorsement, 
                        grp_data$benign_endorsement, 
                        paired = TRUE)
      
      diff <- grp_data$negative_endorsement - grp_data$benign_endorsement
      d <- mean(diff) / sd(diff)
      
      cat(grp, "group (n=", nrow(grp_data), "):\n", sep = "")
      cat("  Negative: M =", round(mean(grp_data$negative_endorsement), 3), "\n")
      cat("  Benign:   M =", round(mean(grp_data$benign_endorsement), 3), "\n")
      cat("  t(", t_result$parameter, ") = ", round(t_result$statistic, 3),
          ", p = ", format.pval(t_result$p.value, digits = 3),
          ", d = ", round(d, 3), "\n\n", sep = "")
    }
  }
  
  # POST-HOC: Between-group comparisons
  cat("\n=== POST-HOC: BETWEEN-GROUP COMPARISONS ===\n\n")
  
  # Bias score comparison
  hsa_bias <- subject_scores_grouped %>% filter(group == "HSA") %>% pull(endorsement_bias)
  lsa_bias <- subject_scores_grouped %>% filter(group == "LSA") %>% pull(endorsement_bias)
  
  if (length(hsa_bias) >= 2 && length(lsa_bias) >= 2) {
    bias_test <- t.test(hsa_bias, lsa_bias)
    cat("Endorsement Bias (HSA vs LSA):\n")
    cat("  HSA: M =", round(mean(hsa_bias), 3), "\n")
    cat("  LSA: M =", round(mean(lsa_bias), 3), "\n")
    cat("  t(", round(bias_test$parameter, 1), ") = ", round(bias_test$statistic, 3),
        ", p = ", format.pval(bias_test$p.value, digits = 3), "\n", sep = "")
  }
  
} else {
  cat("\nInsufficient participants for group analysis\n")
}
## 
## === MIXED ANOVA: GROUP × INTERPRETATION TYPE ===
## Note: Small sample size - interpret with caution
## 
## ANOVA Results:
## 
## Error: subject
##           Df   Sum Sq  Mean Sq F value Pr(>F)  
## group      1 0.030124 0.030124    19.7 0.0472 *
## Residuals  2 0.003058 0.001529                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Error: subject:interpretation_type
##                           Df Sum Sq Mean Sq F value  Pr(>F)   
## interpretation_type        1 0.0020  0.0020   0.358 0.61051   
## group:interpretation_type  1 0.6050  0.6050 106.869 0.00923 **
## Residuals                  2 0.0113  0.0057                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Descriptive Statistics:
## 
## 
## === POST-HOC: WITHIN-GROUP COMPARISONS ===
## 
## HSAgroup (n=2):
##   Negative: M = 0.809 
##   Benign:   M = 0.291 
##   t(1) = 5.182, p = 0.121, d = 3.664
## 
## LSAgroup (n=2):
##   Negative: M = 0.136 
##   Benign:   M = 0.718 
##   t(1) = -16, p = 0.0397, d = -11.314
## 
## 
## === POST-HOC: BETWEEN-GROUP COMPARISONS ===
## 
## Endorsement Bias (HSA vs LSA):
##   HSA: M = 0.518 
##   LSA: M = -0.582 
##   t(1.3) = 10.338, p = 0.0359

Visualizations

Figure 1: Endorsement Rates by Interpretation Type (All Participants)

# Prepare data for plotting
plot_data <- subject_scores %>%
  select(subject, 
         Negative = negative_endorsement,
         Benign = benign_endorsement) %>%
  pivot_longer(cols = c(Negative, Benign),
               names_to = "Interpretation",
               values_to = "Endorsement_Rate")

# Calculate means and SEs
plot_summary <- plot_data %>%
  group_by(Interpretation) %>%
  summarise(
    mean = mean(Endorsement_Rate, na.rm = TRUE),
    se = sd(Endorsement_Rate, na.rm = TRUE) / sqrt(n()),
    .groups = "drop"
  )

# Create plot
ggplot(plot_summary, aes(x = Interpretation, y = mean, fill = Interpretation)) +
  geom_bar(stat = "identity", width = 0.6, color = "black") +
  geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.2) +
  geom_jitter(data = plot_data, 
              aes(x = Interpretation, y = Endorsement_Rate, fill = Interpretation),
              width = 0.1, alpha = 0.5, size = 3, shape = 21) +
  scale_fill_manual(values = c("Negative" = "#d62728", "Benign" = "#2ca02c")) +
  labs(title = "Endorsement Rates by Interpretation Type",
       subtitle = paste0("N = ", nrow(endorsement_data), " | Error bars show ±1 SE"),
       x = "Interpretation Type",
       y = "Endorsement Rate") +
  ylim(0, 1) +
  theme(legend.position = "none",
        plot.title = element_text(face = "bold", size = 16),
        axis.title = element_text(face = "bold"))

Figure 2: Individual Bias Scores

# Plot individual bias scores with group coloring
ggplot(subject_scores, aes(x = reorder(subject, SPIN), y = endorsement_bias, fill = group)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray40", size = 1) +
  geom_bar(stat = "identity", width = 0.6, color = "black") +
  geom_text(aes(label = sprintf("%.2f", endorsement_bias)),
            vjust = ifelse(subject_scores$endorsement_bias > 0, -0.5, 1.5),
            fontface = "bold", size = 3) +
  scale_fill_manual(values = c("HSA" = "#d62728", "LSA" = "#2ca02c", "Moderate" = "#ff7f0e"),
                   labels = c("HSA" = "High Social Anxiety", 
                             "LSA" = "Low Social Anxiety",
                             "Moderate" = "Moderate")) +
  labs(title = "Endorsement Bias Score by Subject",
       subtitle = "Bias = Negative Endorsement - Benign Endorsement",
       x = "Subject (ordered by SPIN score)",
       y = "Endorsement Bias Score",
       fill = "Group") +
  theme(legend.position = "bottom",
        plot.title = element_text(face = "bold", size = 16),
        axis.title = element_text(face = "bold"),
        axis.text.x = element_text(angle = 45, hjust = 1))

Figure 3: Group × Interpretation Interaction

if (nrow(subject_scores_grouped) >= 2) {
  
  # Calculate means and SEs for plotting
  interaction_summary <- anova_data %>%
    group_by(group, interpretation_type) %>%
    summarise(
      mean = mean(endorsement_rate, na.rm = TRUE),
      se = sd(endorsement_rate, na.rm = TRUE) / sqrt(n()),
      .groups = "drop"
    )
  
  # Line plot showing interaction
  p_interaction <- ggplot(interaction_summary, 
                         aes(x = interpretation_type, y = mean, 
                             group = group, color = group, shape = group)) +
    geom_line(size = 1.5) +
    geom_point(size = 5) +
    geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.1, size = 1) +
    scale_color_manual(values = c("LSA" = "#2ca02c", "HSA" = "#d62728"),
                      labels = c("LSA" = "Low Social Anxiety", "HSA" = "High Social Anxiety")) +
    scale_shape_manual(values = c("LSA" = 16, "HSA" = 17),
                      labels = c("LSA" = "Low Social Anxiety", "HSA" = "High Social Anxiety")) +
    labs(title = "Group × Interpretation Type Interaction",
         subtitle = "Error bars show ±1 SE (Beard & Amir, 2009 key finding)",
         x = "Interpretation Type",
         y = "Endorsement Rate",
         color = "Group",
         shape = "Group") +
    ylim(0, 1) +
    theme(legend.position = "bottom",
          plot.title = element_text(face = "bold", size = 16),
          axis.title = element_text(face = "bold"))
  
  # Bar plot showing bias by group
  bias_by_group <- subject_scores_grouped %>%
    group_by(group) %>%
    summarise(
      mean_bias = mean(endorsement_bias, na.rm = TRUE),
      se_bias = sd(endorsement_bias, na.rm = TRUE) / sqrt(n()),
      .groups = "drop"
    )
  
  p_bias_group <- ggplot(bias_by_group, aes(x = group, y = mean_bias, fill = group)) +
    geom_bar(stat = "identity", width = 0.6, color = "black", alpha = 0.7) +
    geom_errorbar(aes(ymin = mean_bias - se_bias, ymax = mean_bias + se_bias), 
                  width = 0.2, size = 1) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "red", size = 1) +
    geom_text(aes(label = sprintf("%.3f", mean_bias)), 
              vjust = ifelse(bias_by_group$mean_bias > 0, -1.5, 2),
              size = 5, fontface = "bold") +
    scale_fill_manual(values = c("LSA" = "#2ca02c", "HSA" = "#d62728")) +
    scale_x_discrete(labels = c("LSA" = "Low Social\nAnxiety", "HSA" = "High Social\nAnxiety")) +
    labs(title = "Endorsement Bias by Group",
         subtitle = "Negative - Benign",
         x = "Group",
         y = "Endorsement Bias Score") +
    theme(legend.position = "none",
          plot.title = element_text(face = "bold", size = 16),
          axis.title = element_text(face = "bold"))
  
  # Combine plots
  ggarrange(p_interaction, p_bias_group, ncol = 2, widths = c(1.2, 1))
}


Summary

Sample Size: N = 5 participants
Groups: HSA (n = 2) | LSA (n = 2)

Key Findings:

  1. Overall Endorsement Bias:
    • Mean difference = -0.046
    • No significant bias (p = 0.862)
  2. Response Time:
    • No significant RT difference (p = 0.259)
  3. Group × Interpretation Interaction:
    • Mixed ANOVA conducted - see results above

Note: This is a pilot study with limited power. Results should be interpreted cautiously.


Save Results

write_csv(subject_scores, file.path(OUTPUT_DIR, "pilot_subject_scores.csv"))
write_csv(df, file.path(OUTPUT_DIR, "pilot_cleaned_data.csv"))

cat("Results saved to:\n")
## Results saved to:
cat("  - pilot_subject_scores.csv\n")
##   - pilot_subject_scores.csv
cat("  - pilot_cleaned_data.csv\n")
##   - pilot_cleaned_data.csv

References

Beard, C., & Amir, N. (2009). Interpretation in social anxiety: When meaning precedes ambiguity. Cognitive Therapy and Research, 33(4), 406-415. https://doi.org/10.1007/s10608-009-9235-0