Set up

# Clear the environment 
rm(list = ls())

Load packages

# Check for required packages and install them if missing
required_packages <- c("tidyverse", "readr", "rstatix", "ggpubr")
new_packages <- required_packages[!(required_packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)

# Library packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(rstatix)
## 
## Attaching package: 'rstatix'
## 
## The following object is masked from 'package:stats':
## 
##     filter
library(ggpubr)

Read the data

raw_data <- read_csv("Data_2026.csv") 
## Rows: 61 Columns: 70
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (29): StartDate, EndDate, Status, IPAddress, RecordedDate, ResponseId, D...
## dbl (34): Progress, Duration (in seconds), LocationLatitude, LocationLongitu...
## lgl  (7): Finished, RecipientLastName, RecipientFirstName, RecipientEmail, E...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Please make sure the data file is in the working directory, also, I changed the data file's name and format from "Data - 2026.xlsx" to "Data_2026.csv"

# getwd()

Clean the data and check the data

# Participants Exclusion
data_1 <- raw_data %>%
  # Exclusion 1: Attention Check (Exclude inattentive participants)
  filter(passedattn == "yes") %>%
  
  # Exclusion 2: Exclude "Never" Initiators (Design choice)
  filter(initiator_type != "never") %>%
  
  # Exclusion 3: Manual Exclusion
  # Participant self-reported inattentiveness in the comments
  filter(ResponseId != "R_bBJ75JWs3W8KSgF") %>%
  
  # Select only variables of interest
  select(
    ResponseId,
    initiator_type,
    real_imaginary,
    feelings_bothyoufirst,  # Scenario 1: Reciprocated
    feelings_youalone, # Scenario 2: Unreciprocated
    sex,
    age
  )

# Sanity check

# Check 1: Missing Data
# Calculates the number of NAs in each column of the valid participants
print("--- Missing Data Count ---")
## [1] "--- Missing Data Count ---"
print(colSums(is.na(data_1)))
##            ResponseId        initiator_type        real_imaginary 
##                     0                     0                     0 
## feelings_bothyoufirst     feelings_youalone                   sex 
##                     0                     0                     0 
##                   age 
##                     0
# Check 2: Range Check 
# Ensure all affect ratings are within -30 to +30
print("--- Range Check (Min/Max) ---")
## [1] "--- Range Check (Min/Max) ---"
data_1 %>%
  select(feelings_bothyoufirst, feelings_youalone) %>%
  summarise(
    Reciprocated_Min = min(feelings_bothyoufirst, na.rm = TRUE),
    Reciprocated_Max = max(feelings_bothyoufirst, na.rm = TRUE),
    Unreciprocated_Min = min(feelings_youalone, na.rm = TRUE),
    Unreciprocated_Max = max(feelings_youalone, na.rm = TRUE)
  ) %>%
  print()
## # A tibble: 1 × 4
##   Reciprocated_Min Reciprocated_Max Unreciprocated_Min Unreciprocated_Max
##              <dbl>            <dbl>              <dbl>              <dbl>
## 1              -30               30                -30                 10

Demographics for main sample (N=36)

print("--- Main Sample Sex Distribution ---")
## [1] "--- Main Sample Sex Distribution ---"
# Calculate counts and percentages for Sex
data_1 %>%
  group_by(sex) %>%
  summarise(
    n = n(),
    percent = n() / nrow(data_1) * 100
  ) %>%
  print()
## # A tibble: 2 × 3
##   sex        n percent
##   <chr>  <int>   <dbl>
## 1 Female    20    55.6
## 2 Male      16    44.4
print("--- Main Sample Age Statistics ---")
## [1] "--- Main Sample Age Statistics ---"
# Calculate Mean and SD for Age
data_1 %>%
  mutate(age = as.numeric(age)) %>%
  get_summary_stats(age, type = "mean_sd") %>%
  print()
## # A tibble: 1 × 4
##   variable     n  mean    sd
##   <fct>    <dbl> <dbl> <dbl>
## 1 age         36  31.2  9.81

Reformat the data for analysis

clean_data <- data_1 %>%
  # Create a new variable " imaginary_conflict" to indicate if participants using imaginary or real experience
  # 1 = Imaginary, 0 = Real
  mutate(
    imaginary_conflict = if_else(
      str_detect(real_imaginary, "imagining myself in a fictional situation"), 
      1, 
      0
    )
  ) %>%

  # Reshape to Long Format 
  pivot_longer(
    cols = starts_with("feelings"),
    names_to = "condition",
    values_to = "affect_rating"
  ) %>%

  # Formatting Factors
  mutate(
    # Rename conditions for clarity
    condition = if_else(condition == "feelings_bothyoufirst", "Reciprocated", "Unreciprocated"),
    
    # Convert to factors with specific reference levels
    initiator_type = factor(initiator_type, levels = c("always", "conditional")),
    condition = factor(condition, levels = c("Reciprocated", "Unreciprocated"))
  )

# Final sample size
print(paste("Final Sample Size (Unique Participants):", n_distinct(clean_data$ResponseId)))
## [1] "Final Sample Size (Unique Participants): 36"
head(clean_data)
## # A tibble: 6 × 8
##   ResponseId        initiator_type real_imaginary sex     age imaginary_conflict
##   <chr>             <fct>          <chr>          <chr> <dbl>              <dbl>
## 1 R_UWk2HHPXa0vxJEB always         I have though… Male     26                  0
## 2 R_UWk2HHPXa0vxJEB always         I have though… Male     26                  0
## 3 R_3dNe0dpi9xqXJE4 always         I have though… Male     34                  0
## 4 R_3dNe0dpi9xqXJE4 always         I have though… Male     34                  0
## 5 R_uthr22c7jsHMSUp always         I have though… Male     20                  0
## 6 R_uthr22c7jsHMSUp always         I have though… Male     20                  0
## # ℹ 2 more variables: condition <fct>, affect_rating <dbl>

Assumption Checks For Main Sample Mixted ANOVA

# Step 1: Normality Check 
# Test normality for each combination of groups
normality_check <- clean_data %>%
  group_by(initiator_type, condition) %>%
  shapiro_test(affect_rating)

print("--- Normality Check (Shapiro-Wilk) ---")
## [1] "--- Normality Check (Shapiro-Wilk) ---"
# If p > .05, data is normal. If p < .05, data deviates from normality.
print(normality_check)
## # A tibble: 4 × 5
##   initiator_type condition      variable      statistic         p
##   <fct>          <fct>          <chr>             <dbl>     <dbl>
## 1 always         Reciprocated   affect_rating     0.936 0.178    
## 2 always         Unreciprocated affect_rating     0.926 0.113    
## 3 conditional    Reciprocated   affect_rating     0.927 0.244    
## 4 conditional    Unreciprocated affect_rating     0.652 0.0000793
# Step 1-2: Normality Visualization (Q-Q Plots)
qq_plot <- ggqqplot(clean_data, "affect_rating", ggtheme = theme_classic()) +
  facet_grid(initiator_type ~ condition, labeller = "label_both") +
  labs(title = "Q-Q Plot for Normality Check")

print(qq_plot)

# Step 2: Homogeneity of Variance 
# Check if variance is equal between "Always" and "Conditional" groups
# We check this separately for Reciprocated and Unreciprocated conditions
levene_check <- clean_data %>%
  group_by(condition) %>%
  levene_test(affect_rating ~ initiator_type)

print("--- Homogeneity of Variance (Levene's Test) ---")
## [1] "--- Homogeneity of Variance (Levene's Test) ---"
# If p > .05, variances are equal.
print(levene_check)
## # A tibble: 2 × 5
##   condition        df1   df2 statistic      p
##   <fct>          <int> <int>     <dbl>  <dbl>
## 1 Reciprocated       1    34     2.96  0.0947
## 2 Unreciprocated     1    34     0.874 0.357

Data analysis and visualization

# Step 1: Descriptives Statistics
# Calculate the Mean and SD for each group
summary_stats <- clean_data %>%
  group_by(initiator_type, condition) %>%
  get_summary_stats(affect_rating, type = "mean_sd")

print("--- Descriptive Statistics ---")
## [1] "--- Descriptive Statistics ---"
print(summary_stats)
## # A tibble: 4 × 6
##   initiator_type condition      variable          n   mean    sd
##   <fct>          <fct>          <fct>         <dbl>  <dbl> <dbl>
## 1 always         Reciprocated   affect_rating    21  12.8   11.1
## 2 always         Unreciprocated affect_rating    21 -13.5   11.9
## 3 conditional    Reciprocated   affect_rating    15   4.13  16.6
## 4 conditional    Unreciprocated affect_rating    15 -23.6   11.3
# Step 2: Fit the Model (Mixed ANOVA)
# Formula: dv ~ between * within + Error(id/within)
anova_results <- clean_data %>%
  anova_test(
    dv = affect_rating,          # Dependent Variable (affect ratings of within-condition)
    wid = ResponseId,            # Subject ID (handles the repeated measure correction)
    between = initiator_type,    # Between-Subjects Factor
    within = condition,          # Within-Subjects Factor
    effect.size = "pes"          # Partial Eta Squared (effect size)
  )

print("--- ANOVA Table ---")
## [1] "--- ANOVA Table ---"
print(get_anova_table(anova_results))
## ANOVA Table (type III tests)
## 
##                     Effect DFn DFd       F        p p<.05   pes
## 1           initiator_type   1  34   7.119 1.20e-02     * 0.173
## 2                condition   1  34 120.750 9.88e-13     * 0.780
## 3 initiator_type:condition   1  34   0.093 7.63e-01       0.003
# Step 3: Visualization

interaction_plot <- ggline(
  clean_data,
  x = "condition",
  y = "affect_rating",
  
  # Grouping variables
  color = "initiator_type",
  shape = "initiator_type",    
  linetype = "initiator_type", 
  
  # Error bars (Standard Error)
  add = "mean_se",             
  error.plot = "errorbar",     
  
  # Formatting 
  palette = c("black", "gray40"), 
  position = position_dodge(0.2),
  size = 0.8,                     
  point.size = 3, 
  
  # Labels
  # The Figure Title belongs in LaTeX, not inside the image.
  ylab = "Affect Rating",
  xlab = "Apology Outcome",
  legend.title = "Initiator Type",
  
  # Theme
  ggtheme = theme_classic()    
) +
  # Fine-tuning fonts to match LaTeX 
  theme(
    text = element_text(family = "serif"),            
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10, color = "black"),
    legend.position = "top",                          
    legend.text = element_text(size = 10),
    legend.title = element_text(face = "bold"),
    plot.margin = margin(10, 10, 10, 10)
  ) +
  # Scale and Reference Line
  scale_y_continuous(
    limits = c(-30, 30),        
    breaks = seq(-30, 30, 10)   
  ) +
  geom_hline(yintercept = 0, linetype = "dotted", color = "gray")

 # Save the plot
ggsave("interaction_plot_apa.png", plot = interaction_plot, width = 6, height = 4, dpi = 300)

print(interaction_plot)

Robustness Check (Excluding participants using imaginary conflict experience )

# Step 1: Filter Data
# Keep only participants who recalled a REAL conflict (imaginary_conflict == 0)
real_conflict_data <- clean_data %>%
  filter(imaginary_conflict == 0)

# Check how many are left
print(paste("Robustness Sample Size (Real Conflicts Only):", n_distinct(real_conflict_data$ResponseId)))
## [1] "Robustness Sample Size (Real Conflicts Only): 34"
# Demographics for robustness sample (N=34)
robust_participants <- real_conflict_data %>%
  distinct(ResponseId, .keep_all = TRUE)

print("--- Robustness Sample Sex Distribution ---")
## [1] "--- Robustness Sample Sex Distribution ---"
robust_participants %>%
  group_by(sex) %>%
  summarise(n = n()) %>%
  print()
## # A tibble: 2 × 2
##   sex        n
##   <chr>  <int>
## 1 Female    18
## 2 Male      16
print("--- Robustness Sample Age Statistics ---")
## [1] "--- Robustness Sample Age Statistics ---"
robust_participants %>%
  mutate(age = as.numeric(age)) %>%
  get_summary_stats(age, type = "mean_sd") %>%
  print()
## # A tibble: 1 × 4
##   variable     n  mean    sd
##   <fct>    <dbl> <dbl> <dbl>
## 1 age         34  31.8  9.84
# Step 2: Re-Run Descriptive Statistics
robust_summary_stats <- real_conflict_data %>%
  group_by(initiator_type, condition) %>%
  get_summary_stats(affect_rating, type = "mean_sd")

print("--- Robustness Check: Descriptive Statistics ---")
## [1] "--- Robustness Check: Descriptive Statistics ---"
print(robust_summary_stats)
## # A tibble: 4 × 6
##   initiator_type condition      variable          n   mean    sd
##   <fct>          <fct>          <fct>         <dbl>  <dbl> <dbl>
## 1 always         Reciprocated   affect_rating    20  12.4   11.2
## 2 always         Unreciprocated affect_rating    20 -13.8   12.1
## 3 conditional    Reciprocated   affect_rating    14   4.43  17.2
## 4 conditional    Unreciprocated affect_rating    14 -23.1   11.6
# Step 3: Robustness Check: Mixed ANOVA Assumption Check

# Step 3-1: Normality Check
# Test normality for each combination of groups within the REAL conflict sample
robust_normality_check <- real_conflict_data %>%
  group_by(initiator_type, condition) %>%
  shapiro_test(affect_rating)

print("--- Robustness Check: Normality (Shapiro-Wilk) ---")
## [1] "--- Robustness Check: Normality (Shapiro-Wilk) ---"
print(robust_normality_check)
## # A tibble: 4 × 5
##   initiator_type condition      variable      statistic        p
##   <fct>          <fct>          <chr>             <dbl>    <dbl>
## 1 always         Reciprocated   affect_rating     0.943 0.268   
## 2 always         Unreciprocated affect_rating     0.912 0.0689  
## 3 conditional    Reciprocated   affect_rating     0.909 0.152   
## 4 conditional    Unreciprocated affect_rating     0.674 0.000203
# Step 3-1-2. Normality Visualization (Q-Q Plots)
# Visual check for the robust sample
robust_qq_plot <- ggqqplot(real_conflict_data, "affect_rating", ggtheme = theme_classic()) +
  facet_grid(initiator_type ~ condition, labeller = "label_both") +
  labs(title = "Robustness Check: Q-Q Plot for Normality")

print(robust_qq_plot)

# Step 3-2. Homogeneity of Variance 
# Check if variance is equal between "Always" and "Conditional" groups
robust_levene_check <- real_conflict_data %>%
  group_by(condition) %>%
  levene_test(affect_rating ~ initiator_type)

print("--- Robustness Check: Homogeneity of Variance (Levene's Test) ---")
## [1] "--- Robustness Check: Homogeneity of Variance (Levene's Test) ---"
print(robust_levene_check)
## # A tibble: 2 × 5
##   condition        df1   df2 statistic     p
##   <fct>          <int> <int>     <dbl> <dbl>
## 1 Reciprocated       1    32     2.79  0.105
## 2 Unreciprocated     1    32     0.576 0.454
# Step 4: Re-Run Mixed ANOVA Model
robust_anova_results <- real_conflict_data %>%
  anova_test(
    dv = affect_rating,          # Dependent Variable (affect ratings of within-condition)
    wid = ResponseId,            # Subject ID
    between = initiator_type,    # Between-Subjects Factor
    within = condition,          # Within-Subjects Factor
    effect.size = "pes"          # Partial Eta Squared
  )

print("--- Robustness Check: ANOVA Table ---")
## [1] "--- Robustness Check: ANOVA Table ---"
print(get_anova_table(robust_anova_results))
## ANOVA Table (type III tests)
## 
##                     Effect DFn DFd       F        p p<.05   pes
## 1           initiator_type   1  32   5.474 2.60e-02     * 0.146
## 2                condition   1  32 106.451 1.05e-11     * 0.769
## 3 initiator_type:condition   1  32   0.064 8.02e-01       0.002