Similarity Ratings Analyses

Libraries and Data Files

#load libraries
library(dplyr)
library(ggplot2)
library(rstatix)
## Warning: package 'rstatix' was built under R version 4.5.2
library(ez)
library(sciplot)
## Warning: package 'sciplot' was built under R version 4.5.2
library(gplots)
## Warning: package 'gplots' was built under R version 4.5.2
library(car)
library(afex)
## Warning: package 'afex' was built under R version 4.5.2
library(emmeans)
## Warning: package 'emmeans' was built under R version 4.5.2
#load data
data<-read.csv("AllData_SimilarityRatings.csv", header = T)

Data Pre-processing

#rename useful variables
data$sbj<-data$participant
data$resp<-data$slider_exp.response
data$block<-data$blocks.thisN +1

data <- mutate_if(data, is.character, as.factor)
data$sbj<-as.factor(data$sbj)
str(data)
## 'data.frame':    9648 obs. of  38 variables:
##  $ trl                       : int  20 12 17 14 21 3 10 13 6 23 ...
##  $ cnd                       : Factor w/ 2 levels "between","within": 2 2 2 2 1 2 2 2 2 1 ...
##  $ dim                       : Factor w/ 2 levels "irrel","rel": 2 1 2 2 2 1 1 2 1 2 ...
##  $ size1                     : num  3.4 2.6 2.2 2.6 2.2 2.2 2.6 2.2 3 3 ...
##  $ hue1                      : num  0.1 0.3 0.1 -0.3 -0.1 0.1 -0.1 -0.3 -0.1 -0.1 ...
##  $ size2                     : num  3.4 3 2.2 2.6 2.2 2.6 3 2.2 3.4 3 ...
##  $ hue2                      : num  0.3 0.3 0.3 -0.1 0.1 0.1 -0.1 -0.1 -0.1 0.1 ...
##  $ practice_trials.thisRepN  : logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisTrialN: logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisN     : logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisIndex : logi  NA NA NA NA NA NA ...
##  $ blocks.thisRepN           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ blocks.thisTrialN         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ blocks.thisN              : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ blocks.thisIndex          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ trials.thisRepN           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ trials.thisTrialN         : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ trials.thisN              : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ trials.thisIndex          : int  19 11 16 13 20 2 9 12 5 22 ...
##  $ break_loop.thisRepN       : logi  NA NA NA NA NA NA ...
##  $ break_loop.thisTrialN     : logi  NA NA NA NA NA NA ...
##  $ break_loop.thisN          : logi  NA NA NA NA NA NA ...
##  $ break_loop.thisIndex      : logi  NA NA NA NA NA NA ...
##  $ thisRow.t                 : num  68.9 73.1 75.9 79.7 82.8 ...
##  $ notes                     : logi  NA NA NA NA NA NA ...
##  $ slider_exp.response       : int  5 7 5 4 3 4 3 2 7 5 ...
##  $ slider_exp.rt             : num  3.89 2.6 3.45 2.87 4.87 ...
##  $ participant               : int  17202 17202 17202 17202 17202 17202 17202 17202 17202 17202 ...
##  $ session                   : Factor w/ 2 levels "Post","Pre": 1 1 1 1 1 1 1 1 1 1 ...
##  $ group                     : Factor w/ 4 levels "1100","1600",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ date                      : Factor w/ 134 levels "2025-02-25_11h40.18.454",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ expName                   : Factor w/ 1 level "SimilarityRatings": 1 1 1 1 1 1 1 1 1 1 ...
##  $ psychopyVersion           : Factor w/ 1 level "2023.2.3": 1 1 1 1 1 1 1 1 1 1 ...
##  $ frameRate                 : num  60 60 60 60 60 ...
##  $ expStart                  : Factor w/ 134 levels "2025-02-25 11h48.02.736634 +0200",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ sbj                       : Factor w/ 67 levels "341","1003","1206",..: 10 10 10 10 10 10 10 10 10 10 ...
##  $ resp                      : int  5 7 5 4 3 4 3 2 7 5 ...
##  $ block                     : num  1 1 1 1 1 1 1 1 1 1 ...
#re-order the levels of the session factor
data$session <- factor(data$session, levels = c("Pre", "Post"))

#exclude one participant (20335) for not following instructions
data<-droplevels(data[data$sbj!="20335",])
str(data)
## 'data.frame':    9504 obs. of  38 variables:
##  $ trl                       : int  20 12 17 14 21 3 10 13 6 23 ...
##  $ cnd                       : Factor w/ 2 levels "between","within": 2 2 2 2 1 2 2 2 2 1 ...
##  $ dim                       : Factor w/ 2 levels "irrel","rel": 2 1 2 2 2 1 1 2 1 2 ...
##  $ size1                     : num  3.4 2.6 2.2 2.6 2.2 2.2 2.6 2.2 3 3 ...
##  $ hue1                      : num  0.1 0.3 0.1 -0.3 -0.1 0.1 -0.1 -0.3 -0.1 -0.1 ...
##  $ size2                     : num  3.4 3 2.2 2.6 2.2 2.6 3 2.2 3.4 3 ...
##  $ hue2                      : num  0.3 0.3 0.3 -0.1 0.1 0.1 -0.1 -0.1 -0.1 0.1 ...
##  $ practice_trials.thisRepN  : logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisTrialN: logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisN     : logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisIndex : logi  NA NA NA NA NA NA ...
##  $ blocks.thisRepN           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ blocks.thisTrialN         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ blocks.thisN              : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ blocks.thisIndex          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ trials.thisRepN           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ trials.thisTrialN         : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ trials.thisN              : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ trials.thisIndex          : int  19 11 16 13 20 2 9 12 5 22 ...
##  $ break_loop.thisRepN       : logi  NA NA NA NA NA NA ...
##  $ break_loop.thisTrialN     : logi  NA NA NA NA NA NA ...
##  $ break_loop.thisN          : logi  NA NA NA NA NA NA ...
##  $ break_loop.thisIndex      : logi  NA NA NA NA NA NA ...
##  $ thisRow.t                 : num  68.9 73.1 75.9 79.7 82.8 ...
##  $ notes                     : logi  NA NA NA NA NA NA ...
##  $ slider_exp.response       : int  5 7 5 4 3 4 3 2 7 5 ...
##  $ slider_exp.rt             : num  3.89 2.6 3.45 2.87 4.87 ...
##  $ participant               : int  17202 17202 17202 17202 17202 17202 17202 17202 17202 17202 ...
##  $ session                   : Factor w/ 2 levels "Pre","Post": 2 2 2 2 2 2 2 2 2 2 ...
##  $ group                     : Factor w/ 4 levels "1100","1600",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ date                      : Factor w/ 132 levels "2025-02-25_11h40.18.454",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ expName                   : Factor w/ 1 level "SimilarityRatings": 1 1 1 1 1 1 1 1 1 1 ...
##  $ psychopyVersion           : Factor w/ 1 level "2023.2.3": 1 1 1 1 1 1 1 1 1 1 ...
##  $ frameRate                 : num  60 60 60 60 60 ...
##  $ expStart                  : Factor w/ 132 levels "2025-02-25 11h48.02.736634 +0200",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ sbj                       : Factor w/ 66 levels "341","1003","1206",..: 10 10 10 10 10 10 10 10 10 10 ...
##  $ resp                      : int  5 7 5 4 3 4 3 2 7 5 ...
##  $ block                     : num  1 1 1 1 1 1 1 1 1 1 ...
#exclude five participants for not meeting the learning criterion: 
data <- droplevels(  data[!(data$sbj %in% c("77623", "77337", "48888", "43423", "63018")), ])
str(data)
## 'data.frame':    8784 obs. of  38 variables:
##  $ trl                       : int  20 12 17 14 21 3 10 13 6 23 ...
##  $ cnd                       : Factor w/ 2 levels "between","within": 2 2 2 2 1 2 2 2 2 1 ...
##  $ dim                       : Factor w/ 2 levels "irrel","rel": 2 1 2 2 2 1 1 2 1 2 ...
##  $ size1                     : num  3.4 2.6 2.2 2.6 2.2 2.2 2.6 2.2 3 3 ...
##  $ hue1                      : num  0.1 0.3 0.1 -0.3 -0.1 0.1 -0.1 -0.3 -0.1 -0.1 ...
##  $ size2                     : num  3.4 3 2.2 2.6 2.2 2.6 3 2.2 3.4 3 ...
##  $ hue2                      : num  0.3 0.3 0.3 -0.1 0.1 0.1 -0.1 -0.1 -0.1 0.1 ...
##  $ practice_trials.thisRepN  : logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisTrialN: logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisN     : logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisIndex : logi  NA NA NA NA NA NA ...
##  $ blocks.thisRepN           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ blocks.thisTrialN         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ blocks.thisN              : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ blocks.thisIndex          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ trials.thisRepN           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ trials.thisTrialN         : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ trials.thisN              : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ trials.thisIndex          : int  19 11 16 13 20 2 9 12 5 22 ...
##  $ break_loop.thisRepN       : logi  NA NA NA NA NA NA ...
##  $ break_loop.thisTrialN     : logi  NA NA NA NA NA NA ...
##  $ break_loop.thisN          : logi  NA NA NA NA NA NA ...
##  $ break_loop.thisIndex      : logi  NA NA NA NA NA NA ...
##  $ thisRow.t                 : num  68.9 73.1 75.9 79.7 82.8 ...
##  $ notes                     : logi  NA NA NA NA NA NA ...
##  $ slider_exp.response       : int  5 7 5 4 3 4 3 2 7 5 ...
##  $ slider_exp.rt             : num  3.89 2.6 3.45 2.87 4.87 ...
##  $ participant               : int  17202 17202 17202 17202 17202 17202 17202 17202 17202 17202 ...
##  $ session                   : Factor w/ 2 levels "Pre","Post": 2 2 2 2 2 2 2 2 2 2 ...
##  $ group                     : Factor w/ 4 levels "1100","1600",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ date                      : Factor w/ 122 levels "2025-02-25_13h08.44.108",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ expName                   : Factor w/ 1 level "SimilarityRatings": 1 1 1 1 1 1 1 1 1 1 ...
##  $ psychopyVersion           : Factor w/ 1 level "2023.2.3": 1 1 1 1 1 1 1 1 1 1 ...
##  $ frameRate                 : num  60 60 60 60 60 ...
##  $ expStart                  : Factor w/ 122 levels "2025-02-25 13h09.40.421447 +0200",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ sbj                       : Factor w/ 61 levels "341","1003","1206",..: 10 10 10 10 10 10 10 10 10 10 ...
##  $ resp                      : int  5 7 5 4 3 4 3 2 7 5 ...
##  $ block                     : num  1 1 1 1 1 1 1 1 1 1 ...

Comparison of Responses from “Exclusion-Instructions” Participants

#check if we can keep data from 5 first sbjs (instructions: we will delete your data)
data$excl_instr<-"no"
#For the first 5 participants, we need to register that the instructions were different.
#These 5 participants are: "17202" "40242" "57393" "89443" "94372"
data[data$sbj=="17202" |  data$sbj=="40242"  | data$sbj=="57393" | data$sbj=="89443" | data$sbj=="94372",]$excl_instr<-"yes"
data$excl_instr<-as.factor(data$excl_instr)
str(data)
## 'data.frame':    8784 obs. of  39 variables:
##  $ trl                       : int  20 12 17 14 21 3 10 13 6 23 ...
##  $ cnd                       : Factor w/ 2 levels "between","within": 2 2 2 2 1 2 2 2 2 1 ...
##  $ dim                       : Factor w/ 2 levels "irrel","rel": 2 1 2 2 2 1 1 2 1 2 ...
##  $ size1                     : num  3.4 2.6 2.2 2.6 2.2 2.2 2.6 2.2 3 3 ...
##  $ hue1                      : num  0.1 0.3 0.1 -0.3 -0.1 0.1 -0.1 -0.3 -0.1 -0.1 ...
##  $ size2                     : num  3.4 3 2.2 2.6 2.2 2.6 3 2.2 3.4 3 ...
##  $ hue2                      : num  0.3 0.3 0.3 -0.1 0.1 0.1 -0.1 -0.1 -0.1 0.1 ...
##  $ practice_trials.thisRepN  : logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisTrialN: logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisN     : logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisIndex : logi  NA NA NA NA NA NA ...
##  $ blocks.thisRepN           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ blocks.thisTrialN         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ blocks.thisN              : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ blocks.thisIndex          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ trials.thisRepN           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ trials.thisTrialN         : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ trials.thisN              : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ trials.thisIndex          : int  19 11 16 13 20 2 9 12 5 22 ...
##  $ break_loop.thisRepN       : logi  NA NA NA NA NA NA ...
##  $ break_loop.thisTrialN     : logi  NA NA NA NA NA NA ...
##  $ break_loop.thisN          : logi  NA NA NA NA NA NA ...
##  $ break_loop.thisIndex      : logi  NA NA NA NA NA NA ...
##  $ thisRow.t                 : num  68.9 73.1 75.9 79.7 82.8 ...
##  $ notes                     : logi  NA NA NA NA NA NA ...
##  $ slider_exp.response       : int  5 7 5 4 3 4 3 2 7 5 ...
##  $ slider_exp.rt             : num  3.89 2.6 3.45 2.87 4.87 ...
##  $ participant               : int  17202 17202 17202 17202 17202 17202 17202 17202 17202 17202 ...
##  $ session                   : Factor w/ 2 levels "Pre","Post": 2 2 2 2 2 2 2 2 2 2 ...
##  $ group                     : Factor w/ 4 levels "1100","1600",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ date                      : Factor w/ 122 levels "2025-02-25_13h08.44.108",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ expName                   : Factor w/ 1 level "SimilarityRatings": 1 1 1 1 1 1 1 1 1 1 ...
##  $ psychopyVersion           : Factor w/ 1 level "2023.2.3": 1 1 1 1 1 1 1 1 1 1 ...
##  $ frameRate                 : num  60 60 60 60 60 ...
##  $ expStart                  : Factor w/ 122 levels "2025-02-25 13h09.40.421447 +0200",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ sbj                       : Factor w/ 61 levels "341","1003","1206",..: 10 10 10 10 10 10 10 10 10 10 ...
##  $ resp                      : int  5 7 5 4 3 4 3 2 7 5 ...
##  $ block                     : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ excl_instr                : Factor w/ 2 levels "no","yes": 2 2 2 2 2 2 2 2 2 2 ...
#test if there's a difference in response distribution among participants (excl_instr= "yes" vs. "no")
table_sim<-table(data$resp, data$excl_instr)
chi_sim<-chisq.test(table_sim)
## Warning in chisq.test(table_sim): Chi-squared approximation may be incorrect
print(chi_sim)
## 
##  Pearson's Chi-squared test
## 
## data:  table_sim
## X-squared = 18.417, df = 8, p-value = 0.01831
#function to calculate the percentage of expected counts >=5)
check_expected_frequencies <- function(chisq_test_result) {
  expected <- chisq_test_result$expected
  total_cells <- length(expected)
  cells_over_5 <- sum(expected >= 5)
  percent_over_5 <- (cells_over_5 / total_cells) * 100
  return(percent_over_5)
}

check_expected_frequencies(chi_sim)
## [1] 94.44444
# it should be >80%

#plot distribution
plot_sim <- data %>%
  group_by(resp, excl_instr) %>%
  summarise(n = n()) %>%
  group_by(excl_instr) %>%
  mutate(freq = n / sum(n))
## `summarise()` has grouped output by 'resp'. You can override using the
## `.groups` argument.
ggplot(plot_sim, aes(x = as.factor(resp), y = freq, fill = excl_instr)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Response Distribution - Pre Session", x = "Response (1???9)", y = "Relative Frequency", fill = "Exclude\nInstructions") +
  theme_minimal()

#Calculate standardized score for the resp variable.
data <- data %>%
  group_by(sbj) %>%
  mutate(z_resp = (resp - mean(resp, na.rm = TRUE)) / sd(resp, na.rm = TRUE))

Similarity Ratings Plot

# 600 ms
###########

#Between-category differences
################################
data_btw600<-droplevels(data[data$cnd=="between" & data$group=="600",])
data_btw_av600<-aggregate(data_btw600$z_resp, list(data_btw600$sbj, data_btw600$session), mean)
colnames(data_btw_av600)<-c("sbj", "session", "z_resp")

#Within-category differences, Relevant Dimension 
#################################################
data_w_r600<-droplevels(data[data$cnd=="within" & data$dim=="rel" & data$group=="600",])
data_w_r_av600<-aggregate(data_w_r600$z_resp, list(data_w_r600$sbj, data_w_r600$session), mean)
colnames(data_w_r_av600)<-c("sbj", "session", "z_resp")

#Within-category differences, Irrelevant Dimension
#################################################
data_w_i600<-droplevels(data[data$cnd=="within" & data$dim=="irrel" & data$group=="600",])
data_w_i_av600<-aggregate(data_w_i600$z_resp, list(data_w_i600$sbj, data_w_i600$session), mean)
colnames(data_w_i_av600)<-c("sbj", "session", "z_resp")



# 1100 ms
###########

#Between-category differences 
################################
data_btw1100<-droplevels(data[data$cnd=="between" & data$group=="1100",])
data_btw_av1100<-aggregate(data_btw1100$z_resp, list(data_btw1100$sbj, data_btw1100$session), mean)
colnames(data_btw_av1100)<-c("sbj", "session", "z_resp")

#Within-category differences, Relevant Dimension 
#################################################
data_w_r1100<-droplevels(data[data$cnd=="within" & data$dim=="rel" & data$group=="1100",])
data_w_r_av1100<-aggregate(data_w_r1100$z_resp, list(data_w_r1100$sbj, data_w_r1100$session), mean)
colnames(data_w_r_av1100)<-c("sbj", "session", "z_resp")

#Within-category differences, Irrelevant Dimension 
#################################################
data_w_i1100<-droplevels(data[data$cnd=="within" & data$dim=="irrel" & data$group=="1100",])
data_w_i_av1100<-aggregate(data_w_i1100$z_resp, list(data_w_i1100$sbj, data_w_i1100$session), mean)
colnames(data_w_i_av1100)<-c("sbj", "session", "z_resp")


# 1600 ms
###########

#Between-category differences 
################################
data_btw1600<-droplevels(data[data$cnd=="between" & data$group=="1600",])
data_btw_av1600<-aggregate(data_btw1600$z_resp, list(data_btw1600$sbj, data_btw1600$session), mean)
colnames(data_btw_av1600)<-c("sbj", "session", "z_resp")

#Within-category differences, Relevant Dimension 
#################################################
data_w_r1600<-droplevels(data[data$cnd=="within" & data$dim=="rel" & data$group=="1600",])
data_w_r_av1600<-aggregate(data_w_r1600$z_resp, list(data_w_r1600$sbj, data_w_r1600$session), mean)
colnames(data_w_r_av1600)<-c("sbj", "session", "z_resp")

#Within-category differences, Irrelevant Dimension
#################################################
data_w_i1600<-droplevels(data[data$cnd=="within" & data$dim=="irrel" & data$group=="1600",])
data_w_i_av1600<-aggregate(data_w_i1600$z_resp, list(data_w_i1600$sbj, data_w_i1600$session), mean)
colnames(data_w_i_av1600)<-c("sbj", "session", "z_resp")


# RD
###########

#Between-category differences
################################
data_btwRD<-droplevels(data[data$cnd=="between" & data$group=="RD",])
data_btw_avRD<-aggregate(data_btwRD$z_resp, list(data_btwRD$sbj, data_btwRD$session), mean)
colnames(data_btw_avRD)<-c("sbj", "session", "z_resp")

#Within-category differences, Relevant Dimension 
#################################################
data_w_rRD<-droplevels(data[data$cnd=="within" & data$dim=="rel" & data$group=="RD",])
data_w_r_avRD<-aggregate(data_w_rRD$z_resp, list(data_w_rRD$sbj, data_w_rRD$session), mean)
colnames(data_w_r_avRD)<-c("sbj", "session", "z_resp")

#Within-category differences, Irrelevant Dimension 
#################################################
data_w_iRD<-droplevels(data[data$cnd=="within" & data$dim=="irrel" & data$group=="RD",])
data_w_i_avRD<-aggregate(data_w_iRD$z_resp, list(data_w_iRD$sbj, data_w_iRD$session), mean)
colnames(data_w_i_avRD)<-c("sbj", "session", "z_resp")


##########################################################
# Similarity Ratings, by Group, Session, and Pair Type
# Violin plots + within-subject trajectories (ggplot2)
##########################################################
#create new factor, condition (pair type) with three levels
data$condition<-as.factor(ifelse(data$cnd=="between", "between", ifelse(data$dim=="rel","within_rel","within_irrel" )))

ylb <- "Standardized Similarity Ratings"
xlb <- "Session"

group_levels <- c("600", "1100", "1600", "RD")
group_labels <- c("Group: 600 ms", "Group: 1100 ms", "Group: 1600 ms", "Group: Response Defined")

cond_levels <- c("between", "within_rel", "within_irrel")
cond_labels <- c("Between Category Pairs\nRelevant Dimension",
                 "Within Category Pairs\nRelevant Dimension",
                 "Within Category Pairs\nIrrelevant Dimension")

# Participant means per cell (sbj × group × condition × session)
plot_df <- data %>%
  mutate(
    group = factor(group, levels = group_levels, labels = group_labels),
    condition = factor(condition, levels = cond_levels, labels = cond_labels)
  ) %>%
  group_by(sbj, group, condition, session) %>%
  summarise(z_resp = mean(z_resp, na.rm = TRUE), .groups = "drop") %>%
  filter(!is.na(z_resp))

p_sim <- ggplot(plot_df, aes(x = session, y = z_resp)) +
  geom_violin(aes(fill = session),
              trim = FALSE, width = 0.8, color = NA, alpha = 0.55) +
  geom_line(aes(group = sbj), color = "grey40", linewidth = 0.30, alpha = 0.70) +
  geom_point(aes(group = sbj), color = "grey40", size = 0.80, alpha = 0.70) +
  coord_cartesian(ylim = c(-1.5, 1.5)) +
  facet_grid(group ~ condition) +
  scale_fill_manual(values = c("grey25", "grey75")) +
  labs(
    title = "Similarity Ratings by Group, Session, and Pair Type",
    x = xlb, y = ylb
  ) +
  theme_classic(base_size = 11) +
  theme(
    legend.position = "none",
    strip.background = element_blank(),
    strip.text = element_text(face = "bold"),          # facet titles bold
    axis.title.x = element_text(face = "bold"),        # axis titles bold
    axis.title.y = element_text(face = "bold"),
    axis.text.x  = element_text(face = "bold"),        # Pre/Post bold
    plot.title   = element_text(face = "bold", hjust = 0.5)
  )

print(p_sim)

#graph for paper 600 x 750 

Inferential Statistics - ANOVA

#create new factor, condition (pair type) with three levels
data$condition<-as.factor(ifelse(data$cnd=="between", "between", ifelse(data$dim=="rel","within_rel","within_irrel" )))

ezANOVA(data=data, dv=z_resp,wid=sbj, within=.(session,condition), between=group, type=3)
## Warning: Data is unbalanced (unequal N per group). Make sure you specified a
## well-considered value for the type argument to ezANOVA().
## Warning: Collapsing data to cell means. *IF* the requested effects are a subset
## of the full design, you must use the "within_full" argument, else results may
## be inaccurate.
## $ANOVA
##                    Effect DFn DFd          F            p p<.05         ges
## 2                   group   3  57  2.9180733 4.179359e-02     * 0.009906707
## 3                 session   1  57 13.1225571 6.220974e-04     * 0.048932436
## 5               condition   2 114 42.3330979 1.780369e-14     * 0.288107750
## 4           group:session   3  57  0.8059441 4.957776e-01       0.009390649
## 6         group:condition   6 114  2.3418151 3.602935e-02     * 0.062936624
## 7       session:condition   2 114 10.0766925 9.338633e-05     * 0.028583838
## 8 group:session:condition   6 114  0.8783103 5.132696e-01       0.007635503
## 
## $`Mauchly's Test for Sphericity`
##                    Effect         W            p p<.05
## 5               condition 0.1464522 4.360026e-24     *
## 6         group:condition 0.1464522 4.360026e-24     *
## 7       session:condition 0.4552354 2.695317e-10     *
## 8 group:session:condition 0.4552354 2.695317e-10     *
## 
## $`Sphericity Corrections`
##                    Effect       GGe        p[GG] p[GG]<.05       HFe
## 5               condition 0.5395059 6.936719e-09         * 0.5416811
## 6         group:condition 0.5395059 7.736925e-02           0.5416811
## 7       session:condition 0.6473478 9.223859e-04         * 0.6560627
## 8 group:session:condition 0.6473478 4.786980e-01           0.6560627
##          p[HF] p[HF]<.05
## 5 6.525744e-09         *
## 6 7.708394e-02          
## 7 8.713289e-04         *
## 8 4.797683e-01
#report GG corrections
#2*0.5387983
#104*0.5387983

#2*0.6918409
#104*0.6918409

#6*0.5387983
#104*0.5387983


#post-hoc comparisons 
afex_options(type = 3)

m <- aov_ez(id = "sbj",dv ="z_resp", data = data, within = c("session", "condition"), between= "group", type   = 3, factorize = FALSE)
## Warning: More than one observation per design cell, aggregating data using `fun_aggregate = mean`.
## To turn off this warning, pass `fun_aggregate = mean` explicitly.
## Contrasts set to contr.sum for the following variables: group
anova(m)
## Anova Table (Type 3 tests)
## 
## Response: z_resp
##                         num Df den Df     MSE       F      ges    Pr(>F)    
## group                   3.0000 57.000 0.11685  2.9181 0.009907 0.0417936 *  
## session                 1.0000 57.000 0.40085 13.1226 0.048932 0.0006221 ***
## group:session           3.0000 57.000 0.40085  0.8059 0.009391 0.4957776    
## condition               1.0790 61.504 0.90583 42.3331 0.288108 6.937e-09 ***
## group:condition         3.2370 61.504 0.90583  2.3418 0.062937 0.0773693 .  
## session:condition       1.2947 73.798 0.23059 10.0767 0.028584 0.0009224 ***
## group:session:condition 3.8841 73.798 0.23059  0.8783 0.007636 0.4786980    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emm_sc <- emmeans(m, ~ session | condition)

########################################################
# Post–Pre within each condition
########################################################

post_pre_by_condition <- contrast(
  emm_sc,
  method = list("Post-Pre" = c(-1, 1)),  # Pre, Post -> Post-Pre
  adjust = "holm"
)

summary(post_pre_by_condition, infer = TRUE)
## condition = between:
##  contrast estimate     SE df lower.CL upper.CL t.ratio p.value
##  Post-Pre  -0.4098 0.0931 57   -0.596   -0.223  -4.402  <.0001
## 
## condition = within_irrel:
##  contrast estimate     SE df lower.CL upper.CL t.ratio p.value
##  Post-Pre   0.0115 0.0798 57   -0.148    0.171   0.144  0.8858
## 
## condition = within_rel:
##  contrast estimate     SE df lower.CL upper.CL t.ratio p.value
##  Post-Pre  -0.3212 0.0890 57   -0.499   -0.143  -3.611  0.0006
## 
## Results are averaged over the levels of: group 
## Confidence level used: 0.95
########################################################
# Compare Post–Pre changes across conditions
########################################################

# EMMs for condition × session
emm_cs <- emmeans(m, ~ condition * session)  # averaged over group

chg_pairs <- contrast(emm_cs, method = "pairwise", by = NULL, adjust = "holm")
summary(chg_pairs, infer = TRUE)
##  contrast                             estimate     SE df lower.CL upper.CL
##  between Pre - within_irrel Pre         0.8770 0.0947 57  0.58688   1.1672
##  between Pre - within_rel Pre          -0.0422 0.0354 57 -0.15077   0.0664
##  between Pre - between Post             0.4098 0.0931 57  0.12460   0.6951
##  between Pre - within_irrel Post        0.8655 0.1270 57  0.47509   1.2559
##  between Pre - within_rel Post          0.2791 0.0900 57  0.00323   0.5549
##  within_irrel Pre - within_rel Pre     -0.9192 0.0967 57 -1.21558  -0.6228
##  within_irrel Pre - between Post       -0.4672 0.1250 57 -0.85013  -0.0842
##  within_irrel Pre - within_irrel Post  -0.0115 0.0798 57 -0.25605   0.2330
##  within_irrel Pre - within_rel Post    -0.5980 0.1260 57 -0.98319  -0.2127
##  within_rel Pre - between Post          0.4520 0.0915 57  0.17164   0.7324
##  within_rel Pre - within_irrel Post     0.9077 0.1230 57  0.53039   1.2850
##  within_rel Pre - within_rel Post       0.3212 0.0890 57  0.04871   0.5938
##  between Post - within_irrel Post       0.4557 0.1420 57  0.01995   0.8914
##  between Post - within_rel Post        -0.1308 0.0361 57 -0.24137  -0.0202
##  within_irrel Post - within_rel Post   -0.5864 0.1470 57 -1.03776  -0.1351
##  t.ratio p.value
##    9.261  <.0001
##   -1.191  0.4774
##    4.402  0.0004
##    6.792  <.0001
##    3.100  0.0090
##   -9.503  <.0001
##   -3.738  0.0030
##   -0.144  0.8858
##   -4.756  0.0001
##    4.939  0.0001
##    7.371  <.0001
##    3.611  0.0037
##    3.204  0.0089
##   -3.624  0.0037
##   -3.981  0.0016
## 
## Results are averaged over the levels of: group 
## Confidence level used: 0.95 
## Conf-level adjustment: bonferroni method for 15 estimates 
## P value adjustment: holm method for 15 tests

Session Information

sessionInfo()
## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26200)
## 
## Matrix products: default
##   LAPACK version 3.12.1
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: Europe/Athens
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] emmeans_2.0.0 afex_1.5-0    lme4_1.1-37   Matrix_1.7-3  car_3.1-3    
##  [6] carData_3.0-5 gplots_3.2.0  sciplot_1.2-0 ez_4.4-0      rstatix_0.7.3
## [11] ggplot2_4.0.0 dplyr_1.1.4  
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6        xfun_0.53           bslib_0.9.0        
##  [4] caTools_1.18.3      lattice_0.22-7      numDeriv_2016.8-1.1
##  [7] vctrs_0.6.5         tools_4.5.1         Rdpack_2.6.4       
## [10] bitops_1.0-9        generics_0.1.4      sandwich_3.1-1     
## [13] parallel_4.5.1      tibble_3.3.0        pkgconfig_2.0.3    
## [16] KernSmooth_2.23-26  RColorBrewer_1.1-3  S7_0.2.0           
## [19] lifecycle_1.0.4     compiler_4.5.1      farver_2.1.2       
## [22] stringr_1.5.2       codetools_0.2-20    lmerTest_3.1-3     
## [25] htmltools_0.5.8.1   sass_0.4.10         yaml_2.3.10        
## [28] Formula_1.2-5       pillar_1.11.1       nloptr_2.2.1       
## [31] jquerylib_0.1.4     tidyr_1.3.1         MASS_7.3-65        
## [34] cachem_1.1.0        reformulas_0.4.1    multcomp_1.4-29    
## [37] boot_1.3-31         abind_1.4-8         nlme_3.1-168       
## [40] gtools_3.9.5        tidyselect_1.2.1    digest_0.6.37      
## [43] mvtnorm_1.3-3       stringi_1.8.7       reshape2_1.4.4     
## [46] purrr_1.1.0         labeling_0.4.3      splines_4.5.1      
## [49] fastmap_1.2.0       grid_4.5.1          cli_3.6.5          
## [52] magrittr_2.0.4      survival_3.8-3      TH.data_1.1-5      
## [55] broom_1.0.10        withr_3.0.2         scales_1.4.0       
## [58] backports_1.5.0     estimability_1.5.1  rmarkdown_2.30     
## [61] zoo_1.8-14          evaluate_1.0.5      knitr_1.50         
## [64] rbibutils_2.3       mgcv_1.9-3          rlang_1.1.6        
## [67] Rcpp_1.1.0          xtable_1.8-4        glue_1.8.0         
## [70] rstudioapi_0.17.1   minqa_1.2.8         jsonlite_2.0.0     
## [73] R6_2.6.1            plyr_1.8.9