Accuracy Analyses for Categorization Task

Libraries and Data Files

#load libraries
library(dplyr)
library(gplots)
## Warning: package 'gplots' was built under R version 4.5.2
library(sciplot)
## Warning: package 'sciplot' was built under R version 4.5.2
library(ez)
library(emmeans)
## Warning: package 'emmeans' was built under R version 4.5.2
#clear environment
rm(list=ls())
#set working directory & load data file
#setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
data<-read.csv("AllData_Categorization.csv", header = T)

Data Preprocessing

#rename useful variables
data$acc<-data$keyboard_response_Exp.corr
data$rt<-data$keyboard_response_Exp.rt
data$sbj<-data$participant
data$block<-data$block.thisN+1
str(data)
## 'data.frame':    8960 obs. of  42 variables:
##  $ index                         : int  51 41 16 13 33 38 56 9 37 33 ...
##  $ st_exp                        : int  49 19 46 38 1 11 59 24 10 1 ...
##  $ p                             : int  36 14 33 27 0 8 44 17 7 0 ...
##  $ hue                           : num  -0.35 -0.15 0.15 0.15 -0.35 -0.15 -0.15 0.35 -0.25 -0.35 ...
##  $ critical                      : chr  "no" "no" "no" "no" ...
##  $ size                          : num  3.3 2.5 3.1 2.9 2.1 2.3 3.5 2.5 2.3 2.1 ...
##  $ correctAns                    : chr  "b" "b" "a" "a" ...
##  $ group                         : chr  "control" "control" "control" "control" ...
##  $ dist_from_border              : int  4 2 2 2 4 2 2 4 3 4 ...
##  $ practice_trials.thisRepN      : logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisTrialN    : logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisN         : logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisIndex     : logi  NA NA NA NA NA NA ...
##  $ block.thisRepN                : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ block.thisTrialN              : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ block.thisN                   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ block.thisIndex               : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ trials.thisRepN               : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ trials.thisTrialN             : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ trials.thisN                  : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ trials.thisIndex              : int  11 26 70 14 74 37 25 42 13 3 ...
##  $ break_loop.thisRepN           : logi  NA NA NA NA NA NA ...
##  $ break_loop.thisTrialN         : logi  NA NA NA NA NA NA ...
##  $ break_loop.thisN              : logi  NA NA NA NA NA NA ...
##  $ break_loop.thisIndex          : logi  NA NA NA NA NA NA ...
##  $ thisRow.t                     : num  120 124 126 129 131 ...
##  $ notes                         : logi  NA NA NA NA NA NA ...
##  $ keyboard_response_Exp.keys    : chr  "a" "b" "b" "a" ...
##  $ keyboard_response_Exp.corr    : int  0 1 0 1 1 1 1 1 1 1 ...
##  $ keyboard_response_Exp.rt      : num  2.193 0.772 0.64 1.249 1.987 ...
##  $ keyboard_response_Exp.duration: chr  "None" "None" "None" "None" ...
##  $ participant                   : int  48370 48370 48370 48370 48370 48370 48370 48370 48370 48370 ...
##  $ categories                    : chr  "AB" "AB" "AB" "AB" ...
##  $ date                          : chr  "2025-02-27_12h31.18.274" "2025-02-27_12h31.18.274" "2025-02-27_12h31.18.274" "2025-02-27_12h31.18.274" ...
##  $ expName                       : chr  "Categorization_CON" "Categorization_CON" "Categorization_CON" "Categorization_CON" ...
##  $ psychopyVersion               : chr  "2023.2.3" "2023.2.3" "2023.2.3" "2023.2.3" ...
##  $ frameRate                     : num  60 60 60 60 60 ...
##  $ expStart                      : chr  "2025-02-27 12h31.28.138889 +0200" "2025-02-27 12h31.28.138889 +0200" "2025-02-27 12h31.28.138889 +0200" "2025-02-27 12h31.28.138889 +0200" ...
##  $ acc                           : int  0 1 0 1 1 1 1 1 1 1 ...
##  $ rt                            : num  2.193 0.772 0.64 1.249 1.987 ...
##  $ sbj                           : int  48370 48370 48370 48370 48370 48370 48370 48370 48370 48370 ...
##  $ block                         : num  1 1 1 1 1 1 1 1 1 1 ...
#convert to factors
data <- mutate_if(data, is.character, as.factor)
data$sbj<-as.factor(data$sbj)
str(data)
## 'data.frame':    8960 obs. of  42 variables:
##  $ index                         : int  51 41 16 13 33 38 56 9 37 33 ...
##  $ st_exp                        : int  49 19 46 38 1 11 59 24 10 1 ...
##  $ p                             : int  36 14 33 27 0 8 44 17 7 0 ...
##  $ hue                           : num  -0.35 -0.15 0.15 0.15 -0.35 -0.15 -0.15 0.35 -0.25 -0.35 ...
##  $ critical                      : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ size                          : num  3.3 2.5 3.1 2.9 2.1 2.3 3.5 2.5 2.3 2.1 ...
##  $ correctAns                    : Factor w/ 2 levels "a","b": 2 2 1 1 2 2 2 1 2 2 ...
##  $ group                         : Factor w/ 2 levels "control","oversampling": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dist_from_border              : int  4 2 2 2 4 2 2 4 3 4 ...
##  $ practice_trials.thisRepN      : logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisTrialN    : logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisN         : logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisIndex     : logi  NA NA NA NA NA NA ...
##  $ block.thisRepN                : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ block.thisTrialN              : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ block.thisN                   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ block.thisIndex               : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ trials.thisRepN               : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ trials.thisTrialN             : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ trials.thisN                  : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ trials.thisIndex              : int  11 26 70 14 74 37 25 42 13 3 ...
##  $ break_loop.thisRepN           : logi  NA NA NA NA NA NA ...
##  $ break_loop.thisTrialN         : logi  NA NA NA NA NA NA ...
##  $ break_loop.thisN              : logi  NA NA NA NA NA NA ...
##  $ break_loop.thisIndex          : logi  NA NA NA NA NA NA ...
##  $ thisRow.t                     : num  120 124 126 129 131 ...
##  $ notes                         : logi  NA NA NA NA NA NA ...
##  $ keyboard_response_Exp.keys    : Factor w/ 2 levels "a","b": 1 2 2 1 2 2 2 1 2 2 ...
##  $ keyboard_response_Exp.corr    : int  0 1 0 1 1 1 1 1 1 1 ...
##  $ keyboard_response_Exp.rt      : num  2.193 0.772 0.64 1.249 1.987 ...
##  $ keyboard_response_Exp.duration: Factor w/ 3 levels "0.0080403","0.0161796",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ participant                   : int  48370 48370 48370 48370 48370 48370 48370 48370 48370 48370 ...
##  $ categories                    : Factor w/ 2 levels "AB","BA": 1 1 1 1 1 1 1 1 1 1 ...
##  $ date                          : Factor w/ 28 levels "2025-02-27_12h31.18.274",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ expName                       : Factor w/ 2 levels "Categorization_CON",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ psychopyVersion               : Factor w/ 1 level "2023.2.3": 1 1 1 1 1 1 1 1 1 1 ...
##  $ frameRate                     : num  60 60 60 60 60 ...
##  $ expStart                      : Factor w/ 28 levels "2025-02-27 12h31.28.138889 +0200",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ acc                           : int  0 1 0 1 1 1 1 1 1 1 ...
##  $ rt                            : num  2.193 0.772 0.64 1.249 1.987 ...
##  $ sbj                           : Factor w/ 28 levels "3375","5076",..: 12 12 12 12 12 12 12 12 12 12 ...
##  $ block                         : num  1 1 1 1 1 1 1 1 1 1 ...

Descriptive Statistics

#calculate participant's average accuracy
data_av<-aggregate(data$acc, list(data$sbj, data$group), mean)
colnames(data_av) <-c("sbj","group", "acc")

#save this data frame, for correlation analysis
write.csv(data_av, "cat_av_acc.csv", row.names = FALSE)

#Boundary stim only
#calculate participant's average accuracy in boundary stim
data_av_b<-aggregate(acc~sbj+group, data=data[data$critical=="yes", ], FUN=mean)

#save this data frame, for correlation analysis
write.csv(data_av_b, "cat_av_acc_b.csv", row.names = FALSE)


#Control group
mean(data_av[data_av$group=="control",]$acc)
## [1] 0.9075893
sd(data_av[data_av$group=="control",]$acc)
## [1] 0.02775435
#OverSampling group
mean(data_av[data_av$group=="oversampling",]$acc)
## [1] 0.8649554
sd(data_av[data_av$group=="oversampling",]$acc)
## [1] 0.03397226

Learning Curves

#calculate average accuracy per sbj  and block
data_bl<-aggregate(data$acc, list(data$sbj,data$group, data$block), mean)
colnames(data_bl) <-c("sbj", "group","block", "acc")
data_bl<-data_bl[order(data_bl$sbj,  data_bl$block),]

#calculate average accuracy and SE per block
#split data per group
data_bl_c<-droplevels(data_bl[data_bl$group=="control",])
data_bl_o<-droplevels(data_bl[data_bl$group=="oversampling",])

#control
data_gr_c<-aggregate(data_bl_c$acc, list(data_bl_c$block), mean)
colnames(data_gr_c)<-c("block", "acc")
temp<- aggregate(data_bl_c$acc, list(data_bl_c$block), se)
data_gr_c$se<-1.96*temp$x #95% CIs
colnames(data_gr_c)<-c("block", "acc", "se")
rm(temp)
str(data_gr_c)
## 'data.frame':    4 obs. of  3 variables:
##  $ block: num  1 2 3 4
##  $ acc  : num  0.884 0.905 0.921 0.92
##  $ se   : num  0.0251 0.02 0.0169 0.0177
#oversampling
data_gr_o<-aggregate(data_bl_o$acc, list(data_bl_o$block), mean)
colnames(data_gr_o)<-c("block", "acc")
temp<- aggregate(data_bl_o$acc, list(data_bl_o$block), se)
data_gr_o$se<-1.96*temp$x
colnames(data_gr_o)<-c("block", "acc", "se")
rm(temp)
str(data_gr_o)
## 'data.frame':    4 obs. of  3 variables:
##  $ block: num  1 2 3 4
##  $ acc  : num  0.825 0.863 0.895 0.878
##  $ se   : num  0.0374 0.0187 0.013 0.0284
#learning curves graph
col=c( "black", "grey")
pch=c(15,16)
lwd=2
lty=c(1,1)
xlb="Block"
ylb="Proportion Correct"
mn=""
#mn="Categorization Accuracy"
offset=0.02
#oversampling
plotCI(x=1:4, y=data_gr_o$acc, uiw=data_gr_o$se, bty="n", ylim=c(0,1), xlab=xlb, ylab=ylb, main=mn, las=1, xaxt="n", pch=pch[1], col=col[1],gap=0)
lines(x=1:4, y=data_gr_o$acc, lty=lty[1], col=col[1], lwd=lwd)
axis(side=1, at=c(1,2,3,4))
#control
plotCI(x=1:4+offset, y=data_gr_c$acc, uiw=data_gr_c$se, bty="n", pch=pch[2], col=col[2], add=T, gap=0)
lines(x=1:4+offset, y=data_gr_c$acc, lty=lty[2], col=col[2], lwd=lwd)
legend(x=2.5, y=0.4, legend=c("Oversampling", "Control"), col=col, lty=lty, lwd=lwd, pch=pch, bty="n", seg.len=4)

Inferential Statistics

data_bl$block<-as.factor(data_bl$block)

ezANOVA(data=data_bl, dv=acc, wid=sbj, within=block, between=group, type=3)
## $ANOVA
##        Effect DFn DFd          F            p p<.05        ges
## 2       group   1  26 13.2232818 1.197646e-03     * 0.19787808
## 3       block   3  78 11.2682600 3.207398e-06     * 0.18245496
## 4 group:block   3  78  0.8860144 4.521891e-01       0.01724539
## 
## $`Mauchly's Test for Sphericity`
##        Effect         W          p p<.05
## 3       block 0.6043731 0.02924817     *
## 4 group:block 0.6043731 0.02924817     *
## 
## $`Sphericity Corrections`
##        Effect       GGe        p[GG] p[GG]<.05       HFe        p[HF] p[HF]<.05
## 3       block 0.7415276 4.093039e-05         * 0.8140584 1.998234e-05         *
## 4 group:block 0.7415276 4.274920e-01           0.8140584 4.353149e-01
#0.7415276*3 = 2.224583, GG correction
#0.7415276*78 = 57.83915, GG correction

#step-wise Successive comparisons
mod <- aov(acc ~ group * block + Error(sbj / block), data = data_bl)
emm_blocks <- emmeans(mod, ~ block )   # EMMs
## Note: re-fitting model with sum-to-zero contrasts
## NOTE: Results may be misleading due to involvement in interactions
stepwise<- contrast(emm_blocks, method = "consec", adjust = "holm")
print(stepwise)
##  contrast        estimate      SE df t.ratio p.value
##  block2 - block1  0.02946 0.00986 78   2.987  0.0113
##  block3 - block2  0.02411 0.00986 78   2.444  0.0336
##  block4 - block3 -0.00937 0.00986 78  -0.950  0.3448
## 
## Results are averaged over the levels of: group 
## P value adjustment: holm method for 3 tests

Session Information

sessionInfo()
## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26200)
## 
## Matrix products: default
##   LAPACK version 3.12.1
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: Europe/Athens
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] emmeans_2.0.0 ez_4.4-0      sciplot_1.2-0 gplots_3.2.0  dplyr_1.1.4  
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6       xfun_0.53          bslib_0.9.0        ggplot2_4.0.0     
##  [5] caTools_1.18.3     lattice_0.22-7     vctrs_0.6.5        tools_4.5.1       
##  [9] Rdpack_2.6.4       bitops_1.0-9       generics_0.1.4     sandwich_3.1-1    
## [13] tibble_3.3.0       pkgconfig_2.0.3    Matrix_1.7-3       KernSmooth_2.23-26
## [17] RColorBrewer_1.1-3 S7_0.2.0           lifecycle_1.0.4    compiler_4.5.1    
## [21] farver_2.1.2       stringr_1.5.2      codetools_0.2-20   carData_3.0-5     
## [25] htmltools_0.5.8.1  sass_0.4.10        yaml_2.3.10        Formula_1.2-5     
## [29] pillar_1.11.1      car_3.1-3          nloptr_2.2.1       jquerylib_0.1.4   
## [33] MASS_7.3-65        cachem_1.1.0       reformulas_0.4.1   boot_1.3-31       
## [37] abind_1.4-8        multcomp_1.4-29    nlme_3.1-168       gtools_3.9.5      
## [41] tidyselect_1.2.1   digest_0.6.37      mvtnorm_1.3-3      stringi_1.8.7     
## [45] reshape2_1.4.4     splines_4.5.1      fastmap_1.2.0      grid_4.5.1        
## [49] cli_3.6.5          magrittr_2.0.4     survival_3.8-3     TH.data_1.1-5     
## [53] withr_3.0.2        scales_1.4.0       estimability_1.5.1 rmarkdown_2.30    
## [57] lme4_1.1-37        zoo_1.8-14         evaluate_1.0.5     knitr_1.50        
## [61] rbibutils_2.3      mgcv_1.9-3         rlang_1.1.6        Rcpp_1.1.0        
## [65] xtable_1.8-4       glue_1.8.0         rstudioapi_0.17.1  minqa_1.2.8       
## [69] jsonlite_2.0.0     R6_2.6.1           plyr_1.8.9