Accuracy Analyses for Categorization Task
Libraries and Data Files
#load libraries
library(dplyr)
library(gplots)
## Warning: package 'gplots' was built under R version 4.5.2
library(sciplot)
## Warning: package 'sciplot' was built under R version 4.5.2
library(ez)
library(emmeans)
## Warning: package 'emmeans' was built under R version 4.5.2
#clear environment
rm(list=ls())
#set working directory & load data file
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
data<-read.csv("AllData_Categorization.csv", header = T)
Data Preprocessing
#rename useful variables
data$acc<-data$keyboard_response_Exp.corr
data$rt<-data$keyboard_response_Exp.rt
data$sbj<-data$participant
data$block<-data$block.thisN+1
str(data)
## 'data.frame': 8960 obs. of 42 variables:
## $ index : int 51 41 16 13 33 38 56 9 37 33 ...
## $ st_exp : int 49 19 46 38 1 11 59 24 10 1 ...
## $ p : int 36 14 33 27 0 8 44 17 7 0 ...
## $ hue : num -0.35 -0.15 0.15 0.15 -0.35 -0.15 -0.15 0.35 -0.25 -0.35 ...
## $ critical : chr "no" "no" "no" "no" ...
## $ size : num 3.3 2.5 3.1 2.9 2.1 2.3 3.5 2.5 2.3 2.1 ...
## $ correctAns : chr "b" "b" "a" "a" ...
## $ group : chr "control" "control" "control" "control" ...
## $ dist_from_border : int 4 2 2 2 4 2 2 4 3 4 ...
## $ practice_trials.thisRepN : logi NA NA NA NA NA NA ...
## $ practice_trials.thisTrialN : logi NA NA NA NA NA NA ...
## $ practice_trials.thisN : logi NA NA NA NA NA NA ...
## $ practice_trials.thisIndex : logi NA NA NA NA NA NA ...
## $ block.thisRepN : int 0 0 0 0 0 0 0 0 0 0 ...
## $ block.thisTrialN : int 0 0 0 0 0 0 0 0 0 0 ...
## $ block.thisN : int 0 0 0 0 0 0 0 0 0 0 ...
## $ block.thisIndex : int 0 0 0 0 0 0 0 0 0 0 ...
## $ trials.thisRepN : int 0 0 0 0 0 0 0 0 0 0 ...
## $ trials.thisTrialN : int 0 1 2 3 4 5 6 7 8 9 ...
## $ trials.thisN : int 0 1 2 3 4 5 6 7 8 9 ...
## $ trials.thisIndex : int 11 26 70 14 74 37 25 42 13 3 ...
## $ break_loop.thisRepN : logi NA NA NA NA NA NA ...
## $ break_loop.thisTrialN : logi NA NA NA NA NA NA ...
## $ break_loop.thisN : logi NA NA NA NA NA NA ...
## $ break_loop.thisIndex : logi NA NA NA NA NA NA ...
## $ thisRow.t : num 120 124 126 129 131 ...
## $ notes : logi NA NA NA NA NA NA ...
## $ keyboard_response_Exp.keys : chr "a" "b" "b" "a" ...
## $ keyboard_response_Exp.corr : int 0 1 0 1 1 1 1 1 1 1 ...
## $ keyboard_response_Exp.rt : num 2.193 0.772 0.64 1.249 1.987 ...
## $ keyboard_response_Exp.duration: chr "None" "None" "None" "None" ...
## $ participant : int 48370 48370 48370 48370 48370 48370 48370 48370 48370 48370 ...
## $ categories : chr "AB" "AB" "AB" "AB" ...
## $ date : chr "2025-02-27_12h31.18.274" "2025-02-27_12h31.18.274" "2025-02-27_12h31.18.274" "2025-02-27_12h31.18.274" ...
## $ expName : chr "Categorization_CON" "Categorization_CON" "Categorization_CON" "Categorization_CON" ...
## $ psychopyVersion : chr "2023.2.3" "2023.2.3" "2023.2.3" "2023.2.3" ...
## $ frameRate : num 60 60 60 60 60 ...
## $ expStart : chr "2025-02-27 12h31.28.138889 +0200" "2025-02-27 12h31.28.138889 +0200" "2025-02-27 12h31.28.138889 +0200" "2025-02-27 12h31.28.138889 +0200" ...
## $ acc : int 0 1 0 1 1 1 1 1 1 1 ...
## $ rt : num 2.193 0.772 0.64 1.249 1.987 ...
## $ sbj : int 48370 48370 48370 48370 48370 48370 48370 48370 48370 48370 ...
## $ block : num 1 1 1 1 1 1 1 1 1 1 ...
#convert to factors
data <- mutate_if(data, is.character, as.factor)
data$sbj<-as.factor(data$sbj)
str(data)
## 'data.frame': 8960 obs. of 42 variables:
## $ index : int 51 41 16 13 33 38 56 9 37 33 ...
## $ st_exp : int 49 19 46 38 1 11 59 24 10 1 ...
## $ p : int 36 14 33 27 0 8 44 17 7 0 ...
## $ hue : num -0.35 -0.15 0.15 0.15 -0.35 -0.15 -0.15 0.35 -0.25 -0.35 ...
## $ critical : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ size : num 3.3 2.5 3.1 2.9 2.1 2.3 3.5 2.5 2.3 2.1 ...
## $ correctAns : Factor w/ 2 levels "a","b": 2 2 1 1 2 2 2 1 2 2 ...
## $ group : Factor w/ 2 levels "control","oversampling": 1 1 1 1 1 1 1 1 1 1 ...
## $ dist_from_border : int 4 2 2 2 4 2 2 4 3 4 ...
## $ practice_trials.thisRepN : logi NA NA NA NA NA NA ...
## $ practice_trials.thisTrialN : logi NA NA NA NA NA NA ...
## $ practice_trials.thisN : logi NA NA NA NA NA NA ...
## $ practice_trials.thisIndex : logi NA NA NA NA NA NA ...
## $ block.thisRepN : int 0 0 0 0 0 0 0 0 0 0 ...
## $ block.thisTrialN : int 0 0 0 0 0 0 0 0 0 0 ...
## $ block.thisN : int 0 0 0 0 0 0 0 0 0 0 ...
## $ block.thisIndex : int 0 0 0 0 0 0 0 0 0 0 ...
## $ trials.thisRepN : int 0 0 0 0 0 0 0 0 0 0 ...
## $ trials.thisTrialN : int 0 1 2 3 4 5 6 7 8 9 ...
## $ trials.thisN : int 0 1 2 3 4 5 6 7 8 9 ...
## $ trials.thisIndex : int 11 26 70 14 74 37 25 42 13 3 ...
## $ break_loop.thisRepN : logi NA NA NA NA NA NA ...
## $ break_loop.thisTrialN : logi NA NA NA NA NA NA ...
## $ break_loop.thisN : logi NA NA NA NA NA NA ...
## $ break_loop.thisIndex : logi NA NA NA NA NA NA ...
## $ thisRow.t : num 120 124 126 129 131 ...
## $ notes : logi NA NA NA NA NA NA ...
## $ keyboard_response_Exp.keys : Factor w/ 2 levels "a","b": 1 2 2 1 2 2 2 1 2 2 ...
## $ keyboard_response_Exp.corr : int 0 1 0 1 1 1 1 1 1 1 ...
## $ keyboard_response_Exp.rt : num 2.193 0.772 0.64 1.249 1.987 ...
## $ keyboard_response_Exp.duration: Factor w/ 3 levels "0.0080403","0.0161796",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ participant : int 48370 48370 48370 48370 48370 48370 48370 48370 48370 48370 ...
## $ categories : Factor w/ 2 levels "AB","BA": 1 1 1 1 1 1 1 1 1 1 ...
## $ date : Factor w/ 28 levels "2025-02-27_12h31.18.274",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ expName : Factor w/ 2 levels "Categorization_CON",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ psychopyVersion : Factor w/ 1 level "2023.2.3": 1 1 1 1 1 1 1 1 1 1 ...
## $ frameRate : num 60 60 60 60 60 ...
## $ expStart : Factor w/ 28 levels "2025-02-27 12h31.28.138889 +0200",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ acc : int 0 1 0 1 1 1 1 1 1 1 ...
## $ rt : num 2.193 0.772 0.64 1.249 1.987 ...
## $ sbj : Factor w/ 28 levels "3375","5076",..: 12 12 12 12 12 12 12 12 12 12 ...
## $ block : num 1 1 1 1 1 1 1 1 1 1 ...
Descriptive Statistics
#calculate participant's average accuracy
data_av<-aggregate(data$acc, list(data$sbj, data$group), mean)
colnames(data_av) <-c("sbj","group", "acc")
#Control group
mean(data_av[data_av$group=="control",]$acc)
## [1] 0.9075893
sd(data_av[data_av$group=="control",]$acc)
## [1] 0.02775435
#OverSampling group
mean(data_av[data_av$group=="oversampling",]$acc)
## [1] 0.8649554
sd(data_av[data_av$group=="oversampling",]$acc)
## [1] 0.03397226
Learning Curves
#calculate average accuracy per sbj and block
data_bl<-aggregate(data$acc, list(data$sbj,data$group, data$block), mean)
colnames(data_bl) <-c("sbj", "group","block", "acc")
data_bl<-data_bl[order(data_bl$sbj, data_bl$block),]
#calculate average accuracy and SE per block
#split data per group
data_bl_c<-droplevels(data_bl[data_bl$group=="control",])
data_bl_o<-droplevels(data_bl[data_bl$group=="oversampling",])
#control
data_gr_c<-aggregate(data_bl_c$acc, list(data_bl_c$block), mean)
colnames(data_gr_c)<-c("block", "acc")
temp<- aggregate(data_bl_c$acc, list(data_bl_c$block), se)
data_gr_c$se<-1.96*temp$x #95% CIs
colnames(data_gr_c)<-c("block", "acc", "se")
rm(temp)
str(data_gr_c)
## 'data.frame': 4 obs. of 3 variables:
## $ block: num 1 2 3 4
## $ acc : num 0.884 0.905 0.921 0.92
## $ se : num 0.0251 0.02 0.0169 0.0177
#oversampling
data_gr_o<-aggregate(data_bl_o$acc, list(data_bl_o$block), mean)
colnames(data_gr_o)<-c("block", "acc")
temp<- aggregate(data_bl_o$acc, list(data_bl_o$block), se)
data_gr_o$se<-1.96*temp$x
colnames(data_gr_o)<-c("block", "acc", "se")
rm(temp)
str(data_gr_o)
## 'data.frame': 4 obs. of 3 variables:
## $ block: num 1 2 3 4
## $ acc : num 0.825 0.863 0.895 0.878
## $ se : num 0.0374 0.0187 0.013 0.0284
#learning curves graph
col=c( "black", "grey")
pch=c(15,16)
lwd=2
lty=c(1,1)
xlb="Block"
ylb="Proportion Correct"
mn=""
#mn="Categorization Accuracy"
offset=0.02
#oversampling
plotCI(x=1:4, y=data_gr_o$acc, uiw=data_gr_o$se, bty="n", ylim=c(0,1), xlab=xlb, ylab=ylb, main=mn, las=1, xaxt="n", pch=pch[1], col=col[1],gap=0)
lines(x=1:4, y=data_gr_o$acc, lty=lty[1], col=col[1], lwd=lwd)
axis(side=1, at=c(1,2,3,4))
#control
plotCI(x=1:4+offset, y=data_gr_c$acc, uiw=data_gr_c$se, bty="n", pch=pch[2], col=col[2], add=T, gap=0)
lines(x=1:4+offset, y=data_gr_c$acc, lty=lty[2], col=col[2], lwd=lwd)
legend(x=2.5, y=0.4, legend=c("Oversampling", "Control"), col=col, lty=lty, lwd=lwd, pch=pch, bty="n", seg.len=4)

Inferential Statistics
data_bl$block<-as.factor(data_bl$block)
ezANOVA(data=data_bl, dv=acc, wid=sbj, within=block, between=group, type=3)
## $ANOVA
## Effect DFn DFd F p p<.05 ges
## 2 group 1 26 13.2232818 1.197646e-03 * 0.19787808
## 3 block 3 78 11.2682600 3.207398e-06 * 0.18245496
## 4 group:block 3 78 0.8860144 4.521891e-01 0.01724539
##
## $`Mauchly's Test for Sphericity`
## Effect W p p<.05
## 3 block 0.6043731 0.02924817 *
## 4 group:block 0.6043731 0.02924817 *
##
## $`Sphericity Corrections`
## Effect GGe p[GG] p[GG]<.05 HFe p[HF] p[HF]<.05
## 3 block 0.7415276 4.093039e-05 * 0.8140584 1.998234e-05 *
## 4 group:block 0.7415276 4.274920e-01 0.8140584 4.353149e-01
#0.7415276*3 = 2.224583, GG correction
#0.7415276*78 = 57.83915, GG correction
#step-wise Successive comparisons
mod <- aov(acc ~ group * block + Error(sbj / block), data = data_bl)
emm_blocks <- emmeans(mod, ~ block ) # EMMs
## Note: re-fitting model with sum-to-zero contrasts
## NOTE: Results may be misleading due to involvement in interactions
stepwise<- contrast(emm_blocks, method = "consec", adjust = "holm")
print(stepwise)
## contrast estimate SE df t.ratio p.value
## block2 - block1 0.02946 0.00986 78 2.987 0.0113
## block3 - block2 0.02411 0.00986 78 2.444 0.0336
## block4 - block3 -0.00937 0.00986 78 -0.950 0.3448
##
## Results are averaged over the levels of: group
## P value adjustment: holm method for 3 tests
Session Information
sessionInfo()
## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26200)
##
## Matrix products: default
## LAPACK version 3.12.1
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## time zone: Europe/Athens
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] emmeans_2.0.0 ez_4.4-0 sciplot_1.2-0 gplots_3.2.0 dplyr_1.1.4
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.10 generics_0.1.4 bitops_1.0-9 KernSmooth_2.23-26
## [5] gtools_3.9.5 stringi_1.8.7 lattice_0.22-7 lme4_1.1-37
## [9] digest_0.6.37 magrittr_2.0.4 caTools_1.18.3 estimability_1.5.1
## [13] evaluate_1.0.5 grid_4.5.1 RColorBrewer_1.1-3 mvtnorm_1.3-3
## [17] fastmap_1.2.0 plyr_1.8.9 jsonlite_2.0.0 Matrix_1.7-3
## [21] Formula_1.2-5 mgcv_1.9-3 scales_1.4.0 jquerylib_0.1.4
## [25] Rdpack_2.6.4 reformulas_0.4.1 abind_1.4-8 cli_3.6.5
## [29] rlang_1.1.6 rbibutils_2.3 splines_4.5.1 withr_3.0.2
## [33] cachem_1.1.0 yaml_2.3.10 tools_4.5.1 reshape2_1.4.4
## [37] nloptr_2.2.1 minqa_1.2.8 ggplot2_4.0.0 boot_1.3-31
## [41] vctrs_0.6.5 R6_2.6.1 lifecycle_1.0.4 stringr_1.5.2
## [45] car_3.1-3 MASS_7.3-65 pkgconfig_2.0.3 pillar_1.11.1
## [49] bslib_0.9.0 gtable_0.3.6 Rcpp_1.1.0 glue_1.8.0
## [53] xfun_0.53 tibble_3.3.0 tidyselect_1.2.1 rstudioapi_0.17.1
## [57] knitr_1.50 xtable_1.8-4 farver_2.1.2 htmltools_0.5.8.1
## [61] nlme_3.1-168 rmarkdown_2.30 carData_3.0-5 compiler_4.5.1
## [65] S7_0.2.0