Accuracy Analyses for Categorization Task

Libraries and Data Files

#load libraries
library(dplyr)
library(ez)
library(sciplot)
library(gplots)
library(plotrix)
## Warning: package 'plotrix' was built under R version 4.3.2
library (afex)
## Warning: package 'afex' was built under R version 4.3.3
library(emmeans)
library(mgcv)
library(itsadug)
## Warning: package 'itsadug' was built under R version 4.3.2
## Warning: package 'plotfunctions' was built under R version 4.3.2
library(boot)
library(effsize)
## Warning: package 'effsize' was built under R version 4.3.3
#load data
data<-read.csv("AllData_Categorization.csv", header = T)

Data Preprocessing

#rename useful variables
data$acc<-data$Accuracy
data$sbj<-data$participant
data$block<-data$block.thisN+1

#covert to factors
data <- mutate_if(data, is.character, as.factor)
data$sbj<-as.factor(data$sbj)

#exclude one participant (20335) for not following instructions
data<-droplevels(data[data$sbj!="20335",])
str(data)
## 'data.frame':    21120 obs. of  37 variables:
##  $ st_exp                    : int  4 30 48 18 9 55 24 62 12 34 ...
##  $ p                         : int  48 21 35 13 6 40 17 45 50 25 ...
##  $ hue                       : num  -0.05 0.15 0.35 -0.25 -0.35 0.25 0.35 0.15 -0.05 -0.25 ...
##  $ border                    : Factor w/ 2 levels "no","yes": 2 1 1 1 1 1 1 1 2 1 ...
##  $ size                      : num  2.1 2.7 3.1 2.5 2.3 3.3 2.5 3.5 2.3 2.9 ...
##  $ correctAns                : Factor w/ 2 levels "a","b": 2 1 1 2 2 1 1 1 2 2 ...
##  $ practice_trials.thisRepN  : logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisTrialN: logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisN     : logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisIndex : logi  NA NA NA NA NA NA ...
##  $ block.thisRepN            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ block.thisTrialN          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ block.thisN               : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ block.thisIndex           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ trials.thisRepN           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ trials.thisTrialN         : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ trials.thisN              : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ trials.thisIndex          : int  60 36 29 5 34 9 23 33 50 19 ...
##  $ thisRow.t                 : num  111 115 117 119 121 ...
##  $ notes                     : logi  NA NA NA NA NA NA ...
##  $ Accuracy                  : int  0 0 1 1 1 1 1 1 1 1 ...
##  $ RT                        : num  2.2247 0.3665 0.3849 0.4082 0.0632 ...
##  $ participant               : int  17202 17202 17202 17202 17202 17202 17202 17202 17202 17202 ...
##  $ categories                : Factor w/ 2 levels "AB","BA": 1 1 1 1 1 1 1 1 1 1 ...
##  $ group                     : Factor w/ 4 levels "1100","1600",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ date                      : Factor w/ 66 levels "2025-02-25_12h00.07.372",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ expName                   : Factor w/ 1 level "Categorization": 1 1 1 1 1 1 1 1 1 1 ...
##  $ psychopyVersion           : Factor w/ 1 level "2023.2.3": 1 1 1 1 1 1 1 1 1 1 ...
##  $ frameRate                 : num  60.1 60.1 60.1 60.1 60.1 ...
##  $ expStart                  : Factor w/ 66 levels "2025-02-25 12h00.22.380300 +0200",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ X                         : logi  NA NA NA NA NA NA ...
##  $ X.1                       : logi  NA NA NA NA NA NA ...
##  $ X.2                       : logi  NA NA NA NA NA NA ...
##  $ X.3                       : logi  NA NA NA NA NA NA ...
##  $ acc                       : int  0 0 1 1 1 1 1 1 1 1 ...
##  $ sbj                       : Factor w/ 66 levels "341","1003","1206",..: 10 10 10 10 10 10 10 10 10 10 ...
##  $ block                     : num  1 1 1 1 1 1 1 1 1 1 ...
#calculate participant's average accuracy
data_av<-aggregate(data$acc, list(data$sbj, data$group), mean)
colnames(data_av) <-c("sbj","group" ,"acc")

#Check how many sbjs per group
data %>% 
  distinct(sbj, group) %>% 
  count(group)
##   group  n
## 1  1100 15
## 2  1600 17
## 3   600 18
## 4    RD 16
#calculate participant's average accuracy
data_av<-aggregate(data$acc, list(data$sbj, data$group), mean)
colnames(data_av) <-c("sbj","group" ,"acc")

Descriptive Statistics

#by-group accuracy
#"600"
mean(data_av[data_av$group=="600",]$acc)
## [1] 0.8303819
sd(data_av[data_av$group=="600",]$acc)
## [1] 0.08761026
#"1100"
mean(data_av[data_av$group=="1100",]$acc)
## [1] 0.8670833
sd(data_av[data_av$group=="1100",]$acc)
## [1] 0.03201882
"#1600"
## [1] "#1600"
mean(data_av[data_av$group=="1600",]$acc)
## [1] 0.8689338
sd(data_av[data_av$group=="1600",]$acc)
## [1] 0.04216537
#"RD"
mean(data_av[data_av$group=="RD",]$acc)
## [1] 0.8714844
sd(data_av[data_av$group=="RD",]$acc)
## [1] 0.03289177

Inferential Statistics - ANOVA & gam

#one-way anova
ezANOVA(data=data_av, dv=acc, wid=sbj, between=group, type=3)
## Warning: Data is unbalanced (unequal N per group). Make sure you specified a
## well-considered value for the type argument to ezANOVA().
## Coefficient covariances computed by hccm()
## $ANOVA
##   Effect DFn DFd        F        p p<.05        ges
## 2  group   3  62 2.168351 0.100717       0.09495728
## 
## $`Levene's Test for Homogeneity of Variance`
##   DFn DFd         SSn      SSd         F         p p<.05
## 1   3  62 0.004357066 0.131365 0.6854644 0.5643131
# No effect of group!

# Learning Curves
##################

# calculate average performance per sbj  and block
data_bl<-aggregate(data$acc, list(data$sbj, data$group, data$block), mean)
colnames(data_bl) <-c("sbj","group", "block", "acc")

#calculate average accuracy and SE per block
data_gr<-aggregate(data_bl$acc, list(data_bl$group, data_bl$block), mean)
colnames(data_gr)<-c("group","block", "acc")

temp<- aggregate(data_bl$acc, list(data_bl$group, data_bl$block), se)
data_gr$se<-temp$x
rm(temp)
data_gr<-data_gr[order(data_gr$group, data_gr$block),]

#xlb="Block\n Error Bars: +/- 1 SE"
xlb="Block"
ylb="Proportion Correct"
mn="Categorization Accuracy"
off=0.05
pch=c(15,16,17,18)
lty=c(2,3,4,5)
col=c("grey70","grey60","grey50", "grey40" )
#"600"
plotCI(x=1:4, y=data_gr[data_gr$group=="600",]$acc, uiw=data_gr[data_gr$group=="600",]$se, bty="n", ylim=c(0.6,1),xlim=c(1, 4.5), xlab=xlb, ylab=ylb, main=mn, las=1, xaxt="n", yaxt="n", pch=pch[1], col=col[1], gap=0)
lines(x=1:4, y=data_gr[data_gr$group=="600",]$acc, lty=lty[1], col=col[1], lwd=2)
#"1100"
plotCI(x=1:4+off, y=data_gr[data_gr$group=="1100",]$acc, uiw=data_gr[data_gr$group=="600",]$se, add=T, pch=pch[2], col=col[2], gap=0)
lines(x=1:4+off, y=data_gr[data_gr$group=="1100",]$acc, lty=lty[2], col=col[2], lwd=2)
#"1600"
plotCI(x=1:4+2*off, y=data_gr[data_gr$group=="1600",]$acc, uiw=data_gr[data_gr$group=="600",]$se, add=T, pch=pch[3], col=col[3], gap=0)
lines(x=1:4+2*off, y=data_gr[data_gr$group=="1600",]$acc, lty=lty[3], col=col[3], lwd=2)
#"RD"
plotCI(x=1:4+3*off, y=data_gr[data_gr$group=="RD",]$acc, uiw=data_gr[data_gr$group=="600",]$se, add=T, pch=pch[4], col=col[4], gap=0)
lines(x=1:4+3*off, y=data_gr[data_gr$group=="RD",]$acc, lty=lty[4], col=col[4], lwd=2)

axis(side=1, at=c(1,2,3,4))
axis(side=2, at=c(0.6, 0.7, 0.8, 0.9, 1.0),labels=c("0.0", "0.7", "0.8", "0.9", "1.0") , las=2)
axis.break(2, 0.65, style="slash")

legend(x=2.5, y=0.8, legend=c("600 ms", "1100 ms", "1600 ms", "RD"), col=col, lty=lty, pch=pch, bty="n", seg.len=4, lwd=2)

#########################
####gam analysis
#########################


#first create the trial variable
tr<-1:320
data$trial<-tr

#Null model
m0<-bam(acc~1 + s(trial) + s(trial, sbj, bs="fs", m=1), data=data, family=binomial)
## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated
## 1-d smooths of same variable.
gam.check(m0)

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 6 iterations.
## Gradient range [-7.582071e-07,8.719987e-09]
## (score 29783.12 & scale 1).
## Hessian positive definite, eigenvalue range [2.565925,23.59241].
## Model rank =  604 / 604 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value
## s(trial)       9.00   7.99    0.96    0.86
## s(trial,sbj) 594.00 128.65    0.96    0.86
#edf is not close to k', so we are ok. We do not need to increase k

#add a fixed effect of group
m1<- bam(acc~ 1 + group + s(trial) + s(trial, sbj, bs="fs", m=1), data=data, family=binomial )
## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated
## 1-d smooths of same variable.
gam.check(m1)

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 6 iterations.
## Gradient range [-8.069979e-07,8.009941e-09]
## (score 29792.38 & scale 1).
## Hessian positive definite, eigenvalue range [2.566158,21.92624].
## Model rank =  607 / 607 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value
## s(trial)       9.00   7.99    0.94    0.18
## s(trial,sbj) 594.00 125.42    0.94    0.12
#edf is not close to k', so we are ok. We do not need to increase k

compareML(m0,m1)
## m0: acc ~ 1 + s(trial) + s(trial, sbj, bs = "fs", m = 1)
## 
## m1: acc ~ 1 + group + s(trial) + s(trial, sbj, bs = "fs", m = 1)
## 
## Model m0 preferred: lower fREML score (9.259), and lower df (3.000).
## -----
##   Model    Score Edf Difference     Df
## 1    m1 29792.38   8                  
## 2    m0 29783.12   5      9.259 -3.000
## 
## AIC difference: -0.03, model m0 has lower AIC.
#m0 preferred

#add a term modeling four smooth terms of trial, one for each group level
m2<-bam(acc~ 1 + group + s(trial) + s(trial, by=group) +s(trial, sbj, bs="fs", m=1), data=data, family=binomial )
## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated
## 1-d smooths of same variable.
gam.check(m2)

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 11 iterations.
## Gradient range [-0.0001191143,2.647236e-05]
## (score 29800.47 & scale 1).
## Hessian positive definite, eigenvalue range [0.000113609,21.9125].
## Model rank =  642 / 643 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                         k'     edf k-index p-value  
## s(trial)             9.000   7.979    0.94   0.060 .
## s(trial):group1100   9.000   0.381    0.94   0.090 .
## s(trial):group1600   9.000   1.000    0.94   0.080 .
## s(trial):group600    9.000   2.680    0.94   0.070 .
## s(trial):groupRD     9.000   1.000    0.94   0.065 .
## s(trial,sbj)       594.000 123.003    0.94   0.070 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#edf is not close to k', so we are ok. We do not need to increase k

compareML(m2, m0)
## m2: acc ~ 1 + group + s(trial) + s(trial, by = group) + s(trial, 
##     sbj, bs = "fs", m = 1)
## 
## m0: acc ~ 1 + s(trial) + s(trial, sbj, bs = "fs", m = 1)
## 
## Model m0 preferred: lower fREML score (17.351), and lower df (11.000).
## -----
##   Model    Score Edf Difference     Df
## 1    m2 29800.47  16                  
## 2    m0 29783.12   5    -17.351 11.000
## 
## AIC difference: 5.01, model m0 has lower AIC.
#m0 preferred

m3<-bam(acc~ 1 + group  + s(trial, by=group) +s(trial, sbj, bs="fs", m=1), data=data, family=binomial )
gam.check(m3)

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 8 iterations.
## Gradient range [-1.038658e-05,1.117415e-07]
## (score 29797.04 & scale 1).
## Hessian positive definite, eigenvalue range [0.8519929,21.88718].
## Model rank =  634 / 634 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                        k'    edf k-index p-value
## s(trial):group1100   9.00   6.09    0.96    0.60
## s(trial):group1600   9.00   4.58    0.96    0.60
## s(trial):group600    9.00   5.37    0.96    0.61
## s(trial):groupRD     9.00   7.45    0.96    0.58
## s(trial,sbj)       594.00 121.95    0.96    0.56
compareML(m3, m0)
## m3: acc ~ 1 + group + s(trial, by = group) + s(trial, sbj, bs = "fs", 
##     m = 1)
## 
## m0: acc ~ 1 + s(trial) + s(trial, sbj, bs = "fs", m = 1)
## 
## Model m0 preferred: lower fREML score (13.920), and lower df (9.000).
## -----
##   Model    Score Edf Difference    Df
## 1    m3 29797.04  14                 
## 2    m0 29783.12   5    -13.920 9.000
## 
## AIC difference: 20.46, model m0 has lower AIC.
#m0 preferred

m4<-bam(acc~ 1 + s(trial, by=group) +s(trial, sbj, bs="fs", m=1), data=data, family=binomial )
gam.check(m4)

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 8 iterations.
## Gradient range [-8.50605e-06,1.010756e-07]
## (score 29785.85 & scale 1).
## Hessian positive definite, eigenvalue range [0.848284,23.58432].
## Model rank =  631 / 631 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                        k'    edf k-index p-value
## s(trial):group1100   9.00   6.08    0.95    0.34
## s(trial):group1600   9.00   4.56    0.95    0.22
## s(trial):group600    9.00   5.42    0.95    0.28
## s(trial):groupRD     9.00   7.43    0.95    0.27
## s(trial,sbj)       594.00 125.31    0.95    0.26
compareML(m4, m0)
## m4: acc ~ 1 + s(trial, by = group) + s(trial, sbj, bs = "fs", m = 1)
## 
## m0: acc ~ 1 + s(trial) + s(trial, sbj, bs = "fs", m = 1)
## 
## Model m0 preferred: lower fREML score (2.734), and lower df (6.000).
## -----
##   Model    Score Edf Difference    Df
## 1    m4 29785.85  11                 
## 2    m0 29783.12   5     -2.734 6.000
## 
## AIC difference: 20.52, model m0 has lower AIC.
## Warning in compareML(m4, m0): Only small difference in fREML...
#m0 preferred

#best-fit model
fm<-m1
summary(fm)
## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## acc ~ 1 + group + s(trial) + s(trial, sbj, bs = "fs", m = 1)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.92062    0.09898  19.405   <2e-16 ***
## group1600    0.02232    0.13587   0.164   0.8695    
## group600    -0.25259    0.13316  -1.897   0.0578 .  
## groupRD      0.03363    0.13786   0.244   0.8073    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                  edf  Ref.df Chi.sq p-value    
## s(trial)       7.986   8.714  150.8  <2e-16 ***
## s(trial,sbj) 125.422 590.000  489.1  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0412   Deviance explained = 4.78%
## fREML =  29792  Scale est. = 1         n = 21120
fm<-m0
summary(fm)
## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## acc ~ 1 + s(trial) + s(trial, sbj, bs = "fs", m = 1)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.86494    0.04847   38.47   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                  edf  Ref.df Chi.sq p-value    
## s(trial)       7.986   8.714  150.6  <2e-16 ***
## s(trial,sbj) 128.649 593.000  547.4  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0412   Deviance explained = 4.79%
## fREML =  29783  Scale est. = 1         n = 21120

Session Information

sessionInfo()
## R version 4.3.1 (2023-06-16 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19045)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: Europe/Athens
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] effsize_0.8.1     boot_1.3-28.1     itsadug_2.4.1     plotfunctions_1.4
##  [5] mgcv_1.8-42       nlme_3.1-162      emmeans_1.8.8     afex_1.4-1       
##  [9] lme4_1.1-34       Matrix_1.6-1      plotrix_3.8-4     gplots_3.1.3     
## [13] sciplot_1.2-0     ez_4.4-0          dplyr_1.1.3      
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.4        xfun_0.40           bslib_0.5.1        
##  [4] ggplot2_3.4.3       caTools_1.18.2      lattice_0.21-8     
##  [7] numDeriv_2016.8-1.1 vctrs_0.6.3         tools_4.3.1        
## [10] bitops_1.0-7        generics_0.1.3      parallel_4.3.1     
## [13] sandwich_3.0-2      tibble_3.2.1        fansi_1.0.4        
## [16] pkgconfig_2.0.3     KernSmooth_2.23-21  lifecycle_1.0.3    
## [19] compiler_4.3.1      stringr_1.5.0       munsell_0.5.0      
## [22] codetools_0.2-19    lmerTest_3.1-3      carData_3.0-5      
## [25] htmltools_0.5.6     sass_0.4.7          yaml_2.3.7         
## [28] pillar_1.9.0        car_3.1-2           nloptr_2.0.3       
## [31] jquerylib_0.1.4     MASS_7.3-60         cachem_1.0.8       
## [34] abind_1.4-5         multcomp_1.4-25     gtools_3.9.4       
## [37] tidyselect_1.2.0    digest_0.6.33       mvtnorm_1.2-3      
## [40] stringi_1.7.12      reshape2_1.4.4      splines_4.3.1      
## [43] fastmap_1.1.1       grid_4.3.1          colorspace_2.1-0   
## [46] cli_3.6.1           magrittr_2.0.3      survival_3.5-5     
## [49] utf8_1.2.3          TH.data_1.1-2       withr_2.5.0        
## [52] scales_1.2.1        estimability_1.4.1  rmarkdown_2.24     
## [55] zoo_1.8-12          evaluate_0.21       knitr_1.44         
## [58] rlang_1.1.1         Rcpp_1.0.11         xtable_1.8-4       
## [61] glue_1.6.2          rstudioapi_0.15.0   minqa_1.2.5        
## [64] jsonlite_1.8.7      R6_2.5.1            plyr_1.8.8