Categorization

Accuracy Analyses for Categorization Task

Libraries and Data Files

#load libraries
library(dplyr)
library(ez)
library(sciplot)

## Warning: package 'sciplot' was built under R version 4.5.2

library(gplots)

## Warning: package 'gplots' was built under R version 4.5.2

library(plotrix)

## Warning: package 'plotrix' was built under R version 4.5.2

library (afex)

## Warning: package 'afex' was built under R version 4.5.2

library(emmeans)

## Warning: package 'emmeans' was built under R version 4.5.2

library(mgcv)
library(itsadug)

## Warning: package 'itsadug' was built under R version 4.5.2

## Warning: package 'plotfunctions' was built under R version 4.5.2

library(boot)
library(effsize)

## Warning: package 'effsize' was built under R version 4.5.2

library(parallel)
library(stats)

#load data
data<-read.csv("AllData_Categorization.csv", header = T)

Data Preprocessing

#rename useful variables
data$acc<-data$Accuracy
data$sbj<-data$participant
data$block<-data$block.thisN+1

#covert to factors
data <- mutate_if(data, is.character, as.factor)
data$sbj<-as.factor(data$sbj)
str(data)

## 'data.frame':    21440 obs. of  37 variables:
##  $ st_exp                    : int  4 30 48 18 9 55 24 62 12 34 ...
##  $ p                         : int  48 21 35 13 6 40 17 45 50 25 ...
##  $ hue                       : num  -0.05 0.15 0.35 -0.25 -0.35 0.25 0.35 0.15 -0.05 -0.25 ...
##  $ border                    : Factor w/ 2 levels "no","yes": 2 1 1 1 1 1 1 1 2 1 ...
##  $ size                      : num  2.1 2.7 3.1 2.5 2.3 3.3 2.5 3.5 2.3 2.9 ...
##  $ correctAns                : Factor w/ 2 levels "a","b": 2 1 1 2 2 1 1 1 2 2 ...
##  $ practice_trials.thisRepN  : logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisTrialN: logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisN     : logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisIndex : logi  NA NA NA NA NA NA ...
##  $ block.thisRepN            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ block.thisTrialN          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ block.thisN               : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ block.thisIndex           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ trials.thisRepN           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ trials.thisTrialN         : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ trials.thisN              : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ trials.thisIndex          : int  60 36 29 5 34 9 23 33 50 19 ...
##  $ thisRow.t                 : num  111 115 117 119 121 ...
##  $ notes                     : logi  NA NA NA NA NA NA ...
##  $ Accuracy                  : int  0 0 1 1 1 1 1 1 1 1 ...
##  $ RT                        : num  2.2247 0.3665 0.3849 0.4082 0.0632 ...
##  $ participant               : int  17202 17202 17202 17202 17202 17202 17202 17202 17202 17202 ...
##  $ categories                : Factor w/ 2 levels "AB","BA": 1 1 1 1 1 1 1 1 1 1 ...
##  $ group                     : Factor w/ 4 levels "1100","1600",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ date                      : Factor w/ 67 levels "2025-02-25_12h00.07.372",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ expName                   : Factor w/ 1 level "Categorization": 1 1 1 1 1 1 1 1 1 1 ...
##  $ psychopyVersion           : Factor w/ 1 level "2023.2.3": 1 1 1 1 1 1 1 1 1 1 ...
##  $ frameRate                 : num  60.1 60.1 60.1 60.1 60.1 ...
##  $ expStart                  : Factor w/ 67 levels "2025-02-25 12h00.22.380300 +0200",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ X                         : logi  NA NA NA NA NA NA ...
##  $ X.1                       : logi  NA NA NA NA NA NA ...
##  $ X.2                       : logi  NA NA NA NA NA NA ...
##  $ X.3                       : logi  NA NA NA NA NA NA ...
##  $ acc                       : int  0 0 1 1 1 1 1 1 1 1 ...
##  $ sbj                       : Factor w/ 67 levels "341","1003","1206",..: 10 10 10 10 10 10 10 10 10 10 ...
##  $ block                     : num  1 1 1 1 1 1 1 1 1 1 ...

#exclude one participant (20335) for not following instructions
data<-droplevels(data[data$sbj!="20335",])
#also exclude five early "exclusion instructions" participants.
data<-droplevels(data[!(data$sbj %in% c("17202","57393", "94372", "40242", "89443")), ])
str(data)

## 'data.frame':    19520 obs. of  37 variables:
##  $ st_exp                    : int  56 39 2 10 4 5 44 21 4 52 ...
##  $ p                         : int  41 28 1 7 48 49 58 53 48 60 ...
##  $ hue                       : num  0.35 0.25 -0.25 -0.25 -0.05 0.05 -0.05 0.05 -0.05 -0.05 ...
##  $ border                    : Factor w/ 2 levels "no","yes": 1 1 1 1 2 2 2 2 2 2 ...
##  $ size                      : num  3.3 2.9 2.1 2.3 2.1 2.1 3.1 2.5 2.1 3.3 ...
##  $ correctAns                : Factor w/ 2 levels "a","b": 2 2 1 1 1 2 1 2 1 1 ...
##  $ practice_trials.thisRepN  : logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisTrialN: logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisN     : logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisIndex : logi  NA NA NA NA NA NA ...
##  $ block.thisRepN            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ block.thisTrialN          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ block.thisN               : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ block.thisIndex           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ trials.thisRepN           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ trials.thisTrialN         : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ trials.thisN              : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ trials.thisIndex          : int  14 36 13 33 79 57 71 78 67 58 ...
##  $ thisRow.t                 : num  73.7 77.6 81.2 84.3 87.7 ...
##  $ notes                     : logi  NA NA NA NA NA NA ...
##  $ Accuracy                  : int  0 1 0 1 1 0 1 0 1 1 ...
##  $ RT                        : num  1.31 0.983 0.505 0.796 0.764 ...
##  $ participant               : int  77337 77337 77337 77337 77337 77337 77337 77337 77337 77337 ...
##  $ categories                : Factor w/ 2 levels "AB","BA": 2 2 2 2 2 2 2 2 2 2 ...
##  $ group                     : Factor w/ 4 levels "1100","1600",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ date                      : Factor w/ 61 levels "2025-02-25_12h00.07.372",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ expName                   : Factor w/ 1 level "Categorization": 1 1 1 1 1 1 1 1 1 1 ...
##  $ psychopyVersion           : Factor w/ 1 level "2023.2.3": 1 1 1 1 1 1 1 1 1 1 ...
##  $ frameRate                 : num  60 60 60 60 60 ...
##  $ expStart                  : Factor w/ 61 levels "2025-02-25 12h00.22.380300 +0200",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ X                         : logi  NA NA NA NA NA NA ...
##  $ X.1                       : logi  NA NA NA NA NA NA ...
##  $ X.2                       : logi  NA NA NA NA NA NA ...
##  $ X.3                       : logi  NA NA NA NA NA NA ...
##  $ acc                       : int  0 1 0 1 1 0 1 0 1 1 ...
##  $ sbj                       : Factor w/ 61 levels "341","1003","1206",..: 42 42 42 42 42 42 42 42 42 42 ...
##  $ block                     : num  1 1 1 1 1 1 1 1 1 1 ...

#calculate participant's average accuracy
data_av<-aggregate(data$acc, list(data$sbj, data$group), mean)
colnames(data_av) <-c("sbj","group" ,"acc")

# check learning criterion
# criterion was set to >=22/32 for one or more blocks of the task
cr=22/32
dat_b<-droplevels(data[data$border=="yes",]) 
dat_b_av<- aggregate(dat_b$acc, list(dat_b$sbj, dat_b$group, dat_b$block), mean)
colnames(dat_b_av)<-c("sbj","group", "block", "acc")
dat_b_av<-dat_b_av[order(dat_b_av$sbj, dat_b_av$block),]
#new column, "yes" if acc>=cr
dat_b_av$cr<-ifelse(dat_b_av$acc>=cr,"yes", "no")

#Create a list containing sbj ids that do not fulfill the learning criterion (4 "no"s)
only_no<- dat_b_av[dat_b_av$cr == "no", ]
no_counts<- table(only_no$sbj)
total_blocks <- table(dat_b_av$sbj)
only_no_sbj<- names(no_counts[no_counts == 4 & total_blocks[names(no_counts)] == 4])
only_no_sbj

## [1] "43423" "48888" "63018" "77337" "77623"

rm(only_no,no_counts, total_blocks, cr)


#Exclude data from non-learner participants
data<-droplevels(data[!(data$sbj %in% only_no_sbj), ])
str(data)

## 'data.frame':    17920 obs. of  37 variables:
##  $ st_exp                    : int  36 36 2 55 11 59 52 25 43 35 ...
##  $ p                         : int  56 56 1 40 8 44 60 18 32 26 ...
##  $ hue                       : num  -0.05 -0.05 -0.25 0.25 -0.15 -0.15 -0.05 -0.35 -0.15 -0.15 ...
##  $ border                    : Factor w/ 2 levels "no","yes": 2 2 1 1 1 1 2 1 1 1 ...
##  $ size                      : num  2.9 2.9 2.1 3.3 2.3 3.5 3.3 2.7 3.1 2.9 ...
##  $ correctAns                : Factor w/ 2 levels "a","b": 2 2 2 1 2 2 2 2 2 2 ...
##  $ practice_trials.thisRepN  : logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisTrialN: logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisN     : logi  NA NA NA NA NA NA ...
##  $ practice_trials.thisIndex : logi  NA NA NA NA NA NA ...
##  $ block.thisRepN            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ block.thisTrialN          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ block.thisN               : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ block.thisIndex           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ trials.thisRepN           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ trials.thisTrialN         : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ trials.thisN              : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ trials.thisIndex          : int  66 60 35 31 28 22 57 41 16 33 ...
##  $ thisRow.t                 : num  71.1 77.2 79.2 81 83.7 ...
##  $ notes                     : logi  NA NA NA NA NA NA ...
##  $ Accuracy                  : int  0 1 1 0 0 1 1 0 1 1 ...
##  $ RT                        : num  4.52 0.378 0.259 1.064 1.437 ...
##  $ participant               : int  81139 81139 81139 81139 81139 81139 81139 81139 81139 81139 ...
##  $ categories                : Factor w/ 2 levels "AB","BA": 1 1 1 1 1 1 1 1 1 1 ...
##  $ group                     : Factor w/ 4 levels "1100","1600",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ date                      : Factor w/ 56 levels "2025-03-18_11h21.33.043",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ expName                   : Factor w/ 1 level "Categorization": 1 1 1 1 1 1 1 1 1 1 ...
##  $ psychopyVersion           : Factor w/ 1 level "2023.2.3": 1 1 1 1 1 1 1 1 1 1 ...
##  $ frameRate                 : num  60 60 60 60 60 ...
##  $ expStart                  : Factor w/ 56 levels "2025-03-18 11h21.51.634029 +0200",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ X                         : logi  NA NA NA NA NA NA ...
##  $ X.1                       : logi  NA NA NA NA NA NA ...
##  $ X.2                       : logi  NA NA NA NA NA NA ...
##  $ X.3                       : logi  NA NA NA NA NA NA ...
##  $ acc                       : int  0 1 1 0 0 1 1 0 1 1 ...
##  $ sbj                       : Factor w/ 56 levels "341","1003","1206",..: 42 42 42 42 42 42 42 42 42 42 ...
##  $ block                     : num  1 1 1 1 1 1 1 1 1 1 ...

#Check that there are 14 sbjs per group
data %>% 
  distinct(sbj, group) %>% 
  count(group)

##   group  n
## 1  1100 14
## 2  1600 14
## 3   600 14
## 4    RD 14

#calculate participant's average accuracy
data_av<-aggregate(data$acc, list(data$sbj, data$group), mean)
colnames(data_av) <-c("sbj","group" ,"acc")

#write data frame, to use it in analysis examining correlation between learning accuracy and perceptual change
write.csv(data_av, "d_cat.csv")

Descriptive Statistics

#by-group accuracy
#"600"
mean(data_av[data_av$group=="600",]$acc)

## [1] 0.8455357

sd(data_av[data_av$group=="600",]$acc)

## [1] 0.03759432

#"1100"
mean(data_av[data_av$group=="1100",]$acc)

## [1] 0.8654018

sd(data_av[data_av$group=="1100",]$acc)

## [1] 0.0325329

"#1600"

## [1] "#1600"

mean(data_av[data_av$group=="1600",]$acc)

## [1] 0.8814732

sd(data_av[data_av$group=="1600",]$acc)

## [1] 0.02297915

#"RD"
mean(data_av[data_av$group=="RD",]$acc)

## [1] 0.8763393

sd(data_av[data_av$group=="RD",]$acc)

## [1] 0.0308073

Inferential Statistics - ANOVA

######################################################
# one-way ANOVA (factor: group) on learning accuracy
######################################################

#one-way anova
ezANOVA(data=data_av, dv=acc, wid=sbj, between=group, type=3)

## Coefficient covariances computed by hccm()

## $ANOVA
##   Effect DFn DFd        F          p p<.05       ges
## 2  group   3  52 3.591778 0.01957389     * 0.1716492
## 
## $`Levene's Test for Homogeneity of Variance`
##   DFn DFd          SSn        SSd         F         p p<.05
## 1   3  52 0.0002664621 0.02787528 0.1656907 0.9190173

#same analysis, in format suitable for post-hoc comparisons
model <- aov(acc ~ group, data = data_av)

#calculate estimated marginal means, for post-hoc comparisons.
emms <- emmeans::emmeans(model, ~ group)

#post-hoc comparisons
pairwise_comparisons <- contrast(emms, method = "pairwise", adjust = "bonferroni")
summary(pairwise_comparisons)

##  contrast    estimate     SE df t.ratio p.value
##  1100 - 1600 -0.01607 0.0119 52  -1.353  1.0000
##  1100 - 600   0.01987 0.0119 52   1.673  0.6022
##  1100 - RD   -0.01094 0.0119 52  -0.921  1.0000
##  1600 - 600   0.03594 0.0119 52   3.026  0.0231
##  1600 - RD    0.00513 0.0119 52   0.432  1.0000
##  600 - RD    -0.03080 0.0119 52  -2.594  0.0738
## 
## P value adjustment: bonferroni method for 6 tests

# Calculate Cohen's d for significant differences.
cohen.d(data_av$acc[data_av$group == "1600"],
        data_av$acc[data_av$group == "600"],
        pooled = TRUE)

## 
## Cohen's d
## 
## d estimate: 1.153475 (large)
## 95 percent confidence interval:
##     lower     upper 
## 0.3144359 1.9925144

Learning-Curves Plot

data$trial <- data$block.thisN * 80 + data$trials.thisN + 1

bins <- 16 #thus, 16 bins of 20 trials (320 trials overall)
data <- data %>%
  group_by(sbj) %>%
  mutate(trial_bin = ntile(trial, bins)) %>%
  ungroup()


data_tb<-aggregate(data$acc, list(data$sbj, data$group, data$trial_bin), mean)
colnames(data_tb) <-c("sbj","group","trial_bin","acc")

data_gr<-aggregate(data_tb$acc, list(data_tb$group, data_tb$trial_bin), mean)
colnames(data_gr)<-c("group","trial_bin", "acc")

temp<- aggregate(data_tb$acc, list(data_tb$group, data_tb$trial_bin), se)
data_gr$se<-temp$x
rm(temp)
data_gr<-data_gr[order(data_gr$group, data_gr$trial_bin),]

# Labels
xlb="Bins of 20 Trials"
ylb="Proportion Correct"
mn="Categorization Accuracy"
off=0.10
pch=c(15,16,17,18)
lty=c(2,3,4,5)
col=c("grey70","grey60","grey50", "grey40" )

x0 <- 1:bins

#"600"
plotCI(x=x0, y=data_gr[data_gr$group=="600",]$acc,
       uiw=data_gr[data_gr$group=="600",]$se,
       bty="n", ylim=c(0.6,1),xlim=c(1, bins+0.6),
       xlab=xlb, ylab=ylb, main=mn, las=1, xaxt="n", yaxt="n",
       pch=pch[1], col=col[1], gap=0, cex=1.5)
lines(x=x0, y=data_gr[data_gr$group=="600",]$acc, lty=lty[1], col=col[1], lwd=2)

#"1100"
plotCI(x=x0+off, y=data_gr[data_gr$group=="1100",]$acc,
       uiw=data_gr[data_gr$group=="1100",]$se,
       add=T, pch=pch[2], col=col[2], gap=0, cex=1.5)
lines(x=x0+off, y=data_gr[data_gr$group=="1100",]$acc, lty=lty[2], col=col[2], lwd=2)

#"1600"
plotCI(x=x0+2*off, y=data_gr[data_gr$group=="1600",]$acc,
       uiw=data_gr[data_gr$group=="1600",]$se,
       add=T, pch=pch[3], col=col[3], gap=0, cex=1.5)
lines(x=x0+2*off, y=data_gr[data_gr$group=="1600",]$acc, lty=lty[3], col=col[3], lwd=2)

#"RD"
plotCI(x=x0+3*off, y=data_gr[data_gr$group=="RD",]$acc,
       uiw=data_gr[data_gr$group=="RD",]$se,
       add=T, pch=pch[4], col=col[4], gap=0, cex=1.5)
lines(x=x0+3*off, y=data_gr[data_gr$group=="RD",]$acc, lty=lty[4], col=col[4], lwd=2)

axis(side=1, at=1:bins, labels=1:bins)
axis(side=2, at=c(0.6, 0.7, 0.8, 0.9, 1.0),labels=c("0.0", "0.7", "0.8", "0.9", "1.0") , las=2)
axis.break(2, 0.65, style="slash")

legend(x=round(bins*0.60), y=0.8, legend=c("600 ms", "1100 ms", "1600 ms", "RD"),
       col=col, lty=lty, pch=pch, bty="n", seg.len=6, lwd=2, pt.cex=1.5)

### pixels: 1000 x 650

Inferential Statistics - gam

nc <- detectCores()
cl <- makeCluster(nc-1)

#make sure factors are ok
data$sbj   <- as.factor(data$sbj)
data$group <- as.factor(data$group)
levels(data$group)

## [1] "1100" "1600" "600"  "RD"

#set reference level
data$group <- relevel(data$group, ref="600")

#scale trial, to facilitate convergence
data$trial_s <- as.numeric(scale(data$trial))

# Test for fixed effects

#Test if k=10 is a justified
m0i_fREML <- bam(acc ~ 1 + s(trial_s, k=10) +
                s(sbj, bs="re") +
                s(trial_s, sbj, bs="fs", m=1, k=10),
              data=data, family=binomial, method="fREML", discrete = T)

## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated
## 1-d smooths of same variable.

gam.check(m0i_fREML)

## 
## Method: fREML   Optimizer: perf chol
## $grad
## [1] -1.638139e-07 -3.281919e-06 -1.908798e-05 -3.281919e-06
## 
## $hess
##             [,1]       [,2]        [,3]       [,4]
## [1,]  2.46590905 -0.0111960 -0.05207555 -0.0111960
## [2,] -0.01119600  2.8944569  0.24355471  2.8944536
## [3,] -0.05207555  0.2435547 11.58292252  0.2435547
## [4,] -0.01119600  2.8944536  0.24355471  2.8944569
## 
## Model rank =  626 / 626 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                    k'    edf k-index p-value
## s(trial_s)       9.00   7.94    0.96    0.38
## s(sbj)          56.00  17.84      NA      NA
## s(trial_s,sbj) 560.00  89.51    0.96    0.40

# Models are fit with method ML
m0i_ML <- bam(acc ~ 1 + s(trial_s, k=10) +
                s(sbj, bs="re") +
                s(trial_s, sbj, bs="fs", m=1, k=10),
              data=data, family=binomial, method="ML", discrete = T)

## Warning in bam(acc ~ 1 + s(trial_s, k = 10) + s(sbj, bs = "re") + s(trial_s, :
## discretization only available with fREML or NCV
## Warning in bam(acc ~ 1 + s(trial_s, k = 10) + s(sbj, bs = "re") + s(trial_s, :
## model has repeated 1-d smooths of same variable.

m1i_ML <- bam(acc ~ group + s(trial_s, k=10) +
                s(sbj, bs="re") +
                s(trial_s, sbj, bs="fs", m=1, k=10),
              data=data, family=binomial, method="ML", discrete = T)

## Warning in bam(acc ~ group + s(trial_s, k = 10) + s(sbj, bs = "re") +
## s(trial_s, : discretization only available with fREML or NCV
## Warning in bam(acc ~ group + s(trial_s, k = 10) + s(sbj, bs = "re") +
## s(trial_s, : model has repeated 1-d smooths of same variable.

AIC(m0i_ML, m1i_ML)

##              df      AIC
## m0i_ML 153.2876 13837.62
## m1i_ML 147.8566 13830.84

#the "group" fixed effects reduces AIC, therefore preferred. Models fit with fREML,appropriate when smooth terms differ

#Test for group-specific smooths of trial, i.e., s(trial_s, by=group, k=10)
m1i_fREML <- bam(acc ~ group + s(trial_s, k=10) +
                  s(sbj, bs="re") +
                  s(trial_s, sbj, bs="fs", m=1, k=10),
                data=data, family=binomial, method="fREML", discrete = T)

## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated
## 1-d smooths of same variable.

m2i_fREML <- bam(acc ~ group + s(trial_s, k=10) + s(trial_s, by=group, k=10) +
                  s(sbj, bs="re") +
                  s(trial_s, sbj, bs="fs", m=1, k=10),
                data=data, family=binomial, method="fREML", discrete = T)

## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated
## 1-d smooths of same variable.

compareML(m1i_fREML, m2i_fREML)

## m1i_fREML: acc ~ group + s(trial_s, k = 10) + s(sbj, bs = "re") + s(trial_s, 
##     sbj, bs = "fs", m = 1, k = 10)
## 
## m2i_fREML: acc ~ group + s(trial_s, k = 10) + s(trial_s, by = group, k = 10) + 
##     s(sbj, bs = "re") + s(trial_s, sbj, bs = "fs", m = 1, k = 10)
## 
## Model m1i_fREML preferred: lower fREML score (2.516), and lower df (8.000).
## -----
##       Model    Score Edf Difference     Df
## 1 m2i_fREML 23386.45  17                  
## 2 m1i_fREML 23383.93   9      2.516 -8.000
## 
## AIC difference: -3.39, model m1i_fREML has lower AIC.

## Warning in compareML(m1i_fREML, m2i_fREML): Only small difference in fREML...

fm <-m1i_fREML

summary(fm)

## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## acc ~ group + s(trial_s, k = 10) + s(sbj, bs = "re") + s(trial_s, 
##     sbj, bs = "fs", m = 1, k = 10)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.74073    0.06940  25.082  < 2e-16 ***
## group1100    0.16044    0.09915   1.618  0.10561    
## group1600    0.30010    0.10020   2.995  0.00274 ** 
## groupRD      0.25104    0.09979   2.516  0.01188 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                   edf  Ref.df Chi.sq p-value    
## s(trial_s)      7.945   8.685 127.22  <2e-16 ***
## s(sbj)         15.592  52.000  22.31  <2e-16 ***
## s(trial_s,sbj) 87.164 556.000 179.48  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0277   Deviance explained = 3.61%
## fREML =  23384  Scale est. = 1         n = 17920

Model-Predictions Plot

labx="Trial"
#labx="Trial\n Error Bands: 95% CI"
main="Categorization Accuracy - Model Predictions"
col=c("grey70","grey60","grey50", "grey40" )
lty=c(2,3,4,5)
#"600"
plot_smooth(fm, view="trial_s", cond=list(group="600"), rm.ranef=c("s(trial,sbj)"), shade=T,se=1.96, print.summary=F, ylim=c(0.5,1), transform=inv.logit, xlab=labx, ylab="Proportion Correct", lwd=4, las=2, rug=F, col=col[1], lty=lty[1], hide.label=T, main=main, yaxs="i",xaxt="n", yaxt="n")
#"1100"
plot_smooth(fm, view="trial_s", cond=list(group="1100"), rm.ranef=c("s(trial,sbj)"), shade=T, se=1.96, print.summary=F,transform=inv.logit,lwd=4, rug=FALSE, col=col[2], lty=lty[2], add=T)
#"1600"
plot_smooth(fm, view="trial_s", cond=list(group="1600"), rm.ranef=c("s(trial,sbj)"), shade=T, se=1.96, print.summary=F,transform=inv.logit,lwd=4, rug=FALSE, col=col[3], lty=lty[3], add=T)
#"RD"
plot_smooth(fm, view="trial_s", cond=list(group="RD"), rm.ranef=c("s(trial,sbj)"), shade=T, se=1.96, print.summary=F,transform=inv.logit,lwd=4, rug=FALSE, col=col[4], lty=lty[4], add=T)

mu <- attr(scale(data$trial), "scaled:center")
sd <- attr(scale(data$trial), "scaled:scale")
ticks_raw <- c(0,50,100,150,200,250,300,320)
ticks_s   <- (ticks_raw - mu) / sd
axis(side=1, at=ticks_s, labels=ticks_raw)
axis(side=2, at=c(0.5, 0.6, 0.7, 0.8, 0.9, 1.0),labels=c("0.0", "0.6", "0.7", "0.8", "0.9", "1.0") , las=2)
axis.break(2, 0.55, style="slash")

legend(x=(200-mu)/sd, y=0.75, legend=c("600 ms", "1100 ms", "1600 ms", "RD"), col=col, lty=lty, bty="n", seg.len=6, lwd=4, pt.cex=1.5)

# graph for paper 1000 x 650

Reaction Times Analysis

rt_by_sbj <- aggregate(RT ~ sbj + group, data = data, FUN = function(x) mean(x, na.rm = TRUE))

mean_rt_by_group <- aggregate(RT ~ group, data = rt_by_sbj, mean)
sd_rt_by_group <- aggregate(RT ~ group, data = rt_by_sbj, FUN = stats::sd)
n_by_group <- aggregate(RT ~ group, data = rt_by_sbj, function(x) length(x))

#
rt_summary <- data.frame(
  group = mean_rt_by_group$group,
  mean_rt = mean_rt_by_group$RT,
  sd_rt = sd_rt_by_group$RT,
  n = n_by_group$RT
)
rt_summary$se <- rt_summary$sd_rt / sqrt(rt_summary$n)
rt_summary$ci95 <- 1.96 * rt_summary$se

#one-sample t-test for the RD group, comparison value = 0.6 sec
rt_by_sbj_RD<-droplevels(rt_by_sbj[rt_by_sbj$group=="RD",])
t.test(rt_by_sbj_RD$RT , mu = 0.600)

## 
##  One Sample t-test
## 
## data:  rt_by_sbj_RD$RT
## t = 3.9435, df = 13, p-value = 0.001682
## alternative hypothesis: true mean is not equal to 0.6
## 95 percent confidence interval:
##  0.6965478 0.9304929
## sample estimates:
## mean of x 
## 0.8135203

Session Information

sessionInfo()

## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26200)
## 
## Matrix products: default
##   LAPACK version 3.12.1
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: Europe/Athens
## tzcode source: internal
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] effsize_0.8.1     boot_1.3-31       itsadug_2.4.1     plotfunctions_1.4
##  [5] mgcv_1.9-3        nlme_3.1-168      emmeans_2.0.0     afex_1.5-0       
##  [9] lme4_1.1-37       Matrix_1.7-3      plotrix_3.8-4     gplots_3.2.0     
## [13] sciplot_1.2-0     ez_4.4-0          dplyr_1.1.4      
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6        xfun_0.53           bslib_0.9.0        
##  [4] ggplot2_4.0.0       caTools_1.18.3      lattice_0.22-7     
##  [7] numDeriv_2016.8-1.1 vctrs_0.6.5         tools_4.5.1        
## [10] Rdpack_2.6.4        bitops_1.0-9        generics_0.1.4     
## [13] sandwich_3.1-1      tibble_3.3.0        pkgconfig_2.0.3    
## [16] KernSmooth_2.23-26  RColorBrewer_1.1-3  S7_0.2.0           
## [19] lifecycle_1.0.4     compiler_4.5.1      farver_2.1.2       
## [22] stringr_1.5.2       codetools_0.2-20    lmerTest_3.1-3     
## [25] carData_3.0-5       htmltools_0.5.8.1   sass_0.4.10        
## [28] yaml_2.3.10         Formula_1.2-5       pillar_1.11.1      
## [31] car_3.1-3           nloptr_2.2.1        jquerylib_0.1.4    
## [34] MASS_7.3-65         cachem_1.1.0        reformulas_0.4.1   
## [37] multcomp_1.4-29     abind_1.4-8         gtools_3.9.5       
## [40] tidyselect_1.2.1    digest_0.6.37       mvtnorm_1.3-3      
## [43] stringi_1.8.7       reshape2_1.4.4      splines_4.5.1      
## [46] fastmap_1.2.0       grid_4.5.1          cli_3.6.5          
## [49] magrittr_2.0.4      survival_3.8-3      TH.data_1.1-5      
## [52] withr_3.0.2         scales_1.4.0        estimability_1.5.1 
## [55] rmarkdown_2.30      zoo_1.8-14          evaluate_1.0.5     
## [58] knitr_1.50          rbibutils_2.3       rlang_1.1.6        
## [61] Rcpp_1.1.0          xtable_1.8-4        glue_1.8.0         
## [64] rstudioapi_0.17.1   minqa_1.2.8         jsonlite_2.0.0     
## [67] R6_2.6.1            plyr_1.8.9

Categorization

Fotis Fotiadis

2025-07-24

Accuracy Analyses for Categorization Task

Libraries and Data Files

Data Preprocessing

Descriptive Statistics

Inferential Statistics - ANOVA

Learning-Curves Plot

Inferential Statistics - gam

Model-Predictions Plot

Reaction Times Analysis

Session Information