Analyses, Initial Category Learning

Libraries, Data Files, Preprocessing
Descriptive Statistics
Inferential Statistics
Graph
Session Information

Libraries, Data Files, Preprocessing

#Libraries 
library(dplyr)
library(sciplot)
library(gplots)
library(parallel)
library(itsadug)
library(mgcv)
library(boot)

#read data file
data<-read.table("initial.txt", header=T)

#convert chr to factor
data <- mutate_if(data, is.character, as.factor)
names(data)[names(data) == "stim"] <- "shape"

#re-order based on trial
data<-data[order(data$sbj,data$trial),]

#remove non-learner participants
#data in "dat"
dat<-droplevels(data[data$sbj!="agggko96" & data$sbj!="dimdim96" & data$sbj!="dimdou96"& data$sbj!="thaath96" & data$sbj!="zoigeo96" & data$sbj!="efhgia88" & data$sbj!="thesak96",])

#no response is denoted by rt = -100000 
dat$rt<-ifelse(dat$rt==-10000,NA, dat$rt)
sum(is.na(dat$rt))

## [1] 4

str(dat)

## 'data.frame':    9216 obs. of  10 variables:
##  $ sbj      : Factor w/ 32 levels "aggsem96","aggska92",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ trial    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ item     : int  11052134 12064213 22064321 12052143 12012314 11021423 21033412 22041243 21013421 12021324 ...
##  $ rt       : num  871 976 -1460 2486 1254 ...
##  $ sub      : Factor w/ 32 levels "s1","s10","s11",..: 9 9 9 9 9 9 9 9 9 9 ...
##  $ acc      : int  1 1 0 1 1 0 0 0 1 0 ...
##  $ shape    : Factor w/ 4 levels "rand21","rand22",..: 2 3 4 3 3 2 1 4 1 3 ...
##  $ stim_    : Factor w/ 128 levels "s10rand21","s10rand22",..: 30 31 32 31 31 30 29 32 29 31 ...
##  $ corr_resp: Factor w/ 4 levels "ideo1","ideo2",..: 4 3 2 3 3 4 1 2 1 3 ...
##  $ cnd      : Factor w/ 2 levels "ideo","pseudo": 2 2 1 2 2 2 1 1 1 2 ...

#percent of NA data
sum(is.na(dat$rt))/length(dat$rt)

## [1] 0.0004340278

#check if there RTs less than 250
sum(ifelse((dat$rt>-250) & (dat$rt<250),1,0), na.rm=T)

## [1] 0

#correct accuracy measure according to NA responses
dat$acc<-ifelse(is.na(dat$rt),NA,dat$acc)

Descriptive Statistics

#aggregate acc
dat_av<-with(dat,aggregate(acc,list(sbj,cnd),mean, na.rm=T))
names(dat_av)<-c("sbj", "cnd", "acc")

# label
mean(dat_av[dat_av$cnd=="pseudo",]$acc, na.rm=T)

## [1] 0.8922412

sd(dat_av[dat_av$cnd=="pseudo",]$acc, na.rm=T)

## [1] 0.09274273

#ideo
mean(dat_av[dat_av$cnd=="ideo",]$acc, na.rm=T)

## [1] 0.8723306

sd(dat_av[dat_av$cnd=="ideo",]$acc, na.rm=T)

## [1] 0.09856028

# Calculate accuracy per block of 24 trials

n_blocks<-12
n_parts<-length(levels(dat$sbj))
n_cnds<-2
n_trials_in_block<-24
n_trials_in_block_per_cnd<-n_trials_in_block/n_cnds
block<-rep(1:n_blocks,n_parts)
sbjs<-dat[dat$trial<(n_blocks+1),]$sbj

perf_ps<-0; perf_ps[1:(n_parts*n_blocks)]<-0
perf_id<-0; perf_id[1:(n_parts*n_blocks)]<-0
for (i in 1:(n_parts*n_blocks)){
  perf_ps[i]<-mean(dat[dat$cnd=="pseudo",]$acc[((i-1)*n_trials_in_block_per_cnd+1):(i*n_trials_in_block_per_cnd)], na.rm=TRUE) 
  perf_id[i]<-mean(dat[dat$cnd=="ideo",]$acc[((i-1)*n_trials_in_block_per_cnd+1):(i*n_trials_in_block_per_cnd)], na.rm=TRUE)
}

data_bl<-data.frame(sbj=sbjs, block=block, perf_ps= perf_ps, perf_id= perf_id)

with(data_bl,aggregate(perf_ps,list(block),mean))->data_gr_ps
with(data_bl,aggregate(perf_id,list(block),mean))->data_gr_id
names(data_gr_ps)<-c("block", "perf_ps"); names(data_gr_id)<-c("block", "perf_id")

with(data_bl,aggregate(perf_ps,list(block),se))->data_gr_ps_se
with(data_bl,aggregate(perf_id,list(block),se))->data_gr_id_se
names(data_gr_ps_se)<-c("block", "perf_ps_se"); names(data_gr_id_se)<-c("block", "perf_id_se")

#data frame for graph
data_gr<-data.frame(block=data_gr_ps$block, perf_ps=data_gr_ps$perf_ps, perf_id=data_gr_id$perf_id, perf_ps_se=data_gr_ps_se$perf_ps_se, perf_id_se=data_gr_id_se$perf_id_se )

Inferential Statistics

nc <- detectCores()
cl <- makeCluster(nc-1)

m0<-bam(acc~1 + s(trial) + s(trial, sbj, bs="fs", m=1), data=dat, family=binomial)

## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated
## 1-d smooths of same variable.

gam.check(m0)

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 4 iterations.
## Gradient range [-1.976066e-08,2.886486e-07]
## (score 11856.91 & scale 1).
## Hessian positive definite, eigenvalue range [1.419103,22.76145].
## Model rank =  298 / 298 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value
## s(trial)       9.00   4.87    0.96    0.22
## s(trial,sbj) 288.00 115.44    0.96    0.25

#edf is not close to k'

m1<-bam(acc~1 + cnd +s(trial) + s(trial, sbj, bs="fs", m=1), data=dat, family=binomial)

## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated
## 1-d smooths of same variable.

gam.check(m1)

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 4 iterations.
## Gradient range [-2.324611e-08,2.398126e-07]
## (score 11872.61 & scale 1).
## Hessian positive definite, eigenvalue range [1.526318,22.86986].
## Model rank =  299 / 299 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value
## s(trial)       9.00   4.83    0.97    0.54
## s(trial,sbj) 288.00 115.79    0.97    0.47

#edf is not close to k' 

compareML(m0, m1, suggest.report = T)

## m0: acc ~ 1 + s(trial) + s(trial, sbj, bs = "fs", m = 1)
## 
## m1: acc ~ 1 + cnd + s(trial) + s(trial, sbj, bs = "fs", m = 1)
## 
## Model m0 preferred: lower fREML score (15.697), and lower df (1.000).
## -----
##   Model    Score Edf Difference     Df
## 1    m1 11872.61   6                  
## 2    m0 11856.91   5     15.697 -1.000
## 
## AIC difference: 14.94, model m1 has lower AIC.

m2<-bam(acc~1 + s(trial, by=cnd) + s(trial, sbj, bs="fs", m=1), data=dat, family=binomial) 
gam.check(m2)

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 4 iterations.
## Gradient range [-8.09246e-09,8.478705e-08]
## (score 11835.03 & scale 1).
## Hessian positive definite, eigenvalue range [1.26909,22.91401].
## Model rank =  307 / 307 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                        k'    edf k-index p-value
## s(trial):cndideo     9.00   3.82    0.96    0.18
## s(trial):cndpseudo   9.00   5.09    0.96    0.21
## s(trial,sbj)       288.00 116.00    0.96    0.15

#edf is not close to k' 

compareML(m2, m1, suggest.report = T)

## m2: acc ~ 1 + s(trial, by = cnd) + s(trial, sbj, bs = "fs", m = 1)
## 
## m1: acc ~ 1 + cnd + s(trial) + s(trial, sbj, bs = "fs", m = 1)
## 
## Report suggestion: The Chi-Square test on the fREML scores indicates that model m2 is [significantly?] better than model m1 (X2(1.00)=37.573, p<2e-16).
## -----
##   Model    Score Edf Difference    Df  p.value Sig.
## 1    m1 11872.61   6                               
## 2    m2 11835.03   7     37.573 1.000  < 2e-16  ***
## 
## AIC difference: 5.45, model m1 has lower AIC.

compareML(m2, m0, suggest.report = T)

## m2: acc ~ 1 + s(trial, by = cnd) + s(trial, sbj, bs = "fs", m = 1)
## 
## m0: acc ~ 1 + s(trial) + s(trial, sbj, bs = "fs", m = 1)
## 
## Report suggestion: The Chi-Square test on the fREML scores indicates that model m2 is [significantly?] better than model m0 (X2(2.00)=21.876, p<.001).
## -----
##   Model    Score Edf Difference    Df   p.value Sig.
## 1    m0 11856.91   5                                
## 2    m2 11835.03   7     21.876 2.000 3.159e-10  ***
## 
## AIC difference: -9.49, model m2 has lower AIC.

m3<-bam(acc~1 + s(trial) + s(trial, by=cnd) + s(trial, sbj, bs="fs", m=1), data=dat, family=binomial)

## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated
## 1-d smooths of same variable.

gam.check(m3)

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 9 iterations.
## Gradient range [-8.431337e-05,3.453751e-05]
## (score 11842.41 & scale 1).
## Hessian positive definite, eigenvalue range [8.430391e-05,22.87839].
## Model rank =  315 / 316 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                          k'      edf k-index p-value  
## s(trial)           9.00e+00 4.40e+00    0.95   0.050 *
## s(trial):cndideo   9.00e+00 2.15e-04    0.95   0.075 .
## s(trial):cndpseudo 9.00e+00 2.94e+00    0.95   0.055 .
## s(trial,sbj)       2.88e+02 1.16e+02    0.95   0.095 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

compareML(m2, m3, suggest.report=T)

## m2: acc ~ 1 + s(trial, by = cnd) + s(trial, sbj, bs = "fs", m = 1)
## 
## m3: acc ~ 1 + s(trial) + s(trial, by = cnd) + s(trial, sbj, bs = "fs", 
##     m = 1)
## 
## Model m2 preferred: lower fREML score (7.380), and lower df (2.000).
## -----
##   Model    Score Edf Difference     Df
## 1    m3 11842.41   9                  
## 2    m2 11835.03   7      7.380 -2.000
## 
## AIC difference: -0.85, model m2 has lower AIC.

#best-fit model
fm<-m2

Graph

par(mfrow=c(1,2), oma = c(0, 0, 2, 0))
color<-c("red" ,"blue")
clrsmooth <- c("#D0A0A0","#A0A0D0")
lintyp<-c("solid", "solid")
linwid=c(2,2)
pchar=c(15,16)

leg<-c("Label Categories", "Ideogram Categories")
offset=0.1


plot_smooth(fm, view="trial", cond=list(cnd="pseudo"), rm.ranef=c("s(trial,sbj)"),shade=T,se=1.96, print.summary=F, ylim=c(0,1),transform=inv.logit, xlab="Trial", ylab="Proportion Correct", lwd=2, las=2, rug=FALSE, col=clrsmooth[1], lty=lintyp[1], add=F, hide.label=T, main="Accuracy", yaxs="i", xaxt="n")
plot_smooth(fm, view="trial", cond=list(cnd="ideo"), rm.ranef=c("s(trial,sbj)"),shade=T,se=1.96, print.summary=F, ylim=c(0,1),transform=inv.logit, xlab="Trial", ylab="Proportion Correct", lwd=2, las=2, rug=FALSE, col=clrsmooth[2], lty=lintyp[1], add=T, hide.label=T, yaxs="i", xaxt="n")
plotCI(data_gr$block*24-12, data_gr$perf_ps, uiw=data_gr$perf_ps_se, gap=0, col=color[1], pch=pchar[1], add=T)
plotCI(data_gr$block*24-12+offset*10, data_gr$perf_id, uiw=data_gr$perf_id_se, gap=0, col=color[2], pch=pchar[2], add=T)

axis(side = 1, at= c(0, 50,100,150,200,250, 288), cex.axis=1)
legend(x=100, y=0.65, legend=leg, lty=lintyp, bty="n", col=clrsmooth, lwd=2, seg.len=2.0)
legend(x=120, y=0.65, legend=c("", ""), bty="n", col=color, pch=pchar)

plot_diff(fm, view="trial", comp=list(cnd=c('pseudo', 'ideo')),rm.ranef=c("s(trial,sbj)"),shade=T,se=1.96, xlab="Trial", ylab="Estimated Difference in Accuracy (Log Odds)", lwd=2, main="Difference in Accuracy\n Between Label and Ideogram Categories", hide.label=T)

## Summary:
##  * trial : numeric predictor; with 100 values ranging from 1.000000 to 288.000000. 
##  * sbj : factor; set to the value(s): aggsem96. (Might be canceled as random effect, check below.) 
##  * NOTE : The following random effects columns are canceled: s(trial,sbj)
##

## 
## trial window(s) of significant difference(s):
##  53.181818 - 111.161616
##  253.212121 - 288.000000

mtext('Initial Category Learning', outer = TRUE, cex = 1.2, font =2)

Session Information

sessionInfo()

## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19044)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] boot_1.3-28       itsadug_2.4.1     plotfunctions_1.4 mgcv_1.8-41      
## [5] nlme_3.1-160      gplots_3.1.3      sciplot_1.2-0     dplyr_1.0.10     
## 
## loaded via a namespace (and not attached):
##  [1] highr_0.10         pillar_1.8.1       bslib_0.4.2        compiler_4.2.2    
##  [5] jquerylib_0.1.4    bitops_1.0-7       tools_4.2.2        digest_0.6.31     
##  [9] jsonlite_1.8.4     evaluate_0.19      lifecycle_1.0.3    tibble_3.1.8      
## [13] lattice_0.20-45    pkgconfig_2.0.3    rlang_1.0.6        Matrix_1.5-1      
## [17] cli_3.5.0          rstudioapi_0.14    yaml_2.3.6         xfun_0.36         
## [21] fastmap_1.1.0      withr_2.5.0        stringr_1.5.0      knitr_1.41        
## [25] generics_0.1.3     vctrs_0.5.1        sass_0.4.4         gtools_3.9.4      
## [29] caTools_1.18.2     grid_4.2.2         tidyselect_1.2.0   glue_1.6.2        
## [33] R6_2.5.1           fansi_1.0.3        rmarkdown_2.19     magrittr_2.0.3    
## [37] splines_4.2.2      htmltools_0.5.4    utf8_1.2.2         KernSmooth_2.23-20
## [41] stringi_1.7.8      cachem_1.0.6

Analyses, Initial Category Learning

Fotis Fotiadis

2023-04-18

Libraries, Data Files, Preprocessing

Descriptive Statistics

Inferential Statistics

Graph

Session Information