Libraries, Data Files, Preprocessing

#Libraries 
library(dplyr)
library(sciplot)
library(gplots)
library(parallel)
library(itsadug)
library(mgcv)
library(boot)
library(plotrix)
#read data file
data<-read.table("assessment.txt", header=T)
str(data)
## 'data.frame':    29952 obs. of  13 variables:
##  $ sbj       : chr  "elppap96" "elppap96" "elppap96" "elppap96" ...
##  $ trial     : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ item      : int  105100 105001 105000 105111 105110 105101 105010 105011 119111 119011 ...
##  $ rt        : num  -2278 -1693 1583 -1640 1084 ...
##  $ sub       : chr  "s19" "s19" "s19" "s19" ...
##  $ order     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ cnd       : chr  "control" "control" "control" "control" ...
##  $ acc       : int  0 0 1 0 1 0 0 0 1 1 ...
##  $ color     : chr  "lblue_brown" "lblue_brown" "lblue_brown" "lblue_brown" ...
##  $ pair      : chr  "rand17_rand26" "rand17_rand26" "rand17_rand26" "rand17_rand26" ...
##  $ stim_shape: chr  "rand17" "rand26" "rand26" "rand17" ...
##  $ stim_color: chr  "brown" "brown" "brown" "lblue" ...
##  $ stim_size : chr  "small" "big" "small" "big" ...
#convert chr to factor
data <- mutate_if(data, is.character, as.factor)
str(data)
## 'data.frame':    29952 obs. of  13 variables:
##  $ sbj       : Factor w/ 39 levels "agggko96","aggsem96",..: 16 16 16 16 16 16 16 16 16 16 ...
##  $ trial     : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ item      : int  105100 105001 105000 105111 105110 105101 105010 105011 119111 119011 ...
##  $ rt        : num  -2278 -1693 1583 -1640 1084 ...
##  $ sub       : Factor w/ 32 levels "s1","s10","s11",..: 11 11 11 11 11 11 11 11 11 11 ...
##  $ order     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ cnd       : Factor w/ 3 levels "control","ideo",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ acc       : int  0 0 1 0 1 0 0 0 1 1 ...
##  $ color     : Factor w/ 3 levels "lblue_brown",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ pair      : Factor w/ 3 levels "rand17_rand26",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ stim_shape: Factor w/ 6 levels "rand17","rand21",..: 1 5 5 1 1 1 5 5 1 5 ...
##  $ stim_color: Factor w/ 6 levels "_blue","brown",..: 2 2 2 4 4 2 4 4 4 4 ...
##  $ stim_size : Factor w/ 2 levels "big","small": 2 1 2 1 2 1 2 1 1 1 ...
#remove non-learner participants
#data in "dat"
dat<-droplevels(data[data$sbj!="agggko96" & data$sbj!="dimdim96" & data$sbj!="dimdou96"& data$sbj!="thaath96" & data$sbj!="zoigeo96" & data$sbj!="efhgia88" & data$sbj!="thesak96",])
str(dat)
## 'data.frame':    24576 obs. of  13 variables:
##  $ sbj       : Factor w/ 32 levels "aggsem96","aggska92",..: 12 12 12 12 12 12 12 12 12 12 ...
##  $ trial     : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ item      : int  105100 105001 105000 105111 105110 105101 105010 105011 119111 119011 ...
##  $ rt        : num  -2278 -1693 1583 -1640 1084 ...
##  $ sub       : Factor w/ 32 levels "s1","s10","s11",..: 11 11 11 11 11 11 11 11 11 11 ...
##  $ order     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ cnd       : Factor w/ 3 levels "control","ideo",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ acc       : int  0 0 1 0 1 0 0 0 1 1 ...
##  $ color     : Factor w/ 3 levels "lblue_brown",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ pair      : Factor w/ 3 levels "rand17_rand26",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ stim_shape: Factor w/ 6 levels "rand17","rand21",..: 1 5 5 1 1 1 5 5 1 5 ...
##  $ stim_color: Factor w/ 6 levels "_blue","brown",..: 2 2 2 4 4 2 4 4 4 4 ...
##  $ stim_size : Factor w/ 2 levels "big","small": 2 1 2 1 2 1 2 1 1 1 ...
#no response is denoted by rt = -20000
sum(ifelse(dat$rt==-20000,1, 0), na.rm=T)
## [1] 0
#there are no no-reponse trials.

#check if there RTs less than 250
sum(ifelse((dat$rt>-250) & (dat$rt<250),1,0), na.rm=T)
## [1] 1
#correct accuracy measure accordingly
dat$acc<-ifelse((dat$rt>-250) & (dat$rt<250), NA, dat$acc)

#new variable perf, denoting 1 after learning criterion 
dat$perf<-ifelse(is.na(dat$rt)==T, 1, dat$acc)

Descriptive Statistics

dat_av<-with(dat,aggregate(perf,list(sbj,cnd),mean, na.rm=T))
names(dat_av)<-c("sbj", "cnd", "perf")

#rule-discovery task
mean(dat_av[dat_av$cnd=="control",]$perf, na.rm=T)
## [1] 0.8884277
sd(dat_av[dat_av$cnd=="control",]$perf, na.rm=T)
## [1] 0.07206826
#label task
mean(dat_av[dat_av$cnd=="label",]$perf, na.rm=T)
## [1] 0.9737525
sd(dat_av[dat_av$cnd=="label",]$perf, na.rm=T)
## [1] 0.02574344
mean(dat_av[dat_av$cnd=="ideo",]$perf, na.rm=T)
## [1] 0.979126
sd(dat_av[dat_av$cnd=="ideo",]$perf, na.rm=T)
## [1] 0.0235341
# Calculate accuracy per 32 blocks of 8 trials
n_blocks<-32
n_parts<-length(unique(dat$sbj))
n_trials_in_block<-8
n_tasks<-3
block<-rep(1:n_blocks,n_parts*n_tasks)
sbjs<-dat[dat$trial<(n_blocks*n_tasks+1)& dat$cnd=="control",]$sbj
group<-dat[dat$trial<(n_blocks+1),]$group
pair<-dat[dat$trial<(n_blocks+1),]$pair
color<-dat[dat$trial<(n_blocks+1),]$color
cnd<-dat[dat$trial<(n_blocks+1),]$cnd
order<-dat[dat$trial<(n_blocks+1),]$order

perf<-0; perf[1:(n_parts*n_blocks*n_tasks)]<-0
for (i in 1:(n_parts*n_blocks*n_tasks)){perf[i]<-mean(dat$perf[((i-1)*n_trials_in_block+1):(i*n_trials_in_block)], na.rm=TRUE) }

data_bl<-data.frame(sbj=sbjs, cnd=cnd, block=block, order=order, pair=pair, color=color, perf= perf)
str(data_bl)
## 'data.frame':    3072 obs. of  7 variables:
##  $ sbj  : Factor w/ 32 levels "aggsem96","aggska92",..: 12 12 12 12 12 12 12 12 12 12 ...
##  $ cnd  : Factor w/ 3 levels "control","ideo",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ block: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ order: int  1 1 1 1 1 1 1 1 1 1 ...
##  $ pair : Factor w/ 3 levels "rand17_rand26",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ color: Factor w/ 3 levels "lblue_brown",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ perf : num  0.25 0.625 0.5 0.375 0.625 0.875 1 0.875 1 1 ...
#data frame for graph
with(data_bl,aggregate(perf,list(block, cnd),mean, na.rm=TRUE, na.action=NULL))->data_gr
names(data_gr)<-c("blck", "cnd", "perf")
with(data_bl,aggregate(perf,list(block, cnd),se))->data_gr_se
names(data_gr_se)<-c("blck", "cnd","perf_se")
data_gr$perf_se<-data_gr_se$perf_se

Inferential Statistics

data<-droplevels(dat[dat$cnd!="control",])
str(data)
## 'data.frame':    16384 obs. of  14 variables:
##  $ sbj       : Factor w/ 32 levels "aggsem96","aggska92",..: 12 12 12 12 12 12 12 12 12 12 ...
##  $ trial     : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ item      : int  118110 118111 118101 118001 118100 118000 118010 118011 108010 108111 ...
##  $ rt        : num  -3749 -2351 -885 -751 2313 ...
##  $ sub       : Factor w/ 32 levels "s1","s10","s11",..: 11 11 11 11 11 11 11 11 11 11 ...
##  $ order     : int  2 2 2 2 2 2 2 2 2 2 ...
##  $ cnd       : Factor w/ 2 levels "ideo","label": 2 2 2 2 2 2 2 2 2 2 ...
##  $ acc       : int  0 0 0 0 1 1 1 0 1 1 ...
##  $ color     : Factor w/ 2 levels "red___green",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ pair      : Factor w/ 2 levels "rand21_rand30",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ stim_shape: Factor w/ 4 levels "rand21","rand22",..: 2 2 2 3 2 3 3 3 3 2 ...
##  $ stim_color: Factor w/ 4 levels "_blue","green",..: 3 3 2 2 2 2 3 3 3 3 ...
##  $ stim_size : Factor w/ 2 levels "big","small": 2 1 1 1 2 2 2 1 2 1 ...
##  $ perf      : num  0 0 0 0 1 1 1 0 1 1 ...
nc <- detectCores()
cl <- makeCluster(nc-1)



m0<-bam(perf~1 + s(trial) + s(trial, sbj, bs="fs", m=1), data=data, family=binomial)
## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated
## 1-d smooths of same variable.
gam.check(m0)       

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 4 iterations.
## Gradient range [-1.708706e-09,-7.322227e-10]
## (score 16713.74 & scale 1).
## Hessian positive definite, eigenvalue range [0.6372298,8.870311].
## Model rank =  298 / 298 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value
## s(trial)       9.00   3.23    0.99    0.82
## s(trial,sbj) 288.00  53.29    0.99    0.86
#edf is not close to k'

m1<-bam(perf~1 + cnd +s(trial) + s(trial, sbj, bs="fs", m=1), data=data, family=binomial) 
## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated
## 1-d smooths of same variable.
gam.check(m1)

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 4 iterations.
## Gradient range [-1.613998e-09,-6.315783e-10]
## (score 16674.04 & scale 1).
## Hessian positive definite, eigenvalue range [0.6408052,8.880032].
## Model rank =  299 / 299 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                  k'    edf k-index p-value
## s(trial)       9.00   3.24    0.97    0.20
## s(trial,sbj) 288.00  53.31    0.97    0.22
#edf is not close to k' 

compareML(m0, m1, suggest.report = T)
## m0: perf ~ 1 + s(trial) + s(trial, sbj, bs = "fs", m = 1)
## 
## m1: perf ~ 1 + cnd + s(trial) + s(trial, sbj, bs = "fs", m = 1)
## 
## Report suggestion: The Chi-Square test on the fREML scores indicates that model m1 is [significantly] better than model m0 (X2(1.00)=39.702, p<2e-16).
## -----
##   Model    Score Edf Difference    Df  p.value Sig.
## 1    m0 16713.74   5                               
## 2    m1 16674.04   6     39.702 1.000  < 2e-16  ***
## 
## AIC difference: 4.84, model m1 has lower AIC.
#m1
m2<-bam(perf~1 + cnd + s(trial, by=cnd) + s(trial, sbj, bs="fs", m=1), data=data, family=binomial) 
gam.check(m2)

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 9 iterations.
## Gradient range [-7.099975e-05,8.825753e-06]
## (score 16610.36 & scale 1).
## Hessian positive definite, eigenvalue range [7.099011e-05,9.188301].
## Model rank =  308 / 308 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                      k'   edf k-index p-value  
## s(trial):cndideo    9.0   1.0    0.97    0.10 .
## s(trial):cndlabel   9.0   3.7    0.97    0.12  
## s(trial,sbj)      288.0  54.0    0.97    0.06 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
compareML(m2,m1, suggest.report = T)
## m2: perf ~ 1 + cnd + s(trial, by = cnd) + s(trial, sbj, bs = "fs", 
##     m = 1)
## 
## m1: perf ~ 1 + cnd + s(trial) + s(trial, sbj, bs = "fs", m = 1)
## 
## Report suggestion: The Chi-Square test on the fREML scores indicates that model m2 is [significantly?] better than model m1 (X2(2.00)=63.677, p<2e-16).
## -----
##   Model    Score Edf Difference    Df  p.value Sig.
## 1    m1 16674.04   6                               
## 2    m2 16610.36   8     63.677 2.000  < 2e-16  ***
## 
## AIC difference: -19.98, model m2 has lower AIC.
m3<-bam(perf~1 + cnd + s(trial) + s(trial, by=cnd) + s(trial, sbj, bs="fs", m=1), data=data, family=binomial) 
## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated
## 1-d smooths of same variable.
gam.check(m3)

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 6 iterations.
## Gradient range [-6.635565e-05,5.396031e-06]
## (score 16610.36 & scale 1).
## Hessian positive definite, eigenvalue range [5.894638e-05,9.188281].
## Model rank =  316 / 317 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                      k'   edf k-index p-value   
## s(trial)            9.0   1.0    0.95   0.015 * 
## s(trial):cndideo    9.0   1.0    0.95   0.015 * 
## s(trial):cndlabel   9.0   2.7    0.95   0.010 **
## s(trial,sbj)      288.0  54.0    0.95   0.010 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
compareML(m3,m2, suggest.report = T)
## m3: perf ~ 1 + cnd + s(trial) + s(trial, by = cnd) + s(trial, sbj, 
##     bs = "fs", m = 1)
## 
## m2: perf ~ 1 + cnd + s(trial, by = cnd) + s(trial, sbj, bs = "fs", 
##     m = 1)
## 
## Report suggestion: The Chi-Square test on the fREML scores indicates that model m3 is [not significantly?] better than model m2 (X2(2.00)=0.001, p>.1).
## -----
##   Model    Score Edf Difference    Df p.value Sig.
## 1    m2 16610.36   8                              
## 2    m3 16610.36  10      0.001 2.000   0.999     
## 
## AIC difference: 0.00, model m2 has lower AIC.
## Warning in compareML(m3, m2, suggest.report = T): Only small difference in fREML...
fm<-m2
summary(fm)
## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## perf ~ 1 + cnd + s(trial, by = cnd) + s(trial, sbj, bs = "fs", 
##     m = 1)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  12.4339     0.8751  14.209  < 2e-16 ***
## cndlabel     -4.7321     1.7702  -2.673  0.00751 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                      edf Ref.df Chi.sq p-value    
## s(trial):cndideo   1.000   1.00  130.0  <2e-16 ***
## s(trial):cndlabel  3.696   4.38  146.4  <2e-16 ***
## s(trial,sbj)      54.019 287.00  299.4  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.237   Deviance explained = 47.5%
## fREML =  16610  Scale est. = 1         n = 16383

Graphs

par(mfrow=c(1,2), oma = c(0, 0, 2, 0))

color<-c("red" ,"blue", "grey50")
lintyp<-c("solid", "solid", "solid")
pchar=c(15,16,17)
leg<-c("Label Task", "Ideogram Task", "Rule-Discovery Task")
offset=0.1


#draw an emply plot
plot(1, type="n", xaxt="n", yaxt="n", xlim=c(0, 32), ylim=c(0.3, 1.0), main = "Accuracy", xlab="Blocks of 8 trials", ylab="Percent Correct", xaxt ="n", , yaxt="n", las=2, frame.plot="F", cex.main=1, cex.axis=1)
##label
plotCI(x=1:sum(!is.na(data_gr[data_gr$cnd=="label",]$perf_se)), data_gr[n_blocks*2+1:sum(!is.na(data_gr[data_bl$cnd=="label",]$perf_se)),]$perf, pch=pchar[1], uiw=data_gr[n_blocks*2+1:sum(!is.na(data_gr[data_bl$cnd=="label",]$perf_se)),]$perf_se, gap=0, add=TRUE, col=color[1])
lines(1:sum(!is.na(data_gr[data_gr$cnd=="label",]$perf)),data_gr[n_blocks*2+1:sum(!is.na(data_gr[data_gr$cnd=="label",]$perf)),]$perf,col=color[1], lty=lintyp[1])
##ideo
plotCI(x=1:sum(!is.na(data_gr[data_gr$cnd=="ideo",]$perf_se))+offset, data_gr[n_blocks+1:sum(!is.na(data_gr[data_gr$cnd=="ideo",]$perf_se)),]$perf, pch=pchar[2], uiw=data_gr[n_blocks+1:sum(!is.na(data_gr[data_gr$cnd=="ideo",]$perf_se)),]$perf_se, gap=0, add = TRUE, col=color[2])
lines(1:sum(!is.na(data_gr[data_gr$cnd=="ideo",]$perf))+offset,data_gr[n_blocks+1:sum(!is.na(data_gr[data_gr$cnd=="ideo",]$perf)),]$perf,col=color[2], lty=lintyp[2])
##rule-discovery
plotCI(x=1:sum(!is.na(data_gr[data_gr$cnd=="control",]$perf_se))+2*offset, data_gr[1:sum(!is.na(data_gr[data_gr$cnd=="control",]$perf_se)),]$perf, pch=pchar[3], uiw=data_gr[1:sum(!is.na(data_gr[data_gr$cnd=="control",]$perf_se)),]$perf_se, gap=0, add = TRUE, col=color[3])
lines(1:sum(!is.na(data_gr[data_gr$cnd=="control",]$perf))+2*offset,data_gr[1:sum(!is.na(data_gr[data_gr$cnd=="control",]$perf)),]$perf,col=color[3], lty=lintyp[3])

axis(side = 1, at = 1:n_blocks, labels = 1:n_blocks, cex.axis = 0.8)
axis(side = 2, at = 3:10/10, labels = c("0.0", 4:9/10.0, "1.0"), las=2, cex.axis = 0.8)
axis.break(axis=2,breakpos=.35,bgcol="white",breakcol="black", style="slash",brw=0.02)


legend(x=10, y=0.7, legend=leg, pch=pchar, lty=lintyp, bty="n", col=color, seg.len=4.0)
#legend(x=20, y=0.50, legend="chance level", bty ="n" ,cex=0.8)
#savePlot("LC_CAT", type="png",device = dev.cur())


plot_diff(fm, view="trial", comp=list(cnd=c('label', 'ideo')),rm.ranef=c("s(trial,sbj)"),shade=T,se=1.96, xlab="Trial", ylab="Estimated Difference in Accuracy (Log Odds)", lwd=2, main="Difference in Accuracy\n Between Label and Ideogram Assessment Tasks", hide.label=T, cex.main=1, xaxt="n")
## Summary:
##  * trial : numeric predictor; with 100 values ranging from 1.000000 to 256.000000. 
##  * sbj : factor; set to the value(s): aggsem96. (Might be canceled as random effect, check below.) 
##  * NOTE : The following random effects columns are canceled: s(trial,sbj)
## 
## 
## trial window(s) of significant difference(s):
##  37.060606 - 209.636364
axis(side = 1, at = c(0, 50, 100, 150, 200, 256), labels = c("0", "50", "100", "150", "200", "256"))

mtext('Assessment Type II Tasks', outer = TRUE, cex = 1.2, font =2)

par(mfrow=c(1,1), oma = c(0, 0, 0, 0))

Session Information

sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19044)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
## [1] plotrix_3.8-2     boot_1.3-28       itsadug_2.4.1     plotfunctions_1.4
## [5] mgcv_1.8-41       nlme_3.1-160      gplots_3.1.3      sciplot_1.2-0    
## [9] dplyr_1.0.10     
## 
## loaded via a namespace (and not attached):
##  [1] highr_0.10         pillar_1.8.1       bslib_0.4.2        compiler_4.2.2    
##  [5] jquerylib_0.1.4    bitops_1.0-7       tools_4.2.2        digest_0.6.31     
##  [9] jsonlite_1.8.4     evaluate_0.19      lifecycle_1.0.3    tibble_3.1.8      
## [13] lattice_0.20-45    pkgconfig_2.0.3    rlang_1.0.6        Matrix_1.5-1      
## [17] cli_3.5.0          rstudioapi_0.14    yaml_2.3.6         xfun_0.36         
## [21] fastmap_1.1.0      withr_2.5.0        stringr_1.5.0      knitr_1.41        
## [25] generics_0.1.3     vctrs_0.5.1        sass_0.4.4         gtools_3.9.4      
## [29] caTools_1.18.2     grid_4.2.2         tidyselect_1.2.0   glue_1.6.2        
## [33] R6_2.5.1           fansi_1.0.3        rmarkdown_2.19     magrittr_2.0.3    
## [37] splines_4.2.2      htmltools_0.5.4    utf8_1.2.2         KernSmooth_2.23-20
## [41] stringi_1.7.8      cachem_1.0.6