Libraries, Data Files, Preprocessing
#Libraries
library(dplyr)
library(sciplot)
library(gplots)
library(parallel)
library(itsadug)
library(mgcv)
library(boot)
#read data file
data<-read.table("initial.txt", header=T)
#convert chr to factor
data <- mutate_if(data, is.character, as.factor)
names(data)[names(data) == "stim"] <- "shape"
#re-order based on trial
data<-data[order(data$sbj,data$trial),]
#remove non-learner participants
#data in "dat"
dat<-droplevels(data[data$sbj!="agggko96" & data$sbj!="dimdim96" & data$sbj!="dimdou96"& data$sbj!="thaath96" & data$sbj!="zoigeo96" & data$sbj!="efhgia88" & data$sbj!="thesak96",])
#no response is denoted by rt = -100000
dat$rt<-ifelse(dat$rt==-10000,NA, dat$rt)
sum(is.na(dat$rt))
## [1] 4
str(dat)
## 'data.frame': 9216 obs. of 10 variables:
## $ sbj : Factor w/ 32 levels "aggsem96","aggska92",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ trial : int 1 2 3 4 5 6 7 8 9 10 ...
## $ item : int 11052134 12064213 22064321 12052143 12012314 11021423 21033412 22041243 21013421 12021324 ...
## $ rt : num 871 976 -1460 2486 1254 ...
## $ sub : Factor w/ 32 levels "s1","s10","s11",..: 9 9 9 9 9 9 9 9 9 9 ...
## $ acc : int 1 1 0 1 1 0 0 0 1 0 ...
## $ shape : Factor w/ 4 levels "rand21","rand22",..: 2 3 4 3 3 2 1 4 1 3 ...
## $ stim_ : Factor w/ 128 levels "s10rand21","s10rand22",..: 30 31 32 31 31 30 29 32 29 31 ...
## $ corr_resp: Factor w/ 4 levels "ideo1","ideo2",..: 4 3 2 3 3 4 1 2 1 3 ...
## $ cnd : Factor w/ 2 levels "ideo","pseudo": 2 2 1 2 2 2 1 1 1 2 ...
#percent of NA data
sum(is.na(dat$rt))/length(dat$rt)
## [1] 0.0004340278
#check if there RTs less than 250
sum(ifelse((dat$rt>-250) & (dat$rt<250),1,0), na.rm=T)
## [1] 0
#correct accuracy measure according to NA responses
dat$acc<-ifelse(is.na(dat$rt),NA,dat$acc)
Descriptive Statistics
#aggregate acc
dat_av<-with(dat,aggregate(acc,list(sbj,cnd),mean, na.rm=T))
names(dat_av)<-c("sbj", "cnd", "acc")
# label
mean(dat_av[dat_av$cnd=="pseudo",]$acc, na.rm=T)
## [1] 0.8922412
sd(dat_av[dat_av$cnd=="pseudo",]$acc, na.rm=T)
## [1] 0.09274273
#ideo
mean(dat_av[dat_av$cnd=="ideo",]$acc, na.rm=T)
## [1] 0.8723306
sd(dat_av[dat_av$cnd=="ideo",]$acc, na.rm=T)
## [1] 0.09856028
# Calculate accuracy per block of 24 trials
n_blocks<-12
n_parts<-length(levels(dat$sbj))
n_cnds<-2
n_trials_in_block<-24
n_trials_in_block_per_cnd<-n_trials_in_block/n_cnds
block<-rep(1:n_blocks,n_parts)
sbjs<-dat[dat$trial<(n_blocks+1),]$sbj
perf_ps<-0; perf_ps[1:(n_parts*n_blocks)]<-0
perf_id<-0; perf_id[1:(n_parts*n_blocks)]<-0
for (i in 1:(n_parts*n_blocks)){
perf_ps[i]<-mean(dat[dat$cnd=="pseudo",]$acc[((i-1)*n_trials_in_block_per_cnd+1):(i*n_trials_in_block_per_cnd)], na.rm=TRUE)
perf_id[i]<-mean(dat[dat$cnd=="ideo",]$acc[((i-1)*n_trials_in_block_per_cnd+1):(i*n_trials_in_block_per_cnd)], na.rm=TRUE)
}
data_bl<-data.frame(sbj=sbjs, block=block, perf_ps= perf_ps, perf_id= perf_id)
with(data_bl,aggregate(perf_ps,list(block),mean))->data_gr_ps
with(data_bl,aggregate(perf_id,list(block),mean))->data_gr_id
names(data_gr_ps)<-c("block", "perf_ps"); names(data_gr_id)<-c("block", "perf_id")
with(data_bl,aggregate(perf_ps,list(block),se))->data_gr_ps_se
with(data_bl,aggregate(perf_id,list(block),se))->data_gr_id_se
names(data_gr_ps_se)<-c("block", "perf_ps_se"); names(data_gr_id_se)<-c("block", "perf_id_se")
#data frame for graph
data_gr<-data.frame(block=data_gr_ps$block, perf_ps=data_gr_ps$perf_ps, perf_id=data_gr_id$perf_id, perf_ps_se=data_gr_ps_se$perf_ps_se, perf_id_se=data_gr_id_se$perf_id_se )
Inferential Statistics
nc <- detectCores()
cl <- makeCluster(nc-1)
m0<-bam(acc~1 + s(trial) + s(trial, sbj, bs="fs", m=1), data=dat, family=binomial)
## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated
## 1-d smooths of same variable.
gam.check(m0)

##
## Method: fREML Optimizer: perf newton
## full convergence after 4 iterations.
## Gradient range [-1.976066e-08,2.886486e-07]
## (score 11856.91 & scale 1).
## Hessian positive definite, eigenvalue range [1.419103,22.76145].
## Model rank = 298 / 298
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(trial) 9.00 4.87 0.96 0.22
## s(trial,sbj) 288.00 115.44 0.96 0.25
#edf is not close to k'
m1<-bam(acc~1 + cnd +s(trial) + s(trial, sbj, bs="fs", m=1), data=dat, family=binomial)
## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated
## 1-d smooths of same variable.
gam.check(m1)

##
## Method: fREML Optimizer: perf newton
## full convergence after 4 iterations.
## Gradient range [-2.324611e-08,2.398126e-07]
## (score 11872.61 & scale 1).
## Hessian positive definite, eigenvalue range [1.526318,22.86986].
## Model rank = 299 / 299
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(trial) 9.00 4.83 0.97 0.54
## s(trial,sbj) 288.00 115.79 0.97 0.47
#edf is not close to k'
compareML(m0, m1, suggest.report = T)
## m0: acc ~ 1 + s(trial) + s(trial, sbj, bs = "fs", m = 1)
##
## m1: acc ~ 1 + cnd + s(trial) + s(trial, sbj, bs = "fs", m = 1)
##
## Model m0 preferred: lower fREML score (15.697), and lower df (1.000).
## -----
## Model Score Edf Difference Df
## 1 m1 11872.61 6
## 2 m0 11856.91 5 15.697 -1.000
##
## AIC difference: 14.94, model m1 has lower AIC.
m2<-bam(acc~1 + s(trial, by=cnd) + s(trial, sbj, bs="fs", m=1), data=dat, family=binomial)
gam.check(m2)

##
## Method: fREML Optimizer: perf newton
## full convergence after 4 iterations.
## Gradient range [-8.09246e-09,8.478705e-08]
## (score 11835.03 & scale 1).
## Hessian positive definite, eigenvalue range [1.26909,22.91401].
## Model rank = 307 / 307
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(trial):cndideo 9.00 3.82 0.96 0.18
## s(trial):cndpseudo 9.00 5.09 0.96 0.21
## s(trial,sbj) 288.00 116.00 0.96 0.15
#edf is not close to k'
compareML(m2, m1, suggest.report = T)
## m2: acc ~ 1 + s(trial, by = cnd) + s(trial, sbj, bs = "fs", m = 1)
##
## m1: acc ~ 1 + cnd + s(trial) + s(trial, sbj, bs = "fs", m = 1)
##
## Report suggestion: The Chi-Square test on the fREML scores indicates that model m2 is [significantly?] better than model m1 (X2(1.00)=37.573, p<2e-16).
## -----
## Model Score Edf Difference Df p.value Sig.
## 1 m1 11872.61 6
## 2 m2 11835.03 7 37.573 1.000 < 2e-16 ***
##
## AIC difference: 5.45, model m1 has lower AIC.
compareML(m2, m0, suggest.report = T)
## m2: acc ~ 1 + s(trial, by = cnd) + s(trial, sbj, bs = "fs", m = 1)
##
## m0: acc ~ 1 + s(trial) + s(trial, sbj, bs = "fs", m = 1)
##
## Report suggestion: The Chi-Square test on the fREML scores indicates that model m2 is [significantly?] better than model m0 (X2(2.00)=21.876, p<.001).
## -----
## Model Score Edf Difference Df p.value Sig.
## 1 m0 11856.91 5
## 2 m2 11835.03 7 21.876 2.000 3.159e-10 ***
##
## AIC difference: -9.49, model m2 has lower AIC.
m3<-bam(acc~1 + s(trial) + s(trial, by=cnd) + s(trial, sbj, bs="fs", m=1), data=dat, family=binomial)
## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated
## 1-d smooths of same variable.
gam.check(m3)

##
## Method: fREML Optimizer: perf newton
## full convergence after 9 iterations.
## Gradient range [-8.431337e-05,3.453751e-05]
## (score 11842.41 & scale 1).
## Hessian positive definite, eigenvalue range [8.430391e-05,22.87839].
## Model rank = 315 / 316
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(trial) 9.00e+00 4.40e+00 0.95 0.050 *
## s(trial):cndideo 9.00e+00 2.15e-04 0.95 0.075 .
## s(trial):cndpseudo 9.00e+00 2.94e+00 0.95 0.055 .
## s(trial,sbj) 2.88e+02 1.16e+02 0.95 0.095 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
compareML(m2, m3, suggest.report=T)
## m2: acc ~ 1 + s(trial, by = cnd) + s(trial, sbj, bs = "fs", m = 1)
##
## m3: acc ~ 1 + s(trial) + s(trial, by = cnd) + s(trial, sbj, bs = "fs",
## m = 1)
##
## Model m2 preferred: lower fREML score (7.380), and lower df (2.000).
## -----
## Model Score Edf Difference Df
## 1 m3 11842.41 9
## 2 m2 11835.03 7 7.380 -2.000
##
## AIC difference: -0.85, model m2 has lower AIC.
#best-fit model
fm<-m2
Graph
par(mfrow=c(1,2), oma = c(0, 0, 2, 0))
color<-c("red" ,"blue")
clrsmooth <- c("#D0A0A0","#A0A0D0")
lintyp<-c("solid", "solid")
linwid=c(2,2)
pchar=c(15,16)
leg<-c("Label Categories", "Ideogram Categories")
offset=0.1
plot_smooth(fm, view="trial", cond=list(cnd="pseudo"), rm.ranef=c("s(trial,sbj)"),shade=T,se=1.96, print.summary=F, ylim=c(0,1),transform=inv.logit, xlab="Trial", ylab="Proportion Correct", lwd=2, las=2, rug=FALSE, col=clrsmooth[1], lty=lintyp[1], add=F, hide.label=T, main="Accuracy", yaxs="i", xaxt="n")
plot_smooth(fm, view="trial", cond=list(cnd="ideo"), rm.ranef=c("s(trial,sbj)"),shade=T,se=1.96, print.summary=F, ylim=c(0,1),transform=inv.logit, xlab="Trial", ylab="Proportion Correct", lwd=2, las=2, rug=FALSE, col=clrsmooth[2], lty=lintyp[1], add=T, hide.label=T, yaxs="i", xaxt="n")
plotCI(data_gr$block*24-12, data_gr$perf_ps, uiw=data_gr$perf_ps_se, gap=0, col=color[1], pch=pchar[1], add=T)
plotCI(data_gr$block*24-12+offset*10, data_gr$perf_id, uiw=data_gr$perf_id_se, gap=0, col=color[2], pch=pchar[2], add=T)
axis(side = 1, at= c(0, 50,100,150,200,250, 288), cex.axis=1)
legend(x=100, y=0.65, legend=leg, lty=lintyp, bty="n", col=clrsmooth, lwd=2, seg.len=2.0)
legend(x=120, y=0.65, legend=c("", ""), bty="n", col=color, pch=pchar)
plot_diff(fm, view="trial", comp=list(cnd=c('pseudo', 'ideo')),rm.ranef=c("s(trial,sbj)"),shade=T,se=1.96, xlab="Trial", ylab="Estimated Difference in Accuracy (Log Odds)", lwd=2, main="Difference in Accuracy\n Between Label and Ideogram Categories", hide.label=T)
## Summary:
## * trial : numeric predictor; with 100 values ranging from 1.000000 to 288.000000.
## * sbj : factor; set to the value(s): aggsem96. (Might be canceled as random effect, check below.)
## * NOTE : The following random effects columns are canceled: s(trial,sbj)
##
##
## trial window(s) of significant difference(s):
## 53.181818 - 111.161616
## 253.212121 - 288.000000
mtext('Initial Category Learning', outer = TRUE, cex = 1.2, font =2)

Session Information
sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19044)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] boot_1.3-28 itsadug_2.4.1 plotfunctions_1.4 mgcv_1.8-41
## [5] nlme_3.1-160 gplots_3.1.3 sciplot_1.2-0 dplyr_1.0.10
##
## loaded via a namespace (and not attached):
## [1] highr_0.10 pillar_1.8.1 bslib_0.4.2 compiler_4.2.2
## [5] jquerylib_0.1.4 bitops_1.0-7 tools_4.2.2 digest_0.6.31
## [9] jsonlite_1.8.4 evaluate_0.19 lifecycle_1.0.3 tibble_3.1.8
## [13] lattice_0.20-45 pkgconfig_2.0.3 rlang_1.0.6 Matrix_1.5-1
## [17] cli_3.5.0 rstudioapi_0.14 yaml_2.3.6 xfun_0.36
## [21] fastmap_1.1.0 withr_2.5.0 stringr_1.5.0 knitr_1.41
## [25] generics_0.1.3 vctrs_0.5.1 sass_0.4.4 gtools_3.9.4
## [29] caTools_1.18.2 grid_4.2.2 tidyselect_1.2.0 glue_1.6.2
## [33] R6_2.5.1 fansi_1.0.3 rmarkdown_2.19 magrittr_2.0.3
## [37] splines_4.2.2 htmltools_0.5.4 utf8_1.2.2 KernSmooth_2.23-20
## [41] stringi_1.7.8 cachem_1.0.6