Libraries, Data Files, Preprocessing
#Libraries
library(dplyr)
library(sciplot)
library(gplots)
library(parallel)
library(itsadug)
library(mgcv)
library(boot)
library(plotrix)
#read data file
data<-read.table("assessment.txt", header=T)
str(data)
## 'data.frame': 29952 obs. of 13 variables:
## $ sbj : chr "elppap96" "elppap96" "elppap96" "elppap96" ...
## $ trial : int 1 2 3 4 5 6 7 8 9 10 ...
## $ item : int 105100 105001 105000 105111 105110 105101 105010 105011 119111 119011 ...
## $ rt : num -2278 -1693 1583 -1640 1084 ...
## $ sub : chr "s19" "s19" "s19" "s19" ...
## $ order : int 1 1 1 1 1 1 1 1 1 1 ...
## $ cnd : chr "control" "control" "control" "control" ...
## $ acc : int 0 0 1 0 1 0 0 0 1 1 ...
## $ color : chr "lblue_brown" "lblue_brown" "lblue_brown" "lblue_brown" ...
## $ pair : chr "rand17_rand26" "rand17_rand26" "rand17_rand26" "rand17_rand26" ...
## $ stim_shape: chr "rand17" "rand26" "rand26" "rand17" ...
## $ stim_color: chr "brown" "brown" "brown" "lblue" ...
## $ stim_size : chr "small" "big" "small" "big" ...
#convert chr to factor
data <- mutate_if(data, is.character, as.factor)
str(data)
## 'data.frame': 29952 obs. of 13 variables:
## $ sbj : Factor w/ 39 levels "agggko96","aggsem96",..: 16 16 16 16 16 16 16 16 16 16 ...
## $ trial : int 1 2 3 4 5 6 7 8 9 10 ...
## $ item : int 105100 105001 105000 105111 105110 105101 105010 105011 119111 119011 ...
## $ rt : num -2278 -1693 1583 -1640 1084 ...
## $ sub : Factor w/ 32 levels "s1","s10","s11",..: 11 11 11 11 11 11 11 11 11 11 ...
## $ order : int 1 1 1 1 1 1 1 1 1 1 ...
## $ cnd : Factor w/ 3 levels "control","ideo",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ acc : int 0 0 1 0 1 0 0 0 1 1 ...
## $ color : Factor w/ 3 levels "lblue_brown",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ pair : Factor w/ 3 levels "rand17_rand26",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ stim_shape: Factor w/ 6 levels "rand17","rand21",..: 1 5 5 1 1 1 5 5 1 5 ...
## $ stim_color: Factor w/ 6 levels "_blue","brown",..: 2 2 2 4 4 2 4 4 4 4 ...
## $ stim_size : Factor w/ 2 levels "big","small": 2 1 2 1 2 1 2 1 1 1 ...
#remove non-learner participants
#data in "dat"
dat<-droplevels(data[data$sbj!="agggko96" & data$sbj!="dimdim96" & data$sbj!="dimdou96"& data$sbj!="thaath96" & data$sbj!="zoigeo96" & data$sbj!="efhgia88" & data$sbj!="thesak96",])
str(dat)
## 'data.frame': 24576 obs. of 13 variables:
## $ sbj : Factor w/ 32 levels "aggsem96","aggska92",..: 12 12 12 12 12 12 12 12 12 12 ...
## $ trial : int 1 2 3 4 5 6 7 8 9 10 ...
## $ item : int 105100 105001 105000 105111 105110 105101 105010 105011 119111 119011 ...
## $ rt : num -2278 -1693 1583 -1640 1084 ...
## $ sub : Factor w/ 32 levels "s1","s10","s11",..: 11 11 11 11 11 11 11 11 11 11 ...
## $ order : int 1 1 1 1 1 1 1 1 1 1 ...
## $ cnd : Factor w/ 3 levels "control","ideo",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ acc : int 0 0 1 0 1 0 0 0 1 1 ...
## $ color : Factor w/ 3 levels "lblue_brown",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ pair : Factor w/ 3 levels "rand17_rand26",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ stim_shape: Factor w/ 6 levels "rand17","rand21",..: 1 5 5 1 1 1 5 5 1 5 ...
## $ stim_color: Factor w/ 6 levels "_blue","brown",..: 2 2 2 4 4 2 4 4 4 4 ...
## $ stim_size : Factor w/ 2 levels "big","small": 2 1 2 1 2 1 2 1 1 1 ...
#no response is denoted by rt = -20000
sum(ifelse(dat$rt==-20000,1, 0), na.rm=T)
## [1] 0
#there are no no-reponse trials.
#check if there RTs less than 250
sum(ifelse((dat$rt>-250) & (dat$rt<250),1,0), na.rm=T)
## [1] 1
#correct accuracy measure accordingly
dat$acc<-ifelse((dat$rt>-250) & (dat$rt<250), NA, dat$acc)
#new variable perf, denoting 1 after learning criterion
dat$perf<-ifelse(is.na(dat$rt)==T, 1, dat$acc)
Descriptive Statistics
dat_av<-with(dat,aggregate(perf,list(sbj,cnd),mean, na.rm=T))
names(dat_av)<-c("sbj", "cnd", "perf")
#rule-discovery task
mean(dat_av[dat_av$cnd=="control",]$perf, na.rm=T)
## [1] 0.8884277
sd(dat_av[dat_av$cnd=="control",]$perf, na.rm=T)
## [1] 0.07206826
#label task
mean(dat_av[dat_av$cnd=="label",]$perf, na.rm=T)
## [1] 0.9737525
sd(dat_av[dat_av$cnd=="label",]$perf, na.rm=T)
## [1] 0.02574344
mean(dat_av[dat_av$cnd=="ideo",]$perf, na.rm=T)
## [1] 0.979126
sd(dat_av[dat_av$cnd=="ideo",]$perf, na.rm=T)
## [1] 0.0235341
# Calculate accuracy per 32 blocks of 8 trials
n_blocks<-32
n_parts<-length(unique(dat$sbj))
n_trials_in_block<-8
n_tasks<-3
block<-rep(1:n_blocks,n_parts*n_tasks)
sbjs<-dat[dat$trial<(n_blocks*n_tasks+1)& dat$cnd=="control",]$sbj
group<-dat[dat$trial<(n_blocks+1),]$group
pair<-dat[dat$trial<(n_blocks+1),]$pair
color<-dat[dat$trial<(n_blocks+1),]$color
cnd<-dat[dat$trial<(n_blocks+1),]$cnd
order<-dat[dat$trial<(n_blocks+1),]$order
perf<-0; perf[1:(n_parts*n_blocks*n_tasks)]<-0
for (i in 1:(n_parts*n_blocks*n_tasks)){perf[i]<-mean(dat$perf[((i-1)*n_trials_in_block+1):(i*n_trials_in_block)], na.rm=TRUE) }
data_bl<-data.frame(sbj=sbjs, cnd=cnd, block=block, order=order, pair=pair, color=color, perf= perf)
str(data_bl)
## 'data.frame': 3072 obs. of 7 variables:
## $ sbj : Factor w/ 32 levels "aggsem96","aggska92",..: 12 12 12 12 12 12 12 12 12 12 ...
## $ cnd : Factor w/ 3 levels "control","ideo",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ block: int 1 2 3 4 5 6 7 8 9 10 ...
## $ order: int 1 1 1 1 1 1 1 1 1 1 ...
## $ pair : Factor w/ 3 levels "rand17_rand26",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ color: Factor w/ 3 levels "lblue_brown",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ perf : num 0.25 0.625 0.5 0.375 0.625 0.875 1 0.875 1 1 ...
#data frame for graph
with(data_bl,aggregate(perf,list(block, cnd),mean, na.rm=TRUE, na.action=NULL))->data_gr
names(data_gr)<-c("blck", "cnd", "perf")
with(data_bl,aggregate(perf,list(block, cnd),se))->data_gr_se
names(data_gr_se)<-c("blck", "cnd","perf_se")
data_gr$perf_se<-data_gr_se$perf_se
Inferential Statistics
data<-droplevels(dat[dat$cnd!="control",])
str(data)
## 'data.frame': 16384 obs. of 14 variables:
## $ sbj : Factor w/ 32 levels "aggsem96","aggska92",..: 12 12 12 12 12 12 12 12 12 12 ...
## $ trial : int 1 2 3 4 5 6 7 8 9 10 ...
## $ item : int 118110 118111 118101 118001 118100 118000 118010 118011 108010 108111 ...
## $ rt : num -3749 -2351 -885 -751 2313 ...
## $ sub : Factor w/ 32 levels "s1","s10","s11",..: 11 11 11 11 11 11 11 11 11 11 ...
## $ order : int 2 2 2 2 2 2 2 2 2 2 ...
## $ cnd : Factor w/ 2 levels "ideo","label": 2 2 2 2 2 2 2 2 2 2 ...
## $ acc : int 0 0 0 0 1 1 1 0 1 1 ...
## $ color : Factor w/ 2 levels "red___green",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ pair : Factor w/ 2 levels "rand21_rand30",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ stim_shape: Factor w/ 4 levels "rand21","rand22",..: 2 2 2 3 2 3 3 3 3 2 ...
## $ stim_color: Factor w/ 4 levels "_blue","green",..: 3 3 2 2 2 2 3 3 3 3 ...
## $ stim_size : Factor w/ 2 levels "big","small": 2 1 1 1 2 2 2 1 2 1 ...
## $ perf : num 0 0 0 0 1 1 1 0 1 1 ...
nc <- detectCores()
cl <- makeCluster(nc-1)
m0<-bam(perf~1 + s(trial) + s(trial, sbj, bs="fs", m=1), data=data, family=binomial)
## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated
## 1-d smooths of same variable.
gam.check(m0)

##
## Method: fREML Optimizer: perf newton
## full convergence after 4 iterations.
## Gradient range [-1.708706e-09,-7.322227e-10]
## (score 16713.74 & scale 1).
## Hessian positive definite, eigenvalue range [0.6372298,8.870311].
## Model rank = 298 / 298
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(trial) 9.00 3.23 0.99 0.82
## s(trial,sbj) 288.00 53.29 0.99 0.86
#edf is not close to k'
m1<-bam(perf~1 + cnd +s(trial) + s(trial, sbj, bs="fs", m=1), data=data, family=binomial)
## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated
## 1-d smooths of same variable.
gam.check(m1)

##
## Method: fREML Optimizer: perf newton
## full convergence after 4 iterations.
## Gradient range [-1.613998e-09,-6.315783e-10]
## (score 16674.04 & scale 1).
## Hessian positive definite, eigenvalue range [0.6408052,8.880032].
## Model rank = 299 / 299
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(trial) 9.00 3.24 0.97 0.20
## s(trial,sbj) 288.00 53.31 0.97 0.22
#edf is not close to k'
compareML(m0, m1, suggest.report = T)
## m0: perf ~ 1 + s(trial) + s(trial, sbj, bs = "fs", m = 1)
##
## m1: perf ~ 1 + cnd + s(trial) + s(trial, sbj, bs = "fs", m = 1)
##
## Report suggestion: The Chi-Square test on the fREML scores indicates that model m1 is [significantly] better than model m0 (X2(1.00)=39.702, p<2e-16).
## -----
## Model Score Edf Difference Df p.value Sig.
## 1 m0 16713.74 5
## 2 m1 16674.04 6 39.702 1.000 < 2e-16 ***
##
## AIC difference: 4.84, model m1 has lower AIC.
#m1
m2<-bam(perf~1 + cnd + s(trial, by=cnd) + s(trial, sbj, bs="fs", m=1), data=data, family=binomial)
gam.check(m2)

##
## Method: fREML Optimizer: perf newton
## full convergence after 9 iterations.
## Gradient range [-7.099975e-05,8.825753e-06]
## (score 16610.36 & scale 1).
## Hessian positive definite, eigenvalue range [7.099011e-05,9.188301].
## Model rank = 308 / 308
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(trial):cndideo 9.0 1.0 0.97 0.10 .
## s(trial):cndlabel 9.0 3.7 0.97 0.12
## s(trial,sbj) 288.0 54.0 0.97 0.06 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
compareML(m2,m1, suggest.report = T)
## m2: perf ~ 1 + cnd + s(trial, by = cnd) + s(trial, sbj, bs = "fs",
## m = 1)
##
## m1: perf ~ 1 + cnd + s(trial) + s(trial, sbj, bs = "fs", m = 1)
##
## Report suggestion: The Chi-Square test on the fREML scores indicates that model m2 is [significantly?] better than model m1 (X2(2.00)=63.677, p<2e-16).
## -----
## Model Score Edf Difference Df p.value Sig.
## 1 m1 16674.04 6
## 2 m2 16610.36 8 63.677 2.000 < 2e-16 ***
##
## AIC difference: -19.98, model m2 has lower AIC.
m3<-bam(perf~1 + cnd + s(trial) + s(trial, by=cnd) + s(trial, sbj, bs="fs", m=1), data=data, family=binomial)
## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated
## 1-d smooths of same variable.
gam.check(m3)

##
## Method: fREML Optimizer: perf newton
## full convergence after 6 iterations.
## Gradient range [-6.635565e-05,5.396031e-06]
## (score 16610.36 & scale 1).
## Hessian positive definite, eigenvalue range [5.894638e-05,9.188281].
## Model rank = 316 / 317
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(trial) 9.0 1.0 0.95 0.015 *
## s(trial):cndideo 9.0 1.0 0.95 0.015 *
## s(trial):cndlabel 9.0 2.7 0.95 0.010 **
## s(trial,sbj) 288.0 54.0 0.95 0.010 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
compareML(m3,m2, suggest.report = T)
## m3: perf ~ 1 + cnd + s(trial) + s(trial, by = cnd) + s(trial, sbj,
## bs = "fs", m = 1)
##
## m2: perf ~ 1 + cnd + s(trial, by = cnd) + s(trial, sbj, bs = "fs",
## m = 1)
##
## Report suggestion: The Chi-Square test on the fREML scores indicates that model m3 is [not significantly?] better than model m2 (X2(2.00)=0.001, p>.1).
## -----
## Model Score Edf Difference Df p.value Sig.
## 1 m2 16610.36 8
## 2 m3 16610.36 10 0.001 2.000 0.999
##
## AIC difference: 0.00, model m2 has lower AIC.
## Warning in compareML(m3, m2, suggest.report = T): Only small difference in fREML...
fm<-m2
summary(fm)
##
## Family: binomial
## Link function: logit
##
## Formula:
## perf ~ 1 + cnd + s(trial, by = cnd) + s(trial, sbj, bs = "fs",
## m = 1)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 12.4339 0.8751 14.209 < 2e-16 ***
## cndlabel -4.7321 1.7702 -2.673 0.00751 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(trial):cndideo 1.000 1.00 130.0 <2e-16 ***
## s(trial):cndlabel 3.696 4.38 146.4 <2e-16 ***
## s(trial,sbj) 54.019 287.00 299.4 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.237 Deviance explained = 47.5%
## fREML = 16610 Scale est. = 1 n = 16383
Graphs
par(mfrow=c(1,2), oma = c(0, 0, 2, 0))
color<-c("red" ,"blue", "grey50")
lintyp<-c("solid", "solid", "solid")
pchar=c(15,16,17)
leg<-c("Label Task", "Ideogram Task", "Rule-Discovery Task")
offset=0.1
#draw an emply plot
plot(1, type="n", xaxt="n", yaxt="n", xlim=c(0, 32), ylim=c(0.3, 1.0), main = "Accuracy", xlab="Blocks of 8 trials", ylab="Percent Correct", xaxt ="n", , yaxt="n", las=2, frame.plot="F", cex.main=1, cex.axis=1)
##label
plotCI(x=1:sum(!is.na(data_gr[data_gr$cnd=="label",]$perf_se)), data_gr[n_blocks*2+1:sum(!is.na(data_gr[data_bl$cnd=="label",]$perf_se)),]$perf, pch=pchar[1], uiw=data_gr[n_blocks*2+1:sum(!is.na(data_gr[data_bl$cnd=="label",]$perf_se)),]$perf_se, gap=0, add=TRUE, col=color[1])
lines(1:sum(!is.na(data_gr[data_gr$cnd=="label",]$perf)),data_gr[n_blocks*2+1:sum(!is.na(data_gr[data_gr$cnd=="label",]$perf)),]$perf,col=color[1], lty=lintyp[1])
##ideo
plotCI(x=1:sum(!is.na(data_gr[data_gr$cnd=="ideo",]$perf_se))+offset, data_gr[n_blocks+1:sum(!is.na(data_gr[data_gr$cnd=="ideo",]$perf_se)),]$perf, pch=pchar[2], uiw=data_gr[n_blocks+1:sum(!is.na(data_gr[data_gr$cnd=="ideo",]$perf_se)),]$perf_se, gap=0, add = TRUE, col=color[2])
lines(1:sum(!is.na(data_gr[data_gr$cnd=="ideo",]$perf))+offset,data_gr[n_blocks+1:sum(!is.na(data_gr[data_gr$cnd=="ideo",]$perf)),]$perf,col=color[2], lty=lintyp[2])
##rule-discovery
plotCI(x=1:sum(!is.na(data_gr[data_gr$cnd=="control",]$perf_se))+2*offset, data_gr[1:sum(!is.na(data_gr[data_gr$cnd=="control",]$perf_se)),]$perf, pch=pchar[3], uiw=data_gr[1:sum(!is.na(data_gr[data_gr$cnd=="control",]$perf_se)),]$perf_se, gap=0, add = TRUE, col=color[3])
lines(1:sum(!is.na(data_gr[data_gr$cnd=="control",]$perf))+2*offset,data_gr[1:sum(!is.na(data_gr[data_gr$cnd=="control",]$perf)),]$perf,col=color[3], lty=lintyp[3])
axis(side = 1, at = 1:n_blocks, labels = 1:n_blocks, cex.axis = 0.8)
axis(side = 2, at = 3:10/10, labels = c("0.0", 4:9/10.0, "1.0"), las=2, cex.axis = 0.8)
axis.break(axis=2,breakpos=.35,bgcol="white",breakcol="black", style="slash",brw=0.02)
legend(x=10, y=0.7, legend=leg, pch=pchar, lty=lintyp, bty="n", col=color, seg.len=4.0)
#legend(x=20, y=0.50, legend="chance level", bty ="n" ,cex=0.8)
#savePlot("LC_CAT", type="png",device = dev.cur())
plot_diff(fm, view="trial", comp=list(cnd=c('label', 'ideo')),rm.ranef=c("s(trial,sbj)"),shade=T,se=1.96, xlab="Trial", ylab="Estimated Difference in Accuracy (Log Odds)", lwd=2, main="Difference in Accuracy\n Between Label and Ideogram Assessment Tasks", hide.label=T, cex.main=1, xaxt="n")
## Summary:
## * trial : numeric predictor; with 100 values ranging from 1.000000 to 256.000000.
## * sbj : factor; set to the value(s): aggsem96. (Might be canceled as random effect, check below.)
## * NOTE : The following random effects columns are canceled: s(trial,sbj)
##
##
## trial window(s) of significant difference(s):
## 37.060606 - 209.636364
axis(side = 1, at = c(0, 50, 100, 150, 200, 256), labels = c("0", "50", "100", "150", "200", "256"))
mtext('Assessment Type II Tasks', outer = TRUE, cex = 1.2, font =2)

par(mfrow=c(1,1), oma = c(0, 0, 0, 0))
Session Information
sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19044)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## attached base packages:
## [1] parallel stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] plotrix_3.8-2 boot_1.3-28 itsadug_2.4.1 plotfunctions_1.4
## [5] mgcv_1.8-41 nlme_3.1-160 gplots_3.1.3 sciplot_1.2-0
## [9] dplyr_1.0.10
##
## loaded via a namespace (and not attached):
## [1] highr_0.10 pillar_1.8.1 bslib_0.4.2 compiler_4.2.2
## [5] jquerylib_0.1.4 bitops_1.0-7 tools_4.2.2 digest_0.6.31
## [9] jsonlite_1.8.4 evaluate_0.19 lifecycle_1.0.3 tibble_3.1.8
## [13] lattice_0.20-45 pkgconfig_2.0.3 rlang_1.0.6 Matrix_1.5-1
## [17] cli_3.5.0 rstudioapi_0.14 yaml_2.3.6 xfun_0.36
## [21] fastmap_1.1.0 withr_2.5.0 stringr_1.5.0 knitr_1.41
## [25] generics_0.1.3 vctrs_0.5.1 sass_0.4.4 gtools_3.9.4
## [29] caTools_1.18.2 grid_4.2.2 tidyselect_1.2.0 glue_1.6.2
## [33] R6_2.5.1 fansi_1.0.3 rmarkdown_2.19 magrittr_2.0.3
## [37] splines_4.2.2 htmltools_0.5.4 utf8_1.2.2 KernSmooth_2.23-20
## [41] stringi_1.7.8 cachem_1.0.6