rm(list=ls())
library(itsadug)
library(parallel)
library(dplyr)

########################################
#dat1 for CATegory training
########################################
dat1<-read.table("train_cat.txt", header=T)
dat1 <- mutate_if(dat1, is.character, as.factor); dat1$item<-as.factor(dat1$item)

names(dat1)[names(dat1) == "stim"] <- "shape"
dat1$rt<-ifelse(dat1$rt==-10000,NA, dat1$rt)

#re-order based on trial
dat1<-dat1[order(dat1$sbj,dat1$trial),]

########################################
#dat2 for PA training
########################################
dat2<-read.table("train_pa.txt", header=T)
dat2 <- mutate_if(dat2, is.character, as.factor); dat2$item<-as.factor(dat2$item)

names(dat2)[names(dat2) == "stim"] <- "shape"
dat2$rt<-ifelse(dat2$rt==-10000,NA, dat2$rt)

#re-order based on trial
dat2<-dat2[order(dat2$sbj,dat2$trial),]

#### Combine data
dat<-rbind(dat1, dat2)

#Remove data from trials with RT<250
dat$rt<-ifelse(((dat$rt<250)&(dat$rt>-250)),NA, dat$rt)

#Accordingly, adjust acc variable
dat$acc<-NA
dat$acc<-ifelse(dat$rt>0, 1,0)
###################################
# descriptive statistics
###################################

temp2<-aggregate(acc~sbj+group, data=dat, mean, na.rm=T)
temp2<-temp2[order(temp2$sbj, temp2$group),]

#CAT
mean(temp2[temp2$group=='CAT',]$acc)
## [1] 0.8882482
sd(temp2[temp2$group=='CAT',]$acc)
## [1] 0.0574847
#PA
mean(temp2[temp2$group=='PA',]$acc)
## [1] 0.8826085
sd(temp2[temp2$group=='PA',]$acc)
## [1] 0.0556735
##########################
# Group comparison
##########################
nc <- detectCores()
cl <- makeCluster(nc-1) # change to nc to use all the cores


m0<- bam(acc~ 1 + group + s(trial) + s(trial, sbj, bs="fs", m=1), data=dat, family=binomial )
## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated 1-
## d smooths of same variable.
m1<- bam(acc~ 1 + group  + s(trial, by=group) + s(trial, sbj, bs="fs", m=1), data=dat, family=binomial )
compareML(m0,m1)
## m0: acc ~ 1 + group + s(trial) + s(trial, sbj, bs = "fs", m = 1)
## 
## m1: acc ~ 1 + group + s(trial, by = group) + s(trial, sbj, bs = "fs", 
##     m = 1)
## 
## Model m0 preferred: lower fREML score (9.640), and lower df (2.000).
## -----
##   Model    Score Edf Difference     Df
## 1    m1 18013.85   8                  
## 2    m0 18004.21   6      9.640 -2.000
## 
## AIC difference: -2.76, model m0 has lower AIC.
#m0 preferred 
summary(m0)
## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## acc ~ 1 + group + s(trial) + s(trial, sbj, bs = "fs", m = 1)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.4438     0.1897  18.156   <2e-16 ***
## groupPA      -0.1559     0.2444  -0.638    0.523    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                  edf  Ref.df Chi.sq p-value    
## s(trial)       5.063   5.958  341.3  <2e-16 ***
## s(trial,sbj) 148.906 430.000  753.2  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.372   Deviance explained = 41.9%
## fREML =  18004  Scale est. = 1         n = 13812