Inferential Statistics - ANOVA & gam
#one-way anova
ezANOVA(data=data_av, dv=acc, wid=sbj, between=group, type=3)
## Warning: Data is unbalanced (unequal N per group). Make sure you specified a
## well-considered value for the type argument to ezANOVA().
## Coefficient covariances computed by hccm()
## $ANOVA
## Effect DFn DFd F p p<.05 ges
## 2 group 3 62 2.168351 0.100717 0.09495728
##
## $`Levene's Test for Homogeneity of Variance`
## DFn DFd SSn SSd F p p<.05
## 1 3 62 0.004357066 0.131365 0.6854644 0.5643131
# No effect of group!
# Learning Curves
##################
# calculate average performance per sbj and block
data_bl<-aggregate(data$acc, list(data$sbj, data$group, data$block), mean)
colnames(data_bl) <-c("sbj","group", "block", "acc")
#calculate average accuracy and SE per block
data_gr<-aggregate(data_bl$acc, list(data_bl$group, data_bl$block), mean)
colnames(data_gr)<-c("group","block", "acc")
temp<- aggregate(data_bl$acc, list(data_bl$group, data_bl$block), se)
data_gr$se<-temp$x
rm(temp)
data_gr<-data_gr[order(data_gr$group, data_gr$block),]
#xlb="Block\n Error Bars: +/- 1 SE"
xlb="Block"
ylb="Proportion Correct"
mn="Categorization Accuracy"
off=0.05
pch=c(15,16,17,18)
lty=c(2,3,4,5)
col=c("grey70","grey60","grey50", "grey40" )
#"600"
plotCI(x=1:4, y=data_gr[data_gr$group=="600",]$acc, uiw=data_gr[data_gr$group=="600",]$se, bty="n", ylim=c(0.6,1),xlim=c(1, 4.5), xlab=xlb, ylab=ylb, main=mn, las=1, xaxt="n", yaxt="n", pch=pch[1], col=col[1], gap=0)
lines(x=1:4, y=data_gr[data_gr$group=="600",]$acc, lty=lty[1], col=col[1], lwd=2)
#"1100"
plotCI(x=1:4+off, y=data_gr[data_gr$group=="1100",]$acc, uiw=data_gr[data_gr$group=="600",]$se, add=T, pch=pch[2], col=col[2], gap=0)
lines(x=1:4+off, y=data_gr[data_gr$group=="1100",]$acc, lty=lty[2], col=col[2], lwd=2)
#"1600"
plotCI(x=1:4+2*off, y=data_gr[data_gr$group=="1600",]$acc, uiw=data_gr[data_gr$group=="600",]$se, add=T, pch=pch[3], col=col[3], gap=0)
lines(x=1:4+2*off, y=data_gr[data_gr$group=="1600",]$acc, lty=lty[3], col=col[3], lwd=2)
#"RD"
plotCI(x=1:4+3*off, y=data_gr[data_gr$group=="RD",]$acc, uiw=data_gr[data_gr$group=="600",]$se, add=T, pch=pch[4], col=col[4], gap=0)
lines(x=1:4+3*off, y=data_gr[data_gr$group=="RD",]$acc, lty=lty[4], col=col[4], lwd=2)
axis(side=1, at=c(1,2,3,4))
axis(side=2, at=c(0.6, 0.7, 0.8, 0.9, 1.0),labels=c("0.0", "0.7", "0.8", "0.9", "1.0") , las=2)
axis.break(2, 0.65, style="slash")
legend(x=2.5, y=0.8, legend=c("600 ms", "1100 ms", "1600 ms", "RD"), col=col, lty=lty, pch=pch, bty="n", seg.len=4, lwd=2)

#########################
####gam analysis
#########################
#first create the trial variable
tr<-1:320
data$trial<-tr
#Null model
m0<-bam(acc~1 + s(trial) + s(trial, sbj, bs="fs", m=1), data=data, family=binomial)
## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated
## 1-d smooths of same variable.
gam.check(m0)

##
## Method: fREML Optimizer: perf newton
## full convergence after 6 iterations.
## Gradient range [-7.582071e-07,8.719987e-09]
## (score 29783.12 & scale 1).
## Hessian positive definite, eigenvalue range [2.565925,23.59241].
## Model rank = 604 / 604
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(trial) 9.00 7.99 0.96 0.86
## s(trial,sbj) 594.00 128.65 0.96 0.86
#edf is not close to k', so we are ok. We do not need to increase k
#add a fixed effect of group
m1<- bam(acc~ 1 + group + s(trial) + s(trial, sbj, bs="fs", m=1), data=data, family=binomial )
## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated
## 1-d smooths of same variable.
gam.check(m1)

##
## Method: fREML Optimizer: perf newton
## full convergence after 6 iterations.
## Gradient range [-8.069979e-07,8.009941e-09]
## (score 29792.38 & scale 1).
## Hessian positive definite, eigenvalue range [2.566158,21.92624].
## Model rank = 607 / 607
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(trial) 9.00 7.99 0.94 0.18
## s(trial,sbj) 594.00 125.42 0.94 0.12
#edf is not close to k', so we are ok. We do not need to increase k
compareML(m0,m1)
## m0: acc ~ 1 + s(trial) + s(trial, sbj, bs = "fs", m = 1)
##
## m1: acc ~ 1 + group + s(trial) + s(trial, sbj, bs = "fs", m = 1)
##
## Model m0 preferred: lower fREML score (9.259), and lower df (3.000).
## -----
## Model Score Edf Difference Df
## 1 m1 29792.38 8
## 2 m0 29783.12 5 9.259 -3.000
##
## AIC difference: -0.03, model m0 has lower AIC.
#m0 preferred
#add a term modeling four smooth terms of trial, one for each group level
m2<-bam(acc~ 1 + group + s(trial) + s(trial, by=group) +s(trial, sbj, bs="fs", m=1), data=data, family=binomial )
## Warning in gam.side(sm, X, tol = .Machine$double.eps^0.5): model has repeated
## 1-d smooths of same variable.
gam.check(m2)

##
## Method: fREML Optimizer: perf newton
## full convergence after 11 iterations.
## Gradient range [-0.0001191143,2.647236e-05]
## (score 29800.47 & scale 1).
## Hessian positive definite, eigenvalue range [0.000113609,21.9125].
## Model rank = 642 / 643
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(trial) 9.000 7.979 0.94 0.060 .
## s(trial):group1100 9.000 0.381 0.94 0.090 .
## s(trial):group1600 9.000 1.000 0.94 0.080 .
## s(trial):group600 9.000 2.680 0.94 0.070 .
## s(trial):groupRD 9.000 1.000 0.94 0.065 .
## s(trial,sbj) 594.000 123.003 0.94 0.070 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#edf is not close to k', so we are ok. We do not need to increase k
compareML(m2, m0)
## m2: acc ~ 1 + group + s(trial) + s(trial, by = group) + s(trial,
## sbj, bs = "fs", m = 1)
##
## m0: acc ~ 1 + s(trial) + s(trial, sbj, bs = "fs", m = 1)
##
## Model m0 preferred: lower fREML score (17.351), and lower df (11.000).
## -----
## Model Score Edf Difference Df
## 1 m2 29800.47 16
## 2 m0 29783.12 5 -17.351 11.000
##
## AIC difference: 5.01, model m0 has lower AIC.
#m0 preferred
m3<-bam(acc~ 1 + group + s(trial, by=group) +s(trial, sbj, bs="fs", m=1), data=data, family=binomial )
gam.check(m3)

##
## Method: fREML Optimizer: perf newton
## full convergence after 8 iterations.
## Gradient range [-1.038658e-05,1.117415e-07]
## (score 29797.04 & scale 1).
## Hessian positive definite, eigenvalue range [0.8519929,21.88718].
## Model rank = 634 / 634
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(trial):group1100 9.00 6.09 0.96 0.60
## s(trial):group1600 9.00 4.58 0.96 0.60
## s(trial):group600 9.00 5.37 0.96 0.61
## s(trial):groupRD 9.00 7.45 0.96 0.58
## s(trial,sbj) 594.00 121.95 0.96 0.56
compareML(m3, m0)
## m3: acc ~ 1 + group + s(trial, by = group) + s(trial, sbj, bs = "fs",
## m = 1)
##
## m0: acc ~ 1 + s(trial) + s(trial, sbj, bs = "fs", m = 1)
##
## Model m0 preferred: lower fREML score (13.920), and lower df (9.000).
## -----
## Model Score Edf Difference Df
## 1 m3 29797.04 14
## 2 m0 29783.12 5 -13.920 9.000
##
## AIC difference: 20.46, model m0 has lower AIC.
#m0 preferred
m4<-bam(acc~ 1 + s(trial, by=group) +s(trial, sbj, bs="fs", m=1), data=data, family=binomial )
gam.check(m4)

##
## Method: fREML Optimizer: perf newton
## full convergence after 8 iterations.
## Gradient range [-8.50605e-06,1.010756e-07]
## (score 29785.85 & scale 1).
## Hessian positive definite, eigenvalue range [0.848284,23.58432].
## Model rank = 631 / 631
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(trial):group1100 9.00 6.08 0.95 0.34
## s(trial):group1600 9.00 4.56 0.95 0.22
## s(trial):group600 9.00 5.42 0.95 0.28
## s(trial):groupRD 9.00 7.43 0.95 0.27
## s(trial,sbj) 594.00 125.31 0.95 0.26
compareML(m4, m0)
## m4: acc ~ 1 + s(trial, by = group) + s(trial, sbj, bs = "fs", m = 1)
##
## m0: acc ~ 1 + s(trial) + s(trial, sbj, bs = "fs", m = 1)
##
## Model m0 preferred: lower fREML score (2.734), and lower df (6.000).
## -----
## Model Score Edf Difference Df
## 1 m4 29785.85 11
## 2 m0 29783.12 5 -2.734 6.000
##
## AIC difference: 20.52, model m0 has lower AIC.
## Warning in compareML(m4, m0): Only small difference in fREML...
#m0 preferred
#best-fit model
fm<-m1
summary(fm)
##
## Family: binomial
## Link function: logit
##
## Formula:
## acc ~ 1 + group + s(trial) + s(trial, sbj, bs = "fs", m = 1)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.92062 0.09898 19.405 <2e-16 ***
## group1600 0.02232 0.13587 0.164 0.8695
## group600 -0.25259 0.13316 -1.897 0.0578 .
## groupRD 0.03363 0.13786 0.244 0.8073
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(trial) 7.986 8.714 150.8 <2e-16 ***
## s(trial,sbj) 125.422 590.000 489.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.0412 Deviance explained = 4.78%
## fREML = 29792 Scale est. = 1 n = 21120
fm<-m0
summary(fm)
##
## Family: binomial
## Link function: logit
##
## Formula:
## acc ~ 1 + s(trial) + s(trial, sbj, bs = "fs", m = 1)
##
## Parametric coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.86494 0.04847 38.47 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df Chi.sq p-value
## s(trial) 7.986 8.714 150.6 <2e-16 ***
## s(trial,sbj) 128.649 593.000 547.4 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.0412 Deviance explained = 4.79%
## fREML = 29783 Scale est. = 1 n = 21120