Load data

Whole dataset [S2,4]

For simplicity lets see the best model we can get by modelling rating as a normally distributed interval scale outcome variable

# lets find the maximal random effect structure

# this is the classical within-subject anova in glm form
r1 <- lmer(as.numeric(rating) ~ 1 + (1 | participant), data=arousal.extended.wbaseline)
# this is the classical crossed random effects model where items and subjects both are allowed to have their own mean
r2 <- lmer(as.numeric(rating) ~ 1 + (1 | participant)  + (1 | item) , data=arousal.extended.wbaseline)
# here by introducing cueing as a random slope, we formally allow cueing to have differing effects in each participants
r3 <- lmer(as.numeric(rating) ~ 1 + (1 + cueing | participant) + (1 | item)  , data=arousal.extended.wbaseline)
## boundary (singular) fit: see help('isSingular')
# here we allow item and participants specific cueing effects...
r4 <- lmer(as.numeric(rating) ~ 1 + (1 + cueing | participant)  + (1 + cueing | item) , data=arousal.extended.wbaseline)
## boundary (singular) fit: see help('isSingular')
# based on this LRT test results below we can argue that the 3rd random effect structure is justified that allows random intercept for PARTICIPANT (trivial for within-subject design) and ITEM. Additionally we allow a random slope for the experimental manipulation CUEING meaning that expect that there will be individual differences in how big the cueing effect will be for each sub.

anova(r1,r2,r3,r4)
## refitting model(s) with ML (instead of REML)
## Data: arousal.extended.wbaseline
## Models:
## r1: as.numeric(rating) ~ 1 + (1 | participant)
## r2: as.numeric(rating) ~ 1 + (1 | participant) + (1 | item)
## r3: as.numeric(rating) ~ 1 + (1 + cueing | participant) + (1 | item)
## r4: as.numeric(rating) ~ 1 + (1 + cueing | participant) + (1 + cueing | item)
##    npar    AIC    BIC  logLik deviance    Chisq Df Pr(>Chisq)    
## r1    3 4619.3 4635.3 -2306.7   4613.3                           
## r2    4 4205.5 4226.8 -2098.7   4197.5 415.8723  1     <2e-16 ***
## r3    6 4203.1 4235.1 -2095.6   4191.1   6.3495  2     0.0418 *  
## r4    8 4205.6 4248.3 -2094.8   4189.6   1.4613  2     0.4816    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# no need to worry about the R2 missing for the winning model, once we add in the fixed effect terms it will be present
tab_model(r1,r2,r3,r4)
  as.numeric(rating) as.numeric(rating) as.numeric(rating) as.numeric(rating)
Predictors Estimates CI p Estimates CI p Estimates CI p Estimates CI p
(Intercept) 2.97 2.67 – 3.26 <0.001 2.96 2.62 – 3.31 <0.001 2.98 2.68 – 3.29 <0.001 2.98 2.66 – 3.30 <0.001
Random Effects
σ2 1.17 0.81 0.81 0.81
τ00 0.38 participant 0.36 item 0.35 item 0.33 item
  0.39 participant 0.46 participant 0.46 participant
τ11     0.01 participant.cueingUncued 0.02 item.cueingUncued
      0.01 participant.cueingUncued
ρ01     -1.00 participant 0.36 item
      -1.00 participant
ICC 0.25 0.48    
N 17 participant 17 participant 17 participant 17 participant
  48 item 48 item 48 item
Observations 1523 1523 1523 1523
Marginal R2 / Conditional R2 0.000 / 0.246 0.000 / 0.477 0.000 / NA 0.000 / NA
# lets find the maximal random effect structure
m1 <- lmer(rating ~ baseline + (1 + cueing | participant)  + (1 | item) , data=arousal.extended.wbaseline %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))
## boundary (singular) fit: see help('isSingular')
m2 <- lmer(rating ~ baseline + session + (1 + cueing | participant) + (1 | item) , data=arousal.extended.wbaseline %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))
## boundary (singular) fit: see help('isSingular')
m3 <- lmer(rating ~ cueing*baseline + session + (1 + cueing | participant)  + (1 | item) , data=arousal.extended.wbaseline %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))
## boundary (singular) fit: see help('isSingular')
m4 <- lmer(rating ~ cueing*baseline + baseline*session + (1 + cueing | participant)  + (1 | item) , data=arousal.extended.wbaseline %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))
## boundary (singular) fit: see help('isSingular')
# again we see the 3rd model seems to be explain the most variance out of the ones we constructed

anova(m1,m2,m3,m4)
## refitting model(s) with ML (instead of REML)
## Data: arousal.extended.wbaseline %>% mutate(rating = as.numeric(rating),  ...
## Models:
## m1: rating ~ baseline + (1 + cueing | participant) + (1 | item)
## m2: rating ~ baseline + session + (1 + cueing | participant) + (1 | item)
## m3: rating ~ cueing * baseline + session + (1 + cueing | participant) + (1 | item)
## m4: rating ~ cueing * baseline + baseline * session + (1 + cueing | participant) + (1 | item)
##    npar    AIC    BIC  logLik deviance   Chisq Df Pr(>Chisq)    
## m1    7 3917.6 3954.8 -1951.8   3903.6                          
## m2    8 3881.7 3924.3 -1932.9   3865.7 37.8394  1  7.681e-10 ***
## m3   10 3877.0 3930.2 -1928.5   3857.0  8.7258  2    0.01274 *  
## m4   11 3877.6 3936.2 -1927.8   3855.6  1.3628  1    0.24306    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(m1,m2,m3)
  rating rating rating
Predictors Estimates CI p Estimates CI p Estimates CI p
(Intercept) 1.78 1.52 – 2.04 <0.001 2.18 1.89 – 2.46 <0.001 2.31 1.94 – 2.69 <0.001
baseline 0.39 0.34 – 0.43 <0.001 0.39 0.34 – 0.43 <0.001 0.33 0.27 – 0.39 <0.001
session -0.13 -0.18 – -0.09 <0.001 -0.13 -0.18 – -0.09 <0.001
cueing [Uncued] -0.32 -0.58 – -0.07 0.012
cueing [Uncued] ×
baseline
0.11 0.04 – 0.18 0.003
Random Effects
σ2 0.70 0.68 0.67
τ00 0.14 item 0.14 item 0.14 item
0.30 participant 0.31 participant 0.34 participant
τ11 0.02 participant.cueingUncued 0.02 participant.cueingUncued 0.03 participant.cueingUncued
ρ01 -1.00 participant -1.00 participant -1.00 participant
ICC 0.39   0.37
N 17 participant 17 participant 17 participant
48 item 48 item 48 item
Observations 1517 1517 1517
Marginal R2 / Conditional R2 0.163 / 0.487 0.259 / NA 0.183 / 0.484

What is up with baseline and cueing interacting?

It seems that depending in peoples initial rating cueing seems to induce a kind of “regression toward the mean” pattern. Without this interaction term cueing shows no overall effect wrt. rating

plot_model(m3, type = "pred", terms = c("cueing","baseline"))

library(jtools)
plot_model(m3, type = "pred", terms = c("cueing","baseline[4,5]")) + ylim(1,5) + scale_x_discrete(limits = c("Cued","Uncued")) +coord_fixed(0.8) 
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

library(ggeffects)

tmp.pred <- ggpredict(m3, terms = c("cueing","baseline[4,5]"), ci.lvl = 0.68)
#tmp.pred <- ggpredict(m3, terms = c("cueing","baseline[4,5]"), ci.lvl = 0.95)

tmp.dat <- arousal.extended.wbaseline %>% mutate(rating=as.numeric(rating)) %>% filter(baseline>=4) %>% group_by(cueing, baseline) %>%
summarise(mean=mean(rating, na.rm=TRUE), sd=sd(rating,na.rm=TRUE))
## `summarise()` has grouped output by 'cueing'. You can override using the
## `.groups` argument.
ggplot()  + geom_point(data = tmp.pred, aes(x = group, y = predicted, color=x),position = position_dodge(width = 0.3), size=4) + geom_errorbar(data = tmp.pred,aes(x = group, y = predicted,ymin = conf.low, ymax = conf.high,color=x),position= position_dodge(width = 0.3), width=0.2, size=2) + ylim(0,5)  + scale_color_brewer(palette="Dark2") +  xlab("Baseline rating") + ylab("Predicted rating") + labs(color='Condition') + coord_cartesian(ylim = c(2.5, 4.5)) + theme(axis.text = element_text(size = 18), axis.title = element_text(size = 20), plot.background = element_rect(fill = "white", color = NA), panel.grid = element_blank(), legend.title=element_blank(),axis.line = element_line(colour = "black"),panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank(), legend.key = element_rect(fill = NA), legend.text = element_text(size = 14), legend.position = c(0.2,0.85)) + scale_color_manual(values=c(hue_pal()(2))) + scale_y_continuous(breaks=c(2.5,3.5,4.5))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

ggplot()  + geom_bar(data = tmp.pred, aes(x = group, y = predicted, fill=x), stat = "identity", position = position_dodge()) + geom_errorbar(data = tmp.pred,aes(x = group, y = predicted,ymin = conf.low, ymax = conf.high, group=x),position= position_dodge(width = 0.85), width=0.1) + ylim(0,5) +  xlab("Baseline rating") + ylab("Predicted rating") + labs(color='Condition') + coord_cartesian(ylim = c(2.5, 4.5)) + theme(axis.text = element_text(size = 18), axis.title = element_text(size = 20), plot.background = element_rect(fill = "white", color = NA), panel.grid = element_blank(), legend.title=element_blank(),axis.line = element_line(colour = "black"),panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank(), legend.key = element_rect(fill = NA), legend.text = element_text(size = 14), legend.position = c(0.2,0.85)) + scale_color_manual(values=c(hue_pal()(2))) + scale_y_continuous(breaks=c(2.5,3.5,4.5))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

ggplot() + 
    geom_bar(data=tmp.dat %>% rename(predicted=mean,x=baseline,group=cueing), aes(x=x, y=predicted,color=group),fill="grey",stat="identity", position=position_dodge()) + geom_point(data = tmp.pred, aes(x = group, y = predicted, color=x),position = position_dodge(width = 0.9)) + geom_errorbar(data = tmp.pred,aes(x = group, y = predicted,ymin = conf.low, ymax = conf.high,color=x),position= position_dodge(width = 0.9), width=0.1) + ylim(0,5) +coord_fixed(0.8) + scale_color_brewer(palette="Dark2") +  xlab("Baseline rating") + ylab("Rating") + labs(color='Condition') + theme_apa(legend.pos="topleft",  x.font.size = 20, y.font.size = 20) + theme(axis.title = element_text(size = 40), axis.text = element_text(size = 20)) +  coord_cartesian(ylim = c(2.5, 4.5)) 
## Coordinate system already present. Adding new coordinate system, which will
## replace the existing one.

tmp.pred <- ggpredict(m3, terms = c("cueing","baseline"))
tmp.dat <- arousal.extended.wbaseline %>% mutate(rating=as.numeric(rating)) %>% filter(!is.na(baseline)) %>% group_by(cueing, baseline) %>%
summarise(mean=mean(rating, na.rm=TRUE), sd=sd(rating,na.rm=TRUE))
## `summarise()` has grouped output by 'cueing'. You can override using the
## `.groups` argument.
ggplot() + 
    geom_bar(data=tmp.dat %>% rename(predicted=mean,x=cueing,group=baseline), aes(x=x, y=predicted,color=group),fill="grey",stat="identity", position=position_dodge()) + geom_point(data = tmp.pred, aes(x = x, y = predicted, color=group),position = position_dodge(width = 0.9)) + geom_errorbar(data = tmp.pred,aes(x = x, y = predicted,ymin = conf.low, ymax = conf.high,color=group),position= position_dodge(width = 0.9), width=0.1) + ylim(0,5) + scale_x_discrete(limits = c("Cued","Uncued")) +coord_fixed(0.8) + scale_color_brewer(palette="Dark2") +  xlab("Cueing") + ylab("Predicted rating") + labs(color='Baseline rating')

#with Standard Error
plot_model(m3, type = "est", SE=TRUE)

#with Confidence Interval
plot_model(m3, type = "est", ci.lvl=0.95)

What is a singular fit exactly?

The model that is able to show the strongest cueing effect has the unfortunate issue that it exhibits a so called “singular fit”.

The official definitions is: “While singular models are statistically well defined (it is theoretically sensible for the true maximum likelihood estimate to correspond to a singular fit), there are real concerns that (1) singular fits correspond to overfitted models that may have poor power; (2) chances of numerical problems and mis-convergence are higher for singular models (e.g. it may be computationally difficult to compute profile confidence intervals for such models); (3) standard inferential procedures such as Wald statistics and likelihood ratio tests may be inappropriate.” (lme4 Reference Manual)

Do we really need the random slope term in the random effects structure

The general wisdom is that when 1) the design warrants it (in this case it does) 2) when the model converges the best practice is to ADD random slope into the model!

# However in our case unfortunately in the Random Effects structre we see a -1 for the *ρ01 participant* term. This is not ideal, its a sign of a singular fit and you usually get it when you have too few cells in regards to the model you are trying... basically its a sign of overfitting/unstable fit. I have yet to find literature that says that we are forbidden in interpreting it..its just that there are caveats... 
# given that we used a Likelihood ratio test earlier (>> anova(r1,r2,r3,r4) to show the random slope is warranted we have grounds to argue that its worth sticking with this model even if its singular (for support see  (Bates et al 2015, Matuschek et al 2017))

tab_model(m3)
  rating
Predictors Estimates CI p
(Intercept) 2.31 1.94 – 2.69 <0.001
cueing [Uncued] -0.32 -0.58 – -0.07 0.012
baseline 0.33 0.27 – 0.39 <0.001
session -0.13 -0.18 – -0.09 <0.001
cueing [Uncued] ×
baseline
0.11 0.04 – 0.18 0.003
Random Effects
σ2 0.67
τ00 item 0.14
τ00 participant 0.34
τ11 participant.cueingUncued 0.03
ρ01 participant -1.00
ICC 0.37
N participant 17
N item 48
Observations 1517
Marginal R2 / Conditional R2 0.183 / 0.484
# if we don't want to get dirty with singular models the best we had is the model Viviana showed during her presentation assuming

m3.2 <- lmer(rating ~ cueing*baseline + session + (1 | participant)  + (1 | item) , data=arousal.extended.wbaseline %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))

tab_model(m3,m3.2)
  rating rating
Predictors Estimates CI p Estimates CI p
(Intercept) 2.31 1.94 – 2.69 <0.001 2.27 1.93 – 2.61 <0.001
cueing [Uncued] -0.32 -0.58 – -0.07 0.012 -0.23 -0.47 – 0.01 0.060
baseline 0.33 0.27 – 0.39 <0.001 0.34 0.29 – 0.40 <0.001
session -0.13 -0.18 – -0.09 <0.001 -0.13 -0.18 – -0.09 <0.001
cueing [Uncued] ×
baseline
0.11 0.04 – 0.18 0.003 0.08 0.01 – 0.15 0.032
Random Effects
σ2 0.67 0.68
τ00 0.14 item 0.14 item
0.34 participant 0.25 participant
τ11 0.03 participant.cueingUncued  
ρ01 -1.00 participant  
ICC 0.37 0.36
N 17 participant 17 participant
48 item 48 item
Observations 1517 1517
Marginal R2 / Conditional R2 0.183 / 0.484 0.181 / 0.478
# to better understand what the *ρ01 participant = -1* means lets plot it

# based on the plot .. looking at the 2nd column called "participant (Intercept)" we can see that some people some participants gave higher than average [BLUE] (meaning grand average), while others gave lower than average ratings [RED]. The values on in the 1st column quantify how big the cueing effect (difference between mean Cued-Uncued ratings) is in each participant. RED here means that cueing has overall reduced ratings, BLUE means that its has overall increased.

# its normal that these two things would be highly correlated, the problem is that it shouldn't be this high > it shouldn't be -1 or 1
plot_model(m3, type = "re", sort.est = 2)
## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.5.3
## Current Matrix version is 1.5.1
## Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package
## [[1]]

## 
## [[2]]

Assuming we are happy with this model, how would we report is?

For some reason the text is repeated, but the relevant part starts and ends with “We fitted a linear mixed model”

report(m3)
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## We fitted a linear mixed model (estimated using REML and nloptwrap optimizer)
## to predict rating with cueing (formula: rating ~ cueing * baseline + session).
## The model included cueing as random effects (formula: list(~1 + cueing |
## participant, ~1 | item)). The model's total explanatory power is substantial
## (conditional R2 = 0.48) and the part related to the fixed effects alone
## (marginal R2) is of 0.18. The model's intercept, corresponding to cueing =
## Cued, is at 2.31 (95% CI [1.94, 2.69], t(1507) = 12.26, p < .001). Within this
## model:
## 
##   - The effect of cueing [Uncued] is statistically significant and negative (beta
## = -0.32, 95% CI [-0.58, -0.07], t(1507) = -2.53, p = 0.012; Std. beta = 0.01,
## 95% CI [-0.08, 0.11])
##   - The effect of baseline is statistically significant and positive (beta =
## 0.33, 95% CI [0.27, 0.39], t(1507) = 11.50, p < .001; Std. beta = 0.33, 95% CI
## [0.27, 0.38])
##   - The effect of session is statistically significant and negative (beta =
## -0.13, 95% CI [-0.18, -0.09], t(1507) = -6.20, p < .001; Std. beta = -0.11, 95%
## CI [-0.14, -0.07])
##   - The effect of cueing [Uncued] × baseline is statistically significant and
## positive (beta = 0.11, 95% CI [0.04, 0.18], t(1507) = 3.02, p = 0.003; Std.
## beta = 0.11, 95% CI [0.04, 0.18])
## 
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation., We fitted a linear mixed
## model (estimated using REML and nloptwrap optimizer) to predict rating with
## baseline (formula: rating ~ cueing * baseline + session). The model included
## cueing as random effects (formula: list(~1 + cueing | participant, ~1 | item)).
## The model's total explanatory power is substantial (conditional R2 = 0.48) and
## the part related to the fixed effects alone (marginal R2) is of 0.18. The
## model's intercept, corresponding to baseline = 0, is at 2.31 (95% CI [1.94,
## 2.69], t(1507) = 12.26, p < .001). Within this model:
## 
##   - The effect of cueing [Uncued] is statistically significant and negative (beta
## = -0.32, 95% CI [-0.58, -0.07], t(1507) = -2.53, p = 0.012; Std. beta = 0.01,
## 95% CI [-0.08, 0.11])
##   - The effect of baseline is statistically significant and positive (beta =
## 0.33, 95% CI [0.27, 0.39], t(1507) = 11.50, p < .001; Std. beta = 0.33, 95% CI
## [0.27, 0.38])
##   - The effect of session is statistically significant and negative (beta =
## -0.13, 95% CI [-0.18, -0.09], t(1507) = -6.20, p < .001; Std. beta = -0.11, 95%
## CI [-0.14, -0.07])
##   - The effect of cueing [Uncued] × baseline is statistically significant and
## positive (beta = 0.11, 95% CI [0.04, 0.18], t(1507) = 3.02, p = 0.003; Std.
## beta = 0.11, 95% CI [0.04, 0.18])
## 
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation. and We fitted a linear
## mixed model (estimated using REML and nloptwrap optimizer) to predict rating
## with session (formula: rating ~ cueing * baseline + session). The model
## included cueing as random effects (formula: list(~1 + cueing | participant, ~1
## | item)). The model's total explanatory power is substantial (conditional R2 =
## 0.48) and the part related to the fixed effects alone (marginal R2) is of 0.18.
## The model's intercept, corresponding to session = 0, is at 2.31 (95% CI [1.94,
## 2.69], t(1507) = 12.26, p < .001). Within this model:
## 
##   - The effect of cueing [Uncued] is statistically significant and negative (beta
## = -0.32, 95% CI [-0.58, -0.07], t(1507) = -2.53, p = 0.012; Std. beta = 0.01,
## 95% CI [-0.08, 0.11])
##   - The effect of baseline is statistically significant and positive (beta =
## 0.33, 95% CI [0.27, 0.39], t(1507) = 11.50, p < .001; Std. beta = 0.33, 95% CI
## [0.27, 0.38])
##   - The effect of session is statistically significant and negative (beta =
## -0.13, 95% CI [-0.18, -0.09], t(1507) = -6.20, p < .001; Std. beta = -0.11, 95%
## CI [-0.14, -0.07])
##   - The effect of cueing [Uncued] × baseline is statistically significant and
## positive (beta = 0.11, 95% CI [0.04, 0.18], t(1507) = 3.02, p = 0.003; Std.
## beta = 0.11, 95% CI [0.04, 0.18])
## 
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.

Filtered dataset [baseline>=4]

Result: No singular fit (yay), but cueing not significant at <0.05:( p=0.078 in [m3]

arousal.extended.wbaseline.high <- arousal.extended.wbaseline %>% filter(baseline >= 4)

arousal.extended.wbaseline.high %>% group_by(participant) %>% count() -> tmp
hist(tmp$n)

# lets find the maximal random effect structure

# this is the classical within-subject anova in glm form
r1 <- lmer(as.numeric(rating) ~ 1 + (1 | participant), data=arousal.extended.wbaseline.high)
# this is the classical crossed random effects model where items and subjects both are allowed to have their own mean
r2 <- lmer(as.numeric(rating) ~ 1 + (1 | participant)  + (1 | item) , data=arousal.extended.wbaseline.high)
# here by introducing cueing as a random slope, we formally allow cueing to have differing effects in each participants
r3 <- lmer(as.numeric(rating) ~ 1 + (1 + cueing | participant) + (1 | item)  , data=arousal.extended.wbaseline.high)
# here we allow item and participants specific cueing effects...
r4 <- lmer(as.numeric(rating) ~ 1 + (1 + cueing | participant)  + (1 + cueing | item) , data=arousal.extended.wbaseline.high)

# based on this LRT test results below we can argue that the 3rd random effect structure is justified that allows random intercept for PARTICIPANT (trivial for within-subject design) and ITEM. Additionally we allow a random slope for the experimental manipulation CUEING meaning that expect that there will be individual differences in how big the cueing effect will be for each sub.

anova(r1,r2,r3,r4)
## refitting model(s) with ML (instead of REML)
## Data: arousal.extended.wbaseline.high
## Models:
## r1: as.numeric(rating) ~ 1 + (1 | participant)
## r2: as.numeric(rating) ~ 1 + (1 | participant) + (1 | item)
## r3: as.numeric(rating) ~ 1 + (1 + cueing | participant) + (1 | item)
## r4: as.numeric(rating) ~ 1 + (1 + cueing | participant) + (1 + cueing | item)
##    npar    AIC    BIC  logLik deviance   Chisq Df Pr(>Chisq)    
## r1    3 1671.4 1684.6 -832.70   1665.4                          
## r2    4 1610.8 1628.5 -801.40   1602.8 62.5902  1  2.545e-15 ***
## r3    6 1602.9 1629.4 -795.44   1590.9 11.9257  2   0.002573 ** 
## r4    8 1602.7 1638.0 -793.34   1586.7  4.2048  2   0.122165    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# no need to worry about the R2 missing for the winning model, once we add in the fixed effect terms it will be present
tab_model(r1,r2,r3,r4)
  as.numeric(rating) as.numeric(rating) as.numeric(rating) as.numeric(rating)
Predictors Estimates CI p Estimates CI p Estimates CI p Estimates CI p
(Intercept) 3.64 3.35 – 3.93 <0.001 3.49 3.18 – 3.81 <0.001 3.58 3.30 – 3.86 <0.001 3.58 3.30 – 3.87 <0.001
Random Effects
σ2 0.83 0.68 0.65 0.62
τ00 0.34 participant 0.18 item 0.18 item 0.27 item
  0.34 participant 0.51 participant 0.49 participant
τ11     0.12 participant.cueingUncued 0.15 item.cueingUncued
      0.09 participant.cueingUncued
ρ01     -0.78 participant -0.60 item
      -0.79 participant
ICC 0.29 0.44 0.52 0.55
N 17 participant 17 participant 17 participant 17 participant
  48 item 48 item 48 item
Observations 610 610 610 610
Marginal R2 / Conditional R2 0.000 / 0.291 0.000 / 0.436 0.000 / 0.516 0.000 / 0.549
# lets find the maximal random effect structure
m1 <- lmer(rating ~ 1 + (1 + cueing | participant)  + (1 | item) , data=arousal.extended.wbaseline.high %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))

m2 <- lmer(rating ~ baseline + session + (1 + cueing | participant) + (1 | item) , data=arousal.extended.wbaseline.high %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))

m3 <- lmer(rating ~ cueing + baseline + session + (1| participant)  + (1 | item) , data=arousal.extended.wbaseline.high %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))

m4 <- lmer(rating ~ cueing * baseline + session + (1+ cueing| participant)  + (1 | item) , data=arousal.extended.wbaseline.high %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))

m5 <- lmer(rating ~ cueing*baseline + baseline*session + (1 + cueing | participant)  + (1 | item) , data=arousal.extended.wbaseline.high %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))


# again we see the 3rd model seems to be explain the most variance out of the ones we constructed

anova(m1,m2,m3,m4,m5)
## refitting model(s) with ML (instead of REML)
## Data: arousal.extended.wbaseline.high %>% mutate(rating = as.numeric(rating),  ...
## Models:
## m1: rating ~ 1 + (1 + cueing | participant) + (1 | item)
## m3: rating ~ cueing + baseline + session + (1 | participant) + (1 | item)
## m2: rating ~ baseline + session + (1 + cueing | participant) + (1 | item)
## m4: rating ~ cueing * baseline + session + (1 + cueing | participant) + (1 | item)
## m5: rating ~ cueing * baseline + baseline * session + (1 + cueing | participant) + (1 | item)
##    npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## m1    6 1602.9 1629.4 -795.44   1590.9                         
## m3    7 1555.5 1586.3 -770.73   1541.5 49.432  1  2.054e-12 ***
## m2    8 1544.5 1579.8 -764.25   1528.5 12.958  1  0.0003186 ***
## m4   10 1545.4 1589.5 -762.69   1525.4  3.119  2  0.2102428    
## m5   11 1546.6 1595.1 -762.29   1524.6  0.794  1  0.3728925    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(m1,m2,m3,m4,m5)
  rating rating rating rating rating
Predictors Estimates CI p Estimates CI p Estimates CI p Estimates CI p Estimates CI p
(Intercept) 3.58 3.30 – 3.86 <0.001 1.89 1.17 – 2.62 <0.001 1.81 1.06 – 2.56 <0.001 1.38 0.39 – 2.37 0.006 0.65 -1.25 – 2.55 0.503
baseline 0.50 0.35 – 0.65 <0.001 0.49 0.33 – 0.64 <0.001 0.58 0.37 – 0.79 <0.001 0.75 0.32 – 1.17 0.001
session -0.16 -0.22 – -0.09 <0.001 -0.16 -0.22 – -0.09 <0.001 -0.16 -0.22 – -0.09 <0.001 0.10 -0.46 – 0.66 0.734
cueing [Uncued] 0.12 -0.01 – 0.25 0.078 0.83 -0.44 – 2.09 0.202 0.82 -0.44 – 2.09 0.202
cueing [Uncued] ×
baseline
-0.15 -0.44 – 0.13 0.293 -0.15 -0.44 – 0.13 0.294
baseline × session -0.06 -0.19 – 0.07 0.375
Random Effects
σ2 0.65 0.59 0.62 0.59 0.59
τ00 0.18 item 0.14 item 0.14 item 0.14 item 0.14 item
0.51 participant 0.50 participant 0.32 participant 0.49 participant 0.49 participant
τ11 0.12 participant.cueingUncued 0.14 participant.cueingUncued   0.12 participant.cueingUncued 0.12 participant.cueingUncued
ρ01 -0.78 participant -0.79 participant   -0.80 participant -0.80 participant
ICC 0.52 0.52 0.43 0.45 0.45
N 17 participant 17 participant 17 participant 17 participant 17 participant
48 item 48 item 48 item 48 item 48 item
Observations 610 610 610 610 610
Marginal R2 / Conditional R2 0.000 / 0.516 0.063 / 0.548 0.071 / 0.469 0.076 / 0.494 0.077 / 0.494

Plotting chosen model

plot_model(m3, type = "pred", terms = c("cueing","baseline"))

plot_model(m3, type = "pred", terms = c("cueing"))

Filtered dataset [S2 only]

Cueing not sig at p=0.32

Summary:

arousal.extended.wbaseline.s2 <- arousal.extended.wbaseline %>% filter(session == 2)

arousal.extended.wbaseline.s2 %>% group_by(participant) %>% count() -> tmp
# lets find the maximal random effect structure

# this is the classical within-subject anova in glm form
r1 <- lmer(as.numeric(rating) ~ 1 + (1 | participant), data=arousal.extended.wbaseline.s2)
# this is the classical crossed random effects model where items and subjects both are allowed to have their own mean
r2 <- lmer(as.numeric(rating) ~ 1 + (1 | participant)  + (1 | item) , data=arousal.extended.wbaseline.s2)
# here by introducing cueing as a random slope, we formally allow cueing to have differing effects in each participants
r3 <- lmer(as.numeric(rating) ~ 1 + (1 + cueing | participant) + (1 | item)  , data=arousal.extended.wbaseline.s2)
## boundary (singular) fit: see help('isSingular')
# here we allow item and participants specific cueing effects...
r4 <- lmer(as.numeric(rating) ~ 1 + (1 + cueing | participant)  + (1 + cueing | item) , data=arousal.extended.wbaseline.s2)
## boundary (singular) fit: see help('isSingular')
# based on this LRT test results below we can argue that the 3rd random effect structure is justified that allows random intercept for PARTICIPANT (trivial for within-subject design) and ITEM. Additionally we allow a random slope for the experimental manipulation CUEING meaning that expect that there will be individual differences in how big the cueing effect will be for each sub.

anova(r1,r2,r3,r4)
## refitting model(s) with ML (instead of REML)
## Data: arousal.extended.wbaseline.s2
## Models:
## r1: as.numeric(rating) ~ 1 + (1 | participant)
## r2: as.numeric(rating) ~ 1 + (1 | participant) + (1 | item)
## r3: as.numeric(rating) ~ 1 + (1 + cueing | participant) + (1 | item)
## r4: as.numeric(rating) ~ 1 + (1 + cueing | participant) + (1 + cueing | item)
##    npar    AIC    BIC  logLik deviance    Chisq Df Pr(>Chisq)    
## r1    3 2464.3 2478.4 -1229.2   2458.3                           
## r2    4 2260.2 2279.0 -1126.1   2252.2 206.1712  1     <2e-16 ***
## r3    6 2262.6 2290.8 -1125.3   2250.6   1.5409  2     0.4628    
## r4    8 2266.5 2304.1 -1125.2   2250.5   0.1700  2     0.9185    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# no need to worry about the R2 missing for the winning model, once we add in the fixed effect terms it will be present
tab_model(r1,r2,r3,r4)
  as.numeric(rating) as.numeric(rating) as.numeric(rating) as.numeric(rating)
Predictors Estimates CI p Estimates CI p Estimates CI p Estimates CI p
(Intercept) 3.08 2.78 – 3.39 <0.001 3.08 2.73 – 3.43 <0.001 3.11 2.77 – 3.45 <0.001 3.11 2.76 – 3.45 <0.001
Random Effects
σ2 1.14 0.77 0.77 0.77
τ00 0.39 participant 0.37 item 0.37 item 0.35 item
  0.40 participant 0.45 participant 0.45 participant
τ11     0.01 participant.cueingUncued 0.00 item.cueingUncued
      0.01 participant.cueingUncued
ρ01     -1.00 participant 1.00 item
      -1.00 participant
ICC 0.25 0.50    
N 17 participant 17 participant 17 participant 17 participant
  48 item 48 item 48 item
Observations 812 812 812 812
Marginal R2 / Conditional R2 0.000 / 0.255 0.000 / 0.497 0.000 / NA 0.000 / NA
# lets find the maximal random effect structure
m1 <- lmer(rating ~ 1 + (1  | participant)  + (1 | item) , data=arousal.extended.wbaseline.s2 %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))

m2 <- lmer(rating ~ baseline  + (1  | participant) + (1 | item) , data=arousal.extended.wbaseline.s2 %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))

m3 <- lmer(rating ~ cueing + baseline  + (1| participant)  + (1 | item) , data=arousal.extended.wbaseline.s2 %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))

m4 <- lmer(rating ~ cueing * baseline  + (1| participant)  + (1 | item) , data=arousal.extended.wbaseline.s2 %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))

m4.cr <- lmer(rating ~ cueing * baseline  + (1+cueing | participant)  + (1 | item) , data=arousal.extended.wbaseline.s2 %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))
## boundary (singular) fit: see help('isSingular')
anova(m2,m3,m4)
## refitting model(s) with ML (instead of REML)
## Data: arousal.extended.wbaseline.s2 %>% mutate(rating = as.numeric(rating),  ...
## Models:
## m2: rating ~ baseline + (1 | participant) + (1 | item)
## m3: rating ~ cueing + baseline + (1 | participant) + (1 | item)
## m4: rating ~ cueing * baseline + (1 | participant) + (1 | item)
##    npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
## m2    5 2059.6 2083.1 -1024.8   2049.6                     
## m3    6 2061.2 2089.4 -1024.6   2049.2 0.3695  1     0.5433
## m4    7 2061.6 2094.4 -1023.8   2047.6 1.6348  1     0.2010
tab_model(m2,m3,m4)
  rating rating rating
Predictors Estimates CI p Estimates CI p Estimates CI p
(Intercept) 1.74 1.42 – 2.05 <0.001 1.72 1.40 – 2.04 <0.001 1.81 1.46 – 2.16 <0.001
baseline 0.43 0.38 – 0.49 <0.001 0.43 0.38 – 0.49 <0.001 0.40 0.33 – 0.48 <0.001
cueing [Uncued] 0.03 -0.08 – 0.15 0.544 -0.16 -0.47 – 0.16 0.329
cueing [Uncued] ×
baseline
0.06 -0.03 – 0.15 0.201
Random Effects
σ2 0.64 0.64 0.64
τ00 0.13 item 0.13 item 0.13 item
0.24 participant 0.24 participant 0.24 participant
ICC 0.36 0.36 0.37
N 17 participant 17 participant 17 participant
48 item 48 item 48 item
Observations 809 809 809
Marginal R2 / Conditional R2 0.216 / 0.502 0.216 / 0.502 0.216 / 0.503
tmp.pred <- ggpredict(m4.cr, terms = c("cueing","baseline[4,5]"))
tmp.dat <- arousal.extended.wbaseline %>% mutate(rating=as.numeric(rating)) %>% filter(baseline>=4) %>% group_by(cueing, baseline) %>%
summarise(mean=mean(rating, na.rm=TRUE), sd=sd(rating,na.rm=TRUE))
## `summarise()` has grouped output by 'cueing'. You can override using the
## `.groups` argument.
ggplot()  + geom_point(data = tmp.pred, aes(x = group, y = predicted, color=x),position = position_dodge(width = 0.3)) + geom_errorbar(data = tmp.pred,aes(x = group, y = predicted,ymin = conf.low, ymax = conf.high,color=x),position= position_dodge(width = 0.3), width=0.1) + ylim(0,5)  + scale_color_brewer(palette="Dark2") +  xlab("Baseline rating") + ylab("Predicted rating") + labs(color='Condition') + coord_cartesian(ylim = c(2.5, 4.5)) + theme(axis.text = element_text(size = 18), axis.title = element_text(size = 20), plot.background = element_rect(fill = "white", color = NA), panel.grid = element_blank(), legend.title=element_blank(),axis.line = element_line(colour = "black"),panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank(), legend.key = element_rect(fill = NA), legend.text = element_text(size = 14), legend.position = c(0.2,0.85)) + scale_color_manual(values=c(hue_pal()(2))) + scale_y_continuous(breaks=c(2.5,3.5,4.5))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

Filtered dataset [S4 only]

Result: model4 looks interesting here, cueing p=0.029 and beta is -0.42 (vs -0.32 in the model encompassing the whole data) BUT - we still have the singular fit and conditionalR2 couldn’t be estimated - when we do anova(m2,m3,m4) m4 for doesn’t quite win over m3 p=0.057 - it seems that we are missing 2 participants from the S4 dataset? weird

arousal.extended.wbaseline.s4 <- arousal.extended.wbaseline %>% filter(session == 4)

arousal.extended.wbaseline.s4 %>% group_by(participant) %>% count() -> tmp
# lets find the maximal random effect structure

# this is the classical within-subject anova in glm form
r1 <- lmer(as.numeric(rating) ~ 1 + (1 | participant), data=arousal.extended.wbaseline.s4)
# this is the classical crossed random effects model where items and subjects both are allowed to have their own mean
r2 <- lmer(as.numeric(rating) ~ 1 + (1 | participant)  + (1 | item) , data=arousal.extended.wbaseline.s4)
# here by introducing cueing as a random slope, we formally allow cueing to have differing effects in each participants
r3 <- lmer(as.numeric(rating) ~ 1 + (1 + cueing | participant) + (1 | item)  , data=arousal.extended.wbaseline.s4)
# here we allow item and participants specific cueing effects...
r4 <- lmer(as.numeric(rating) ~ 1 + (1 + cueing | participant)  + (1 + cueing | item) , data=arousal.extended.wbaseline.s4)
## boundary (singular) fit: see help('isSingular')
# based on this LRT test results below we can argue that the 3rd random effect structure is justified that allows random intercept for PARTICIPANT (trivial for within-subject design) and ITEM. Additionally we allow a random slope for the experimental manipulation CUEING meaning that expect that there will be individual differences in how big the cueing effect will be for each sub.

anova(r1,r2,r3,r4)
## refitting model(s) with ML (instead of REML)
## Data: arousal.extended.wbaseline.s4
## Models:
## r1: as.numeric(rating) ~ 1 + (1 | participant)
## r2: as.numeric(rating) ~ 1 + (1 | participant) + (1 | item)
## r3: as.numeric(rating) ~ 1 + (1 + cueing | participant) + (1 | item)
## r4: as.numeric(rating) ~ 1 + (1 + cueing | participant) + (1 + cueing | item)
##    npar    AIC    BIC   logLik deviance    Chisq Df Pr(>Chisq)    
## r1    3 2154.9 2168.6 -1074.45   2148.9                           
## r2    4 2013.6 2031.9 -1002.81   2005.6 143.2759  1    < 2e-16 ***
## r3    6 2011.0 2038.4  -999.50   1999.0   6.6177  2    0.03656 *  
## r4    8 2014.2 2050.8  -999.12   1998.2   0.7586  2    0.68435    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# no need to worry about the R2 missing for the winning model, once we add in the fixed effect terms it will be present
tab_model(r1,r2,r3,r4)
  as.numeric(rating) as.numeric(rating) as.numeric(rating) as.numeric(rating)
Predictors Estimates CI p Estimates CI p Estimates CI p Estimates CI p
(Intercept) 2.84 2.52 – 3.16 <0.001 2.84 2.48 – 3.19 <0.001 2.83 2.52 – 3.14 <0.001 2.83 2.51 – 3.15 <0.001
Random Effects
σ2 1.14 0.81 0.80 0.80
τ00 0.37 participant 0.32 item 0.32 item 0.29 item
  0.38 participant 0.48 participant 0.49 participant
τ11     0.03 participant.cueingUncued 0.00 item.cueingUncued
      0.03 participant.cueingUncued
ρ01     -1.00 participant 1.00 item
      -1.00 participant
ICC 0.24 0.46 0.50  
N 15 participant 15 participant 15 participant 15 participant
  48 item 48 item 48 item
Observations 711 711 711 711
Marginal R2 / Conditional R2 0.000 / 0.245 0.000 / 0.463 0.000 / 0.501 0.000 / NA
# lets find the maximal random effect structure
m1 <- lmer(rating ~ 1 + (1 + cueing | participant)  + (1 | item) , data=arousal.extended.wbaseline.s4 %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))

m2 <- lmer(rating ~ baseline  + (1 + cueing | participant) + (1 | item) , data=arousal.extended.wbaseline.s4 %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))
## boundary (singular) fit: see help('isSingular')
m3 <- lmer(rating ~ cueing + baseline  + (1| participant)  + (1 | item) , data=arousal.extended.wbaseline.s4 %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))

m4 <- lmer(rating ~ cueing * baseline  + (1+ cueing| participant)  + (1 | item) , data=arousal.extended.wbaseline.s4 %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))
## boundary (singular) fit: see help('isSingular')
m4.s <- lmer(rating ~ cueing * baseline  + (1 | participant)  + (1 | item) , data=arousal.extended.wbaseline.s4 %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))


anova(m2,m3,m4,m4.s)
## refitting model(s) with ML (instead of REML)
## Data: arousal.extended.wbaseline.s4 %>% mutate(rating = as.numeric(rating),  ...
## Models:
## m3: rating ~ cueing + baseline + (1 | participant) + (1 | item)
## m2: rating ~ baseline + (1 + cueing | participant) + (1 | item)
## m4.s: rating ~ cueing * baseline + (1 | participant) + (1 | item)
## m4: rating ~ cueing * baseline + (1 + cueing | participant) + (1 | item)
##      npar    AIC    BIC  logLik deviance   Chisq Df Pr(>Chisq)   
## m3      6 1883.6 1911.0 -935.80   1871.6                         
## m2      7 1876.0 1908.0 -931.01   1862.0  9.5694  1   0.001978 **
## m4.s    7 1882.9 1914.8 -934.44   1868.9  0.0000  0              
## m4      9 1874.3 1915.4 -928.15   1856.3 12.5682  2   0.001866 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(m2,m3,m4,m4.s)
  rating rating rating rating
Predictors Estimates CI p Estimates CI p Estimates CI p Estimates CI p
(Intercept) 1.62 1.32 – 1.92 <0.001 1.64 1.29 – 1.99 <0.001 1.84 1.41 – 2.27 <0.001 1.78 1.40 – 2.16 <0.001
baseline 0.39 0.33 – 0.45 <0.001 0.39 0.32 – 0.45 <0.001 0.32 0.24 – 0.41 <0.001 0.34 0.26 – 0.42 <0.001
cueing [Uncued] -0.00 -0.13 – 0.12 0.947 -0.42 -0.80 – -0.04 0.029 -0.28 -0.64 – 0.07 0.118
cueing [Uncued] ×
baseline
0.13 0.03 – 0.24 0.014 0.09 -0.02 – 0.20 0.100
Random Effects
σ2 0.71 0.72 0.70 0.71
τ00 0.12 item 0.12 item 0.12 item 0.12 item
0.35 participant 0.25 participant 0.38 participant 0.25 participant
τ11 0.04 participant.cueingUncued   0.06 participant.cueingUncued  
ρ01 -1.00 participant   -1.00 participant  
ICC   0.34   0.34
N 15 participant 15 participant 15 participant 15 participant
48 item 48 item 48 item 48 item
Observations 708 708 708 708
Marginal R2 / Conditional R2 0.242 / NA 0.169 / 0.449 0.244 / NA 0.169 / 0.451
plot_model(m4, type = "pred", terms = c("cueing","baseline"))

plot_model(m4, type = "pred", terms = c("cueing","baseline[4,5]"))

tmp.pred <- ggpredict(m4, terms = c("cueing","baseline[4,5]"))
tmp.dat <- arousal.extended.wbaseline %>% mutate(rating=as.numeric(rating)) %>% filter(baseline>=4) %>% group_by(cueing, baseline) %>%
summarise(mean=mean(rating, na.rm=TRUE), sd=sd(rating,na.rm=TRUE))
## `summarise()` has grouped output by 'cueing'. You can override using the
## `.groups` argument.
ggplot()  + geom_point(data = tmp.pred, aes(x = group, y = predicted, color=x),position = position_dodge(width = 0.3)) + geom_errorbar(data = tmp.pred,aes(x = group, y = predicted,ymin = conf.low, ymax = conf.high,color=x),position= position_dodge(width = 0.3), width=0.1) + ylim(0,5)  + scale_color_brewer(palette="Dark2") +  xlab("Baseline rating") + ylab("Predicted rating") + labs(color='Condition') + coord_cartesian(ylim = c(2.5, 4.5)) + theme(axis.text = element_text(size = 18), axis.title = element_text(size = 20), plot.background = element_rect(fill = "white", color = NA), panel.grid = element_blank(), legend.title=element_blank(),axis.line = element_line(colour = "black"),panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank(), legend.key = element_rect(fill = NA), legend.text = element_text(size = 14), legend.position = c(0.2,0.85)) + scale_color_manual(values=c(hue_pal()(2))) + scale_y_continuous(breaks=c(2.5,3.5,4.5))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

ggplot()  + geom_point(data = tmp.pred, aes(x = group, y = predicted, color=x),position = position_dodge(width = 0.3), size=4) + geom_errorbar(data = tmp.pred,aes(x = group, y = predicted,ymin = conf.low, ymax = conf.high,color=x),position= position_dodge(width = 0.3), width=0.2, size=2) + ylim(0,5)  + scale_color_brewer(palette="Dark2") +  xlab("Baseline rating") + ylab("Predicted rating") + labs(color='Condition') + coord_cartesian(ylim = c(2.5, 4.5)) + theme(axis.text = element_text(size = 18), axis.title = element_text(size = 20), plot.background = element_rect(fill = "white", color = NA), panel.grid = element_blank(), legend.title=element_blank(),axis.line = element_line(colour = "black"),panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank(), legend.key = element_rect(fill = NA), legend.text = element_text(size = 14), legend.position = c(0.2,0.85)) + scale_color_manual(values=c(hue_pal()(2))) + scale_y_continuous(breaks=c(2.5,3.5,4.5))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

ggplot()  + geom_bar(data = tmp.pred, aes(x = group, y = predicted, fill=x), stat = "identity", position = position_dodge()) + geom_errorbar(data = tmp.pred,aes(x = group, y = predicted,ymin = conf.low, ymax = conf.high, group=x),position= position_dodge(width = 0.85), width=0.1) + ylim(0,5) +  xlab("Baseline rating") + ylab("Predicted rating") + labs(color='Condition') + coord_cartesian(ylim = c(2.5, 4.5)) + theme(axis.text = element_text(size = 18), axis.title = element_text(size = 20), plot.background = element_rect(fill = "white", color = NA), panel.grid = element_blank(), legend.title=element_blank(),axis.line = element_line(colour = "black"),panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank(), legend.key = element_rect(fill = NA), legend.text = element_text(size = 14), legend.position = c(0.2,0.85)) + scale_color_manual(values=c(hue_pal()(2))) + scale_y_continuous(breaks=c(2.5,3.5,4.5))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

Lets try Bayesian

library(report)

library(blme)

bm3 <- blmer(rating ~ cueing*baseline + session + (1 + cueing | participant)  + (1 | item), arousal.extended.wbaseline %>% mutate(rating = as.numeric(rating)))

library(rstanarm)

options(mc.cores = parallel::detectCores())
bm3_stanlmer <- stan_lmer(formula = rating ~ cueing*baseline + session + (1 + cueing | participant)  + (1 | item), 
                         data = arousal.extended.wbaseline %>% mutate(rating = as.numeric(rating), baseline =  as.numeric(baseline)),
                         seed = 349)

bayes_rep <- report(bm3_stanlmer)

Checking datapoints

arousal.extended.wbaseline %>% group_by(participant,session) %>% count() %>% pivot_wider(names_from = session, values_from = n)

arousal.extended.wbaseline %>%
  group_by(participant,session) %>%
  summarise(baseline_notna = sum(!is.na(baseline))) %>% pivot_wider(names_from = session, values_from = baseline_notna)

arousal.extended.wbaseline %>%
  group_by(participant,session) %>%
  summarise(rating_notna = sum(!is.na(rating))) %>% pivot_wider(names_from = session, values_from =rating_notna)

arousal.extended.wbaseline %>%
  group_by(participant) %>%
  summarise(rating_notna = sum(!is.na(rating))) %>% filter(rating_notna==96) -> subs.fulldata

Effect size calculations

effect size of overall change due to cueing

m3 <- lmer(rating ~ cueing*baseline + session + (1 + cueing | participant)  + (1 | item) , data=arousal.extended.wbaseline %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))
## boundary (singular) fit: see help('isSingular')
# Options for Cohen D http://jakewestfall.org/blog/index.php/2016/03/25/five-different-cohens-d-statistics-for-within-subject-designs/

# Classical Cohen'D (originally designed with between-subject in mind)

means <- with(arousal.extended.wbaseline %>% mutate(rating=as.numeric(rating)) %>% drop_na(rating), tapply(rating, cueing, mean))
md <- diff(means)
d <- md / with(arousal.extended.wbaseline %>% mutate(rating=as.numeric(rating)) %>% drop_na(rating), sqrt(mean(tapply(rating, cueing, var))))

print(d)
##     Uncued 
## 0.03196972
# Cohen’s d after averaging over replicates

sub_means <- with(arousal.extended.wbaseline %>% mutate(rating=as.numeric(rating)) %>% drop_na(rating), tapply(rating, list(participant , cueing), mean))
d_a <- md / sqrt(mean(diag(var(sub_means))))

print(d_a)
##     Uncued 
## 0.06098943
library(emmeans)
library(effectsize)
## 
## Attaching package: 'effectsize'
## The following object is masked from 'package:jtools':
## 
##     standardize
em_<- emmeans(m3, ~cueing)
## NOTE: Results may be misleading due to involvement in interactions
em_p <- pairs(em_)

# Approx. of Classical Cohen's D based on the models estimated marginal means
t_to_d(
  t = -0.277,
  df_error = 15.6
)
## d     |        95% CI
## ---------------------
## -0.14 | [-1.13, 0.86]
# Approx. of Classical Cohen's D (within-subject) based on the models estimated marginal means
t_to_d(
  t = -0.277,
  df_error = 15.6,
  paired = TRUE
)
## d     |        95% CI
## ---------------------
## -0.07 | [-0.57, 0.43]
# Mixed Models example:
# basedon Brysbaert, M., & Stevens, M. (2018). Power analysis and effect size in mixed effects models: A tutorial. Journal of cognition, 1(1).

library(insight)
## 
## Attaching package: 'insight'
## The following objects are masked from 'package:jtools':
## 
##     get_data, get_weights
totSD<- sqrt(get_variance(m3)$var.intercept[['item']]+get_variance(m3)$var.intercept[['participant']]+get_variance(m3)$var.slope[['participant.cueingUncued']]+get_variance(m3)$var.residual)

eff_size(em_, sigma = totSD, edf = Inf)
##  contrast      effect.size     SE   df lower.CL upper.CL
##  Cued - Uncued     -0.0154 0.0557 20.5   -0.131    0.101
## 
## Results are averaged over the levels of: session 
## sigma used for effect sizes: 1.087 
## Degrees-of-freedom method: inherited from kenward-roger when re-gridding 
## Confidence level used: 0.95

effect size of overall change in items rated 4or5 at baseline

# Classical Cohen'D (originally designed with between-subject in mind)

# means <- with(arousal.extended.wbaseline %>% mutate(rating=as.numeric(rating)) %>% drop_na(rating) %>% filter(baseline>=4), tapply(rating, cueing, mean))
means = c(3.70297,3.75570)
md <- diff(means)

d <- md / with(arousal.extended.wbaseline %>% mutate(rating=as.numeric(rating)) %>% drop_na(rating) %>% filter(baseline>=4), sqrt(mean(tapply(rating, cueing, var))))

print(d)
## [1] 0.04870834
# Cohen’s d after averaging over replicates

sub_means <- with(arousal.extended.wbaseline %>% mutate(rating=as.numeric(rating)) %>% drop_na(rating) %>% filter(baseline>=4), tapply(rating, list(participant , cueing), mean))
d_a <- md / sqrt(mean(diag(var(sub_means))))

print(d_a)
## [1] 0.08014736
library(emmeans)
library(effectsize)

em_<- emmeans(m3, ~cueing, at=list(baseline=c(4,5)))
## NOTE: Results may be misleading due to involvement in interactions
em_p <- pairs(em_)
# Approx. of Classical Cohen's D based on the models estimated marginal means
t_to_d(
  t = -2.119,
  df_error = 39.1
)
## d     |         95% CI
## ----------------------
## -0.68 | [-1.32, -0.03]
# Approx. of Classical Cohen's D (within-subject) based on the models estimated marginal means
t_to_d(
  t = -2.119,
  df_error = 39.1,
  paired = TRUE
)
## d     |         95% CI
## ----------------------
## -0.34 | [-0.66, -0.01]
# Mixed Models example:
# basedon Brysbaert, M., & Stevens, M. (2018). Power analysis and effect size in mixed effects models: A tutorial. Journal of cognition, 1(1).

library(insight)

totSD<- sqrt(get_variance(m3)$var.intercept[['item']]+get_variance(m3)$var.intercept[['participant']]+get_variance(m3)$var.slope[['participant.cueingUncued']]+get_variance(m3)$var.residual)

eff_size(em_, sigma = totSD, edf = Inf)
##  contrast      effect.size     SE   df lower.CL upper.CL
##  Cued - Uncued      -0.154 0.0725 23.3   -0.303 -0.00374
## 
## Results are averaged over the levels of: baseline, session 
## sigma used for effect sizes: 1.087 
## Degrees-of-freedom method: inherited from kenward-roger when re-gridding 
## Confidence level used: 0.95