load data

arousal.data <- read_csv("data/ItemByItemLME.csv")
## Rows: 2448 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): participant, ID, sessions, cueing, items, gender
## dbl (2): rating, age
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
arousal.nomissingdata =read_csv("data/ItemByItemLME_noMissing.csv")
## Rows: 2448 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): participant, ID, sessions, cueing, items, gender
## dbl (2): rating, age
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
population =read_csv("data/PopulationRatings.csv")
## Rows: 48 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): itemID, valence, PopArousal
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
arousal.extended <- arousal.data %>% 
  left_join(.,arousal.nomissingdata %>% 
              select(ID,sessions,items,cueing), by=c("ID"="ID","sessions"="sessions","items"="items")) %>% 
  separate(items, into = c("itemID","sound"), sep = "_") %>% 
  mutate(itemID = as.numeric(itemID)) %>% left_join(.,population) %>% 
  rename(cueing_wna = cueing.x, cueing_nona= cueing.y) %>% 
  mutate(itemID= as.factor(itemID), sessionsNum = readr::parse_number(sessions)) %>% 
  mutate(rating = factor(rating,ordered = TRUE), participant=factor(participant), sessionsOrd = factor(sessions,ordered = TRUE))
## Joining with `by = join_by(itemID)`
str(arousal.extended)
## tibble [2,448 × 14] (S3: tbl_df/tbl/data.frame)
##  $ participant: Factor w/ 17 levels "PT01","PT02",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ ID         : chr [1:2448] "0D9RN5" "0D9RN5" "0D9RN5" "0D9RN5" ...
##  $ sessions   : chr [1:2448] "Session1" "Session1" "Session1" "Session1" ...
##  $ cueing_wna : chr [1:2448] NA NA NA NA ...
##  $ rating     : Ord.factor w/ 5 levels "1"<"2"<"3"<"4"<..: 3 1 3 2 4 5 4 5 5 5 ...
##  $ itemID     : Factor w/ 48 levels "1112","1300",..: 2 3 4 5 7 8 9 11 16 19 ...
##  $ sound      : chr [1:2448] "400ms.wav" "400ms.wav" "400ms.wav" "400ms.wav" ...
##  $ gender     : chr [1:2448] "F" "F" "F" "F" ...
##  $ age        : num [1:2448] 25 25 25 25 25 25 25 25 25 25 ...
##  $ cueing_nona: chr [1:2448] "Cued" "Cued" "Cued" "Cued" ...
##  $ valence    : num [1:2448] 3.55 3.79 1.79 2.58 2.45 1.9 2.3 1.46 1.82 2.21 ...
##  $ PopArousal : num [1:2448] 6.79 6.42 5.25 5.7 5.09 5.82 5.62 7.21 5.75 6.06 ...
##  $ sessionsNum: num [1:2448] 1 1 1 1 1 1 1 1 1 1 ...
##  $ sessionsOrd: Ord.factor w/ 3 levels "Session1"<"Session2"<..: 1 1 1 1 1 1 1 1 1 1 ...
# include baseline in the model as a covariate
# https://stats.stackexchange.com/questions/331791/baseline-adjustment-in-mixed-models

arousal.baseline <- arousal.extended %>% 
  filter(sessionsNum == 1) %>% 
  select(participant,itemID,rating) %>% 
  rename(baseline_rating = rating)

#make names pretty
arousal.extended.wbaseline <- arousal.extended %>% 
  filter(sessionsNum %in% c(2,4)) %>% 
  left_join(.,arousal.baseline) %>% 
  mutate(cueing = cueing_nona) %>% 
  rename(sessionsFactor=sessions, session=sessionsNum, item=itemID, baseline=baseline_rating)
## Joining with `by = join_by(participant, itemID)`
arousal.extended.wbaseline$rating = as.numeric(arousal.extended.wbaseline$rating)
arousal.extended.wbaseline$baseline = as.numeric(arousal.extended.wbaseline$baseline)
arousal.extended.wbaseline$cueing = as.factor(arousal.extended.wbaseline$cueing)
arousal.extended.wbaseline$session = as.factor(arousal.extended.wbaseline$session)

#### Whole dataset [S2,4]

STEP 1 –> lets find the maximal random effect structure

# this is the classical within-subject anova in glm form
r1 <- lmer(as.numeric(rating) ~ 1 + (1 | participant), data=arousal.extended.wbaseline)

# this is the classical crossed random effects model where items and subjects both are allowed to have their own mean
r2 <- lmer(as.numeric(rating) ~ 1 + (1 | participant)  + (1 | item) , data=arousal.extended.wbaseline)

anova(r1,r2) ## --> r2 is the best random structure
## refitting model(s) with ML (instead of REML)
## Data: arousal.extended.wbaseline
## Models:
## r1: as.numeric(rating) ~ 1 + (1 | participant)
## r2: as.numeric(rating) ~ 1 + (1 | participant) + (1 | item)
##    npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## r1    3 4619.3 4635.3 -2306.7   4613.3                         
## r2    4 4205.5 4226.8 -2098.7   4197.5 415.87  1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
lrtest(r1,r2)
## Likelihood ratio test
## 
## Model 1: as.numeric(rating) ~ 1 + (1 | participant)
## Model 2: as.numeric(rating) ~ 1 + (1 | participant) + (1 | item)
##   #Df  LogLik Df  Chisq Pr(>Chisq)    
## 1   3 -2307.7                         
## 2   4 -2099.6  1 416.16  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(r1,r2)
  as.numeric(rating) as.numeric(rating)
Predictors Estimates CI p Estimates CI p
(Intercept) 2.97 2.67 – 3.26 <0.001 2.96 2.62 – 3.31 <0.001
Random Effects
σ2 1.17 0.81
τ00 0.38 participant 0.36 item
  0.39 participant
ICC 0.25 0.48
N 17 participant 17 participant
  48 item
Observations 1523 1523
Marginal R2 / Conditional R2 0.000 / 0.246 0.000 / 0.477

Demonstrate how we got to our model

#remove NAs from baseline
dat2 <- arousal.extended.wbaseline[which(complete.cases(arousal.extended.wbaseline[('baseline')])),]

Full <- lmer(rating ~ cueing  + session + cueing*session +  (1 | participant) + (1 | item) , data=dat2 %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))

Full2 <-lmer(rating ~ cueing + session + baseline + cueing*session  + (1 | participant) + (1 | item) , data=dat2 %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))

Full3 <-lmer(rating ~ cueing + session + baseline + cueing*session  + cueing*baseline + (1 | participant) + (1 | item) , data=dat2 %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))

anova(Full, Full2, Full3) ## -- 3rd is our model!
## refitting model(s) with ML (instead of REML)
## Data: dat2 %>% mutate(rating = as.numeric(rating), baseline = as.numeric(baseline))
## Models:
## Full: rating ~ cueing + session + cueing * session + (1 | participant) + (1 | item)
## Full2: rating ~ cueing + session + baseline + cueing * session + (1 | participant) + (1 | item)
## Full3: rating ~ cueing + session + baseline + cueing * session + cueing * baseline + (1 | participant) + (1 | item)
##       npar    AIC    BIC  logLik deviance    Chisq Df Pr(>Chisq)    
## Full     7 4148.1 4185.4 -2067.1   4134.1                           
## Full2    8 3890.9 3933.5 -1937.5   3874.9 259.2326  1    < 2e-16 ***
## Full3    9 3888.3 3936.3 -1935.2   3870.3   4.5858  1    0.03224 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(Full, Full2, Full3) 
  rating rating rating
Predictors Estimates CI p Estimates CI p Estimates CI p
(Intercept) 3.07 2.72 – 3.43 <0.001 1.88 1.57 – 2.18 <0.001 2.00 1.67 – 2.32 <0.001
cueing [Uncued] 0.03 -0.09 – 0.16 0.625 0.03 -0.09 – 0.14 0.617 -0.21 -0.46 – 0.04 0.095
session [4] -0.25 -0.38 – -0.12 <0.001 -0.25 -0.37 – -0.13 <0.001 -0.25 -0.37 – -0.13 <0.001
cueing [Uncued] × session
[4]
-0.04 -0.22 – 0.14 0.691 -0.03 -0.20 – 0.13 0.704 -0.03 -0.20 – 0.13 0.705
baseline 0.38 0.34 – 0.43 <0.001 0.34 0.29 – 0.40 <0.001
cueing [Uncued] ×
baseline
0.08 0.01 – 0.15 0.032
Random Effects
σ2 0.79 0.68 0.68
τ00 0.35 item 0.14 item 0.14 item
0.40 participant 0.25 participant 0.25 participant
ICC 0.49 0.36 0.36
N 17 participant 17 participant 17 participant
48 item 48 item 48 item
Observations 1517 1517 1517
Marginal R2 / Conditional R2 0.012 / 0.494 0.180 / 0.476 0.181 / 0.478
#plot predicted data
plot_model(Full3, type = "pred", terms = c("cueing","baseline"))

#plotting observed and predicted data
#m3.x <- lmer(rating ~ cueing* baseline + session*cueing + (1 | participant)  + (1 | item) , data=arousal.extended.wbaseline %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))

data.aggr <- dat2 %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)) %>% group_by(session,participant,cueing,baseline) %>% summarise(predicted=mean(rating, na.rm=TRUE)) %>% na.omit() %>% mutate(x = baseline, group_col=cueing)
## `summarise()` has grouped output by 'session', 'participant', 'cueing'. You can
## override using the `.groups` argument.
tmp <- tibble(get_model_data(Full3, type = "pred", terms = c("baseline","cueing")))

#aggregated across session
ggplot(tmp, aes(x = x, y = predicted, color = group_col,fill=group_col)) + 
  geom_line() + 
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.3) +
  xlab("Baseline") +
  ylab("Rating") +   stat_summary(data=data.aggr, fun = "mean", geom = "point", size = 1.5) +
  stat_summary(data=data.aggr,fun.data = mean_se, geom = "errorbar", width = 0.1)   + theme_apa(base_size = 14)  + labs(color=NULL, fill=NULL)

#model is still the same but we split the behavior by session
ggplot(tmp, aes(x = x, y = predicted, color = group_col,fill=group_col)) + 
  geom_line() + 
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.2) +
  xlab("Baseline") +
  ylab("Rating") +   stat_summary(data=data.aggr, fun = "mean", geom = "point", size = 2) +
  stat_summary(data=data.aggr,fun.data = mean_se, geom = "errorbar", width = 0.1)   + 
  theme_apa(base_size = 14) + facet_wrap(~session) + labs(color=NULL, fill=NULL) +
  scale_color_manual(values = c("red", "darkgreen")) +
  theme(legend.position = c(0.85,0.2))

#### Filtered dataset [baseline>=4]

arousal.extended.wbaseline.high <- arousal.extended.wbaseline %>% filter(baseline >= 4)

# lets find the maximal random effect structure

# this is the classical within-subject anova in glm form
high.r1 <- lmer(as.numeric(rating) ~ 1 + (1 | participant), data=arousal.extended.wbaseline.high)
# this is the classical crossed random effects model where items and subjects both are allowed to have their own mean
high.r2 <- lmer(as.numeric(rating) ~ 1 + (1 | participant)  + (1 | item) , data= arousal.extended.wbaseline.high)

anova(high.r1,high.r2) ## high.r2 is the best 
## refitting model(s) with ML (instead of REML)
## Data: arousal.extended.wbaseline.high
## Models:
## high.r1: as.numeric(rating) ~ 1 + (1 | participant)
## high.r2: as.numeric(rating) ~ 1 + (1 | participant) + (1 | item)
##         npar    AIC    BIC logLik deviance Chisq Df Pr(>Chisq)    
## high.r1    3 1671.4 1684.6 -832.7   1665.4                        
## high.r2    4 1610.8 1628.5 -801.4   1602.8 62.59  1  2.545e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
high.m1 <- lmer(rating ~ cueing + session + cueing*session + (1  | participant) + (1 | item) , data=arousal.extended.wbaseline.high %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))

high.m2 <- lmer(rating ~ cueing +  session + baseline + cueing*session + (1  | participant) + (1 | item) , data=arousal.extended.wbaseline.high %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))

high.m3 <- lmer(rating ~ cueing + session + baseline + cueing*session + cueing*baseline + (1  | participant) + (1 | item) , data=arousal.extended.wbaseline.high %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))


anova(high.m1, high.m2, high.m3) # --> high.m2 is the best model
## refitting model(s) with ML (instead of REML)
## Data: arousal.extended.wbaseline.high %>% mutate(rating = as.numeric(rating),  ...
## Models:
## high.m1: rating ~ cueing + session + cueing * session + (1 | participant) + (1 | item)
## high.m2: rating ~ cueing + session + baseline + cueing * session + (1 | participant) + (1 | item)
## high.m3: rating ~ cueing + session + baseline + cueing * session + cueing * baseline + (1 | participant) + (1 | item)
##         npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## high.m1    7 1593.0 1623.9 -789.49   1579.0                         
## high.m2    8 1557.4 1592.7 -770.70   1541.4 37.582  1  8.763e-10 ***
## high.m3    9 1557.4 1597.1 -769.68   1539.4  2.045  1     0.1527    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(high.m1, high.m2, high.m3)
  rating rating rating
Predictors Estimates CI p Estimates CI p Estimates CI p
(Intercept) 3.58 3.25 – 3.92 <0.001 1.50 0.77 – 2.23 <0.001 1.07 0.12 – 2.01 0.027
cueing [Uncued] 0.09 -0.09 – 0.27 0.323 0.11 -0.07 – 0.28 0.240 1.01 -0.24 – 2.26 0.114
session [4] -0.33 -0.52 – -0.14 0.001 -0.33 -0.51 – -0.15 <0.001 -0.33 -0.51 – -0.15 <0.001
cueing [Uncued] × session
[4]
0.03 -0.23 – 0.29 0.821 0.03 -0.22 – 0.28 0.824 0.03 -0.22 – 0.28 0.823
baseline 0.49 0.33 – 0.64 <0.001 0.59 0.38 – 0.79 <0.001
cueing [Uncued] ×
baseline
-0.21 -0.49 – 0.08 0.153
Random Effects
σ2 0.65 0.62 0.62
τ00 0.19 item 0.14 item 0.14 item
0.34 participant 0.32 participant 0.33 participant
ICC 0.45 0.43 0.43
N 17 participant 17 participant 17 participant
48 item 48 item 48 item
Observations 610 610 610
Marginal R2 / Conditional R2 0.023 / 0.459 0.071 / 0.468 0.072 / 0.472

#### PLOT Filtered dataset [baseline>=4]

#predicted data
plot_model(high.m2, type = "pred", terms = c("cueing","baseline"))

library(ggeffects)
tmp.pred2 <- ggpredict(high.m2, terms = c("cueing","baseline[4,5]"), ci.lvl = 0.68)
#tmp.pred <- ggpredict(m3, terms = c("cueing","baseline[4,5]"), ci.lvl = 0.95)

tmp.dat2 <- arousal.extended.wbaseline.high %>% mutate(rating=as.numeric(rating)) %>% group_by(cueing, baseline) %>%
  summarise(mean=mean(rating, na.rm=TRUE), sd=sd(rating,na.rm=TRUE))
## `summarise()` has grouped output by 'cueing'. You can override using the
## `.groups` argument.
ggplot()  + geom_bar(data = tmp.pred2, aes(x = group, y = predicted, fill=x), 
                     stat = "identity", position = position_dodge()) + 
  geom_errorbar(data = tmp.pred2,aes(x = group, y = predicted,ymin = conf.low, 
  ymax = conf.high, group=x),position= position_dodge(width = 0.85), width=0.1) + ylim(0,5) +  
  xlab("Baseline rating") + ylab("Predicted rating") + 
  labs(color='Condition') + coord_cartesian(ylim = c(2.5, 4.5)) + 
  theme(axis.text = element_text(size = 18), axis.title = element_text(size = 20), 
        plot.background = element_rect(fill = "white", color = NA), panel.grid = element_blank(), 
        legend.title=element_blank(),axis.line = element_line(colour = "black"),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(), 
        panel.border = element_blank(), panel.background = element_blank(), 
        legend.key = element_rect(fill = NA), legend.text = element_text(size = 14), 
        legend.position = c(0.2,0.85)) + 
  scale_color_manual(values=c(hue_pal()(2))) + 
  scale_y_continuous(breaks=c(2.5,3.5,4.5))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

#### PLOT RAW DATA [baseline>=4]

arousal.extended.wbaseline.high$rating = as.numeric(arousal.extended.wbaseline.high$rating)
arousal.extended.wbaseline.high$cueing = as.factor(arousal.extended.wbaseline.high$cueing)
arousal.extended.wbaseline.high$session = as.factor(arousal.extended.wbaseline.high$session)

#real data
BaselineFiltered<- apa_beeplot(
  data = arousal.extended.wbaseline.high
  , id = "participant"
  , dv = "rating"
  , factors = c("cueing", "session")
  , dispersion = se
)

#### Participant 23 ADDED TO Whole dataset [S2,4]

We only have S1 and S2 for PT23

full.data <- read_csv("data/ItemByItemLME_p23IN.csv")
## Rows: 1728 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): participant, ID, sessions, cueing, items
## dbl (2): rating, baseline
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
full.data <- full.data %>%
  mutate(across(c(participant, ID, sessions, cueing, items), as.factor))
str(full.data)
## tibble [1,728 × 7] (S3: tbl_df/tbl/data.frame)
##  $ participant: Factor w/ 18 levels "PT01","PT02",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ ID         : Factor w/ 18 levels "0D9RN5","1R2I1Y",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ sessions   : Factor w/ 2 levels "Session2","Session4": 1 1 1 1 1 1 1 1 1 1 ...
##  $ cueing     : Factor w/ 2 levels "Cued","Uncued": 1 1 1 1 1 1 1 1 1 1 ...
##  $ rating     : num [1:1728] 1 2 3 3 4 4 4 4 5 5 ...
##  $ items      : Factor w/ 48 levels "1112_400ms.wav",..: 2 3 4 5 7 8 9 11 16 19 ...
##  $ baseline   : num [1:1728] 3 1 3 2 4 5 4 5 5 5 ...
# Calculate summary statistics by cueing
summary_stats <- full.data %>%
  group_by(cueing, sessions) %>%
  summarise(
    mean_rating = mean(rating, na.rm = TRUE),
    median_rating = median(rating, na.rm = TRUE),
    sd_rating = sd(rating, na.rm = TRUE),
    se_rating = sd(rating, na.rm = TRUE) / sqrt(n())
  )
## `summarise()` has grouped output by 'cueing'. You can override using the
## `.groups` argument.
print(summary_stats)
## # A tibble: 4 × 6
## # Groups:   cueing [2]
##   cueing sessions mean_rating median_rating sd_rating se_rating
##   <fct>  <fct>          <dbl>         <dbl>     <dbl>     <dbl>
## 1 Cued   Session2        2.97             3      1.29    0.0619
## 2 Cued   Session4        2.83             3      1.26    0.0606
## 3 Uncued Session2        3.04             3      1.22    0.0589
## 4 Uncued Session4        2.85             3      1.17    0.0565
# LME - find random effect strucure first
full.r1 <- lmer(as.numeric(rating) ~ 1 + (1 | participant), data=full.data)

# this is the classical crossed random effects model where items and subjects both are allowed to have their own mean
full.r2 <- lmer(as.numeric(rating) ~ 1 + (1 | participant)  + (1 | items) , data=full.data)

anova(full.r1, full.r2) ## --> full.r2 is the best random structure
## refitting model(s) with ML (instead of REML)
## Data: full.data
## Models:
## full.r1: as.numeric(rating) ~ 1 + (1 | participant)
## full.r2: as.numeric(rating) ~ 1 + (1 | participant) + (1 | items)
##         npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## full.r1    3 4755.1 4771.2 -2374.6   4749.1                         
## full.r2    4 4318.3 4339.8 -2155.2   4310.3 438.77  1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(full.r1, full.r2)
  as.numeric(rating) as.numeric(rating)
Predictors Estimates CI p Estimates CI p
(Intercept) 2.89 2.58 – 3.21 <0.001 2.89 2.53 – 3.25 <0.001
Random Effects
σ2 1.16 0.80
τ00 0.45 participant 0.36 items
  0.46 participant
ICC 0.28 0.50
N 18 participant 18 participant
  48 items
Observations 1571 1571
Marginal R2 / Conditional R2 0.000 / 0.281 0.000 / 0.503
# LME - add fixed effect - remove NAs to fit models
full.data2 <- full.data[which(complete.cases(full.data)),] #[('baseline')])),]
str(full.data2)
## tibble [1,565 × 7] (S3: tbl_df/tbl/data.frame)
##  $ participant: Factor w/ 18 levels "PT01","PT02",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ ID         : Factor w/ 18 levels "0D9RN5","1R2I1Y",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ sessions   : Factor w/ 2 levels "Session2","Session4": 1 1 1 1 1 1 1 1 1 1 ...
##  $ cueing     : Factor w/ 2 levels "Cued","Uncued": 1 1 1 1 1 1 1 1 1 1 ...
##  $ rating     : num [1:1565] 1 2 3 3 4 4 4 4 5 5 ...
##  $ items      : Factor w/ 48 levels "1112_400ms.wav",..: 2 3 4 5 7 8 9 11 16 19 ...
##  $ baseline   : num [1:1565] 3 1 3 2 4 5 4 5 5 5 ...
has_na <- any(is.na(full.data2))

# Check if there are any missing values
if (has_na) {
  print("The dataset contains missing values (NA).")
} else {
  print("The dataset does not contain any missing values (NA).")
}
## [1] "The dataset does not contain any missing values (NA)."
Full.m1 <- lmer(rating ~ cueing  + sessions + cueing*sessions +  (1 | participant) + (1 | items) , data=full.data2 %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))

Full.m2 <-lmer(rating ~ cueing + sessions + baseline + cueing*sessions  + (1 | participant) + (1 | items) , data=full.data2 %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))

Full.m3 <-lmer(rating ~ cueing + sessions + baseline + cueing*sessions  + cueing*baseline + (1 | participant) + (1 | items) , data=full.data2 %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))

anova(Full.m1, Full.m2, Full.m3) 
## refitting model(s) with ML (instead of REML)
## Data: full.data2 %>% mutate(rating = as.numeric(rating), baseline = as.numeric(baseline))
## Models:
## Full.m1: rating ~ cueing + sessions + cueing * sessions + (1 | participant) + (1 | items)
## Full.m2: rating ~ cueing + sessions + baseline + cueing * sessions + (1 | participant) + (1 | items)
## Full.m3: rating ~ cueing + sessions + baseline + cueing * sessions + cueing * baseline + (1 | participant) + (1 | items)
##         npar    AIC    BIC  logLik deviance    Chisq Df Pr(>Chisq)    
## Full.m1    7 4260.8 4298.3 -2123.4   4246.8                           
## Full.m2    8 3991.7 4034.6 -1987.9   3975.7 271.0912  1    < 2e-16 ***
## Full.m3    9 3989.9 4038.1 -1986.0   3971.9   3.8387  1    0.05008 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## -- 3rd is SLIGHTLY better than the 2nd based on AIC - BIC - pvalues
tab_model(Full.m1, Full.m2, Full.m3) 
  rating rating rating
Predictors Estimates CI p Estimates CI p Estimates CI p
(Intercept) 2.99 2.61 – 3.36 <0.001 1.80 1.49 – 2.11 <0.001 1.91 1.58 – 2.23 <0.001
cueing [Uncued] 0.04 -0.08 – 0.16 0.467 0.04 -0.07 – 0.16 0.442 -0.17 -0.41 – 0.07 0.169
sessions [Session4] -0.24 -0.37 – -0.12 <0.001 -0.24 -0.36 – -0.13 <0.001 -0.24 -0.36 – -0.12 <0.001
cueing [Uncued] ×
sessions [Session4]
-0.05 -0.23 – 0.12 0.567 -0.05 -0.21 – 0.12 0.569 -0.05 -0.21 – 0.11 0.537
baseline 0.39 0.34 – 0.43 <0.001 0.35 0.30 – 0.41 <0.001
cueing [Uncued] ×
baseline
0.07 -0.00 – 0.14 0.050
Random Effects
σ2 0.78 0.67 0.67
τ00 0.35 items 0.14 items 0.14 items
0.49 participant 0.29 participant 0.29 participant
ICC 0.52 0.39 0.39
N 18 participant 18 participant 18 participant
48 items 48 items 48 items
Observations 1565 1565 1565
Marginal R2 / Conditional R2 0.011 / 0.525 0.179 / 0.496 0.179 / 0.498
## note: including the interaction between baseline and cueing flips the estimate from Positive to Negative

#### Participant 23 ADDED - Run analysis with Filtered dataset [baseline>=4]

full.data.high <- full.data %>% filter(baseline >= 4)
full.data.high <- full.data.high %>%
  mutate(across(c(participant, ID, sessions, cueing, items), as.factor))
str(full.data.high)
## tibble [664 × 7] (S3: tbl_df/tbl/data.frame)
##  $ participant: Factor w/ 18 levels "PT01","PT02",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ ID         : Factor w/ 18 levels "0D9RN5","1R2I1Y",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ sessions   : Factor w/ 2 levels "Session2","Session4": 1 1 1 1 1 1 1 1 1 1 ...
##  $ cueing     : Factor w/ 2 levels "Cued","Uncued": 1 1 1 1 1 1 1 1 1 1 ...
##  $ rating     : num [1:664] 4 4 4 4 5 5 3 3 4 3 ...
##  $ items      : Factor w/ 48 levels "1112_400ms.wav",..: 7 8 9 11 16 19 22 30 37 43 ...
##  $ baseline   : num [1:664] 4 5 4 5 5 5 5 4 5 4 ...
# Calculate summary statistics by cueing
summary_stats.high <- full.data.high %>%
  group_by(cueing, sessions) %>%
  summarise(
    mean_rating = mean(rating, na.rm = TRUE),
    median_rating = median(rating, na.rm = TRUE),
    sd_rating = sd(rating, na.rm = TRUE),
    se_rating = sd(rating, na.rm = TRUE) / sqrt(n())
  )
## `summarise()` has grouped output by 'cueing'. You can override using the
## `.groups` argument.
print(summary_stats.high)
## # A tibble: 4 × 6
## # Groups:   cueing [2]
##   cueing sessions mean_rating median_rating sd_rating se_rating
##   <fct>  <fct>          <dbl>         <dbl>     <dbl>     <dbl>
## 1 Cued   Session2        3.86             4     1.07     0.0834
## 2 Cued   Session4        3.52             4     1.22     0.0953
## 3 Uncued Session2        3.89             4     0.970    0.0746
## 4 Uncued Session4        3.58             4     1.02     0.0788
# lets find the maximal random effect structure

# this is the classical within-subject anova in glm form
full.high.r1 <- lmer(as.numeric(rating) ~ 1 + (1 | participant), data=full.data.high)
# this is the classical crossed random effects model where items and subjects both are allowed to have their own mean
full.high.r2 <- lmer(as.numeric(rating) ~ 1 + (1 | participant)  + (1 | items) , data=full.data.high)

anova(full.high.r1, full.high.r2) ## full.high.r2 is the best 
## refitting model(s) with ML (instead of REML)
## Data: full.data.high
## Models:
## full.high.r1: as.numeric(rating) ~ 1 + (1 | participant)
## full.high.r2: as.numeric(rating) ~ 1 + (1 | participant) + (1 | items)
##              npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## full.high.r1    3 1684.3 1697.6 -839.18   1678.3                         
## full.high.r2    4 1624.0 1641.6 -807.97   1616.0 62.408  1  2.792e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
full.high.m1 <- lmer(rating ~ cueing + sessions + cueing*sessions + (1  | participant) + (1 | items) , data=full.data.high %>% mutate(rating = as.numeric(rating)))

full.high.m2 <- lmer(rating ~ cueing +  sessions + baseline + cueing*sessions + (1  | participant) + (1 | items) , data=full.data.high %>% mutate(rating = as.numeric(rating)))

full.high.m3 <- lmer(rating ~ cueing + sessions + baseline + cueing*sessions + cueing*baseline + (1  | participant) + (1 | items) , data=full.data.high %>% mutate(rating = as.numeric(rating)))


anova(full.high.m1, full.high.m2, full.high.m3) # --> full.high.m2 is the best model
## refitting model(s) with ML (instead of REML)
## Data: full.data.high %>% mutate(rating = as.numeric(rating))
## Models:
## full.high.m1: rating ~ cueing + sessions + cueing * sessions + (1 | participant) + (1 | items)
## full.high.m2: rating ~ cueing + sessions + baseline + cueing * sessions + (1 | participant) + (1 | items)
## full.high.m3: rating ~ cueing + sessions + baseline + cueing * sessions + cueing * baseline + (1 | participant) + (1 | items)
##              npar    AIC    BIC  logLik deviance   Chisq Df Pr(>Chisq)    
## full.high.m1    7 1606.5 1637.5 -796.28   1592.5                          
## full.high.m2    8 1570.0 1605.4 -777.01   1554.0 38.5363  1  5.375e-10 ***
## full.high.m3    9 1570.2 1610.0 -776.09   1552.2  1.8317  1     0.1759    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(full.high.m1, full.high.m2, full.high.m3) 
  rating rating rating
Predictors Estimates CI p Estimates CI p Estimates CI p
(Intercept) 3.56 3.24 – 3.89 <0.001 1.47 0.75 – 2.19 <0.001 1.06 0.13 – 2.00 0.026
cueing [Uncued] 0.08 -0.10 – 0.26 0.382 0.09 -0.08 – 0.27 0.286 0.94 -0.30 – 2.19 0.136
sessions [Session4] -0.33 -0.52 – -0.15 <0.001 -0.33 -0.51 – -0.15 <0.001 -0.33 -0.51 – -0.15 <0.001
cueing [Uncued] ×
sessions [Session4]
0.04 -0.22 – 0.30 0.756 0.04 -0.21 – 0.29 0.762 0.04 -0.21 – 0.29 0.757
baseline 0.49 0.34 – 0.64 <0.001 0.58 0.38 – 0.79 <0.001
cueing [Uncued] ×
baseline
-0.19 -0.48 – 0.09 0.176
Random Effects
σ2 0.65 0.62 0.62
τ00 0.19 items 0.14 items 0.14 items
0.33 participant 0.31 participant 0.32 participant
ICC 0.45 0.42 0.43
N 18 participant 18 participant 18 participant
48 items 48 items 48 items
Observations 616 616 616
Marginal R2 / Conditional R2 0.022 / 0.458 0.071 / 0.464 0.072 / 0.468

with or wirhout P23, data dont change.

“worse” once PT18 has been added (whole dataset from 0.09 to 0.16)

tab_model(Full.m3, full.high.m2)
  rating rating
Predictors Estimates CI p Estimates CI p
(Intercept) 1.91 1.58 – 2.23 <0.001 1.47 0.75 – 2.19 <0.001
cueing [Uncued] -0.17 -0.41 – 0.07 0.169 0.09 -0.08 – 0.27 0.286
sessions [Session4] -0.24 -0.36 – -0.12 <0.001 -0.33 -0.51 – -0.15 <0.001
baseline 0.35 0.30 – 0.41 <0.001 0.49 0.34 – 0.64 <0.001
cueing [Uncued] ×
sessions [Session4]
-0.05 -0.21 – 0.11 0.537 0.04 -0.21 – 0.29 0.762
cueing [Uncued] ×
baseline
0.07 -0.00 – 0.14 0.050
Random Effects
σ2 0.67 0.62
τ00 0.14 items 0.14 items
0.29 participant 0.31 participant
ICC 0.39 0.42
N 18 participant 18 participant
48 items 48 items
Observations 1565 616
Marginal R2 / Conditional R2 0.179 / 0.498 0.071 / 0.464