arousal.data <- read_csv("data/ItemByItemLME.csv")
## Rows: 2448 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): participant, ID, sessions, cueing, items, gender
## dbl (2): rating, age
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
arousal.nomissingdata =read_csv("data/ItemByItemLME_noMissing.csv")
## Rows: 2448 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): participant, ID, sessions, cueing, items, gender
## dbl (2): rating, age
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
population =read_csv("data/PopulationRatings.csv")
## Rows: 48 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (3): itemID, valence, PopArousal
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
arousal.extended <- arousal.data %>%
left_join(.,arousal.nomissingdata %>%
select(ID,sessions,items,cueing), by=c("ID"="ID","sessions"="sessions","items"="items")) %>%
separate(items, into = c("itemID","sound"), sep = "_") %>%
mutate(itemID = as.numeric(itemID)) %>% left_join(.,population) %>%
rename(cueing_wna = cueing.x, cueing_nona= cueing.y) %>%
mutate(itemID= as.factor(itemID), sessionsNum = readr::parse_number(sessions)) %>%
mutate(rating = factor(rating,ordered = TRUE), participant=factor(participant), sessionsOrd = factor(sessions,ordered = TRUE))
## Joining with `by = join_by(itemID)`
str(arousal.extended)
## tibble [2,448 × 14] (S3: tbl_df/tbl/data.frame)
## $ participant: Factor w/ 17 levels "PT01","PT02",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ ID : chr [1:2448] "0D9RN5" "0D9RN5" "0D9RN5" "0D9RN5" ...
## $ sessions : chr [1:2448] "Session1" "Session1" "Session1" "Session1" ...
## $ cueing_wna : chr [1:2448] NA NA NA NA ...
## $ rating : Ord.factor w/ 5 levels "1"<"2"<"3"<"4"<..: 3 1 3 2 4 5 4 5 5 5 ...
## $ itemID : Factor w/ 48 levels "1112","1300",..: 2 3 4 5 7 8 9 11 16 19 ...
## $ sound : chr [1:2448] "400ms.wav" "400ms.wav" "400ms.wav" "400ms.wav" ...
## $ gender : chr [1:2448] "F" "F" "F" "F" ...
## $ age : num [1:2448] 25 25 25 25 25 25 25 25 25 25 ...
## $ cueing_nona: chr [1:2448] "Cued" "Cued" "Cued" "Cued" ...
## $ valence : num [1:2448] 3.55 3.79 1.79 2.58 2.45 1.9 2.3 1.46 1.82 2.21 ...
## $ PopArousal : num [1:2448] 6.79 6.42 5.25 5.7 5.09 5.82 5.62 7.21 5.75 6.06 ...
## $ sessionsNum: num [1:2448] 1 1 1 1 1 1 1 1 1 1 ...
## $ sessionsOrd: Ord.factor w/ 3 levels "Session1"<"Session2"<..: 1 1 1 1 1 1 1 1 1 1 ...
# include baseline in the model as a covariate
# https://stats.stackexchange.com/questions/331791/baseline-adjustment-in-mixed-models
arousal.baseline <- arousal.extended %>%
filter(sessionsNum == 1) %>%
select(participant,itemID,rating) %>%
rename(baseline_rating = rating)
#make names pretty
arousal.extended.wbaseline <- arousal.extended %>%
filter(sessionsNum %in% c(2,4)) %>%
left_join(.,arousal.baseline) %>%
mutate(cueing = cueing_nona) %>%
rename(sessionsFactor=sessions, session=sessionsNum, item=itemID, baseline=baseline_rating)
## Joining with `by = join_by(participant, itemID)`
arousal.extended.wbaseline$rating = as.numeric(arousal.extended.wbaseline$rating)
arousal.extended.wbaseline$baseline = as.numeric(arousal.extended.wbaseline$baseline)
arousal.extended.wbaseline$cueing = as.factor(arousal.extended.wbaseline$cueing)
arousal.extended.wbaseline$session = as.factor(arousal.extended.wbaseline$session)
STEP 1 –> lets find the maximal random effect structure
# this is the classical within-subject anova in glm form
r1 <- lmer(as.numeric(rating) ~ 1 + (1 | participant), data=arousal.extended.wbaseline)
# this is the classical crossed random effects model where items and subjects both are allowed to have their own mean
r2 <- lmer(as.numeric(rating) ~ 1 + (1 | participant) + (1 | item) , data=arousal.extended.wbaseline)
anova(r1,r2) ## --> r2 is the best random structure
## refitting model(s) with ML (instead of REML)
## Data: arousal.extended.wbaseline
## Models:
## r1: as.numeric(rating) ~ 1 + (1 | participant)
## r2: as.numeric(rating) ~ 1 + (1 | participant) + (1 | item)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## r1 3 4619.3 4635.3 -2306.7 4613.3
## r2 4 4205.5 4226.8 -2098.7 4197.5 415.87 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
lrtest(r1,r2)
## Likelihood ratio test
##
## Model 1: as.numeric(rating) ~ 1 + (1 | participant)
## Model 2: as.numeric(rating) ~ 1 + (1 | participant) + (1 | item)
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 3 -2307.7
## 2 4 -2099.6 1 416.16 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(r1,r2)
| as.numeric(rating) | as.numeric(rating) | |||||
|---|---|---|---|---|---|---|
| Predictors | Estimates | CI | p | Estimates | CI | p |
| (Intercept) | 2.97 | 2.67 – 3.26 | <0.001 | 2.96 | 2.62 – 3.31 | <0.001 |
| Random Effects | ||||||
| σ2 | 1.17 | 0.81 | ||||
| τ00 | 0.38 participant | 0.36 item | ||||
| 0.39 participant | ||||||
| ICC | 0.25 | 0.48 | ||||
| N | 17 participant | 17 participant | ||||
| 48 item | ||||||
| Observations | 1523 | 1523 | ||||
| Marginal R2 / Conditional R2 | 0.000 / 0.246 | 0.000 / 0.477 | ||||
#remove NAs from baseline
dat2 <- arousal.extended.wbaseline[which(complete.cases(arousal.extended.wbaseline[('baseline')])),]
Full <- lmer(rating ~ cueing + session + cueing*session + (1 | participant) + (1 | item) , data=dat2 %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))
Full2 <-lmer(rating ~ cueing + session + baseline + cueing*session + (1 | participant) + (1 | item) , data=dat2 %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))
Full3 <-lmer(rating ~ cueing + session + baseline + cueing*session + cueing*baseline + (1 | participant) + (1 | item) , data=dat2 %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))
anova(Full, Full2, Full3) ## -- 3rd is our model!
## refitting model(s) with ML (instead of REML)
## Data: dat2 %>% mutate(rating = as.numeric(rating), baseline = as.numeric(baseline))
## Models:
## Full: rating ~ cueing + session + cueing * session + (1 | participant) + (1 | item)
## Full2: rating ~ cueing + session + baseline + cueing * session + (1 | participant) + (1 | item)
## Full3: rating ~ cueing + session + baseline + cueing * session + cueing * baseline + (1 | participant) + (1 | item)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## Full 7 4148.1 4185.4 -2067.1 4134.1
## Full2 8 3890.9 3933.5 -1937.5 3874.9 259.2326 1 < 2e-16 ***
## Full3 9 3888.3 3936.3 -1935.2 3870.3 4.5858 1 0.03224 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(Full, Full2, Full3)
| rating | rating | rating | |||||||
|---|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | CI | p | Estimates | CI | p | Estimates | CI | p |
| (Intercept) | 3.07 | 2.72 – 3.43 | <0.001 | 1.88 | 1.57 – 2.18 | <0.001 | 2.00 | 1.67 – 2.32 | <0.001 |
| cueing [Uncued] | 0.03 | -0.09 – 0.16 | 0.625 | 0.03 | -0.09 – 0.14 | 0.617 | -0.21 | -0.46 – 0.04 | 0.095 |
| session [4] | -0.25 | -0.38 – -0.12 | <0.001 | -0.25 | -0.37 – -0.13 | <0.001 | -0.25 | -0.37 – -0.13 | <0.001 |
|
cueing [Uncued] × session [4] |
-0.04 | -0.22 – 0.14 | 0.691 | -0.03 | -0.20 – 0.13 | 0.704 | -0.03 | -0.20 – 0.13 | 0.705 |
| baseline | 0.38 | 0.34 – 0.43 | <0.001 | 0.34 | 0.29 – 0.40 | <0.001 | |||
|
cueing [Uncued] × baseline |
0.08 | 0.01 – 0.15 | 0.032 | ||||||
| Random Effects | |||||||||
| σ2 | 0.79 | 0.68 | 0.68 | ||||||
| τ00 | 0.35 item | 0.14 item | 0.14 item | ||||||
| 0.40 participant | 0.25 participant | 0.25 participant | |||||||
| ICC | 0.49 | 0.36 | 0.36 | ||||||
| N | 17 participant | 17 participant | 17 participant | ||||||
| 48 item | 48 item | 48 item | |||||||
| Observations | 1517 | 1517 | 1517 | ||||||
| Marginal R2 / Conditional R2 | 0.012 / 0.494 | 0.180 / 0.476 | 0.181 / 0.478 | ||||||
#plot predicted data
plot_model(Full3, type = "pred", terms = c("cueing","baseline"))
#plotting observed and predicted data
#m3.x <- lmer(rating ~ cueing* baseline + session*cueing + (1 | participant) + (1 | item) , data=arousal.extended.wbaseline %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))
data.aggr <- dat2 %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)) %>% group_by(session,participant,cueing,baseline) %>% summarise(predicted=mean(rating, na.rm=TRUE)) %>% na.omit() %>% mutate(x = baseline, group_col=cueing)
## `summarise()` has grouped output by 'session', 'participant', 'cueing'. You can
## override using the `.groups` argument.
tmp <- tibble(get_model_data(Full3, type = "pred", terms = c("baseline","cueing")))
#aggregated across session
ggplot(tmp, aes(x = x, y = predicted, color = group_col,fill=group_col)) +
geom_line() +
geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.3) +
xlab("Baseline") +
ylab("Rating") + stat_summary(data=data.aggr, fun = "mean", geom = "point", size = 1.5) +
stat_summary(data=data.aggr,fun.data = mean_se, geom = "errorbar", width = 0.1) + theme_apa(base_size = 14) + labs(color=NULL, fill=NULL)
#model is still the same but we split the behavior by session
ggplot(tmp, aes(x = x, y = predicted, color = group_col,fill=group_col)) +
geom_line() +
geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.2) +
xlab("Baseline") +
ylab("Rating") + stat_summary(data=data.aggr, fun = "mean", geom = "point", size = 2) +
stat_summary(data=data.aggr,fun.data = mean_se, geom = "errorbar", width = 0.1) +
theme_apa(base_size = 14) + facet_wrap(~session) + labs(color=NULL, fill=NULL) +
scale_color_manual(values = c("red", "darkgreen")) +
theme(legend.position = c(0.85,0.2))
arousal.extended.wbaseline.high <- arousal.extended.wbaseline %>% filter(baseline >= 4)
# lets find the maximal random effect structure
# this is the classical within-subject anova in glm form
high.r1 <- lmer(as.numeric(rating) ~ 1 + (1 | participant), data=arousal.extended.wbaseline.high)
# this is the classical crossed random effects model where items and subjects both are allowed to have their own mean
high.r2 <- lmer(as.numeric(rating) ~ 1 + (1 | participant) + (1 | item) , data= arousal.extended.wbaseline.high)
anova(high.r1,high.r2) ## high.r2 is the best
## refitting model(s) with ML (instead of REML)
## Data: arousal.extended.wbaseline.high
## Models:
## high.r1: as.numeric(rating) ~ 1 + (1 | participant)
## high.r2: as.numeric(rating) ~ 1 + (1 | participant) + (1 | item)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## high.r1 3 1671.4 1684.6 -832.7 1665.4
## high.r2 4 1610.8 1628.5 -801.4 1602.8 62.59 1 2.545e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
high.m1 <- lmer(rating ~ cueing + session + cueing*session + (1 | participant) + (1 | item) , data=arousal.extended.wbaseline.high %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))
high.m2 <- lmer(rating ~ cueing + session + baseline + cueing*session + (1 | participant) + (1 | item) , data=arousal.extended.wbaseline.high %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))
high.m3 <- lmer(rating ~ cueing + session + baseline + cueing*session + cueing*baseline + (1 | participant) + (1 | item) , data=arousal.extended.wbaseline.high %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))
anova(high.m1, high.m2, high.m3) # --> high.m2 is the best model
## refitting model(s) with ML (instead of REML)
## Data: arousal.extended.wbaseline.high %>% mutate(rating = as.numeric(rating), ...
## Models:
## high.m1: rating ~ cueing + session + cueing * session + (1 | participant) + (1 | item)
## high.m2: rating ~ cueing + session + baseline + cueing * session + (1 | participant) + (1 | item)
## high.m3: rating ~ cueing + session + baseline + cueing * session + cueing * baseline + (1 | participant) + (1 | item)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## high.m1 7 1593.0 1623.9 -789.49 1579.0
## high.m2 8 1557.4 1592.7 -770.70 1541.4 37.582 1 8.763e-10 ***
## high.m3 9 1557.4 1597.1 -769.68 1539.4 2.045 1 0.1527
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(high.m1, high.m2, high.m3)
| rating | rating | rating | |||||||
|---|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | CI | p | Estimates | CI | p | Estimates | CI | p |
| (Intercept) | 3.58 | 3.25 – 3.92 | <0.001 | 1.50 | 0.77 – 2.23 | <0.001 | 1.07 | 0.12 – 2.01 | 0.027 |
| cueing [Uncued] | 0.09 | -0.09 – 0.27 | 0.323 | 0.11 | -0.07 – 0.28 | 0.240 | 1.01 | -0.24 – 2.26 | 0.114 |
| session [4] | -0.33 | -0.52 – -0.14 | 0.001 | -0.33 | -0.51 – -0.15 | <0.001 | -0.33 | -0.51 – -0.15 | <0.001 |
|
cueing [Uncued] × session [4] |
0.03 | -0.23 – 0.29 | 0.821 | 0.03 | -0.22 – 0.28 | 0.824 | 0.03 | -0.22 – 0.28 | 0.823 |
| baseline | 0.49 | 0.33 – 0.64 | <0.001 | 0.59 | 0.38 – 0.79 | <0.001 | |||
|
cueing [Uncued] × baseline |
-0.21 | -0.49 – 0.08 | 0.153 | ||||||
| Random Effects | |||||||||
| σ2 | 0.65 | 0.62 | 0.62 | ||||||
| τ00 | 0.19 item | 0.14 item | 0.14 item | ||||||
| 0.34 participant | 0.32 participant | 0.33 participant | |||||||
| ICC | 0.45 | 0.43 | 0.43 | ||||||
| N | 17 participant | 17 participant | 17 participant | ||||||
| 48 item | 48 item | 48 item | |||||||
| Observations | 610 | 610 | 610 | ||||||
| Marginal R2 / Conditional R2 | 0.023 / 0.459 | 0.071 / 0.468 | 0.072 / 0.472 | ||||||
#predicted data
plot_model(high.m2, type = "pred", terms = c("cueing","baseline"))
library(ggeffects)
tmp.pred2 <- ggpredict(high.m2, terms = c("cueing","baseline[4,5]"), ci.lvl = 0.68)
#tmp.pred <- ggpredict(m3, terms = c("cueing","baseline[4,5]"), ci.lvl = 0.95)
tmp.dat2 <- arousal.extended.wbaseline.high %>% mutate(rating=as.numeric(rating)) %>% group_by(cueing, baseline) %>%
summarise(mean=mean(rating, na.rm=TRUE), sd=sd(rating,na.rm=TRUE))
## `summarise()` has grouped output by 'cueing'. You can override using the
## `.groups` argument.
ggplot() + geom_bar(data = tmp.pred2, aes(x = group, y = predicted, fill=x),
stat = "identity", position = position_dodge()) +
geom_errorbar(data = tmp.pred2,aes(x = group, y = predicted,ymin = conf.low,
ymax = conf.high, group=x),position= position_dodge(width = 0.85), width=0.1) + ylim(0,5) +
xlab("Baseline rating") + ylab("Predicted rating") +
labs(color='Condition') + coord_cartesian(ylim = c(2.5, 4.5)) +
theme(axis.text = element_text(size = 18), axis.title = element_text(size = 20),
plot.background = element_rect(fill = "white", color = NA), panel.grid = element_blank(),
legend.title=element_blank(),axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.border = element_blank(), panel.background = element_blank(),
legend.key = element_rect(fill = NA), legend.text = element_text(size = 14),
legend.position = c(0.2,0.85)) +
scale_color_manual(values=c(hue_pal()(2))) +
scale_y_continuous(breaks=c(2.5,3.5,4.5))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
arousal.extended.wbaseline.high$rating = as.numeric(arousal.extended.wbaseline.high$rating)
arousal.extended.wbaseline.high$cueing = as.factor(arousal.extended.wbaseline.high$cueing)
arousal.extended.wbaseline.high$session = as.factor(arousal.extended.wbaseline.high$session)
#real data
BaselineFiltered<- apa_beeplot(
data = arousal.extended.wbaseline.high
, id = "participant"
, dv = "rating"
, factors = c("cueing", "session")
, dispersion = se
)
We only have S1 and S2 for PT23
full.data <- read_csv("data/ItemByItemLME_p23IN.csv")
## Rows: 1728 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): participant, ID, sessions, cueing, items
## dbl (2): rating, baseline
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
full.data <- full.data %>%
mutate(across(c(participant, ID, sessions, cueing, items), as.factor))
str(full.data)
## tibble [1,728 × 7] (S3: tbl_df/tbl/data.frame)
## $ participant: Factor w/ 18 levels "PT01","PT02",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ ID : Factor w/ 18 levels "0D9RN5","1R2I1Y",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ sessions : Factor w/ 2 levels "Session2","Session4": 1 1 1 1 1 1 1 1 1 1 ...
## $ cueing : Factor w/ 2 levels "Cued","Uncued": 1 1 1 1 1 1 1 1 1 1 ...
## $ rating : num [1:1728] 1 2 3 3 4 4 4 4 5 5 ...
## $ items : Factor w/ 48 levels "1112_400ms.wav",..: 2 3 4 5 7 8 9 11 16 19 ...
## $ baseline : num [1:1728] 3 1 3 2 4 5 4 5 5 5 ...
# Calculate summary statistics by cueing
summary_stats <- full.data %>%
group_by(cueing, sessions) %>%
summarise(
mean_rating = mean(rating, na.rm = TRUE),
median_rating = median(rating, na.rm = TRUE),
sd_rating = sd(rating, na.rm = TRUE),
se_rating = sd(rating, na.rm = TRUE) / sqrt(n())
)
## `summarise()` has grouped output by 'cueing'. You can override using the
## `.groups` argument.
print(summary_stats)
## # A tibble: 4 × 6
## # Groups: cueing [2]
## cueing sessions mean_rating median_rating sd_rating se_rating
## <fct> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Cued Session2 2.97 3 1.29 0.0619
## 2 Cued Session4 2.83 3 1.26 0.0606
## 3 Uncued Session2 3.04 3 1.22 0.0589
## 4 Uncued Session4 2.85 3 1.17 0.0565
# LME - find random effect strucure first
full.r1 <- lmer(as.numeric(rating) ~ 1 + (1 | participant), data=full.data)
# this is the classical crossed random effects model where items and subjects both are allowed to have their own mean
full.r2 <- lmer(as.numeric(rating) ~ 1 + (1 | participant) + (1 | items) , data=full.data)
anova(full.r1, full.r2) ## --> full.r2 is the best random structure
## refitting model(s) with ML (instead of REML)
## Data: full.data
## Models:
## full.r1: as.numeric(rating) ~ 1 + (1 | participant)
## full.r2: as.numeric(rating) ~ 1 + (1 | participant) + (1 | items)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## full.r1 3 4755.1 4771.2 -2374.6 4749.1
## full.r2 4 4318.3 4339.8 -2155.2 4310.3 438.77 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(full.r1, full.r2)
| as.numeric(rating) | as.numeric(rating) | |||||
|---|---|---|---|---|---|---|
| Predictors | Estimates | CI | p | Estimates | CI | p |
| (Intercept) | 2.89 | 2.58 – 3.21 | <0.001 | 2.89 | 2.53 – 3.25 | <0.001 |
| Random Effects | ||||||
| σ2 | 1.16 | 0.80 | ||||
| τ00 | 0.45 participant | 0.36 items | ||||
| 0.46 participant | ||||||
| ICC | 0.28 | 0.50 | ||||
| N | 18 participant | 18 participant | ||||
| 48 items | ||||||
| Observations | 1571 | 1571 | ||||
| Marginal R2 / Conditional R2 | 0.000 / 0.281 | 0.000 / 0.503 | ||||
# LME - add fixed effect - remove NAs to fit models
full.data2 <- full.data[which(complete.cases(full.data)),] #[('baseline')])),]
str(full.data2)
## tibble [1,565 × 7] (S3: tbl_df/tbl/data.frame)
## $ participant: Factor w/ 18 levels "PT01","PT02",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ ID : Factor w/ 18 levels "0D9RN5","1R2I1Y",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ sessions : Factor w/ 2 levels "Session2","Session4": 1 1 1 1 1 1 1 1 1 1 ...
## $ cueing : Factor w/ 2 levels "Cued","Uncued": 1 1 1 1 1 1 1 1 1 1 ...
## $ rating : num [1:1565] 1 2 3 3 4 4 4 4 5 5 ...
## $ items : Factor w/ 48 levels "1112_400ms.wav",..: 2 3 4 5 7 8 9 11 16 19 ...
## $ baseline : num [1:1565] 3 1 3 2 4 5 4 5 5 5 ...
has_na <- any(is.na(full.data2))
# Check if there are any missing values
if (has_na) {
print("The dataset contains missing values (NA).")
} else {
print("The dataset does not contain any missing values (NA).")
}
## [1] "The dataset does not contain any missing values (NA)."
Full.m1 <- lmer(rating ~ cueing + sessions + cueing*sessions + (1 | participant) + (1 | items) , data=full.data2 %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))
Full.m2 <-lmer(rating ~ cueing + sessions + baseline + cueing*sessions + (1 | participant) + (1 | items) , data=full.data2 %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))
Full.m3 <-lmer(rating ~ cueing + sessions + baseline + cueing*sessions + cueing*baseline + (1 | participant) + (1 | items) , data=full.data2 %>% mutate(rating = as.numeric(rating), baseline=as.numeric(baseline)))
anova(Full.m1, Full.m2, Full.m3)
## refitting model(s) with ML (instead of REML)
## Data: full.data2 %>% mutate(rating = as.numeric(rating), baseline = as.numeric(baseline))
## Models:
## Full.m1: rating ~ cueing + sessions + cueing * sessions + (1 | participant) + (1 | items)
## Full.m2: rating ~ cueing + sessions + baseline + cueing * sessions + (1 | participant) + (1 | items)
## Full.m3: rating ~ cueing + sessions + baseline + cueing * sessions + cueing * baseline + (1 | participant) + (1 | items)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## Full.m1 7 4260.8 4298.3 -2123.4 4246.8
## Full.m2 8 3991.7 4034.6 -1987.9 3975.7 271.0912 1 < 2e-16 ***
## Full.m3 9 3989.9 4038.1 -1986.0 3971.9 3.8387 1 0.05008 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## -- 3rd is SLIGHTLY better than the 2nd based on AIC - BIC - pvalues
tab_model(Full.m1, Full.m2, Full.m3)
| rating | rating | rating | |||||||
|---|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | CI | p | Estimates | CI | p | Estimates | CI | p |
| (Intercept) | 2.99 | 2.61 – 3.36 | <0.001 | 1.80 | 1.49 – 2.11 | <0.001 | 1.91 | 1.58 – 2.23 | <0.001 |
| cueing [Uncued] | 0.04 | -0.08 – 0.16 | 0.467 | 0.04 | -0.07 – 0.16 | 0.442 | -0.17 | -0.41 – 0.07 | 0.169 |
| sessions [Session4] | -0.24 | -0.37 – -0.12 | <0.001 | -0.24 | -0.36 – -0.13 | <0.001 | -0.24 | -0.36 – -0.12 | <0.001 |
|
cueing [Uncued] × sessions [Session4] |
-0.05 | -0.23 – 0.12 | 0.567 | -0.05 | -0.21 – 0.12 | 0.569 | -0.05 | -0.21 – 0.11 | 0.537 |
| baseline | 0.39 | 0.34 – 0.43 | <0.001 | 0.35 | 0.30 – 0.41 | <0.001 | |||
|
cueing [Uncued] × baseline |
0.07 | -0.00 – 0.14 | 0.050 | ||||||
| Random Effects | |||||||||
| σ2 | 0.78 | 0.67 | 0.67 | ||||||
| τ00 | 0.35 items | 0.14 items | 0.14 items | ||||||
| 0.49 participant | 0.29 participant | 0.29 participant | |||||||
| ICC | 0.52 | 0.39 | 0.39 | ||||||
| N | 18 participant | 18 participant | 18 participant | ||||||
| 48 items | 48 items | 48 items | |||||||
| Observations | 1565 | 1565 | 1565 | ||||||
| Marginal R2 / Conditional R2 | 0.011 / 0.525 | 0.179 / 0.496 | 0.179 / 0.498 | ||||||
## note: including the interaction between baseline and cueing flips the estimate from Positive to Negative
full.data.high <- full.data %>% filter(baseline >= 4)
full.data.high <- full.data.high %>%
mutate(across(c(participant, ID, sessions, cueing, items), as.factor))
str(full.data.high)
## tibble [664 × 7] (S3: tbl_df/tbl/data.frame)
## $ participant: Factor w/ 18 levels "PT01","PT02",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ ID : Factor w/ 18 levels "0D9RN5","1R2I1Y",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ sessions : Factor w/ 2 levels "Session2","Session4": 1 1 1 1 1 1 1 1 1 1 ...
## $ cueing : Factor w/ 2 levels "Cued","Uncued": 1 1 1 1 1 1 1 1 1 1 ...
## $ rating : num [1:664] 4 4 4 4 5 5 3 3 4 3 ...
## $ items : Factor w/ 48 levels "1112_400ms.wav",..: 7 8 9 11 16 19 22 30 37 43 ...
## $ baseline : num [1:664] 4 5 4 5 5 5 5 4 5 4 ...
# Calculate summary statistics by cueing
summary_stats.high <- full.data.high %>%
group_by(cueing, sessions) %>%
summarise(
mean_rating = mean(rating, na.rm = TRUE),
median_rating = median(rating, na.rm = TRUE),
sd_rating = sd(rating, na.rm = TRUE),
se_rating = sd(rating, na.rm = TRUE) / sqrt(n())
)
## `summarise()` has grouped output by 'cueing'. You can override using the
## `.groups` argument.
print(summary_stats.high)
## # A tibble: 4 × 6
## # Groups: cueing [2]
## cueing sessions mean_rating median_rating sd_rating se_rating
## <fct> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Cued Session2 3.86 4 1.07 0.0834
## 2 Cued Session4 3.52 4 1.22 0.0953
## 3 Uncued Session2 3.89 4 0.970 0.0746
## 4 Uncued Session4 3.58 4 1.02 0.0788
# lets find the maximal random effect structure
# this is the classical within-subject anova in glm form
full.high.r1 <- lmer(as.numeric(rating) ~ 1 + (1 | participant), data=full.data.high)
# this is the classical crossed random effects model where items and subjects both are allowed to have their own mean
full.high.r2 <- lmer(as.numeric(rating) ~ 1 + (1 | participant) + (1 | items) , data=full.data.high)
anova(full.high.r1, full.high.r2) ## full.high.r2 is the best
## refitting model(s) with ML (instead of REML)
## Data: full.data.high
## Models:
## full.high.r1: as.numeric(rating) ~ 1 + (1 | participant)
## full.high.r2: as.numeric(rating) ~ 1 + (1 | participant) + (1 | items)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## full.high.r1 3 1684.3 1697.6 -839.18 1678.3
## full.high.r2 4 1624.0 1641.6 -807.97 1616.0 62.408 1 2.792e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
full.high.m1 <- lmer(rating ~ cueing + sessions + cueing*sessions + (1 | participant) + (1 | items) , data=full.data.high %>% mutate(rating = as.numeric(rating)))
full.high.m2 <- lmer(rating ~ cueing + sessions + baseline + cueing*sessions + (1 | participant) + (1 | items) , data=full.data.high %>% mutate(rating = as.numeric(rating)))
full.high.m3 <- lmer(rating ~ cueing + sessions + baseline + cueing*sessions + cueing*baseline + (1 | participant) + (1 | items) , data=full.data.high %>% mutate(rating = as.numeric(rating)))
anova(full.high.m1, full.high.m2, full.high.m3) # --> full.high.m2 is the best model
## refitting model(s) with ML (instead of REML)
## Data: full.data.high %>% mutate(rating = as.numeric(rating))
## Models:
## full.high.m1: rating ~ cueing + sessions + cueing * sessions + (1 | participant) + (1 | items)
## full.high.m2: rating ~ cueing + sessions + baseline + cueing * sessions + (1 | participant) + (1 | items)
## full.high.m3: rating ~ cueing + sessions + baseline + cueing * sessions + cueing * baseline + (1 | participant) + (1 | items)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## full.high.m1 7 1606.5 1637.5 -796.28 1592.5
## full.high.m2 8 1570.0 1605.4 -777.01 1554.0 38.5363 1 5.375e-10 ***
## full.high.m3 9 1570.2 1610.0 -776.09 1552.2 1.8317 1 0.1759
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tab_model(full.high.m1, full.high.m2, full.high.m3)
| rating | rating | rating | |||||||
|---|---|---|---|---|---|---|---|---|---|
| Predictors | Estimates | CI | p | Estimates | CI | p | Estimates | CI | p |
| (Intercept) | 3.56 | 3.24 – 3.89 | <0.001 | 1.47 | 0.75 – 2.19 | <0.001 | 1.06 | 0.13 – 2.00 | 0.026 |
| cueing [Uncued] | 0.08 | -0.10 – 0.26 | 0.382 | 0.09 | -0.08 – 0.27 | 0.286 | 0.94 | -0.30 – 2.19 | 0.136 |
| sessions [Session4] | -0.33 | -0.52 – -0.15 | <0.001 | -0.33 | -0.51 – -0.15 | <0.001 | -0.33 | -0.51 – -0.15 | <0.001 |
|
cueing [Uncued] × sessions [Session4] |
0.04 | -0.22 – 0.30 | 0.756 | 0.04 | -0.21 – 0.29 | 0.762 | 0.04 | -0.21 – 0.29 | 0.757 |
| baseline | 0.49 | 0.34 – 0.64 | <0.001 | 0.58 | 0.38 – 0.79 | <0.001 | |||
|
cueing [Uncued] × baseline |
-0.19 | -0.48 – 0.09 | 0.176 | ||||||
| Random Effects | |||||||||
| σ2 | 0.65 | 0.62 | 0.62 | ||||||
| τ00 | 0.19 items | 0.14 items | 0.14 items | ||||||
| 0.33 participant | 0.31 participant | 0.32 participant | |||||||
| ICC | 0.45 | 0.42 | 0.43 | ||||||
| N | 18 participant | 18 participant | 18 participant | ||||||
| 48 items | 48 items | 48 items | |||||||
| Observations | 616 | 616 | 616 | ||||||
| Marginal R2 / Conditional R2 | 0.022 / 0.458 | 0.071 / 0.464 | 0.072 / 0.468 | ||||||
tab_model(Full.m3, full.high.m2)
| rating | rating | |||||
|---|---|---|---|---|---|---|
| Predictors | Estimates | CI | p | Estimates | CI | p |
| (Intercept) | 1.91 | 1.58 – 2.23 | <0.001 | 1.47 | 0.75 – 2.19 | <0.001 |
| cueing [Uncued] | -0.17 | -0.41 – 0.07 | 0.169 | 0.09 | -0.08 – 0.27 | 0.286 |
| sessions [Session4] | -0.24 | -0.36 – -0.12 | <0.001 | -0.33 | -0.51 – -0.15 | <0.001 |
| baseline | 0.35 | 0.30 – 0.41 | <0.001 | 0.49 | 0.34 – 0.64 | <0.001 |
|
cueing [Uncued] × sessions [Session4] |
-0.05 | -0.21 – 0.11 | 0.537 | 0.04 | -0.21 – 0.29 | 0.762 |
|
cueing [Uncued] × baseline |
0.07 | -0.00 – 0.14 | 0.050 | |||
| Random Effects | ||||||
| σ2 | 0.67 | 0.62 | ||||
| τ00 | 0.14 items | 0.14 items | ||||
| 0.29 participant | 0.31 participant | |||||
| ICC | 0.39 | 0.42 | ||||
| N | 18 participant | 18 participant | ||||
| 48 items | 48 items | |||||
| Observations | 1565 | 616 | ||||
| Marginal R2 / Conditional R2 | 0.179 / 0.498 | 0.071 / 0.464 | ||||