R Code for VocabModality

2025-10-14

Data Preprocessing

Load packages

pacman::p_load(
  readxl, tidyverse, psych, BayesFactor, gt, emmeans,
  brms, rstan, bayesplot, ggeffects, performance, gridExtra, sjPlot)

Theme setting

theme_set(theme_bw())
set.seed(123)

Load data

SpokenRecall <- read_excel("SpokenRecall.xlsx")
WrittenRecall <- read_excel("WrittenRecall.xlsx")
Weekly <- read_excel("Weekly.xlsx")

Descriptive Statistics

Weekly Quizzes

Weekly.summary <- Weekly |>
  mutate(Group = factor(Group, levels = c("Spoken", "Written"))) |> 
  group_by(Quiz, Group) |>
  summarize(
    mean_score = mean(Score) * 10,
    sd = sd(Score),
    n = n(),
    se = sd / sqrt(n),
    ci_lower = mean_score - 1.96 * se * 10,
    ci_upper = mean_score + 1.96 * se * 10,
    ci_text = paste0("[", round(ci_lower, 1), ",", round(ci_upper, 1), "]")) |>
  ungroup()

## `summarise()` has grouped output by 'Quiz'. You can override using the
## `.groups` argument.

Weekly.summary |> 
  gt()

Quiz	Group	mean_score	sd	n	se	ci_lower	ci_upper	ci_text
1	Spoken	8.500000	0.3575684	360	0.01884551	8.130628	8.869372	[8.1,8.9]
1	Written	8.975610	0.3035952	410	0.01499350	8.681737	9.269482	[8.7,9.3]
2	Spoken	8.921053	0.3106564	380	0.01593634	8.608700	9.233405	[8.6,9.2]
2	Written	9.538462	0.2100877	390	0.01063820	9.329953	9.746970	[9.3,9.7]
3	Spoken	8.868421	0.3172034	380	0.01627219	8.549486	9.187356	[8.5,9.2]
3	Written	9.307692	0.2541722	390	0.01287051	9.055430	9.559954	[9.1,9.6]
4	Spoken	9.228571	0.2672000	350	0.01428244	8.948636	9.508507	[8.9,9.5]
4	Written	9.538462	0.2100877	390	0.01063820	9.329953	9.746970	[9.3,9.7]
5	Spoken	8.135135	0.3900262	370	0.02027650	7.737716	8.532554	[7.7,8.5]
5	Written	8.757576	0.3303588	330	0.01818566	8.401137	9.114015	[8.4,9.1]
6	Spoken	8.589744	0.3484951	390	0.01764673	8.243868	8.935620	[8.2,8.9]
6	Written	7.846154	0.4116170	390	0.02084303	7.437630	8.254677	[7.4,8.3]
7	Spoken	8.694444	0.3373826	360	0.01778162	8.345925	9.042964	[8.3,9]
7	Written	8.410256	0.3661219	390	0.01853930	8.046886	8.773627	[8,8.8]
8	Spoken	8.514286	0.3561747	350	0.01903834	8.141134	8.887437	[8.1,8.9]
8	Written	8.945946	0.3074909	370	0.01598569	8.632626	9.259265	[8.6,9.3]
9	Spoken	8.631579	0.3441337	380	0.01765369	8.285567	8.977591	[8.3,9]
9	Written	8.944444	0.3076956	360	0.01621698	8.626592	9.262297	[8.6,9.3]
10	Spoken	7.392857	0.4398104	280	0.02628370	6.877697	7.908018	[6.9,7.9]
10	Written	8.447368	0.3626328	380	0.01860267	8.082756	8.811981	[8.1,8.8]

ggplot(Weekly.summary, aes(x = Quiz, y = mean_score, color = Group, group = Group)) +
  geom_line(linewidth = 1, position = position_dodge(width = 1)) +
  geom_point(size = 3, position = position_dodge(width = 1)) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.3,
                position = position_dodge(width = 1)) +
  geom_text(aes(label = round(mean_score, 2)),
            position = position_dodge(width = 1),
            vjust = -2.5,
            size = 3) +
  geom_text(aes(label = paste0("[", round(ci_lower, 2), ", ", round(ci_upper, 2), "]")),
            position = position_dodge(width = 1),
            vjust = 3.5,
            size = 2.75) +
  scale_x_continuous(breaks = 1:10) +
  scale_y_continuous(limits = c(0, 10.5), breaks = 0:10) +
  labs(
    title = "",
    x = "Quiz Week (1-10)",
    y = "Scores for Weekly Quiz (1-10)") +
  theme_bw() +
  theme(
    legend.title = element_text(size = 11),
    legend.text = element_text(size = 10),
    legend.position = "bottom",
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "gray90"),
    panel.border = element_rect(fill = NA, color = "black"),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 11)
  )

t-test for Weekly Quiz (Total Score)

Weekly.sum <- Weekly |>
  group_by(ID, Group, Quiz) |>
  summarise(Score = sum(Score))

## `summarise()` has grouped output by 'ID', 'Group'. You can override using the
## `.groups` argument.

t.test.quiz <- ttestBF(formula = Score ~ Group, data = Weekly.sum)

## Warning: data coerced from tibble to data frame

post.quiz <- posterior(t.test.quiz, iterations = 10000)
sigma.quiz <- sqrt(post.quiz[, "sig2"])

data.frame(
  Parameter = c("theta (mean difference)", "sigma (standard deviation)"),
  Mean = c(mean(post.quiz[, 2]), mean(sigma.quiz)),
  `CrI_Lower` = c(
    quantile(post.quiz[, 2], probs = 0.025),
    quantile(sigma.quiz, probs = 0.025)),
  `CrI_Upper` = c(
    quantile(post.quiz[, 2], probs = 0.975),
    quantile(sigma.quiz, probs = 0.975)),
  `P_negative` = c(
    mean(post.quiz[, 2] < 0),
    NA)) |>
  gt() |>
  tab_header(
    title = "Bayesian Estimation Results") |>
  cols_label(
    Parameter = "Parameter",
    Mean = "Mean",
    CrI_Lower = "95% CrI Lower",
    CrI_Upper = "95% CrI Upper",
    P_negative = "P(parameter < 0)") |>
  fmt_number(
    columns = c(Mean, CrI_Lower, CrI_Upper, P_negative),
    decimals = 3) |>
  sub_missing(
    columns = P_negative,
    missing_text = "—")

Parameter	Mean	95% CrI Lower	95% CrI Upper	P(parameter < 0)
Bayesian Estimation Results
theta (mean difference)	−0.291	−0.543	−0.037	0.987
sigma (standard deviation)	1.792	1.703	1.886	—

Practice

Descriptive statistics for practice counts

Practice.summary <- SpokenRecall |>
  mutate(Group = factor(Group, levels = c("Spoken", "Written"))) |>
  pivot_longer(cols = c(Practice_S, Practice_S_Study, Practice_S_Review,
                        Practice_W, Practice_W_Study, Practice_W_Review),
               names_to = "Type",
               values_to = "Count") |>
  mutate(Type_label = case_when(
    Type == "Practice_S" ~ "Spoken [Practice]",
    Type == "Practice_S_Study" ~ "Spoken [Study]",
    Type == "Practice_S_Review" ~ "Spoken [Review]",
    Type == "Practice_W" ~ "Written [Practice]",
    Type == "Practice_W_Study" ~ "Written [Study]",
    Type == "Practice_W_Review" ~ "Written [Review]")) |>
  group_by(Group, Type, Type_label) |>
  summarize(
    mean_score = mean(Count) * 10,
    sd = sd(Count),
    n = n(),
    se = sd / sqrt(n),
    ci_lower = mean_score - 1.96 * se,
    ci_upper = mean_score + 1.96 * se,
    ci_text = paste0("[", round(ci_lower, 1), ",", round(ci_upper, 1), "]"),
    .groups = "drop")

Practice.d <- SpokenRecall |>
  mutate(Group = factor(Group, levels = c("Spoken", "Written"))) |>
  pivot_longer(
    cols = c(Practice_S, Practice_S_Study, Practice_S_Review,
             Practice_W, Practice_W_Study, Practice_W_Review),
    names_to = "Type",
    values_to = "Practice") |>
  mutate(
    Type = recode(
      Type,
      Practice_S = "Spoken",
      Practice_S_Study = "Spoken_Study",
      Practice_S_Review = "Spoken_Review",
      Practice_W = "Written",
      Practice_W_Study = "Written_Study",
      Practice_W_Review = "Written_Review"),
    Type = factor(
      Type,
      levels = c("Spoken", "Spoken_Study", "Spoken_Review",
                 "Written", "Written_Study", "Written_Review")))

Practice.sum <- Practice.d |>
  group_by(ID, Group, Type) |>
  summarize(Practice = mean(Practice), .groups = "drop")

describeBy(Practice.sum$Practice, 
                         group = list(Group = Practice.sum$Group, 
                                     Type = Practice.sum$Type),
                         mat = TRUE) |>
  select(group1, group2, n, mean, sd, median, min, max, se) |>
  gt() |>
  tab_header(
    title = "Descriptive Statistics by Group and Type") |>
  cols_label(
    group1 = "Group",
    group2 = "Type",
    n = "N",
    mean = "Mean",
    sd = "SD",
    median = "Median",
    min = "Min",
    max = "Max",
    se = "SE") |>
  fmt_number(
    columns = c(mean, sd, median, min, max, se),
    decimals = 2)

Group	Type	N	Mean	SD	Median	Min	Max	SE
Descriptive Statistics by Group and Type
Spoken	Spoken	30	2.25	2.43	1.43	0.00	9.26	0.44
Written	Spoken	34	0.54	0.55	0.26	0.00	1.80	0.10
Spoken	Spoken_Study	30	1.18	0.94	1.17	0.00	4.08	0.17
Written	Spoken_Study	34	0.30	0.45	0.10	0.00	1.90	0.08
Spoken	Spoken_Review	30	1.31	2.36	0.39	0.00	10.48	0.43
Written	Spoken_Review	34	0.31	0.40	0.11	0.00	1.52	0.07
Spoken	Written	30	8.14	4.71	6.27	1.56	19.06	0.86
Written	Written	34	8.09	4.59	8.00	1.12	18.18	0.79
Spoken	Written_Study	30	4.45	2.01	3.66	1.26	8.58	0.37
Written	Written_Study	34	1.29	1.44	0.77	0.00	7.20	0.25
Spoken	Written_Review	30	3.95	3.75	2.99	0.20	17.08	0.68
Written	Written_Review	34	7.27	4.65	7.02	0.96	21.24	0.80

ggplot(Practice.sum, aes(x = Group, y = Practice, color = Group)) +
  geom_jitter(width = 0.4, alpha = 0.5) +
  stat_summary(fun = mean, geom = "point", size = 3, color = "black") + 
  xlab("Group") +
  ylab("Practice / Review Counts") +
  facet_wrap(~Type, scales = "free") + theme_bw() +
  theme(legend.position = "bottom") +
  stat_summary(fun = mean, geom = "text",  
               aes(label = round(after_stat(y), 2), group = Group),  
               vjust = -2, size = 4, color = "black",  
               position = position_dodge(width = 0.8)) +
  stat_summary(fun.data = mean_cl_boot, geom = "text",  
               aes(label = paste0("[", round(after_stat(y), 2), ", ", round(..ymax.., 2), "]"), 
                   group = Group),  
               vjust = -1, size = 3, color = "black",  
               position = position_dodge(width = 1))

## Warning: The dot-dot notation (`..ymax..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(ymax)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggplot(subset(Practice.sum, Type %in% c("Spoken", "Written")), 
       aes(x = Group, y = Practice, color = Group)) +
  geom_jitter(width = 0.4, alpha = 0.5) +
  stat_summary(fun = mean, geom = "point", size = 3, color = "black") + 
  xlab("Group") +
  ylab("Practice Counts (Spoken & Written)") +
  facet_wrap(~Type, scales = "free") +
  theme_bw() +
  theme(legend.position = "bottom") +
  stat_summary(fun = mean, geom = "text",  
               aes(label = round(after_stat(y), 2), group = Group),  
               vjust = -2, size = 4, color = "black",  
               position = position_dodge(width = 0.8)) +
  stat_summary(fun.data = mean_cl_boot, geom = "text",  
               aes(label = paste0("[", round(..y.., 2), ", ", round(..ymax.., 2), "]"), 
                   group = Group),  
               vjust = -1, size = 3, color = "black",  
               position = position_dodge(width = 1))

anova.practice.d <- Practice.sum |>
  filter(Type %in% c("Spoken", "Written")) |>
  mutate(
    across(c(Group, Type), as.factor),
    PracticeLog = log(Practice + 0.001))

anova.practice <- anovaBF(
  PracticeLog ~ Group * Type,
  data = as.data.frame(anova.practice.d)) # ANOVA

anova.practice |>
  BayesFactor::extractBF(logbf = FALSE) |>
  as_tibble(rownames = "Model") |>
  mutate(PosteriorProbability = bf / sum(bf)) |>
  rename(
    BF10 = bf,
    MCMC_Error = error) |>
  gt() |>
  tab_header(
    title = "Bayesian ANOVA Model Comparison",
    subtitle = "PracticeLog ~ Group × Type") |>
  cols_label(
    Model = "Model",
    BF10 = "BF10",
    PosteriorProbability = "Posterior Probability",
    MCMC_Error = "MCMC Error") |>
  fmt_scientific(
    columns = c(BF10, MCMC_Error),
    decimals = 2) |>
  fmt_number(
    columns = PosteriorProbability,
    decimals = 3)

Model	BF10	MCMC Error	time	code	Posterior Probability
Bayesian ANOVA Model Comparison
PracticeLog ~ Group × Type
Group	1.38	1.23 × 10⁻⁴	Tue Oct 14 10:52:25 2025	39483058a280	0.000
Type	3.39 × 10¹⁴	1.47 × 10⁻²²	Tue Oct 14 10:52:25 2025	3948b149a4c	0.022
Group + Type	2.19 × 10¹⁵	1.74 × 10⁻²	Tue Oct 14 10:52:25 2025	394856579861	0.145
Group + Type + Group:Type	1.26 × 10¹⁶	3.90 × 10⁻²	Tue Oct 14 10:52:25 2025	39481d6de292	0.833

Recall Test: Pretest & Posttest

Spoken.summary <- SpokenRecall |>
  mutate(Group = factor(Group, levels = c("Spoken", "Written"))) |> 
  group_by(Test, Group) |>
  summarize(
    mean_score = mean(Score) * 10,
    sd = sd(Score),
    n = n(),
    se = sd / sqrt(n),
    ci_lower = mean_score - 1.96 * se * 10,
    ci_upper = mean_score + 1.96 * se * 10,
    ci_text = paste0("[", round(ci_lower, 1), ",", round(ci_upper, 1), "]")) |>
  ungroup()

## `summarise()` has grouped output by 'Test'. You can override using the
## `.groups` argument.

Written.summary <- WrittenRecall |>
  mutate(Group = factor(Group, levels = c("Spoken", "Written"))) |> 
  group_by(Test, Group) |>
  summarize(
    mean_score = mean(Score) * 10,
    sd = sd(Score),
    n = n(),
    se = sd / sqrt(n),
    ci_lower = mean_score - 1.96 * se * 10,
    ci_upper = mean_score + 1.96 * se * 10,
    ci_text = paste0("[", round(ci_lower, 1), ",", round(ci_upper, 1), "]")) |>
  ungroup()

## `summarise()` has grouped output by 'Test'. You can override using the
## `.groups` argument.

Spoken.sum <- SpokenRecall |>
  group_by(ID, Group, Test, Mode) |>
  summarise(Score = sum(Score))

## `summarise()` has grouped output by 'ID', 'Group', 'Test'. You can override
## using the `.groups` argument.

Written.sum <- WrittenRecall |>
  group_by(ID, Group, Test, Mode) |>
  summarise(Score = sum(Score))

## `summarise()` has grouped output by 'ID', 'Group', 'Test'. You can override
## using the `.groups` argument.

Test.sum <- rbind(Spoken.sum, Written.sum)
Test.sum$Test <- factor(Test.sum$Test, levels = c("Pre", "Post"), labels = c("Pretest", "Posttest"))
Test.sum$Group <- factor(Test.sum$Group, levels = c("Spoken", "Written"), labels = c("Spoken [Group]", "Written [Group]"))
Test.sum$Mode <- factor(Test.sum$Mode, levels = c("Spoken", "Written"), labels = c("Spoken [Test]", "Written [Test]"))

Test.t <- ttestBF(formula = Score ~ Group, data = subset(Test.sum, Test=="Pretest"))

## Warning: data coerced from tibble to data frame

post.Test <- posterior(Test.t, iterations = 1000000L)

summary_combined <- rbind(
  Spoken.summary |> mutate(Recall_Type = "Spoken Recall"),
  Written.summary |> mutate(Recall_Type = "Written Recall"))

summary_combined |>
  select(Recall_Type, Test, Group, n, mean_score, sd, se, ci_text) |>
  gt() |>
  tab_header(
    title = "Descriptive Statistics by Test and Group") |>
  cols_label(
    Recall_Type = "Recall Type",
    Test = "Test",
    Group = "Group",
    n = "N",
    mean_score = "Mean",
    sd = "SD",
    se = "SE",
    ci_text = "95% CI") |>
  fmt_number(
    columns = c(mean_score, sd, se),
    decimals = 2) |>
  tab_options(
    table.font.size = 12)

Recall Type	Test	Group	N	Mean	SD	SE	95% CI
Descriptive Statistics by Test and Group
Spoken Recall	Post	Spoken	1500	5.69	0.50	0.01	[5.4,5.9]
Spoken Recall	Post	Written	1700	5.01	0.50	0.01	[4.8,5.2]
Spoken Recall	Pre	Spoken	1500	1.27	0.33	0.01	[1.1,1.4]
Spoken Recall	Pre	Written	1700	1.64	0.37	0.01	[1.5,1.8]
Written Recall	Post	Spoken	1500	5.94	0.49	0.01	[5.7,6.2]
Written Recall	Post	Written	1700	5.68	0.50	0.01	[5.4,5.9]
Written Recall	Pre	Spoken	1500	2.21	0.42	0.01	[2,2.4]
Written Recall	Pre	Written	1700	2.22	0.42	0.01	[2,2.4]

data.frame(
  Parameter = "theta (mean difference)",
  Mean = mean(post.Test[, 2]),
  SD = sd(post.Test[, 2]),
  CI_Lower = quantile(post.Test[, 2], probs = 0.025),
  CI_Upper = quantile(post.Test[, 2], probs = 0.975),
  P_negative = mean(post.Test[, 2] < 0),
  P_positive = mean(post.Test[, 2] > 0)) |>
  gt() |>
  tab_header(
    title = "Bayesian t-test Results",
    subtitle = "Pretest: Spoken vs Written Group") |>
  cols_label(
    Parameter = "Parameter",
    Mean = "Mean",
    SD = "SD",
    CI_Lower = "95% CrI Lower",
    CI_Upper = "95% CrI Upper",
    P_negative = "P(θ < 0)",
    P_positive = "P(θ > 0)") |>
  fmt_number(
    columns = c(Mean, SD, CI_Lower, CI_Upper, P_negative, P_positive),
    decimals = 3)

Parameter	Mean	SD	95% CrI Lower	95% CrI Upper	P(θ < 0)	P(θ > 0)
Bayesian t-test Results
Pretest: Spoken vs Written Group
theta (mean difference)	−0.855	1.001	−2.835	1.106	0.805	0.195

summary_combined |>
  select(Recall_Type, Test, Group, mean_score, ci_text) |>
  tidyr::pivot_wider(
    names_from = Group,
    values_from = c(mean_score, ci_text),
    names_sep = "_") |>
  gt() |>
  tab_header(
    title = "Mean Scores and 95% Credible Intervals") |>
  cols_label(
    Recall_Type = "Recall Type",
    Test = "Test",
    mean_score_Spoken = "Spoken Mean",
    ci_text_Spoken = "Spoken 95% CrI",
    mean_score_Written = "Written Mean",
    ci_text_Written = "Written 95% CrI") |>
  fmt_number(
    columns = starts_with("mean_score"),
    decimals = 2)

Recall Type	Test	Spoken Mean	Written Mean	Spoken 95% CrI	Written 95% CrI
Mean Scores and 95% Credible Intervals
Spoken Recall	Post	5.69	5.01	[5.4,5.9]	[4.8,5.2]
Spoken Recall	Pre	1.27	1.64	[1.1,1.4]	[1.5,1.8]
Written Recall	Post	5.94	5.68	[5.7,6.2]	[5.4,5.9]
Written Recall	Pre	2.21	2.22	[2,2.4]	[2,2.4]

Bayesian ANOVA

anova.test <- anovaBF(
  Score ~ Group * Mode,
  data = subset(Test.sum, Test == "Pretest"), 
  whichRandom = "ID")

## Warning: data coerced from tibble to data frame

# Calculate Bayes Factors and posterior probabilities
BF <- as.vector(anova.test)
post_probs <- BF / sum(BF)

anova.results <- data.frame(
  Model = names(anova.test),
  BF = BF,
  PosteriorProbability = post_probs)

# Rename the columns properly
colnames(anova.results)[1:2] <- c("Numerator", "Denominator")

# Then create the table
anova.results |>
  arrange(desc(PosteriorProbability)) |>
  gt() |>
  tab_header(
    title = "Bayesian ANOVA Results",
    subtitle = "Model Comparison for Pretest Scores") |>
  cols_label(
    Numerator = "Model",
    Denominator = "Baseline",
    BF = "Bayes Factor",
    PosteriorProbability = "Posterior Probability") |>
  fmt_scientific(
    columns = BF,
    decimals = 2) |>
  fmt_number(
    columns = PosteriorProbability,
    decimals = 4) |>
  tab_options(
    table.font.size = 12,
    heading.title.font.size = 16) |>
  tab_footnote(
    footnote = "Models are sorted by posterior probability (highest to lowest)",
    locations = cells_title())

Model	Baseline	Bayes Factor	Posterior Probability
Bayesian ANOVA Results¹
Model Comparison for Pretest Scores¹
Mode	Intercept only	1.00 × 10²	0.7199
Group + Mode	Intercept only	2.87 × 10¹	0.2066
Group + Mode + Group:Mode	Intercept only	9.93	0.0715
Group	Intercept only	2.74 × 10⁻¹	0.0020
¹ Models are sorted by posterior probability (highest to lowest)

Internal Consistency

PreA_S <- SpokenRecall[SpokenRecall$List == "A" & SpokenRecall$Test == "Pre", c(1, 4, 5)] # Extract Spoken Pretest A
PreB_S <- SpokenRecall[SpokenRecall$List == "B" & SpokenRecall$Test == "Pre", c(1, 4, 5)] # Extract Spoken Pretest B
PostA_S <- SpokenRecall[SpokenRecall$List == "A" & SpokenRecall$Test == "Post", c(1, 4, 5)] # Extract Spoken Posttest A
PostB_S <- SpokenRecall[SpokenRecall$List == "B" & SpokenRecall$Test == "Post", c(1, 4, 5)] # Extract Spoken Posttest B
PreA_W <- WrittenRecall[WrittenRecall$List == "A" & WrittenRecall$Test == "Pre", c(1, 4, 5)] # Extract Written Pretest A
PreB_W <- WrittenRecall[WrittenRecall$List == "B" & WrittenRecall$Test == "Pre", c(1, 4, 5)] # Extract Written Pretest B
PostA_W <- WrittenRecall[WrittenRecall$List == "A" & WrittenRecall$Test == "Post", c(1, 4, 5)] # Extract Written Posttest A
PostB_W <- WrittenRecall[WrittenRecall$List == "B" & WrittenRecall$Test == "Post", c(1, 4, 5)] # Extract Written Posttest B
PreA_S <- PreA_S |> pivot_wider(names_from = Word, values_from = Score) # Turn PreA_S into the wide format
PreB_S <- PreB_S |> pivot_wider(names_from = Word, values_from = Score) # Turn PreB_S into the wide format
PostA_S <- PostA_S |> pivot_wider(names_from = Word, values_from = Score) # Turn PostA_S into the wide format
PostB_S <- PostB_S |> pivot_wider(names_from = Word, values_from = Score) # Turn PostB_S into the wide format
PreA_W <- PreA_W |> pivot_wider(names_from = Word, values_from = Score) # Turn PreA_W into the wide format
PreB_W <- PreB_W |> pivot_wider(names_from = Word, values_from = Score) # Turn PreB_W into the wide format
PostA_W <- PostA_W |> pivot_wider(names_from = Word, values_from = Score) # Turn PostA_W into the wide format
PostB_W <- PostB_W |> pivot_wider(names_from = Word, values_from = Score) # Turn PostB_W into the wide format

# Calculate all alpha values (suppress warnings for cleaner output)
alpha_PreA_S <- suppressWarnings(psych::alpha(PreA_S[2:34]))

## Some items ( revive assembly contempt incline simplicity savage undo brutal ) were negatively correlated with the first principal component and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option

alpha_PreB_S <- suppressWarnings(psych::alpha(PreB_S[2:30]))

## Some items ( prominent fatigue strain acquisition resign ) were negatively correlated with the first principal component and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option

## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0

alpha_PreA_W <- suppressWarnings(psych::alpha(PreA_W[2:34]))

## Some items ( limestone moderate shiver convey tolerate elastic plunge bankruptcy ) were negatively correlated with the first principal component and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option

alpha_PreB_W <- suppressWarnings(psych::alpha(PreB_W[2:30]))
alpha_PostA_S <- suppressWarnings(psych::alpha(PostA_S[2:34]))

## Some items ( vacant ) were negatively correlated with the first principal component and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option

alpha_PostB_S <- suppressWarnings(psych::alpha(PostB_S[2:30]))

## Some items ( elderly ) were negatively correlated with the first principal component and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option

alpha_PostA_W <- suppressWarnings(psych::alpha(PostA_W[2:34]))
alpha_PostB_W <- suppressWarnings(psych::alpha(PostB_W[2:30]))

data.frame(
  Test = c("Pretest", "Pretest", "Pretest", "Pretest",
           "Posttest", "Posttest", "Posttest", "Posttest"),
  Mode = c("Spoken", "Spoken", "Written", "Written",
           "Spoken", "Spoken", "Written", "Written"),
  List = c("A", "B", "A", "B", "A", "B", "A", "B"),
  N_Items = c(33, 29, 33, 29, 33, 29, 33, 29),
  Raw_Alpha = c(
    alpha_PreA_S$total$raw_alpha,
    alpha_PreB_S$total$raw_alpha,
    alpha_PreA_W$total$raw_alpha,
    alpha_PreB_W$total$raw_alpha,
    alpha_PostA_S$total$raw_alpha,
    alpha_PostB_S$total$raw_alpha,
    alpha_PostA_W$total$raw_alpha,
    alpha_PostB_W$total$raw_alpha),
  Std_Alpha = c(
    alpha_PreA_S$total$std.alpha,
    alpha_PreB_S$total$std.alpha,
    alpha_PreA_W$total$std.alpha,
    alpha_PreB_W$total$std.alpha,
    alpha_PostA_S$total$std.alpha,
    alpha_PostB_S$total$std.alpha,
    alpha_PostA_W$total$std.alpha,
    alpha_PostB_W$total$std.alpha),
  Average_r = c(
    alpha_PreA_S$total$average_r,
    alpha_PreB_S$total$average_r,
    alpha_PreA_W$total$average_r,
    alpha_PreB_W$total$average_r,
    alpha_PostA_S$total$average_r,
    alpha_PostB_S$total$average_r,
    alpha_PostA_W$total$average_r,
    alpha_PostB_W$total$average_r)) |>
  gt() |>
  tab_header(
    title = "Internal Consistency Reliability (Cronbach's Alpha)",
    subtitle = "By Test, Mode, and List") |>
  cols_label(
    Test = "Test",
    Mode = "Mode",
    List = "List",
    N_Items = "N Items",
    Raw_Alpha = "Raw α",
    Std_Alpha = "Std. α",
    Average_r = "Average r") |>
  fmt_number(
    columns = c(Raw_Alpha, Std_Alpha, Average_r),
    decimals = 3) |>
  tab_options(
    table.font.size = 12,
    heading.title.font.size = 16) |>
  tab_footnote(
    footnote = "Note: Some items had no variance or negative correlations with the first PC.",
    locations = cells_title()) |>
  tab_style(
    style = list(
      cell_fill(color = "#f0f0f0")),
    locations = cells_body(
      rows = Test == "Pretest"))

Test	Mode	List	N Items	Raw α	Std. α	Average r
Internal Consistency Reliability (Cronbach's Alpha)¹
By Test, Mode, and List¹
Pretest	Spoken	A	33	0.539	0.429	0.032
Pretest	Spoken	B	29	0.636	0.621	0.069
Pretest	Written	A	33	0.675	0.665	0.060
Pretest	Written	B	29	0.827	0.838	0.172
Posttest	Spoken	A	33	0.842	0.839	0.137
Posttest	Spoken	B	29	0.832	0.837	0.155
Posttest	Written	A	33	0.874	0.886	0.190
Posttest	Written	B	29	0.854	0.853	0.166
¹ Note: Some items had no variance or negative correlations with the first PC.

Practice.anova <- SpokenRecall |>
  mutate(Group = factor(Group, levels = c("Spoken", "Written"))) |>
  pivot_longer(
    cols = c(Practice_S, Practice_S_Study, Practice_S_Review,
             Practice_W, Practice_W_Study, Practice_W_Review),
    names_to = "Type",
    values_to = "Practice") |>
  mutate(
    Type = recode(
      Type,
      Practice_S = "Spoken",
      Practice_S_Study = "Spoken_Study",
      Practice_S_Review = "Spoken_Review",
      Practice_W = "Written",
      Practice_W_Study = "Written_Study",
      Practice_W_Review = "Written_Review"),
    Type = factor(
      Type,
      levels = c("Spoken", "Spoken_Study", "Spoken_Review",
                 "Written", "Written_Study", "Written_Review"))) |>
  group_by(ID, Group, Type) |>
  summarize(Practice = mean(Practice), .groups = "drop") |>
  filter(Type %in% c("Spoken", "Written")) |>
  mutate(
    Group = factor(Group),
    Type = factor(Type),
    PracticeLog = log(Practice + 0.001))

anova.practice <- anovaBF(
  PracticeLog ~ Group * Type,
  data = Practice.anova |> as.data.frame()) # ANOVA
anova.practice |>
  BayesFactor::extractBF(logbf = FALSE) |>
  as_tibble(rownames = "Model") |>
  mutate(PosteriorProbability = bf / sum(bf)) |>
  rename(
    BF10 = bf,
    MCMC_Error = error) |>
  gt() |>
  tab_header(
    title = "Bayesian ANOVA Model Comparison",
    subtitle = "PracticeLog ~ Group × Type (replication)") |>
  cols_label(
    Model = "Model",
    BF10 = "BF10",
    PosteriorProbability = "Posterior Probability",
    MCMC_Error = "MCMC Error") |>
  fmt_scientific(
    columns = c(BF10, MCMC_Error),
    decimals = 2) |>
  fmt_number(
    columns = PosteriorProbability,
    decimals = 3)

Model	BF10	MCMC Error	time	code	Posterior Probability
Bayesian ANOVA Model Comparison
PracticeLog ~ Group × Type (replication)
Group	1.38	1.23 × 10⁻⁴	Tue Oct 14 10:52:27 2025	394879a7e7c8	0.000
Type	3.39 × 10¹⁴	1.47 × 10⁻²²	Tue Oct 14 10:52:27 2025	39487e6239dd	0.022
Group + Type	2.31 × 10¹⁵	3.00 × 10⁻²	Tue Oct 14 10:52:27 2025	39487af258b3	0.152
Group + Type + Group:Type	1.25 × 10¹⁶	1.21 × 10⁻²	Tue Oct 14 10:52:27 2025	3948c0c1f63	0.825

TOEFL ITP: Posttest Scores

toefl_data <- SpokenRecall |>
  filter(Test == "Post") |>
  select(ID, Group, TOEFL) |>
  distinct()

describeBy(toefl_data$TOEFL, 
           group = toefl_data$Group,
           mat = TRUE,
           digits = 2) |>
  as.data.frame() |>
  select(group1, n, mean, sd, median, min, max, se) |>
  gt() |>
  tab_header(
    title = "TOEFL Descriptive Statistics by Group") |>
  cols_label(
    group1 = "Group",
    n = "N",
    mean = "Mean",
    sd = "SD",
    median = "Median",
    min = "Min",
    max = "Max",
    se = "SE") |>
  fmt_number(
    columns = c(mean, sd, median, se,
    decimals = 2)) |>
  fmt_number(
    columns = c(min, max),
    decimals = 0) |>
  tab_options(
    table.font.size = 12,
    heading.title.font.size = 16)

Group	N	Mean	SD	Median	Min	Max	SE
TOEFL Descriptive Statistics by Group
Spoken	30	511.03	27.23	510.00	453	573	4.97
Written	34	520.79	22.65	520.00	460	553	3.88

subset(SpokenRecall, Test == "Post") |>
  group_by(ID, Group, Test, Mode) |>
  summarise(TOEFL = mean(TOEFL)) |> 
  ggplot(aes(x = Group, y = TOEFL, color = Group, group = Group)) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  stat_summary(fun = mean, geom = "point", size = 3, color = 'black') +
  stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.1, color = "black") +
  stat_summary(fun = mean, geom = "text",
               aes(label = round(..y.., 2)),
               vjust = -4.5, color = "black", size = 3.5) +
  stat_summary(fun.data = mean_cl_boot, geom = "text",
               aes(label = paste0("[", round(..ymin.., 2), ", ", round(..ymax.., 2), "]")),
               vjust = 5.5, color = "black", size = 3) +
  xlab("TOEFL ITP Scores") +
  theme_bw() +
  theme(legend.position = "bottom")

## `summarise()` has grouped output by 'ID', 'Group', 'Test'. You can override
## using the `.groups` argument.

Bayesian t-test

TOEFL.sum <- subset(SpokenRecall, Test == "Post") |>
  group_by(ID, Group, Test, Mode) |>
  summarise(TOEFL = mean(TOEFL))

## `summarise()` has grouped output by 'ID', 'Group', 'Test'. You can override
## using the `.groups` argument.

t.test.toefl <- ttestBF(formula = TOEFL ~ Group, data = TOEFL.sum)

## Warning: data coerced from tibble to data frame

post.toefl <- posterior(t.test.toefl, iterations = 10000)
sigma.toefl <- sqrt(post.toefl[, "sig2"])

data.frame(
  Parameter = c("theta (mean difference)", "sigma (standard deviation)"),
  Mean = c(mean(post.toefl[, 2]), mean(sigma.toefl)),
  SD = c(sd(post.toefl[, 2]), sd(sigma.toefl)),
  CI_Lower = c(
    quantile(post.toefl[, 2], probs = 0.025),
    quantile(sigma.toefl, probs = 0.025)),
  CI_Upper = c(
    quantile(post.toefl[, 2], probs = 0.975),
    quantile(sigma.toefl, probs = 0.975)),
  P_negative = c(
    mean(post.toefl[, 2] < 0),
    NA)) |>
  gt() |>
  tab_header(
    title = "Bayesian t-test Results for TOEFL Scores",
    subtitle = "Group Comparison") |>
  cols_label(
    Parameter = "Parameter",
    Mean = "Mean",
    SD = "SD",
    CI_Lower = "95% CrI Lower",
    CI_Upper = "95% CrI Upper",
    P_negative = "P(parameter < 0)") |>
  fmt_number(
    columns = c(Mean, SD, CI_Lower, CI_Upper, P_negative),
    decimals = 3) |>
  sub_missing(
    columns = P_negative,
    missing_text = "—") |>
  tab_options(
    table.font.size = 12,
    heading.title.font.size = 16)

Parameter	Mean	SD	95% CrI Lower	95% CrI Upper	P(parameter < 0)
Bayesian t-test Results for TOEFL Scores
Group Comparison
theta (mean difference)	−8.334	5.972	−20.237	3.266	0.922
sigma (standard deviation)	25.081	2.282	21.125	30.044	—

Modeling

rstan_options(auto_write = TRUE) # Auto-saving the model
options(mc.cores = parallel::detectCores()) # Using multiple core

Preprocessing data

Spoken.pre <- SpokenRecall |>
  filter(Test == "Pre") |>
  select(ID, Word, Pre = Score)

Spoken.post <- SpokenRecall |>
  filter(Test == "Post") |>
  left_join(Spoken.pre, by = c("ID", "Word"))

Written.pre <- WrittenRecall |>
  filter(Test == "Pre") |>
  select(ID, Word, Pre = Score)

Written.post <- WrittenRecall |>
  filter(Test == "Post") |>
  left_join(Written.pre, by = c("ID", "Word"))

Spoken.post$zTOEFL <- scale(Spoken.post$TOEFL)
Written.post$zTOEFL <- scale(Written.post$TOEFL)

Prior setting

Prior<- c(
  prior(normal(0, 2), class = Intercept),
  prior(normal(0, 1), class = b),
  prior(cauchy(0, 1), class = sd))

Spoken Recall

Fit the spoken model

Fit.spoken <- brm(
  Score ~  
    Group * Practice_S * Pre +
    Group * Practice_W * Pre +
    zTOEFL * Pre +
    (1 | ID) +
    (1 | Word),
  family = bernoulli(link = "logit"),
  iter = 10000, warmup = 5000, chains = 4,
  prior = Prior,
  data = Spoken.post,
  file = "Fit_spoken.rds",
  seed = 123)

## Compiling Stan program...

## Start sampling

Results

fit_summary <- summary(Fit.spoken)

# Extract fixed effects with diagnostics
fixed_effects <- fit_summary$fixed
fixed_df <- as.data.frame(fixed_effects)
fixed_df$Parameter <- rownames(fixed_df)

# Create table with gt
fixed_df |>
  select(Parameter, Estimate, Est.Error, `l-95% CI`, `u-95% CI`, Rhat, Bulk_ESS, Tail_ESS) |>
  gt() |>
  tab_header(
    title = "Bayesian Logistic Regression Results with Diagnostics",
    subtitle = "Spoken Recall Model (Logit Scale)") |>
  cols_label(
    Parameter = "Parameter",
    Estimate = "Estimate",
    Est.Error = "SE",
    `l-95% CI` = "95% CrI Lower",
    `u-95% CI` = "95% CrI Upper",
    Rhat = "R-hat",
    Bulk_ESS = "Bulk ESS",
    Tail_ESS = "Tail ESS") |>
  fmt_number(
    columns = c(Estimate, Est.Error, `l-95% CI`, `u-95% CI`, Rhat),
    decimals = 3) |>
  fmt_number(
    columns = c(Bulk_ESS, Tail_ESS),
    decimals = 0) |>
  tab_style(
    style = list(
      cell_fill(color = "lightyellow"),
      cell_text(weight = "bold")),
    locations = cells_body(
      columns = Rhat,
      rows = Rhat > 1.01)) |>
  tab_style(
    style = cell_fill(color = "lightpink"),
    locations = cells_body(
      columns = c(Bulk_ESS, Tail_ESS),
      rows = Bulk_ESS < 400 | Tail_ESS < 400)) |>
  tab_options(
    table.font.size = 10,
    heading.title.font.size = 16) |>
  tab_footnote(
    footnote = "R-hat > 1.01 highlighted in yellow; ESS < 400 highlighted in pink",
    locations = cells_title())

Parameter	Estimate	SE	95% CrI Lower	95% CrI Upper	R-hat	Bulk ESS	Tail ESS
Bayesian Logistic Regression Results with Diagnostics¹
Spoken Recall Model (Logit Scale)¹
Intercept	−0.502	0.243	−0.975	−0.025	1.001	6,296	10,148
GroupWritten	−0.522	0.289	−1.086	0.043	1.000	7,245	10,932
Practice_S	0.149	0.036	0.080	0.220	1.000	15,384	14,996
Pre	1.783	0.435	0.929	2.632	1.000	11,964	13,903
Practice_W	0.061	0.015	0.031	0.091	1.000	11,777	14,070
zTOEFL	0.399	0.121	0.161	0.637	1.001	6,250	9,837
GroupWritten:Practice_S	0.039	0.126	−0.207	0.286	1.000	14,567	13,975
GroupWritten:Pre	0.429	0.522	−0.590	1.449	1.000	11,660	13,706
Practice_S:Pre	−0.054	0.130	−0.282	0.222	1.000	19,895	14,266
GroupWritten:Practice_W	0.009	0.020	−0.030	0.048	1.000	12,509	12,816
Pre:Practice_W	0.032	0.053	−0.064	0.143	1.000	12,499	13,456
Pre:zTOEFL	−0.156	0.187	−0.525	0.210	1.001	26,686	14,285
GroupWritten:Practice_S:Pre	−0.444	0.346	−1.108	0.251	1.000	21,076	14,881
GroupWritten:Pre:Practice_W	0.018	0.070	−0.119	0.153	1.000	12,595	13,287
¹ R-hat > 1.01 highlighted in yellow; ESS < 400 highlighted in pink

Extract R² values

r2_spoken <- r2_bayes(Fit.spoken, ci = 0.95)

data.frame(
  Type = c("Conditional R²", "Marginal R²"),
  R2 = c(r2_spoken$R2_Bayes, r2_spoken$R2_Bayes_marginal),
  SE = c(attr(r2_spoken, "SE")$R2_Bayes, 
         attr(r2_spoken, "SE")$R2_Bayes_marginal),
  CI_Lower = c(attr(r2_spoken, "CI")$R2_Bayes$CI_low,
               attr(r2_spoken, "CI")$R2_Bayes_marginal$CI_low),
  CI_Upper = c(attr(r2_spoken, "CI")$R2_Bayes$CI_high,
               attr(r2_spoken, "CI")$R2_Bayes_marginal$CI_high)) |>
  gt() |>
  tab_header(
    title = "Bayesian R² for Spoken Recall Model") |>
  cols_label(
    Type = "R² Type",
    R2 = "Estimate",
    SE = "SE",
    CI_Lower = "95% CrI Lower",
    CI_Upper = "95% CrI Upper") |>
  fmt_number(
    columns = c(R2, SE, CI_Lower, CI_Upper),
    decimals = 3) |>
  tab_options(
    table.font.size = 12,
    heading.title.font.size = 16) |>
  tab_footnote(
    footnote = "Conditional R²: variance explained by fixed and random effects. Marginal R²: variance explained by fixed effects only.",
    locations = cells_column_labels(columns = Type))

R² Type¹	Estimate	SE	95% CrI Lower	95% CrI Upper
Bayesian R² for Spoken Recall Model
Conditional R²	0.373	0.009	0.355	0.391
Marginal R²	0.174	0.018	0.137	0.207
¹ Conditional R²: variance explained by fixed and random effects. Marginal R²: variance explained by fixed effects only.

post_spoken <- as_draws_df(Fit.spoken)

directional_probs_spoken <- post_spoken |>
  summarize(
    `P(b_Intercept < 0)` = mean(`b_Intercept` < 0),
    `P(b_GroupWritten < 0)` = mean(`b_GroupWritten` < 0),
    `P(b_Practice_S > 0)` = mean(`b_Practice_S` > 0),
    `P(b_Practice_W > 0)` = mean(`b_Practice_W` > 0),
    `P(b_Pre > 0)` = mean(`b_Pre` > 0),
    `P(b_zTOEFL > 0)` = mean(`b_zTOEFL` > 0),
    `P(b_GroupWritten:Practice_S > 0)` = mean(`b_GroupWritten:Practice_S` > 0),
    `P(b_GroupWritten:Practice_W > 0)` = mean(`b_GroupWritten:Practice_W` > 0),
    `P(b_GroupWritten:Pre > 0)` = mean(`b_GroupWritten:Pre` > 0),
    `P(b_Practice_S:Pre < 0)` = mean(`b_Practice_S:Pre` < 0),
    `P(b_Pre:Practice_W > 0)` = mean(`b_Pre:Practice_W` > 0),
    `P(b_Pre:zTOEFL < 0)` = mean(`b_Pre:zTOEFL` < 0),
    `P(b_GroupWritten:Practice_S:Pre < 0)` = mean(`b_GroupWritten:Practice_S:Pre` < 0),
    `P(b_GroupWritten:Pre:Practice_W > 0)` = mean(`b_GroupWritten:Pre:Practice_W` > 0),
    `P(sd_ID__Intercept > 0)` = mean(`sd_ID__Intercept` > 0),
    `P(sd_Word__Intercept > 0)` = mean(`sd_Word__Intercept` > 0)) |>
  pivot_longer(
    everything(),
    names_to = "Hypothesis",
    values_to = "Probability")

directional_probs_spoken |>
  arrange(desc(Probability)) |>
  gt() |>
  tab_header(
    title = "Directional Posterior Probabilities",
    subtitle = "Spoken Recall Model") |>
  cols_label(
    Hypothesis = "Hypothesis",
    Probability = "Probability") |>
  fmt_number(
    columns = Probability,
    decimals = 2)

Hypothesis	Probability
Directional Posterior Probabilities
Spoken Recall Model
P(b_Practice_S > 0)	1.00
P(b_Practice_W > 0)	1.00
P(sd_ID__Intercept > 0)	1.00
P(sd_Word__Intercept > 0)	1.00
P(b_Pre > 0)	1.00
P(b_zTOEFL > 0)	1.00
P(b_Intercept < 0)	0.98
P(b_GroupWritten < 0)	0.96
P(b_GroupWritten:Practice_S:Pre < 0)	0.90
P(b_Pre:zTOEFL < 0)	0.80
P(b_GroupWritten:Pre > 0)	0.79
P(b_Pre:Practice_W > 0)	0.71
P(b_Practice_S:Pre < 0)	0.68
P(b_GroupWritten:Practice_W > 0)	0.67
P(b_GroupWritten:Practice_S > 0)	0.62
P(b_GroupWritten:Pre:Practice_W > 0)	0.60

Visualization of effects

S_predict_S <- ggpredict(Fit.spoken, terms = c("Practice_S", "Group", "Pre [0]"))

## You are calculating adjusted predictions on the population-level (i.e.
##   `type = "fixed"`) for a *generalized* linear mixed model.
##   This may produce biased estimates due to Jensen's inequality. Consider
##   setting `bias_correction = TRUE` to correct for this bias.
##   See also the documentation of the `bias_correction` argument.

## Some of the focal terms are of type `character`. This may lead to
##   unexpected results. It is recommended to convert these variables to
##   factors before fitting the model.
##   The following variables are of type character: `Group`

## Note: uncertainty of error terms are not taken into account. Consider
##   setting `interval` to "prediction". This will call `posterior_predict()`
##   instead of `posterior_epred()`.

## Warning in glmmTMB::glmmTMB(formula = cond_formula, family = family, data =
## data): some components missing from 'family': downstream methods may fail

colnames(S_predict_S) <- c("Practice_S", "Mean", "SE", "Lower", "Higher", "Group", "Panel")
S_filter_S <- S_predict_S |>
  group_by(Group) |>
  filter(Practice_S >= min(SpokenRecall$Practice_S[SpokenRecall$Group == unique(Group)], na.rm = TRUE) &
           Practice_S <= max(SpokenRecall$Practice_S[SpokenRecall$Group == unique(Group)], na.rm = TRUE))

S_pd <- position_dodge(width = 0.6)
S_predict_S_p <-
  ggplot(S_filter_S, aes(x = Practice_S, y = Mean, group = Group)) +
  geom_errorbar(aes(ymin = Lower, ymax = Higher, color = Group),
                width = 0, position = S_pd) +        
  geom_point(color = "black", position = S_pd) +     
  geom_line(color = "black", position = S_pd) +     
  theme_classic() +
  theme(legend.position = "bottom") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1)) +
  scale_x_continuous(breaks = seq(0, 18, 2), limits = c(-0.5, 18)) +
  ylab("Predicted spoken accuracy (0-1)") +
  xlab("Spoken practice") +
  annotate("text", x = -Inf, y = Inf, label = "A",
           hjust = -1, vjust = 2, size = 7, fontface = "bold")

W_predict_S <-
  ggpredict(Fit.spoken, 
            terms = c("Practice_W", "Group", "Pre [0]"))

## Some of the focal terms are of type `character`. This may lead to
##   unexpected results. It is recommended to convert these variables to
##   factors before fitting the model.
##   The following variables are of type character: `Group`

## Data were 'prettified'. Consider using `terms="Practice_W [all]"` to get
##   smooth plots.

## Note: uncertainty of error terms are not taken into account. Consider
##   setting `interval` to "prediction". This will call `posterior_predict()`
##   instead of `posterior_epred()`.

## Warning in glmmTMB::glmmTMB(formula = cond_formula, family = family, data =
## data): some components missing from 'family': downstream methods may fail

colnames(W_predict_S) <- c("Practice_W", "Mean", "SE", "Lower", "Higher", "Group", "Panel")
W_filter_S <- W_predict_S |>
  group_by(Group) |>
  filter(Practice_W >= min(SpokenRecall$Practice_W[SpokenRecall$Group == unique(Group)], na.rm = TRUE) &
           Practice_W <= max(SpokenRecall$Practice_W[SpokenRecall$Group == unique(Group)], na.rm = TRUE))

W_pd <- position_dodge(width = 1.2)
W_predict_S_p <-
  ggplot(W_filter_S, aes(x = Practice_W, y = Mean, group = Group)) +
  geom_errorbar(aes(ymin = Lower, ymax = Higher, color = Group),
                width = 0, position = W_pd) +      
  geom_point(color = "black", position = W_pd) +    
  geom_line(color = "black", position = W_pd) +     
  theme_classic() +
  theme(legend.position = "bottom") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1)) +
  scale_x_continuous(breaks = seq(0, 35, 3), limits = c(-0.5, 35)) +
  ylab("Predicted spoken accuracy (0-1)") +
  xlab("Written practice") +
  annotate("text", x = -Inf, y = Inf, label = "B",
           hjust = -1, vjust = 2, size = 7, fontface = "bold")

Effect_S <- 
  grid.arrange(S_predict_S_p, W_predict_S_p, nrow = 1)

Effect_S

## TableGrob (1 x 2) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]

practice_means_spoken_base <- SpokenRecall |>
  group_by(Group) |>
  summarize(
    Practice_S = mean(Practice_S, na.rm = TRUE),
    Practice_W = mean(Practice_W, na.rm = TRUE),
    .groups = "drop") |>
  mutate(Group = as.character(Group))

practice_means_spoken <- practice_means_spoken_base |>
  pivot_longer(
    cols = c(Practice_S, Practice_W),
    names_to = "Predictor",
    values_to = "PracticeValue") |>
  mutate(Group = as.character(Group))

practice_means_spoken_wide <- practice_means_spoken_base

predicted_spoken <- practice_means_spoken |>
  mutate(
    results = pmap(
      list(Group, Predictor, PracticeValue),
      function(group, predictor, value) {
        group_value <- as.character(group)
        group_means <- practice_means_spoken_wide[practice_means_spoken_wide$Group == group_value, , drop = FALSE]

        practice_s_val <- if (predictor == "Practice_S") value else group_means$Practice_S
        practice_w_val <- if (predictor == "Practice_W") value else group_means$Practice_W

        new_data <- tibble(
          Group = factor(group_value, levels = levels(Spoken.post$Group)),
          Practice_S = practice_s_val,
          Practice_W = practice_w_val,
          Pre = 0,
          zTOEFL = 0)

        draws <- posterior_epred(
          Fit.spoken,
          newdata = new_data,
          re_formula = NA)

        draws_vec <- as.numeric(draws)

        tibble(
          Response = mean(draws_vec),
          SE = sd(draws_vec),
          Lower = quantile(draws_vec, 0.025),
          Upper = quantile(draws_vec, 0.975))
      })) |>
  unnest(results) |>
  select(Group, Predictor, PracticeValue, Response, SE, Lower, Upper)

predicted_spoken |>
  gt() |>
  tab_header(
    title = "Predicted Probabilities by Practice Levels",
    subtitle = "Spoken Recall Model") |>
  cols_label(
    Group = "Group",
    Predictor = "Practice Predictor",
    PracticeValue = "Mean Practice",
    Response = "Predicted Probability",
    SE = "SE",
    Lower = "Lower 95% CrI",
    Upper = "Upper 95% CrI") |>
  fmt_number(
    columns = c(PracticeValue, Response, SE, Lower, Upper),
    decimals = 2)

Group	Practice Predictor	Mean Practice	Predicted Probability	SE	Lower 95% CrI	Upper 95% CrI
Predicted Probabilities by Practice Levels
Spoken Recall Model
Spoken	Practice_S	2.25	0.58	0.05	0.48	0.67
Spoken	Practice_W	8.14	0.58	0.05	0.48	0.67
Written	Practice_S	0.54	0.52	0.05	0.42	0.62
Written	Practice_W	8.09	0.52	0.05	0.42	0.62

Written Recall

Fit the written model

Fit.written <- brm(
  Score ~  
    Group * Practice_S * Pre +
    Group * Practice_W * Pre +
    zTOEFL * Pre +
    (1 | ID) +
    (1 | Word),
  family = bernoulli(link = "logit"),
  iter = 10000, warmup = 5000, chains = 4,
  prior = Prior,
  data = Written.post,
  file = "Fit_written.rds",
  seed = 234)

## Compiling Stan program...

## Start sampling

## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0.000179 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 1.79 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 1: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 1: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 1: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 1: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 1: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 1: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 1: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 1: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 1: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 1: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 1: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 15.483 seconds (Warm-up)
## Chain 1:                17.461 seconds (Sampling)
## Chain 1:                32.944 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 0.000111 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 1.11 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 2: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 2: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 2: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 2: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 2: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 2: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 2: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 2: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 2: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 2: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 2: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 15.72 seconds (Warm-up)
## Chain 2:                18.045 seconds (Sampling)
## Chain 2:                33.765 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 0.000119 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 1.19 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 3: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 3: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 3: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 3: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 3: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 3: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 3: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 3: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 3: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 3: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 3: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 14.845 seconds (Warm-up)
## Chain 3:                18.228 seconds (Sampling)
## Chain 3:                33.073 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 0.00011 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 1.1 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 4: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 4: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 4: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 4: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 4: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 4: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 4: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 4: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 4: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 4: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 4: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 14.876 seconds (Warm-up)
## Chain 4:                17.884 seconds (Sampling)
## Chain 4:                32.76 seconds (Total)
## Chain 4:

Results

fit_summary_W <- summary(Fit.written)

# Extract fixed effects with diagnostics
fixed_effects_W <- fit_summary_W$fixed
fixed_df_W <- as.data.frame(fixed_effects_W)
fixed_df_W$Parameter <- rownames(fixed_df_W)

# Create table with gt
fixed_df_W |>
  select(Parameter, Estimate, Est.Error, `l-95% CI`, `u-95% CI`, Rhat, Bulk_ESS, Tail_ESS) |>
  gt() |>
  tab_header(
    title = "Bayesian Logistic Regression Results with Diagnostics",
    subtitle = "Written Recall Model (Logit Scale)") |>
  cols_label(
    Parameter = "Parameter",
    Estimate = "Estimate",
    Est.Error = "SE",
    `l-95% CI` = "95% CrI Lower",
    `u-95% CI` = "95% CrI Upper",
    Rhat = "R-hat",
    Bulk_ESS = "Bulk ESS",
    Tail_ESS = "Tail ESS") |>
  fmt_number(
    columns = c(Estimate, Est.Error, `l-95% CI`, `u-95% CI`, Rhat),
    decimals = 3) |>
  fmt_number(
    columns = c(Bulk_ESS, Tail_ESS),
    decimals = 0) |>
  tab_style(
    style = list(
      cell_fill(color = "lightyellow"),
      cell_text(weight = "bold")),
    locations = cells_body(
      columns = Rhat,
      rows = Rhat > 1.01)) |>
  tab_style(
    style = cell_fill(color = "lightpink"),
    locations = cells_body(
      columns = c(Bulk_ESS, Tail_ESS),
      rows = Bulk_ESS < 400 | Tail_ESS < 400)) |>
  tab_options(
    table.font.size = 10,
    heading.title.font.size = 16) |>
  tab_footnote(
    footnote = "R-hat > 1.01 highlighted in yellow; ESS < 400 highlighted in pink",
    locations = cells_title())

Parameter	Estimate	SE	95% CrI Lower	95% CrI Upper	R-hat	Bulk ESS	Tail ESS
Bayesian Logistic Regression Results with Diagnostics¹
Written Recall Model (Logit Scale)¹
Intercept	−0.417	0.277	−0.966	0.138	1.000	7,414	10,840
GroupWritten	−0.167	0.343	−0.840	0.515	1.000	7,369	11,039
Practice_S	0.028	0.037	−0.044	0.100	1.000	23,733	15,578
Pre	1.858	0.399	1.080	2.643	1.000	16,674	15,630
Practice_W	0.071	0.016	0.040	0.103	1.000	19,100	15,296
zTOEFL	0.383	0.157	0.077	0.694	1.000	6,709	10,214
GroupWritten:Practice_S	0.185	0.138	−0.084	0.456	1.000	22,854	16,725
GroupWritten:Pre	0.193	0.482	−0.745	1.148	1.000	16,802	17,361
Practice_S:Pre	0.083	0.109	−0.112	0.314	1.000	30,104	14,929
GroupWritten:Practice_W	−0.021	0.022	−0.063	0.021	1.000	19,153	15,705
Pre:Practice_W	0.040	0.052	−0.057	0.145	1.000	17,121	16,180
Pre:zTOEFL	0.078	0.156	−0.232	0.386	1.000	32,785	14,492
GroupWritten:Practice_S:Pre	−0.115	0.301	−0.684	0.491	1.001	32,230	15,658
GroupWritten:Pre:Practice_W	−0.032	0.063	−0.158	0.089	1.000	16,899	16,888
¹ R-hat > 1.01 highlighted in yellow; ESS < 400 highlighted in pink

Extract R² values

r2_written <- r2_bayes(Fit.written, ci = 0.95)
data.frame(
  Type = c("Conditional R²", "Marginal R²"),
  R2 = c(r2_written$R2_Bayes, r2_written$R2_Bayes_marginal),
  SE = c(attr(r2_written, "SE")$R2_Bayes, 
         attr(r2_written, "SE")$R2_Bayes_marginal),
  CI_Lower = c(attr(r2_written, "CI")$R2_Bayes$CI_low,
               attr(r2_written, "CI")$R2_Bayes_marginal$CI_low),
  CI_Upper = c(attr(r2_written, "CI")$R2_Bayes$CI_high,
               attr(r2_written, "CI")$R2_Bayes_marginal$CI_high)) |>
  gt() |>
  tab_header(
    title = "Bayesian R² for Written Recall Model") |>
  cols_label(
    Type = "R² Type",
    R2 = "Estimate",
    SE = "SE",
    CI_Lower = "95% CrI Lower",
    CI_Upper = "95% CrI Upper") |>
  fmt_number(
    columns = c(R2, SE, CI_Lower, CI_Upper),
    decimals = 3) |>
  tab_options(
    table.font.size = 12,
    heading.title.font.size = 16) |>
  tab_footnote(
    footnote = "Conditional R²: variance explained by fixed and random effects. Marginal R²: variance explained by fixed effects only.",
    locations = cells_column_labels(columns = Type))

R² Type¹	Estimate	SE	95% CrI Lower	95% CrI Upper
Bayesian R² for Written Recall Model
Conditional R²	0.384	0.009	0.365	0.401
Marginal R²	0.173	0.023	0.129	0.217
¹ Conditional R²: variance explained by fixed and random effects. Marginal R²: variance explained by fixed effects only.

post_written <- as_draws_df(Fit.written)

directional_probs_written <- post_written |>
  summarize(
    `P(b_Intercept < 0)` = mean(`b_Intercept` < 0),
    `P(b_GroupWritten < 0)` = mean(`b_GroupWritten` < 0),
    `P(b_Practice_S > 0)` = mean(`b_Practice_S` > 0),
    `P(b_Practice_W > 0)` = mean(`b_Practice_W` > 0),
    `P(b_Pre > 0)` = mean(`b_Pre` > 0),
    `P(b_zTOEFL > 0)` = mean(`b_zTOEFL` > 0),
    `P(b_GroupWritten:Practice_S > 0)` = mean(`b_GroupWritten:Practice_S` > 0),
    `P(b_GroupWritten:Practice_W < 0)` = mean(`b_GroupWritten:Practice_W` < 0),
    `P(b_GroupWritten:Pre > 0)` = mean(`b_GroupWritten:Pre` > 0),
    `P(b_Practice_S:Pre > 0)` = mean(`b_Practice_S:Pre` > 0),
    `P(b_Pre:Practice_W > 0)` = mean(`b_Pre:Practice_W` > 0),
    `P(b_Pre:zTOEFL > 0)` = mean(`b_Pre:zTOEFL` > 0),
    `P(b_GroupWritten:Practice_S:Pre < 0)` = mean(`b_GroupWritten:Practice_S:Pre` < 0),
    `P(b_GroupWritten:Pre:Practice_W < 0)` = mean(`b_GroupWritten:Pre:Practice_W` < 0),
    `P(sd_ID__Intercept > 0)` = mean(`sd_ID__Intercept` > 0),
    `P(sd_Word__Intercept > 0)` = mean(`sd_Word__Intercept` > 0)) |>
  pivot_longer(
    everything(),
    names_to = "Hypothesis",
    values_to = "Probability")

directional_probs_written |>
  arrange(desc(Probability)) |>
  gt() |>
  tab_header(
    title = "Directional Posterior Probabilities",
    subtitle = "Written Recall Model") |>
  cols_label(
    Hypothesis = "Hypothesis",
    Probability = "Probability") |>
  fmt_number(
    columns = Probability,
    decimals = 2)

Hypothesis	Probability
Directional Posterior Probabilities
Written Recall Model
P(b_Practice_W > 0)	1.00
P(b_Pre > 0)	1.00
P(sd_ID__Intercept > 0)	1.00
P(sd_Word__Intercept > 0)	1.00
P(b_zTOEFL > 0)	0.99
P(b_Intercept < 0)	0.93
P(b_GroupWritten:Practice_S > 0)	0.91
P(b_GroupWritten:Practice_W < 0)	0.84
P(b_Practice_S > 0)	0.78
P(b_Practice_S:Pre > 0)	0.77
P(b_Pre:Practice_W > 0)	0.77
P(b_Pre:zTOEFL > 0)	0.70
P(b_GroupWritten:Pre:Practice_W < 0)	0.69
P(b_GroupWritten < 0)	0.69
P(b_GroupWritten:Pre > 0)	0.66
P(b_GroupWritten:Practice_S:Pre < 0)	0.65

interaction_written <- post_written |>
  mutate(Interaction = `b_Practice_S` + `b_GroupWritten:Practice_S`) |>
  summarize(
    Mean = mean(Interaction),
    Lower = quantile(Interaction, probs = 0.025),
    Upper = quantile(Interaction, probs = 0.975),
    `P(Interaction > 0)` = mean(Interaction > 0))

interaction_written |>
  gt() |>
  tab_header(
    title = "Interaction: Practice_S + GroupWritten:Practice_S",
    subtitle = "Written Recall Model") |>
  cols_label(
    Mean = "Mean",
    Lower = "Lower 95% CrI",
    Upper = "Upper 95% CrI",
    `P(Interaction > 0)` = "P(Interaction > 0)") |>
  fmt_number(
    columns = everything(),
    decimals = 2)

Mean	Lower 95% CrI	Upper 95% CrI	P(Interaction > 0)
Interaction: Practice_S + GroupWritten:Practice_S
Written Recall Model
0.21	−0.05	0.48	0.94

Visualization of effects

S_predict_W <- ggpredict(Fit.written, terms = c("Practice_S", "Group", "Pre [0]"))

## You are calculating adjusted predictions on the population-level (i.e.
##   `type = "fixed"`) for a *generalized* linear mixed model.
##   This may produce biased estimates due to Jensen's inequality. Consider
##   setting `bias_correction = TRUE` to correct for this bias.
##   See also the documentation of the `bias_correction` argument.

## Some of the focal terms are of type `character`. This may lead to
##   unexpected results. It is recommended to convert these variables to
##   factors before fitting the model.
##   The following variables are of type character: `Group`

## Note: uncertainty of error terms are not taken into account. Consider
##   setting `interval` to "prediction". This will call `posterior_predict()`
##   instead of `posterior_epred()`.

## Warning in glmmTMB::glmmTMB(formula = cond_formula, family = family, data =
## data): some components missing from 'family': downstream methods may fail

colnames(S_predict_W) <- c("Practice_S", "Mean", "SE", "Lower", "Higher", "Group", "Panel")
S_filter_W <- S_predict_W |>
  group_by(Group) |>
  filter(Practice_S >= min(WrittenRecall$Practice_S[WrittenRecall$Group == unique(Group)], na.rm = TRUE) &
           Practice_S <= max(WrittenRecall$Practice_S[WrittenRecall$Group == unique(Group)], na.rm = TRUE))

S_predict_W_p <-
  ggplot(S_filter_W, aes(x = Practice_S, y = Mean, group = Group)) +
  geom_errorbar(aes(ymin = Lower, ymax = Higher, color = Group),
                width = 0, position = S_pd) +      
  geom_point(color = "black", position = S_pd) +     
  geom_line(color = "black", position = S_pd) +      
  theme_classic() +
  theme(legend.position = "bottom") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1)) +
  scale_x_continuous(breaks = seq(0, 18, 2), limits = c(-0.5, 18)) +
  ylab("Predicted written accuracy (0-1)") +
  xlab("Spoken practice") +
  annotate("text", x = -Inf, y = Inf, label = "C",
           hjust = -1, vjust = 2, size = 7, fontface = "bold")

W_predict_W <- ggpredict(Fit.written, terms = c("Practice_W", "Group", "Pre [0]"))

## Some of the focal terms are of type `character`. This may lead to
##   unexpected results. It is recommended to convert these variables to
##   factors before fitting the model.
##   The following variables are of type character: `Group`

## Data were 'prettified'. Consider using `terms="Practice_W [all]"` to get
##   smooth plots.

## Note: uncertainty of error terms are not taken into account. Consider
##   setting `interval` to "prediction". This will call `posterior_predict()`
##   instead of `posterior_epred()`.

## Warning in glmmTMB::glmmTMB(formula = cond_formula, family = family, data =
## data): some components missing from 'family': downstream methods may fail

colnames(W_predict_W) <- c("Practice_W", "Mean", "SE", "Lower", "Higher", "Group", "Panel")
W_filter_W <- W_predict_W |>
  group_by(Group) |>
  filter(Practice_W >= min(SpokenRecall$Practice_W[SpokenRecall$Group == unique(Group)], na.rm = TRUE) &
           Practice_W <= max(SpokenRecall$Practice_W[SpokenRecall$Group == unique(Group)], na.rm = TRUE))

W_predict_W_p <-
  ggplot(W_filter_W, aes(x = Practice_W, y = Mean, group = Group)) +
  geom_errorbar(aes(ymin = Lower, ymax = Higher, color = Group),
                width = 0, position = W_pd) +       
  geom_point(color = "black", position = W_pd) +     
  geom_line(color = "black", position = W_pd) +      
  theme_classic() +
  theme(legend.position = "bottom") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1)) +
  scale_x_continuous(breaks = seq(0, 35, 3), limits = c(-0.5, 35)) +
  ylab("Predicted written accuracy (0-1)") +
  xlab("Written practice") +
  annotate("text", x = -Inf, y = Inf, label = "D",
           hjust = -1, vjust = 2, size = 7, fontface = "bold")

Effect_W <- 
  grid.arrange(S_predict_W_p, W_predict_W_p, nrow = 1)

Effect_W

## TableGrob (1 x 2) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]

grid.arrange(Effect_S, Effect_W, nrow = 2)

practice_means_written_base <- SpokenRecall |>
  group_by(Group) |>
  summarize(
    Practice_S = mean(Practice_S, na.rm = TRUE),
    Practice_W = mean(Practice_W, na.rm = TRUE),
    .groups = "drop") |>
  mutate(Group = as.character(Group))

practice_means_written <- practice_means_written_base |>
  pivot_longer(
    cols = c(Practice_S, Practice_W),
    names_to = "Predictor",
    values_to = "PracticeValue") |>
  mutate(Group = as.character(Group))

practice_means_written_wide <- practice_means_written_base

predicted_written <- practice_means_written |>
  mutate(
    results = pmap(
      list(Group, Predictor, PracticeValue),
      function(group, predictor, value) {
        group_value <- as.character(group)
        group_means <- practice_means_written_wide[practice_means_written_wide$Group == group_value, , drop = FALSE]

        practice_s_val <- if (predictor == "Practice_S") value else group_means$Practice_S
        practice_w_val <- if (predictor == "Practice_W") value else group_means$Practice_W

        new_data <- tibble(
          Group = factor(group_value, levels = levels(Written.post$Group)),
          Practice_S = practice_s_val,
          Practice_W = practice_w_val,
          Pre = 0,
          zTOEFL = 0)

        draws <- posterior_epred(
          Fit.written,
          newdata = new_data,
          re_formula = NA)

        draws_vec <- as.numeric(draws)

        tibble(
          Response = mean(draws_vec),
          SE = sd(draws_vec),
          Lower = quantile(draws_vec, 0.025),
          Upper = quantile(draws_vec, 0.975))
      })) |>
  unnest(results) |>
  select(Group, Predictor, PracticeValue, Response, SE, Lower, Upper)

predicted_written |>
  gt() |>
  tab_header(
    title = "Predicted Probabilities by Practice Levels",
    subtitle = "Written Recall Model") |>
  cols_label(
    Group = "Group",
    Predictor = "Practice Predictor",
    PracticeValue = "Mean Practice",
    Response = "Predicted Probability",
    SE = "SE",
    Lower = "Lower 95% CrI",
    Upper = "Upper 95% CrI") |>
  fmt_number(
    columns = c(PracticeValue, Response, SE, Lower, Upper),
    decimals = 2)

Group	Practice Predictor	Mean Practice	Predicted Probability	SE	Lower 95% CrI	Upper 95% CrI
Predicted Probabilities by Practice Levels
Written Recall Model
Spoken	Practice_S	2.25	0.56	0.06	0.44	0.67
Spoken	Practice_W	8.14	0.56	0.06	0.44	0.67
Written	Practice_S	0.54	0.54	0.06	0.42	0.66
Written	Practice_W	8.09	0.54	0.06	0.42	0.66

Posterior Predictive Checks

Spoken Recall

brms::pp_check(
  object = Fit.spoken,
  ndraws = 1000,
  type = "stat",
  stat = "mean") +
  theme_test() +
  ylab("Frequency") +
  ggtitle("Posterior Predictive Check: Mean")

## Note: in most cases the default test statistic 'mean' is too weak to detect anything of interest.

## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

pp_check(
  object = Fit.spoken,
  ndraws = 100) +
  theme_test() +
  ylab("Density") +
  ggtitle("Posterior Predictive Check: Density")

trace plots

plot(Fit.spoken, ask = FALSE)

Written Recall

brms::pp_check(
  object = Fit.written,
  ndraws = 1000,
  type = "stat",
  stat = "mean") +
  theme_test() +
  ylab("Frequency") +
  ggtitle("Posterior Predictive Check: Mean")

## Note: in most cases the default test statistic 'mean' is too weak to detect anything of interest.

## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

pp_check(
  object = Fit.written,
  ndraws = 100) +
  theme_test() +
  ylab("Density") +
  ggtitle("Posterior Predictive Check: Density")

plot(Fit.written, ask = FALSE)