R Code for VocabModality

2025-10-14

Data Preprocessing

Load packages

pacman::p_load(
  readxl, tidyverse, psych, BayesFactor, gt, emmeans,
  brms, rstan, bayesplot, ggeffects, performance, gridExtra, sjPlot)

Theme setting

theme_set(theme_bw())
set.seed(123)

Load data

SpokenRecall <- read_excel("SpokenRecall.xlsx")
WrittenRecall <- read_excel("WrittenRecall.xlsx")
Weekly <- read_excel("Weekly.xlsx")

Descriptive Statistics

Weekly Quizzes

Weekly.summary <- Weekly |>
  mutate(Group = factor(Group, levels = c("Spoken", "Written"))) |> 
  group_by(Quiz, Group) |>
  summarize(
    mean_score = mean(Score) * 10,
    sd = sd(Score),
    n = n(),
    se = sd / sqrt(n),
    ci_lower = mean_score - 1.96 * se * 10,
    ci_upper = mean_score + 1.96 * se * 10,
    ci_text = paste0("[", round(ci_lower, 1), ",", round(ci_upper, 1), "]")) |>
  ungroup()
## `summarise()` has grouped output by 'Quiz'. You can override using the
## `.groups` argument.
Weekly.summary |> 
  gt()
Quiz Group mean_score sd n se ci_lower ci_upper ci_text
1 Spoken 8.500000 0.3575684 360 0.01884551 8.130628 8.869372 [8.1,8.9]
1 Written 8.975610 0.3035952 410 0.01499350 8.681737 9.269482 [8.7,9.3]
2 Spoken 8.921053 0.3106564 380 0.01593634 8.608700 9.233405 [8.6,9.2]
2 Written 9.538462 0.2100877 390 0.01063820 9.329953 9.746970 [9.3,9.7]
3 Spoken 8.868421 0.3172034 380 0.01627219 8.549486 9.187356 [8.5,9.2]
3 Written 9.307692 0.2541722 390 0.01287051 9.055430 9.559954 [9.1,9.6]
4 Spoken 9.228571 0.2672000 350 0.01428244 8.948636 9.508507 [8.9,9.5]
4 Written 9.538462 0.2100877 390 0.01063820 9.329953 9.746970 [9.3,9.7]
5 Spoken 8.135135 0.3900262 370 0.02027650 7.737716 8.532554 [7.7,8.5]
5 Written 8.757576 0.3303588 330 0.01818566 8.401137 9.114015 [8.4,9.1]
6 Spoken 8.589744 0.3484951 390 0.01764673 8.243868 8.935620 [8.2,8.9]
6 Written 7.846154 0.4116170 390 0.02084303 7.437630 8.254677 [7.4,8.3]
7 Spoken 8.694444 0.3373826 360 0.01778162 8.345925 9.042964 [8.3,9]
7 Written 8.410256 0.3661219 390 0.01853930 8.046886 8.773627 [8,8.8]
8 Spoken 8.514286 0.3561747 350 0.01903834 8.141134 8.887437 [8.1,8.9]
8 Written 8.945946 0.3074909 370 0.01598569 8.632626 9.259265 [8.6,9.3]
9 Spoken 8.631579 0.3441337 380 0.01765369 8.285567 8.977591 [8.3,9]
9 Written 8.944444 0.3076956 360 0.01621698 8.626592 9.262297 [8.6,9.3]
10 Spoken 7.392857 0.4398104 280 0.02628370 6.877697 7.908018 [6.9,7.9]
10 Written 8.447368 0.3626328 380 0.01860267 8.082756 8.811981 [8.1,8.8]
ggplot(Weekly.summary, aes(x = Quiz, y = mean_score, color = Group, group = Group)) +
  geom_line(linewidth = 1, position = position_dodge(width = 1)) +
  geom_point(size = 3, position = position_dodge(width = 1)) +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.3,
                position = position_dodge(width = 1)) +
  geom_text(aes(label = round(mean_score, 2)),
            position = position_dodge(width = 1),
            vjust = -2.5,
            size = 3) +
  geom_text(aes(label = paste0("[", round(ci_lower, 2), ", ", round(ci_upper, 2), "]")),
            position = position_dodge(width = 1),
            vjust = 3.5,
            size = 2.75) +
  scale_x_continuous(breaks = 1:10) +
  scale_y_continuous(limits = c(0, 10.5), breaks = 0:10) +
  labs(
    title = "",
    x = "Quiz Week (1-10)",
    y = "Scores for Weekly Quiz (1-10)") +
  theme_bw() +
  theme(
    legend.title = element_text(size = 11),
    legend.text = element_text(size = 10),
    legend.position = "bottom",
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "gray90"),
    panel.border = element_rect(fill = NA, color = "black"),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 11)
  )

t-test for Weekly Quiz (Total Score)

Weekly.sum <- Weekly |>
  group_by(ID, Group, Quiz) |>
  summarise(Score = sum(Score))
## `summarise()` has grouped output by 'ID', 'Group'. You can override using the
## `.groups` argument.
t.test.quiz <- ttestBF(formula = Score ~ Group, data = Weekly.sum)
## Warning: data coerced from tibble to data frame
post.quiz <- posterior(t.test.quiz, iterations = 10000)
sigma.quiz <- sqrt(post.quiz[, "sig2"])

data.frame(
  Parameter = c("theta (mean difference)", "sigma (standard deviation)"),
  Mean = c(mean(post.quiz[, 2]), mean(sigma.quiz)),
  `CrI_Lower` = c(
    quantile(post.quiz[, 2], probs = 0.025),
    quantile(sigma.quiz, probs = 0.025)),
  `CrI_Upper` = c(
    quantile(post.quiz[, 2], probs = 0.975),
    quantile(sigma.quiz, probs = 0.975)),
  `P_negative` = c(
    mean(post.quiz[, 2] < 0),
    NA)) |>
  gt() |>
  tab_header(
    title = "Bayesian Estimation Results") |>
  cols_label(
    Parameter = "Parameter",
    Mean = "Mean",
    CrI_Lower = "95% CrI Lower",
    CrI_Upper = "95% CrI Upper",
    P_negative = "P(parameter < 0)") |>
  fmt_number(
    columns = c(Mean, CrI_Lower, CrI_Upper, P_negative),
    decimals = 3) |>
  sub_missing(
    columns = P_negative,
    missing_text = "—")
Bayesian Estimation Results
Parameter Mean 95% CrI Lower 95% CrI Upper P(parameter < 0)
theta (mean difference) −0.291 −0.543 −0.037 0.987
sigma (standard deviation) 1.792 1.703 1.886 —

Practice

Descriptive statistics for practice counts

Practice.summary <- SpokenRecall |>
  mutate(Group = factor(Group, levels = c("Spoken", "Written"))) |>
  pivot_longer(cols = c(Practice_S, Practice_S_Study, Practice_S_Review,
                        Practice_W, Practice_W_Study, Practice_W_Review),
               names_to = "Type",
               values_to = "Count") |>
  mutate(Type_label = case_when(
    Type == "Practice_S" ~ "Spoken [Practice]",
    Type == "Practice_S_Study" ~ "Spoken [Study]",
    Type == "Practice_S_Review" ~ "Spoken [Review]",
    Type == "Practice_W" ~ "Written [Practice]",
    Type == "Practice_W_Study" ~ "Written [Study]",
    Type == "Practice_W_Review" ~ "Written [Review]")) |>
  group_by(Group, Type, Type_label) |>
  summarize(
    mean_score = mean(Count) * 10,
    sd = sd(Count),
    n = n(),
    se = sd / sqrt(n),
    ci_lower = mean_score - 1.96 * se,
    ci_upper = mean_score + 1.96 * se,
    ci_text = paste0("[", round(ci_lower, 1), ",", round(ci_upper, 1), "]"),
    .groups = "drop")

Practice.d <- SpokenRecall |>
  mutate(Group = factor(Group, levels = c("Spoken", "Written"))) |>
  pivot_longer(
    cols = c(Practice_S, Practice_S_Study, Practice_S_Review,
             Practice_W, Practice_W_Study, Practice_W_Review),
    names_to = "Type",
    values_to = "Practice") |>
  mutate(
    Type = recode(
      Type,
      Practice_S = "Spoken",
      Practice_S_Study = "Spoken_Study",
      Practice_S_Review = "Spoken_Review",
      Practice_W = "Written",
      Practice_W_Study = "Written_Study",
      Practice_W_Review = "Written_Review"),
    Type = factor(
      Type,
      levels = c("Spoken", "Spoken_Study", "Spoken_Review",
                 "Written", "Written_Study", "Written_Review")))

Practice.sum <- Practice.d |>
  group_by(ID, Group, Type) |>
  summarize(Practice = mean(Practice), .groups = "drop")

describeBy(Practice.sum$Practice, 
                         group = list(Group = Practice.sum$Group, 
                                     Type = Practice.sum$Type),
                         mat = TRUE) |>
  select(group1, group2, n, mean, sd, median, min, max, se) |>
  gt() |>
  tab_header(
    title = "Descriptive Statistics by Group and Type") |>
  cols_label(
    group1 = "Group",
    group2 = "Type",
    n = "N",
    mean = "Mean",
    sd = "SD",
    median = "Median",
    min = "Min",
    max = "Max",
    se = "SE") |>
  fmt_number(
    columns = c(mean, sd, median, min, max, se),
    decimals = 2)
Descriptive Statistics by Group and Type
Group Type N Mean SD Median Min Max SE
Spoken Spoken 30 2.25 2.43 1.43 0.00 9.26 0.44
Written Spoken 34 0.54 0.55 0.26 0.00 1.80 0.10
Spoken Spoken_Study 30 1.18 0.94 1.17 0.00 4.08 0.17
Written Spoken_Study 34 0.30 0.45 0.10 0.00 1.90 0.08
Spoken Spoken_Review 30 1.31 2.36 0.39 0.00 10.48 0.43
Written Spoken_Review 34 0.31 0.40 0.11 0.00 1.52 0.07
Spoken Written 30 8.14 4.71 6.27 1.56 19.06 0.86
Written Written 34 8.09 4.59 8.00 1.12 18.18 0.79
Spoken Written_Study 30 4.45 2.01 3.66 1.26 8.58 0.37
Written Written_Study 34 1.29 1.44 0.77 0.00 7.20 0.25
Spoken Written_Review 30 3.95 3.75 2.99 0.20 17.08 0.68
Written Written_Review 34 7.27 4.65 7.02 0.96 21.24 0.80
ggplot(Practice.sum, aes(x = Group, y = Practice, color = Group)) +
  geom_jitter(width = 0.4, alpha = 0.5) +
  stat_summary(fun = mean, geom = "point", size = 3, color = "black") + 
  xlab("Group") +
  ylab("Practice / Review Counts") +
  facet_wrap(~Type, scales = "free") + theme_bw() +
  theme(legend.position = "bottom") +
  stat_summary(fun = mean, geom = "text",  
               aes(label = round(after_stat(y), 2), group = Group),  
               vjust = -2, size = 4, color = "black",  
               position = position_dodge(width = 0.8)) +
  stat_summary(fun.data = mean_cl_boot, geom = "text",  
               aes(label = paste0("[", round(after_stat(y), 2), ", ", round(..ymax.., 2), "]"), 
                   group = Group),  
               vjust = -1, size = 3, color = "black",  
               position = position_dodge(width = 1))
## Warning: The dot-dot notation (`..ymax..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(ymax)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggplot(subset(Practice.sum, Type %in% c("Spoken", "Written")), 
       aes(x = Group, y = Practice, color = Group)) +
  geom_jitter(width = 0.4, alpha = 0.5) +
  stat_summary(fun = mean, geom = "point", size = 3, color = "black") + 
  xlab("Group") +
  ylab("Practice Counts (Spoken & Written)") +
  facet_wrap(~Type, scales = "free") +
  theme_bw() +
  theme(legend.position = "bottom") +
  stat_summary(fun = mean, geom = "text",  
               aes(label = round(after_stat(y), 2), group = Group),  
               vjust = -2, size = 4, color = "black",  
               position = position_dodge(width = 0.8)) +
  stat_summary(fun.data = mean_cl_boot, geom = "text",  
               aes(label = paste0("[", round(..y.., 2), ", ", round(..ymax.., 2), "]"), 
                   group = Group),  
               vjust = -1, size = 3, color = "black",  
               position = position_dodge(width = 1))

anova.practice.d <- Practice.sum |>
  filter(Type %in% c("Spoken", "Written")) |>
  mutate(
    across(c(Group, Type), as.factor),
    PracticeLog = log(Practice + 0.001))

anova.practice <- anovaBF(
  PracticeLog ~ Group * Type,
  data = as.data.frame(anova.practice.d)) # ANOVA

anova.practice |>
  BayesFactor::extractBF(logbf = FALSE) |>
  as_tibble(rownames = "Model") |>
  mutate(PosteriorProbability = bf / sum(bf)) |>
  rename(
    BF10 = bf,
    MCMC_Error = error) |>
  gt() |>
  tab_header(
    title = "Bayesian ANOVA Model Comparison",
    subtitle = "PracticeLog ~ Group × Type") |>
  cols_label(
    Model = "Model",
    BF10 = "BF10",
    PosteriorProbability = "Posterior Probability",
    MCMC_Error = "MCMC Error") |>
  fmt_scientific(
    columns = c(BF10, MCMC_Error),
    decimals = 2) |>
  fmt_number(
    columns = PosteriorProbability,
    decimals = 3)
Bayesian ANOVA Model Comparison
PracticeLog ~ Group × Type
Model BF10 MCMC Error time code Posterior Probability
Group 1.38 1.23 × 10−4 Tue Oct 14 10:52:25 2025 39483058a280 0.000
Type 3.39 × 1014 1.47 × 10−22 Tue Oct 14 10:52:25 2025 3948b149a4c 0.022
Group + Type 2.19 × 1015 1.74 × 10−2 Tue Oct 14 10:52:25 2025 394856579861 0.145
Group + Type + Group:Type 1.26 × 1016 3.90 × 10−2 Tue Oct 14 10:52:25 2025 39481d6de292 0.833

Recall Test: Pretest & Posttest

Spoken.summary <- SpokenRecall |>
  mutate(Group = factor(Group, levels = c("Spoken", "Written"))) |> 
  group_by(Test, Group) |>
  summarize(
    mean_score = mean(Score) * 10,
    sd = sd(Score),
    n = n(),
    se = sd / sqrt(n),
    ci_lower = mean_score - 1.96 * se * 10,
    ci_upper = mean_score + 1.96 * se * 10,
    ci_text = paste0("[", round(ci_lower, 1), ",", round(ci_upper, 1), "]")) |>
  ungroup()
## `summarise()` has grouped output by 'Test'. You can override using the
## `.groups` argument.
Written.summary <- WrittenRecall |>
  mutate(Group = factor(Group, levels = c("Spoken", "Written"))) |> 
  group_by(Test, Group) |>
  summarize(
    mean_score = mean(Score) * 10,
    sd = sd(Score),
    n = n(),
    se = sd / sqrt(n),
    ci_lower = mean_score - 1.96 * se * 10,
    ci_upper = mean_score + 1.96 * se * 10,
    ci_text = paste0("[", round(ci_lower, 1), ",", round(ci_upper, 1), "]")) |>
  ungroup()
## `summarise()` has grouped output by 'Test'. You can override using the
## `.groups` argument.
Spoken.sum <- SpokenRecall |>
  group_by(ID, Group, Test, Mode) |>
  summarise(Score = sum(Score))
## `summarise()` has grouped output by 'ID', 'Group', 'Test'. You can override
## using the `.groups` argument.
Written.sum <- WrittenRecall |>
  group_by(ID, Group, Test, Mode) |>
  summarise(Score = sum(Score))
## `summarise()` has grouped output by 'ID', 'Group', 'Test'. You can override
## using the `.groups` argument.
Test.sum <- rbind(Spoken.sum, Written.sum)
Test.sum$Test <- factor(Test.sum$Test, levels = c("Pre", "Post"), labels = c("Pretest", "Posttest"))
Test.sum$Group <- factor(Test.sum$Group, levels = c("Spoken", "Written"), labels = c("Spoken [Group]", "Written [Group]"))
Test.sum$Mode <- factor(Test.sum$Mode, levels = c("Spoken", "Written"), labels = c("Spoken [Test]", "Written [Test]"))

Test.t <- ttestBF(formula = Score ~ Group, data = subset(Test.sum, Test=="Pretest"))
## Warning: data coerced from tibble to data frame
post.Test <- posterior(Test.t, iterations = 1000000L)

summary_combined <- rbind(
  Spoken.summary |> mutate(Recall_Type = "Spoken Recall"),
  Written.summary |> mutate(Recall_Type = "Written Recall"))

summary_combined |>
  select(Recall_Type, Test, Group, n, mean_score, sd, se, ci_text) |>
  gt() |>
  tab_header(
    title = "Descriptive Statistics by Test and Group") |>
  cols_label(
    Recall_Type = "Recall Type",
    Test = "Test",
    Group = "Group",
    n = "N",
    mean_score = "Mean",
    sd = "SD",
    se = "SE",
    ci_text = "95% CI") |>
  fmt_number(
    columns = c(mean_score, sd, se),
    decimals = 2) |>
  tab_options(
    table.font.size = 12)
Descriptive Statistics by Test and Group
Recall Type Test Group N Mean SD SE 95% CI
Spoken Recall Post Spoken 1500 5.69 0.50 0.01 [5.4,5.9]
Spoken Recall Post Written 1700 5.01 0.50 0.01 [4.8,5.2]
Spoken Recall Pre Spoken 1500 1.27 0.33 0.01 [1.1,1.4]
Spoken Recall Pre Written 1700 1.64 0.37 0.01 [1.5,1.8]
Written Recall Post Spoken 1500 5.94 0.49 0.01 [5.7,6.2]
Written Recall Post Written 1700 5.68 0.50 0.01 [5.4,5.9]
Written Recall Pre Spoken 1500 2.21 0.42 0.01 [2,2.4]
Written Recall Pre Written 1700 2.22 0.42 0.01 [2,2.4]
data.frame(
  Parameter = "theta (mean difference)",
  Mean = mean(post.Test[, 2]),
  SD = sd(post.Test[, 2]),
  CI_Lower = quantile(post.Test[, 2], probs = 0.025),
  CI_Upper = quantile(post.Test[, 2], probs = 0.975),
  P_negative = mean(post.Test[, 2] < 0),
  P_positive = mean(post.Test[, 2] > 0)) |>
  gt() |>
  tab_header(
    title = "Bayesian t-test Results",
    subtitle = "Pretest: Spoken vs Written Group") |>
  cols_label(
    Parameter = "Parameter",
    Mean = "Mean",
    SD = "SD",
    CI_Lower = "95% CrI Lower",
    CI_Upper = "95% CrI Upper",
    P_negative = "P(θ < 0)",
    P_positive = "P(θ > 0)") |>
  fmt_number(
    columns = c(Mean, SD, CI_Lower, CI_Upper, P_negative, P_positive),
    decimals = 3)
Bayesian t-test Results
Pretest: Spoken vs Written Group
Parameter Mean SD 95% CrI Lower 95% CrI Upper P(θ < 0) P(θ > 0)
theta (mean difference) −0.855 1.001 −2.835 1.106 0.805 0.195
summary_combined |>
  select(Recall_Type, Test, Group, mean_score, ci_text) |>
  tidyr::pivot_wider(
    names_from = Group,
    values_from = c(mean_score, ci_text),
    names_sep = "_") |>
  gt() |>
  tab_header(
    title = "Mean Scores and 95% Credible Intervals") |>
  cols_label(
    Recall_Type = "Recall Type",
    Test = "Test",
    mean_score_Spoken = "Spoken Mean",
    ci_text_Spoken = "Spoken 95% CrI",
    mean_score_Written = "Written Mean",
    ci_text_Written = "Written 95% CrI") |>
  fmt_number(
    columns = starts_with("mean_score"),
    decimals = 2)
Mean Scores and 95% Credible Intervals
Recall Type Test Spoken Mean Written Mean Spoken 95% CrI Written 95% CrI
Spoken Recall Post 5.69 5.01 [5.4,5.9] [4.8,5.2]
Spoken Recall Pre 1.27 1.64 [1.1,1.4] [1.5,1.8]
Written Recall Post 5.94 5.68 [5.7,6.2] [5.4,5.9]
Written Recall Pre 2.21 2.22 [2,2.4] [2,2.4]

Bayesian ANOVA

anova.test <- anovaBF(
  Score ~ Group * Mode,
  data = subset(Test.sum, Test == "Pretest"), 
  whichRandom = "ID")
## Warning: data coerced from tibble to data frame
# Calculate Bayes Factors and posterior probabilities
BF <- as.vector(anova.test)
post_probs <- BF / sum(BF)

anova.results <- data.frame(
  Model = names(anova.test),
  BF = BF,
  PosteriorProbability = post_probs)

# Rename the columns properly
colnames(anova.results)[1:2] <- c("Numerator", "Denominator")

# Then create the table
anova.results |>
  arrange(desc(PosteriorProbability)) |>
  gt() |>
  tab_header(
    title = "Bayesian ANOVA Results",
    subtitle = "Model Comparison for Pretest Scores") |>
  cols_label(
    Numerator = "Model",
    Denominator = "Baseline",
    BF = "Bayes Factor",
    PosteriorProbability = "Posterior Probability") |>
  fmt_scientific(
    columns = BF,
    decimals = 2) |>
  fmt_number(
    columns = PosteriorProbability,
    decimals = 4) |>
  tab_options(
    table.font.size = 12,
    heading.title.font.size = 16) |>
  tab_footnote(
    footnote = "Models are sorted by posterior probability (highest to lowest)",
    locations = cells_title())
Bayesian ANOVA Results1
Model Comparison for Pretest Scores1
Model Baseline Bayes Factor Posterior Probability
Mode Intercept only 1.00 × 102 0.7199
Group + Mode Intercept only 2.87 × 101 0.2066
Group + Mode + Group:Mode Intercept only 9.93 0.0715
Group Intercept only 2.74 × 10−1 0.0020
1 Models are sorted by posterior probability (highest to lowest)

Internal Consistency

PreA_S <- SpokenRecall[SpokenRecall$List == "A" & SpokenRecall$Test == "Pre", c(1, 4, 5)] # Extract Spoken Pretest A
PreB_S <- SpokenRecall[SpokenRecall$List == "B" & SpokenRecall$Test == "Pre", c(1, 4, 5)] # Extract Spoken Pretest B
PostA_S <- SpokenRecall[SpokenRecall$List == "A" & SpokenRecall$Test == "Post", c(1, 4, 5)] # Extract Spoken Posttest A
PostB_S <- SpokenRecall[SpokenRecall$List == "B" & SpokenRecall$Test == "Post", c(1, 4, 5)] # Extract Spoken Posttest B
PreA_W <- WrittenRecall[WrittenRecall$List == "A" & WrittenRecall$Test == "Pre", c(1, 4, 5)] # Extract Written Pretest A
PreB_W <- WrittenRecall[WrittenRecall$List == "B" & WrittenRecall$Test == "Pre", c(1, 4, 5)] # Extract Written Pretest B
PostA_W <- WrittenRecall[WrittenRecall$List == "A" & WrittenRecall$Test == "Post", c(1, 4, 5)] # Extract Written Posttest A
PostB_W <- WrittenRecall[WrittenRecall$List == "B" & WrittenRecall$Test == "Post", c(1, 4, 5)] # Extract Written Posttest B
PreA_S <- PreA_S |> pivot_wider(names_from = Word, values_from = Score) # Turn PreA_S into the wide format
PreB_S <- PreB_S |> pivot_wider(names_from = Word, values_from = Score) # Turn PreB_S into the wide format
PostA_S <- PostA_S |> pivot_wider(names_from = Word, values_from = Score) # Turn PostA_S into the wide format
PostB_S <- PostB_S |> pivot_wider(names_from = Word, values_from = Score) # Turn PostB_S into the wide format
PreA_W <- PreA_W |> pivot_wider(names_from = Word, values_from = Score) # Turn PreA_W into the wide format
PreB_W <- PreB_W |> pivot_wider(names_from = Word, values_from = Score) # Turn PreB_W into the wide format
PostA_W <- PostA_W |> pivot_wider(names_from = Word, values_from = Score) # Turn PostA_W into the wide format
PostB_W <- PostB_W |> pivot_wider(names_from = Word, values_from = Score) # Turn PostB_W into the wide format

# Calculate all alpha values (suppress warnings for cleaner output)
alpha_PreA_S <- suppressWarnings(psych::alpha(PreA_S[2:34]))
## Some items ( revive assembly contempt incline simplicity savage undo brutal ) were negatively correlated with the first principal component and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
alpha_PreB_S <- suppressWarnings(psych::alpha(PreB_S[2:30]))
## Some items ( prominent fatigue strain acquisition resign ) were negatively correlated with the first principal component and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
## In smc, smcs < 0 were set to .0
alpha_PreA_W <- suppressWarnings(psych::alpha(PreA_W[2:34]))
## Some items ( limestone moderate shiver convey tolerate elastic plunge bankruptcy ) were negatively correlated with the first principal component and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
alpha_PreB_W <- suppressWarnings(psych::alpha(PreB_W[2:30]))
alpha_PostA_S <- suppressWarnings(psych::alpha(PostA_S[2:34]))
## Some items ( vacant ) were negatively correlated with the first principal component and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
alpha_PostB_S <- suppressWarnings(psych::alpha(PostB_S[2:30]))
## Some items ( elderly ) were negatively correlated with the first principal component and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
alpha_PostA_W <- suppressWarnings(psych::alpha(PostA_W[2:34]))
alpha_PostB_W <- suppressWarnings(psych::alpha(PostB_W[2:30]))

data.frame(
  Test = c("Pretest", "Pretest", "Pretest", "Pretest",
           "Posttest", "Posttest", "Posttest", "Posttest"),
  Mode = c("Spoken", "Spoken", "Written", "Written",
           "Spoken", "Spoken", "Written", "Written"),
  List = c("A", "B", "A", "B", "A", "B", "A", "B"),
  N_Items = c(33, 29, 33, 29, 33, 29, 33, 29),
  Raw_Alpha = c(
    alpha_PreA_S$total$raw_alpha,
    alpha_PreB_S$total$raw_alpha,
    alpha_PreA_W$total$raw_alpha,
    alpha_PreB_W$total$raw_alpha,
    alpha_PostA_S$total$raw_alpha,
    alpha_PostB_S$total$raw_alpha,
    alpha_PostA_W$total$raw_alpha,
    alpha_PostB_W$total$raw_alpha),
  Std_Alpha = c(
    alpha_PreA_S$total$std.alpha,
    alpha_PreB_S$total$std.alpha,
    alpha_PreA_W$total$std.alpha,
    alpha_PreB_W$total$std.alpha,
    alpha_PostA_S$total$std.alpha,
    alpha_PostB_S$total$std.alpha,
    alpha_PostA_W$total$std.alpha,
    alpha_PostB_W$total$std.alpha),
  Average_r = c(
    alpha_PreA_S$total$average_r,
    alpha_PreB_S$total$average_r,
    alpha_PreA_W$total$average_r,
    alpha_PreB_W$total$average_r,
    alpha_PostA_S$total$average_r,
    alpha_PostB_S$total$average_r,
    alpha_PostA_W$total$average_r,
    alpha_PostB_W$total$average_r)) |>
  gt() |>
  tab_header(
    title = "Internal Consistency Reliability (Cronbach's Alpha)",
    subtitle = "By Test, Mode, and List") |>
  cols_label(
    Test = "Test",
    Mode = "Mode",
    List = "List",
    N_Items = "N Items",
    Raw_Alpha = "Raw α",
    Std_Alpha = "Std. α",
    Average_r = "Average r") |>
  fmt_number(
    columns = c(Raw_Alpha, Std_Alpha, Average_r),
    decimals = 3) |>
  tab_options(
    table.font.size = 12,
    heading.title.font.size = 16) |>
  tab_footnote(
    footnote = "Note: Some items had no variance or negative correlations with the first PC.",
    locations = cells_title()) |>
  tab_style(
    style = list(
      cell_fill(color = "#f0f0f0")),
    locations = cells_body(
      rows = Test == "Pretest"))
Internal Consistency Reliability (Cronbach's Alpha)1
By Test, Mode, and List1
Test Mode List N Items Raw α Std. α Average r
Pretest Spoken A 33 0.539 0.429 0.032
Pretest Spoken B 29 0.636 0.621 0.069
Pretest Written A 33 0.675 0.665 0.060
Pretest Written B 29 0.827 0.838 0.172
Posttest Spoken A 33 0.842 0.839 0.137
Posttest Spoken B 29 0.832 0.837 0.155
Posttest Written A 33 0.874 0.886 0.190
Posttest Written B 29 0.854 0.853 0.166
1 Note: Some items had no variance or negative correlations with the first PC.
Practice.anova <- SpokenRecall |>
  mutate(Group = factor(Group, levels = c("Spoken", "Written"))) |>
  pivot_longer(
    cols = c(Practice_S, Practice_S_Study, Practice_S_Review,
             Practice_W, Practice_W_Study, Practice_W_Review),
    names_to = "Type",
    values_to = "Practice") |>
  mutate(
    Type = recode(
      Type,
      Practice_S = "Spoken",
      Practice_S_Study = "Spoken_Study",
      Practice_S_Review = "Spoken_Review",
      Practice_W = "Written",
      Practice_W_Study = "Written_Study",
      Practice_W_Review = "Written_Review"),
    Type = factor(
      Type,
      levels = c("Spoken", "Spoken_Study", "Spoken_Review",
                 "Written", "Written_Study", "Written_Review"))) |>
  group_by(ID, Group, Type) |>
  summarize(Practice = mean(Practice), .groups = "drop") |>
  filter(Type %in% c("Spoken", "Written")) |>
  mutate(
    Group = factor(Group),
    Type = factor(Type),
    PracticeLog = log(Practice + 0.001))

anova.practice <- anovaBF(
  PracticeLog ~ Group * Type,
  data = Practice.anova |> as.data.frame()) # ANOVA
anova.practice |>
  BayesFactor::extractBF(logbf = FALSE) |>
  as_tibble(rownames = "Model") |>
  mutate(PosteriorProbability = bf / sum(bf)) |>
  rename(
    BF10 = bf,
    MCMC_Error = error) |>
  gt() |>
  tab_header(
    title = "Bayesian ANOVA Model Comparison",
    subtitle = "PracticeLog ~ Group × Type (replication)") |>
  cols_label(
    Model = "Model",
    BF10 = "BF10",
    PosteriorProbability = "Posterior Probability",
    MCMC_Error = "MCMC Error") |>
  fmt_scientific(
    columns = c(BF10, MCMC_Error),
    decimals = 2) |>
  fmt_number(
    columns = PosteriorProbability,
    decimals = 3)
Bayesian ANOVA Model Comparison
PracticeLog ~ Group × Type (replication)
Model BF10 MCMC Error time code Posterior Probability
Group 1.38 1.23 × 10−4 Tue Oct 14 10:52:27 2025 394879a7e7c8 0.000
Type 3.39 × 1014 1.47 × 10−22 Tue Oct 14 10:52:27 2025 39487e6239dd 0.022
Group + Type 2.31 × 1015 3.00 × 10−2 Tue Oct 14 10:52:27 2025 39487af258b3 0.152
Group + Type + Group:Type 1.25 × 1016 1.21 × 10−2 Tue Oct 14 10:52:27 2025 3948c0c1f63 0.825

TOEFL ITP: Posttest Scores

toefl_data <- SpokenRecall |>
  filter(Test == "Post") |>
  select(ID, Group, TOEFL) |>
  distinct()

describeBy(toefl_data$TOEFL, 
           group = toefl_data$Group,
           mat = TRUE,
           digits = 2) |>
  as.data.frame() |>
  select(group1, n, mean, sd, median, min, max, se) |>
  gt() |>
  tab_header(
    title = "TOEFL Descriptive Statistics by Group") |>
  cols_label(
    group1 = "Group",
    n = "N",
    mean = "Mean",
    sd = "SD",
    median = "Median",
    min = "Min",
    max = "Max",
    se = "SE") |>
  fmt_number(
    columns = c(mean, sd, median, se,
    decimals = 2)) |>
  fmt_number(
    columns = c(min, max),
    decimals = 0) |>
  tab_options(
    table.font.size = 12,
    heading.title.font.size = 16)
TOEFL Descriptive Statistics by Group
Group N Mean SD Median Min Max SE
Spoken 30 511.03 27.23 510.00 453 573 4.97
Written 34 520.79 22.65 520.00 460 553 3.88
subset(SpokenRecall, Test == "Post") |>
  group_by(ID, Group, Test, Mode) |>
  summarise(TOEFL = mean(TOEFL)) |> 
  ggplot(aes(x = Group, y = TOEFL, color = Group, group = Group)) +
  geom_jitter(width = 0.2, alpha = 0.5) +
  stat_summary(fun = mean, geom = "point", size = 3, color = 'black') +
  stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.1, color = "black") +
  stat_summary(fun = mean, geom = "text",
               aes(label = round(..y.., 2)),
               vjust = -4.5, color = "black", size = 3.5) +
  stat_summary(fun.data = mean_cl_boot, geom = "text",
               aes(label = paste0("[", round(..ymin.., 2), ", ", round(..ymax.., 2), "]")),
               vjust = 5.5, color = "black", size = 3) +
  xlab("TOEFL ITP Scores") +
  theme_bw() +
  theme(legend.position = "bottom")
## `summarise()` has grouped output by 'ID', 'Group', 'Test'. You can override
## using the `.groups` argument.

Bayesian t-test

TOEFL.sum <- subset(SpokenRecall, Test == "Post") |>
  group_by(ID, Group, Test, Mode) |>
  summarise(TOEFL = mean(TOEFL))
## `summarise()` has grouped output by 'ID', 'Group', 'Test'. You can override
## using the `.groups` argument.
t.test.toefl <- ttestBF(formula = TOEFL ~ Group, data = TOEFL.sum)
## Warning: data coerced from tibble to data frame
post.toefl <- posterior(t.test.toefl, iterations = 10000)
sigma.toefl <- sqrt(post.toefl[, "sig2"])

data.frame(
  Parameter = c("theta (mean difference)", "sigma (standard deviation)"),
  Mean = c(mean(post.toefl[, 2]), mean(sigma.toefl)),
  SD = c(sd(post.toefl[, 2]), sd(sigma.toefl)),
  CI_Lower = c(
    quantile(post.toefl[, 2], probs = 0.025),
    quantile(sigma.toefl, probs = 0.025)),
  CI_Upper = c(
    quantile(post.toefl[, 2], probs = 0.975),
    quantile(sigma.toefl, probs = 0.975)),
  P_negative = c(
    mean(post.toefl[, 2] < 0),
    NA)) |>
  gt() |>
  tab_header(
    title = "Bayesian t-test Results for TOEFL Scores",
    subtitle = "Group Comparison") |>
  cols_label(
    Parameter = "Parameter",
    Mean = "Mean",
    SD = "SD",
    CI_Lower = "95% CrI Lower",
    CI_Upper = "95% CrI Upper",
    P_negative = "P(parameter < 0)") |>
  fmt_number(
    columns = c(Mean, SD, CI_Lower, CI_Upper, P_negative),
    decimals = 3) |>
  sub_missing(
    columns = P_negative,
    missing_text = "—") |>
  tab_options(
    table.font.size = 12,
    heading.title.font.size = 16)
Bayesian t-test Results for TOEFL Scores
Group Comparison
Parameter Mean SD 95% CrI Lower 95% CrI Upper P(parameter < 0)
theta (mean difference) −8.334 5.972 −20.237 3.266 0.922
sigma (standard deviation) 25.081 2.282 21.125 30.044 —

Modeling

rstan_options(auto_write = TRUE) # Auto-saving the model
options(mc.cores = parallel::detectCores()) # Using multiple core

Preprocessing data

Spoken.pre <- SpokenRecall |>
  filter(Test == "Pre") |>
  select(ID, Word, Pre = Score)

Spoken.post <- SpokenRecall |>
  filter(Test == "Post") |>
  left_join(Spoken.pre, by = c("ID", "Word"))

Written.pre <- WrittenRecall |>
  filter(Test == "Pre") |>
  select(ID, Word, Pre = Score)

Written.post <- WrittenRecall |>
  filter(Test == "Post") |>
  left_join(Written.pre, by = c("ID", "Word"))

Spoken.post$zTOEFL <- scale(Spoken.post$TOEFL)
Written.post$zTOEFL <- scale(Written.post$TOEFL)

Prior setting

Prior<- c(
  prior(normal(0, 2), class = Intercept),
  prior(normal(0, 1), class = b),
  prior(cauchy(0, 1), class = sd))

Spoken Recall

Fit the spoken model

Fit.spoken <- brm(
  Score ~  
    Group * Practice_S * Pre +
    Group * Practice_W * Pre +
    zTOEFL * Pre +
    (1 | ID) +
    (1 | Word),
  family = bernoulli(link = "logit"),
  iter = 10000, warmup = 5000, chains = 4,
  prior = Prior,
  data = Spoken.post,
  file = "Fit_spoken.rds",
  seed = 123)
## Compiling Stan program...
## Start sampling

Results

fit_summary <- summary(Fit.spoken)

# Extract fixed effects with diagnostics
fixed_effects <- fit_summary$fixed
fixed_df <- as.data.frame(fixed_effects)
fixed_df$Parameter <- rownames(fixed_df)

# Create table with gt
fixed_df |>
  select(Parameter, Estimate, Est.Error, `l-95% CI`, `u-95% CI`, Rhat, Bulk_ESS, Tail_ESS) |>
  gt() |>
  tab_header(
    title = "Bayesian Logistic Regression Results with Diagnostics",
    subtitle = "Spoken Recall Model (Logit Scale)") |>
  cols_label(
    Parameter = "Parameter",
    Estimate = "Estimate",
    Est.Error = "SE",
    `l-95% CI` = "95% CrI Lower",
    `u-95% CI` = "95% CrI Upper",
    Rhat = "R-hat",
    Bulk_ESS = "Bulk ESS",
    Tail_ESS = "Tail ESS") |>
  fmt_number(
    columns = c(Estimate, Est.Error, `l-95% CI`, `u-95% CI`, Rhat),
    decimals = 3) |>
  fmt_number(
    columns = c(Bulk_ESS, Tail_ESS),
    decimals = 0) |>
  tab_style(
    style = list(
      cell_fill(color = "lightyellow"),
      cell_text(weight = "bold")),
    locations = cells_body(
      columns = Rhat,
      rows = Rhat > 1.01)) |>
  tab_style(
    style = cell_fill(color = "lightpink"),
    locations = cells_body(
      columns = c(Bulk_ESS, Tail_ESS),
      rows = Bulk_ESS < 400 | Tail_ESS < 400)) |>
  tab_options(
    table.font.size = 10,
    heading.title.font.size = 16) |>
  tab_footnote(
    footnote = "R-hat > 1.01 highlighted in yellow; ESS < 400 highlighted in pink",
    locations = cells_title())
Bayesian Logistic Regression Results with Diagnostics1
Spoken Recall Model (Logit Scale)1
Parameter Estimate SE 95% CrI Lower 95% CrI Upper R-hat Bulk ESS Tail ESS
Intercept −0.502 0.243 −0.975 −0.025 1.001 6,296 10,148
GroupWritten −0.522 0.289 −1.086 0.043 1.000 7,245 10,932
Practice_S 0.149 0.036 0.080 0.220 1.000 15,384 14,996
Pre 1.783 0.435 0.929 2.632 1.000 11,964 13,903
Practice_W 0.061 0.015 0.031 0.091 1.000 11,777 14,070
zTOEFL 0.399 0.121 0.161 0.637 1.001 6,250 9,837
GroupWritten:Practice_S 0.039 0.126 −0.207 0.286 1.000 14,567 13,975
GroupWritten:Pre 0.429 0.522 −0.590 1.449 1.000 11,660 13,706
Practice_S:Pre −0.054 0.130 −0.282 0.222 1.000 19,895 14,266
GroupWritten:Practice_W 0.009 0.020 −0.030 0.048 1.000 12,509 12,816
Pre:Practice_W 0.032 0.053 −0.064 0.143 1.000 12,499 13,456
Pre:zTOEFL −0.156 0.187 −0.525 0.210 1.001 26,686 14,285
GroupWritten:Practice_S:Pre −0.444 0.346 −1.108 0.251 1.000 21,076 14,881
GroupWritten:Pre:Practice_W 0.018 0.070 −0.119 0.153 1.000 12,595 13,287
1 R-hat > 1.01 highlighted in yellow; ESS < 400 highlighted in pink

Extract R² values

r2_spoken <- r2_bayes(Fit.spoken, ci = 0.95)

data.frame(
  Type = c("Conditional R²", "Marginal R²"),
  R2 = c(r2_spoken$R2_Bayes, r2_spoken$R2_Bayes_marginal),
  SE = c(attr(r2_spoken, "SE")$R2_Bayes, 
         attr(r2_spoken, "SE")$R2_Bayes_marginal),
  CI_Lower = c(attr(r2_spoken, "CI")$R2_Bayes$CI_low,
               attr(r2_spoken, "CI")$R2_Bayes_marginal$CI_low),
  CI_Upper = c(attr(r2_spoken, "CI")$R2_Bayes$CI_high,
               attr(r2_spoken, "CI")$R2_Bayes_marginal$CI_high)) |>
  gt() |>
  tab_header(
    title = "Bayesian R² for Spoken Recall Model") |>
  cols_label(
    Type = "R² Type",
    R2 = "Estimate",
    SE = "SE",
    CI_Lower = "95% CrI Lower",
    CI_Upper = "95% CrI Upper") |>
  fmt_number(
    columns = c(R2, SE, CI_Lower, CI_Upper),
    decimals = 3) |>
  tab_options(
    table.font.size = 12,
    heading.title.font.size = 16) |>
  tab_footnote(
    footnote = "Conditional R²: variance explained by fixed and random effects. Marginal R²: variance explained by fixed effects only.",
    locations = cells_column_labels(columns = Type))
Bayesian R² for Spoken Recall Model
R² Type1 Estimate SE 95% CrI Lower 95% CrI Upper
Conditional R² 0.373 0.009 0.355 0.391
Marginal R² 0.174 0.018 0.137 0.207
1 Conditional R²: variance explained by fixed and random effects. Marginal R²: variance explained by fixed effects only.
post_spoken <- as_draws_df(Fit.spoken)

directional_probs_spoken <- post_spoken |>
  summarize(
    `P(b_Intercept < 0)` = mean(`b_Intercept` < 0),
    `P(b_GroupWritten < 0)` = mean(`b_GroupWritten` < 0),
    `P(b_Practice_S > 0)` = mean(`b_Practice_S` > 0),
    `P(b_Practice_W > 0)` = mean(`b_Practice_W` > 0),
    `P(b_Pre > 0)` = mean(`b_Pre` > 0),
    `P(b_zTOEFL > 0)` = mean(`b_zTOEFL` > 0),
    `P(b_GroupWritten:Practice_S > 0)` = mean(`b_GroupWritten:Practice_S` > 0),
    `P(b_GroupWritten:Practice_W > 0)` = mean(`b_GroupWritten:Practice_W` > 0),
    `P(b_GroupWritten:Pre > 0)` = mean(`b_GroupWritten:Pre` > 0),
    `P(b_Practice_S:Pre < 0)` = mean(`b_Practice_S:Pre` < 0),
    `P(b_Pre:Practice_W > 0)` = mean(`b_Pre:Practice_W` > 0),
    `P(b_Pre:zTOEFL < 0)` = mean(`b_Pre:zTOEFL` < 0),
    `P(b_GroupWritten:Practice_S:Pre < 0)` = mean(`b_GroupWritten:Practice_S:Pre` < 0),
    `P(b_GroupWritten:Pre:Practice_W > 0)` = mean(`b_GroupWritten:Pre:Practice_W` > 0),
    `P(sd_ID__Intercept > 0)` = mean(`sd_ID__Intercept` > 0),
    `P(sd_Word__Intercept > 0)` = mean(`sd_Word__Intercept` > 0)) |>
  pivot_longer(
    everything(),
    names_to = "Hypothesis",
    values_to = "Probability")

directional_probs_spoken |>
  arrange(desc(Probability)) |>
  gt() |>
  tab_header(
    title = "Directional Posterior Probabilities",
    subtitle = "Spoken Recall Model") |>
  cols_label(
    Hypothesis = "Hypothesis",
    Probability = "Probability") |>
  fmt_number(
    columns = Probability,
    decimals = 2)
Directional Posterior Probabilities
Spoken Recall Model
Hypothesis Probability
P(b_Practice_S > 0) 1.00
P(b_Practice_W > 0) 1.00
P(sd_ID__Intercept > 0) 1.00
P(sd_Word__Intercept > 0) 1.00
P(b_Pre > 0) 1.00
P(b_zTOEFL > 0) 1.00
P(b_Intercept < 0) 0.98
P(b_GroupWritten < 0) 0.96
P(b_GroupWritten:Practice_S:Pre < 0) 0.90
P(b_Pre:zTOEFL < 0) 0.80
P(b_GroupWritten:Pre > 0) 0.79
P(b_Pre:Practice_W > 0) 0.71
P(b_Practice_S:Pre < 0) 0.68
P(b_GroupWritten:Practice_W > 0) 0.67
P(b_GroupWritten:Practice_S > 0) 0.62
P(b_GroupWritten:Pre:Practice_W > 0) 0.60

Visualization of effects

S_predict_S <- ggpredict(Fit.spoken, terms = c("Practice_S", "Group", "Pre [0]"))  
## You are calculating adjusted predictions on the population-level (i.e.
##   `type = "fixed"`) for a *generalized* linear mixed model.
##   This may produce biased estimates due to Jensen's inequality. Consider
##   setting `bias_correction = TRUE` to correct for this bias.
##   See also the documentation of the `bias_correction` argument.
## Some of the focal terms are of type `character`. This may lead to
##   unexpected results. It is recommended to convert these variables to
##   factors before fitting the model.
##   The following variables are of type character: `Group`
## Note: uncertainty of error terms are not taken into account. Consider
##   setting `interval` to "prediction". This will call `posterior_predict()`
##   instead of `posterior_epred()`.
## Warning in glmmTMB::glmmTMB(formula = cond_formula, family = family, data =
## data): some components missing from 'family': downstream methods may fail
colnames(S_predict_S) <- c("Practice_S", "Mean", "SE", "Lower", "Higher", "Group", "Panel")
S_filter_S <- S_predict_S |>
  group_by(Group) |>
  filter(Practice_S >= min(SpokenRecall$Practice_S[SpokenRecall$Group == unique(Group)], na.rm = TRUE) &
           Practice_S <= max(SpokenRecall$Practice_S[SpokenRecall$Group == unique(Group)], na.rm = TRUE))

S_pd <- position_dodge(width = 0.6)
S_predict_S_p <-
  ggplot(S_filter_S, aes(x = Practice_S, y = Mean, group = Group)) +
  geom_errorbar(aes(ymin = Lower, ymax = Higher, color = Group),
                width = 0, position = S_pd) +        
  geom_point(color = "black", position = S_pd) +     
  geom_line(color = "black", position = S_pd) +     
  theme_classic() +
  theme(legend.position = "bottom") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1)) +
  scale_x_continuous(breaks = seq(0, 18, 2), limits = c(-0.5, 18)) +
  ylab("Predicted spoken accuracy (0-1)") +
  xlab("Spoken practice") +
  annotate("text", x = -Inf, y = Inf, label = "A",
           hjust = -1, vjust = 2, size = 7, fontface = "bold")

W_predict_S <-
  ggpredict(Fit.spoken, 
            terms = c("Practice_W", "Group", "Pre [0]"))  
## Some of the focal terms are of type `character`. This may lead to
##   unexpected results. It is recommended to convert these variables to
##   factors before fitting the model.
##   The following variables are of type character: `Group`
## Data were 'prettified'. Consider using `terms="Practice_W [all]"` to get
##   smooth plots.
## Note: uncertainty of error terms are not taken into account. Consider
##   setting `interval` to "prediction". This will call `posterior_predict()`
##   instead of `posterior_epred()`.
## Warning in glmmTMB::glmmTMB(formula = cond_formula, family = family, data =
## data): some components missing from 'family': downstream methods may fail
colnames(W_predict_S) <- c("Practice_W", "Mean", "SE", "Lower", "Higher", "Group", "Panel")
W_filter_S <- W_predict_S |>
  group_by(Group) |>
  filter(Practice_W >= min(SpokenRecall$Practice_W[SpokenRecall$Group == unique(Group)], na.rm = TRUE) &
           Practice_W <= max(SpokenRecall$Practice_W[SpokenRecall$Group == unique(Group)], na.rm = TRUE))

W_pd <- position_dodge(width = 1.2)
W_predict_S_p <-
  ggplot(W_filter_S, aes(x = Practice_W, y = Mean, group = Group)) +
  geom_errorbar(aes(ymin = Lower, ymax = Higher, color = Group),
                width = 0, position = W_pd) +      
  geom_point(color = "black", position = W_pd) +    
  geom_line(color = "black", position = W_pd) +     
  theme_classic() +
  theme(legend.position = "bottom") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1)) +
  scale_x_continuous(breaks = seq(0, 35, 3), limits = c(-0.5, 35)) +
  ylab("Predicted spoken accuracy (0-1)") +
  xlab("Written practice") +
  annotate("text", x = -Inf, y = Inf, label = "B",
           hjust = -1, vjust = 2, size = 7, fontface = "bold")

Effect_S <- 
  grid.arrange(S_predict_S_p, W_predict_S_p, nrow = 1)

Effect_S
## TableGrob (1 x 2) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
practice_means_spoken_base <- SpokenRecall |>
  group_by(Group) |>
  summarize(
    Practice_S = mean(Practice_S, na.rm = TRUE),
    Practice_W = mean(Practice_W, na.rm = TRUE),
    .groups = "drop") |>
  mutate(Group = as.character(Group))

practice_means_spoken <- practice_means_spoken_base |>
  pivot_longer(
    cols = c(Practice_S, Practice_W),
    names_to = "Predictor",
    values_to = "PracticeValue") |>
  mutate(Group = as.character(Group))

practice_means_spoken_wide <- practice_means_spoken_base

predicted_spoken <- practice_means_spoken |>
  mutate(
    results = pmap(
      list(Group, Predictor, PracticeValue),
      function(group, predictor, value) {
        group_value <- as.character(group)
        group_means <- practice_means_spoken_wide[practice_means_spoken_wide$Group == group_value, , drop = FALSE]

        practice_s_val <- if (predictor == "Practice_S") value else group_means$Practice_S
        practice_w_val <- if (predictor == "Practice_W") value else group_means$Practice_W

        new_data <- tibble(
          Group = factor(group_value, levels = levels(Spoken.post$Group)),
          Practice_S = practice_s_val,
          Practice_W = practice_w_val,
          Pre = 0,
          zTOEFL = 0)

        draws <- posterior_epred(
          Fit.spoken,
          newdata = new_data,
          re_formula = NA)

        draws_vec <- as.numeric(draws)

        tibble(
          Response = mean(draws_vec),
          SE = sd(draws_vec),
          Lower = quantile(draws_vec, 0.025),
          Upper = quantile(draws_vec, 0.975))
      })) |>
  unnest(results) |>
  select(Group, Predictor, PracticeValue, Response, SE, Lower, Upper)

predicted_spoken |>
  gt() |>
  tab_header(
    title = "Predicted Probabilities by Practice Levels",
    subtitle = "Spoken Recall Model") |>
  cols_label(
    Group = "Group",
    Predictor = "Practice Predictor",
    PracticeValue = "Mean Practice",
    Response = "Predicted Probability",
    SE = "SE",
    Lower = "Lower 95% CrI",
    Upper = "Upper 95% CrI") |>
  fmt_number(
    columns = c(PracticeValue, Response, SE, Lower, Upper),
    decimals = 2)
Predicted Probabilities by Practice Levels
Spoken Recall Model
Group Practice Predictor Mean Practice Predicted Probability SE Lower 95% CrI Upper 95% CrI
Spoken Practice_S 2.25 0.58 0.05 0.48 0.67
Spoken Practice_W 8.14 0.58 0.05 0.48 0.67
Written Practice_S 0.54 0.52 0.05 0.42 0.62
Written Practice_W 8.09 0.52 0.05 0.42 0.62

Written Recall

Fit the written model

Fit.written <- brm(
  Score ~  
    Group * Practice_S * Pre +
    Group * Practice_W * Pre +
    zTOEFL * Pre +
    (1 | ID) +
    (1 | Word),
  family = bernoulli(link = "logit"),
  iter = 10000, warmup = 5000, chains = 4,
  prior = Prior,
  data = Written.post,
  file = "Fit_written.rds",
  seed = 234)
## Compiling Stan program...
## Start sampling
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0.000179 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 1.79 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 1: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 1: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 1: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 1: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 1: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 1: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 1: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 1: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 1: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 1: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 1: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 15.483 seconds (Warm-up)
## Chain 1:                17.461 seconds (Sampling)
## Chain 1:                32.944 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 0.000111 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 1.11 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 2: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 2: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 2: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 2: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 2: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 2: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 2: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 2: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 2: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 2: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 2: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 15.72 seconds (Warm-up)
## Chain 2:                18.045 seconds (Sampling)
## Chain 2:                33.765 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 0.000119 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 1.19 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 3: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 3: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 3: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 3: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 3: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 3: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 3: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 3: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 3: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 3: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 3: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 14.845 seconds (Warm-up)
## Chain 3:                18.228 seconds (Sampling)
## Chain 3:                33.073 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 0.00011 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 1.1 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 10000 [  0%]  (Warmup)
## Chain 4: Iteration: 1000 / 10000 [ 10%]  (Warmup)
## Chain 4: Iteration: 2000 / 10000 [ 20%]  (Warmup)
## Chain 4: Iteration: 3000 / 10000 [ 30%]  (Warmup)
## Chain 4: Iteration: 4000 / 10000 [ 40%]  (Warmup)
## Chain 4: Iteration: 5000 / 10000 [ 50%]  (Warmup)
## Chain 4: Iteration: 5001 / 10000 [ 50%]  (Sampling)
## Chain 4: Iteration: 6000 / 10000 [ 60%]  (Sampling)
## Chain 4: Iteration: 7000 / 10000 [ 70%]  (Sampling)
## Chain 4: Iteration: 8000 / 10000 [ 80%]  (Sampling)
## Chain 4: Iteration: 9000 / 10000 [ 90%]  (Sampling)
## Chain 4: Iteration: 10000 / 10000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 14.876 seconds (Warm-up)
## Chain 4:                17.884 seconds (Sampling)
## Chain 4:                32.76 seconds (Total)
## Chain 4:

Results

fit_summary_W <- summary(Fit.written)

# Extract fixed effects with diagnostics
fixed_effects_W <- fit_summary_W$fixed
fixed_df_W <- as.data.frame(fixed_effects_W)
fixed_df_W$Parameter <- rownames(fixed_df_W)

# Create table with gt
fixed_df_W |>
  select(Parameter, Estimate, Est.Error, `l-95% CI`, `u-95% CI`, Rhat, Bulk_ESS, Tail_ESS) |>
  gt() |>
  tab_header(
    title = "Bayesian Logistic Regression Results with Diagnostics",
    subtitle = "Written Recall Model (Logit Scale)") |>
  cols_label(
    Parameter = "Parameter",
    Estimate = "Estimate",
    Est.Error = "SE",
    `l-95% CI` = "95% CrI Lower",
    `u-95% CI` = "95% CrI Upper",
    Rhat = "R-hat",
    Bulk_ESS = "Bulk ESS",
    Tail_ESS = "Tail ESS") |>
  fmt_number(
    columns = c(Estimate, Est.Error, `l-95% CI`, `u-95% CI`, Rhat),
    decimals = 3) |>
  fmt_number(
    columns = c(Bulk_ESS, Tail_ESS),
    decimals = 0) |>
  tab_style(
    style = list(
      cell_fill(color = "lightyellow"),
      cell_text(weight = "bold")),
    locations = cells_body(
      columns = Rhat,
      rows = Rhat > 1.01)) |>
  tab_style(
    style = cell_fill(color = "lightpink"),
    locations = cells_body(
      columns = c(Bulk_ESS, Tail_ESS),
      rows = Bulk_ESS < 400 | Tail_ESS < 400)) |>
  tab_options(
    table.font.size = 10,
    heading.title.font.size = 16) |>
  tab_footnote(
    footnote = "R-hat > 1.01 highlighted in yellow; ESS < 400 highlighted in pink",
    locations = cells_title())
Bayesian Logistic Regression Results with Diagnostics1
Written Recall Model (Logit Scale)1
Parameter Estimate SE 95% CrI Lower 95% CrI Upper R-hat Bulk ESS Tail ESS
Intercept −0.417 0.277 −0.966 0.138 1.000 7,414 10,840
GroupWritten −0.167 0.343 −0.840 0.515 1.000 7,369 11,039
Practice_S 0.028 0.037 −0.044 0.100 1.000 23,733 15,578
Pre 1.858 0.399 1.080 2.643 1.000 16,674 15,630
Practice_W 0.071 0.016 0.040 0.103 1.000 19,100 15,296
zTOEFL 0.383 0.157 0.077 0.694 1.000 6,709 10,214
GroupWritten:Practice_S 0.185 0.138 −0.084 0.456 1.000 22,854 16,725
GroupWritten:Pre 0.193 0.482 −0.745 1.148 1.000 16,802 17,361
Practice_S:Pre 0.083 0.109 −0.112 0.314 1.000 30,104 14,929
GroupWritten:Practice_W −0.021 0.022 −0.063 0.021 1.000 19,153 15,705
Pre:Practice_W 0.040 0.052 −0.057 0.145 1.000 17,121 16,180
Pre:zTOEFL 0.078 0.156 −0.232 0.386 1.000 32,785 14,492
GroupWritten:Practice_S:Pre −0.115 0.301 −0.684 0.491 1.001 32,230 15,658
GroupWritten:Pre:Practice_W −0.032 0.063 −0.158 0.089 1.000 16,899 16,888
1 R-hat > 1.01 highlighted in yellow; ESS < 400 highlighted in pink

Extract R² values

r2_written <- r2_bayes(Fit.written, ci = 0.95)
data.frame(
  Type = c("Conditional R²", "Marginal R²"),
  R2 = c(r2_written$R2_Bayes, r2_written$R2_Bayes_marginal),
  SE = c(attr(r2_written, "SE")$R2_Bayes, 
         attr(r2_written, "SE")$R2_Bayes_marginal),
  CI_Lower = c(attr(r2_written, "CI")$R2_Bayes$CI_low,
               attr(r2_written, "CI")$R2_Bayes_marginal$CI_low),
  CI_Upper = c(attr(r2_written, "CI")$R2_Bayes$CI_high,
               attr(r2_written, "CI")$R2_Bayes_marginal$CI_high)) |>
  gt() |>
  tab_header(
    title = "Bayesian R² for Written Recall Model") |>
  cols_label(
    Type = "R² Type",
    R2 = "Estimate",
    SE = "SE",
    CI_Lower = "95% CrI Lower",
    CI_Upper = "95% CrI Upper") |>
  fmt_number(
    columns = c(R2, SE, CI_Lower, CI_Upper),
    decimals = 3) |>
  tab_options(
    table.font.size = 12,
    heading.title.font.size = 16) |>
  tab_footnote(
    footnote = "Conditional R²: variance explained by fixed and random effects. Marginal R²: variance explained by fixed effects only.",
    locations = cells_column_labels(columns = Type))
Bayesian R² for Written Recall Model
R² Type1 Estimate SE 95% CrI Lower 95% CrI Upper
Conditional R² 0.384 0.009 0.365 0.401
Marginal R² 0.173 0.023 0.129 0.217
1 Conditional R²: variance explained by fixed and random effects. Marginal R²: variance explained by fixed effects only.
post_written <- as_draws_df(Fit.written)

directional_probs_written <- post_written |>
  summarize(
    `P(b_Intercept < 0)` = mean(`b_Intercept` < 0),
    `P(b_GroupWritten < 0)` = mean(`b_GroupWritten` < 0),
    `P(b_Practice_S > 0)` = mean(`b_Practice_S` > 0),
    `P(b_Practice_W > 0)` = mean(`b_Practice_W` > 0),
    `P(b_Pre > 0)` = mean(`b_Pre` > 0),
    `P(b_zTOEFL > 0)` = mean(`b_zTOEFL` > 0),
    `P(b_GroupWritten:Practice_S > 0)` = mean(`b_GroupWritten:Practice_S` > 0),
    `P(b_GroupWritten:Practice_W < 0)` = mean(`b_GroupWritten:Practice_W` < 0),
    `P(b_GroupWritten:Pre > 0)` = mean(`b_GroupWritten:Pre` > 0),
    `P(b_Practice_S:Pre > 0)` = mean(`b_Practice_S:Pre` > 0),
    `P(b_Pre:Practice_W > 0)` = mean(`b_Pre:Practice_W` > 0),
    `P(b_Pre:zTOEFL > 0)` = mean(`b_Pre:zTOEFL` > 0),
    `P(b_GroupWritten:Practice_S:Pre < 0)` = mean(`b_GroupWritten:Practice_S:Pre` < 0),
    `P(b_GroupWritten:Pre:Practice_W < 0)` = mean(`b_GroupWritten:Pre:Practice_W` < 0),
    `P(sd_ID__Intercept > 0)` = mean(`sd_ID__Intercept` > 0),
    `P(sd_Word__Intercept > 0)` = mean(`sd_Word__Intercept` > 0)) |>
  pivot_longer(
    everything(),
    names_to = "Hypothesis",
    values_to = "Probability")

directional_probs_written |>
  arrange(desc(Probability)) |>
  gt() |>
  tab_header(
    title = "Directional Posterior Probabilities",
    subtitle = "Written Recall Model") |>
  cols_label(
    Hypothesis = "Hypothesis",
    Probability = "Probability") |>
  fmt_number(
    columns = Probability,
    decimals = 2)
Directional Posterior Probabilities
Written Recall Model
Hypothesis Probability
P(b_Practice_W > 0) 1.00
P(b_Pre > 0) 1.00
P(sd_ID__Intercept > 0) 1.00
P(sd_Word__Intercept > 0) 1.00
P(b_zTOEFL > 0) 0.99
P(b_Intercept < 0) 0.93
P(b_GroupWritten:Practice_S > 0) 0.91
P(b_GroupWritten:Practice_W < 0) 0.84
P(b_Practice_S > 0) 0.78
P(b_Practice_S:Pre > 0) 0.77
P(b_Pre:Practice_W > 0) 0.77
P(b_Pre:zTOEFL > 0) 0.70
P(b_GroupWritten:Pre:Practice_W < 0) 0.69
P(b_GroupWritten < 0) 0.69
P(b_GroupWritten:Pre > 0) 0.66
P(b_GroupWritten:Practice_S:Pre < 0) 0.65
interaction_written <- post_written |>
  mutate(Interaction = `b_Practice_S` + `b_GroupWritten:Practice_S`) |>
  summarize(
    Mean = mean(Interaction),
    Lower = quantile(Interaction, probs = 0.025),
    Upper = quantile(Interaction, probs = 0.975),
    `P(Interaction > 0)` = mean(Interaction > 0))

interaction_written |>
  gt() |>
  tab_header(
    title = "Interaction: Practice_S + GroupWritten:Practice_S",
    subtitle = "Written Recall Model") |>
  cols_label(
    Mean = "Mean",
    Lower = "Lower 95% CrI",
    Upper = "Upper 95% CrI",
    `P(Interaction > 0)` = "P(Interaction > 0)") |>
  fmt_number(
    columns = everything(),
    decimals = 2)
Interaction: Practice_S + GroupWritten:Practice_S
Written Recall Model
Mean Lower 95% CrI Upper 95% CrI P(Interaction > 0)
0.21 −0.05 0.48 0.94

Visualization of effects

S_predict_W <- ggpredict(Fit.written, terms = c("Practice_S", "Group", "Pre [0]"))  
## You are calculating adjusted predictions on the population-level (i.e.
##   `type = "fixed"`) for a *generalized* linear mixed model.
##   This may produce biased estimates due to Jensen's inequality. Consider
##   setting `bias_correction = TRUE` to correct for this bias.
##   See also the documentation of the `bias_correction` argument.
## Some of the focal terms are of type `character`. This may lead to
##   unexpected results. It is recommended to convert these variables to
##   factors before fitting the model.
##   The following variables are of type character: `Group`
## Note: uncertainty of error terms are not taken into account. Consider
##   setting `interval` to "prediction". This will call `posterior_predict()`
##   instead of `posterior_epred()`.
## Warning in glmmTMB::glmmTMB(formula = cond_formula, family = family, data =
## data): some components missing from 'family': downstream methods may fail
colnames(S_predict_W) <- c("Practice_S", "Mean", "SE", "Lower", "Higher", "Group", "Panel")
S_filter_W <- S_predict_W |>
  group_by(Group) |>
  filter(Practice_S >= min(WrittenRecall$Practice_S[WrittenRecall$Group == unique(Group)], na.rm = TRUE) &
           Practice_S <= max(WrittenRecall$Practice_S[WrittenRecall$Group == unique(Group)], na.rm = TRUE))

S_predict_W_p <-
  ggplot(S_filter_W, aes(x = Practice_S, y = Mean, group = Group)) +
  geom_errorbar(aes(ymin = Lower, ymax = Higher, color = Group),
                width = 0, position = S_pd) +      
  geom_point(color = "black", position = S_pd) +     
  geom_line(color = "black", position = S_pd) +      
  theme_classic() +
  theme(legend.position = "bottom") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1)) +
  scale_x_continuous(breaks = seq(0, 18, 2), limits = c(-0.5, 18)) +
  ylab("Predicted written accuracy (0-1)") +
  xlab("Spoken practice") +
  annotate("text", x = -Inf, y = Inf, label = "C",
           hjust = -1, vjust = 2, size = 7, fontface = "bold")

W_predict_W <- ggpredict(Fit.written, terms = c("Practice_W", "Group", "Pre [0]"))  
## Some of the focal terms are of type `character`. This may lead to
##   unexpected results. It is recommended to convert these variables to
##   factors before fitting the model.
##   The following variables are of type character: `Group`
## Data were 'prettified'. Consider using `terms="Practice_W [all]"` to get
##   smooth plots.
## Note: uncertainty of error terms are not taken into account. Consider
##   setting `interval` to "prediction". This will call `posterior_predict()`
##   instead of `posterior_epred()`.
## Warning in glmmTMB::glmmTMB(formula = cond_formula, family = family, data =
## data): some components missing from 'family': downstream methods may fail
colnames(W_predict_W) <- c("Practice_W", "Mean", "SE", "Lower", "Higher", "Group", "Panel")
W_filter_W <- W_predict_W |>
  group_by(Group) |>
  filter(Practice_W >= min(SpokenRecall$Practice_W[SpokenRecall$Group == unique(Group)], na.rm = TRUE) &
           Practice_W <= max(SpokenRecall$Practice_W[SpokenRecall$Group == unique(Group)], na.rm = TRUE))

W_predict_W_p <-
  ggplot(W_filter_W, aes(x = Practice_W, y = Mean, group = Group)) +
  geom_errorbar(aes(ymin = Lower, ymax = Higher, color = Group),
                width = 0, position = W_pd) +       
  geom_point(color = "black", position = W_pd) +     
  geom_line(color = "black", position = W_pd) +      
  theme_classic() +
  theme(legend.position = "bottom") +
  scale_y_continuous(breaks = seq(0, 1, 0.1), limits = c(0, 1)) +
  scale_x_continuous(breaks = seq(0, 35, 3), limits = c(-0.5, 35)) +
  ylab("Predicted written accuracy (0-1)") +
  xlab("Written practice") +
  annotate("text", x = -Inf, y = Inf, label = "D",
           hjust = -1, vjust = 2, size = 7, fontface = "bold")

Effect_W <- 
  grid.arrange(S_predict_W_p, W_predict_W_p, nrow = 1)

Effect_W
## TableGrob (1 x 2) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
grid.arrange(Effect_S, Effect_W, nrow = 2)

practice_means_written_base <- SpokenRecall |>
  group_by(Group) |>
  summarize(
    Practice_S = mean(Practice_S, na.rm = TRUE),
    Practice_W = mean(Practice_W, na.rm = TRUE),
    .groups = "drop") |>
  mutate(Group = as.character(Group))

practice_means_written <- practice_means_written_base |>
  pivot_longer(
    cols = c(Practice_S, Practice_W),
    names_to = "Predictor",
    values_to = "PracticeValue") |>
  mutate(Group = as.character(Group))

practice_means_written_wide <- practice_means_written_base

predicted_written <- practice_means_written |>
  mutate(
    results = pmap(
      list(Group, Predictor, PracticeValue),
      function(group, predictor, value) {
        group_value <- as.character(group)
        group_means <- practice_means_written_wide[practice_means_written_wide$Group == group_value, , drop = FALSE]

        practice_s_val <- if (predictor == "Practice_S") value else group_means$Practice_S
        practice_w_val <- if (predictor == "Practice_W") value else group_means$Practice_W

        new_data <- tibble(
          Group = factor(group_value, levels = levels(Written.post$Group)),
          Practice_S = practice_s_val,
          Practice_W = practice_w_val,
          Pre = 0,
          zTOEFL = 0)

        draws <- posterior_epred(
          Fit.written,
          newdata = new_data,
          re_formula = NA)

        draws_vec <- as.numeric(draws)

        tibble(
          Response = mean(draws_vec),
          SE = sd(draws_vec),
          Lower = quantile(draws_vec, 0.025),
          Upper = quantile(draws_vec, 0.975))
      })) |>
  unnest(results) |>
  select(Group, Predictor, PracticeValue, Response, SE, Lower, Upper)

predicted_written |>
  gt() |>
  tab_header(
    title = "Predicted Probabilities by Practice Levels",
    subtitle = "Written Recall Model") |>
  cols_label(
    Group = "Group",
    Predictor = "Practice Predictor",
    PracticeValue = "Mean Practice",
    Response = "Predicted Probability",
    SE = "SE",
    Lower = "Lower 95% CrI",
    Upper = "Upper 95% CrI") |>
  fmt_number(
    columns = c(PracticeValue, Response, SE, Lower, Upper),
    decimals = 2)
Predicted Probabilities by Practice Levels
Written Recall Model
Group Practice Predictor Mean Practice Predicted Probability SE Lower 95% CrI Upper 95% CrI
Spoken Practice_S 2.25 0.56 0.06 0.44 0.67
Spoken Practice_W 8.14 0.56 0.06 0.44 0.67
Written Practice_S 0.54 0.54 0.06 0.42 0.66
Written Practice_W 8.09 0.54 0.06 0.42 0.66

Posterior Predictive Checks

Spoken Recall

brms::pp_check(
  object = Fit.spoken,
  ndraws = 1000,
  type = "stat",
  stat = "mean") +
  theme_test() +
  ylab("Frequency") +
  ggtitle("Posterior Predictive Check: Mean")
## Note: in most cases the default test statistic 'mean' is too weak to detect anything of interest.
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

pp_check(
  object = Fit.spoken,
  ndraws = 100) +
  theme_test() +
  ylab("Density") +
  ggtitle("Posterior Predictive Check: Density")

trace plots

plot(Fit.spoken, ask = FALSE)

Written Recall

brms::pp_check(
  object = Fit.written,
  ndraws = 1000,
  type = "stat",
  stat = "mean") +
  theme_test() +
  ylab("Frequency") +
  ggtitle("Posterior Predictive Check: Mean")
## Note: in most cases the default test statistic 'mean' is too weak to detect anything of interest.
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

pp_check(
  object = Fit.written,
  ndraws = 100) +
  theme_test() +
  ylab("Density") +
  ggtitle("Posterior Predictive Check: Density")

plot(Fit.written, ask = FALSE)