library(readr)
library(tidyverse)
library(ggplot2)
library(ggridges)
library(ggthemes)
library(car)
library(dplyr)
library(afex) # for aov_car (ANOVA with corrections)
library(lmerTest) # for LMM with p-values
library(effectsize) # for η² etc.
library(emmeans)
Data analysis
Several required packages are loaded here, to be used later.
Preliminary
Firstly, the data were imported and stored as a data frame (df
), and converted to a more workable format.
Code
<- read_csv("2025-W41-6_BMS1052_A3_Data.csv")
df
<- FALSE # Whether to remove the Carryover test data
exclude_carryover
<- df %>%
df_long pivot_longer(
cols = c(`Pre-test`, `Mid-test`, `Post-test`, `Carryover`),
names_to = "Test",
values_to = "Error"
%>%
) if (exclude_carryover) filter(., Test != "Carryover") else . } {
Removing outliers
Code
<- df_long %>%
df_flag group_by(Training, Test) %>%
mutate(
Q1 = quantile(Error, 0.25, na.rm = TRUE),
Q3 = quantile(Error, 0.75, na.rm = TRUE),
IQR = Q3 - Q1,
is_outlier = Error < (Q1 - 1.5 * IQR) | Error > (Q3 + 1.5 * IQR)
%>%
) ungroup()
<- df_flag %>%
outliers_list filter(is_outlier) %>%
arrange(Training, Test, Participant)
<- outliers_list %>%
outliers_summary summarise(
total_outliers = n(),
total_rows = nrow(df_long),
perc = 100 * total_outliers / total_rows
)
<- df_flag %>%
df_flag mutate(Test = factor(Test,
levels = c("Carryover", "Post-test", "Mid-test", "Pre-test")))
<- df_flag %>% filter(!is_outlier)
df_clean <- df_flag %>% filter(is_outlier)
df_out
<- 0.06
point_nudge
ggplot() +
stat_density_ridges(
data = df_clean,
aes(x = Error, y = Test),
fill = "grey85",
colour = "grey40",
alpha = 0.7,
scale = 0.95,
rel_min_height = 0.01,
size = 0.3
+
) geom_point(
data = df_out,
aes(x = Error, y = Test),
shape = 21,
fill = "red",
colour = "white",
stroke = 0.6,
size = 2.3,
position = position_nudge(y = point_nudge)
+
) facet_wrap(~ Training) +
theme_clean() +
theme(
panel.grid.minor = element_blank()
+
) labs(
title = "Ridgeline distributions of the data collected",
subtitle = "Outliers shown as red points",
x = "Absolute error (cm)",
y = "Test phase"
)
Before performing analyses on the data, they were checked for outliers. A total of 12 outliers were found within the data, comprising 3.61 per cent thereof. These were subsequently removed, and are shown in Figure 1.
Normality
Many common statistical analysis models rely on the assumption that the data approximate a normal distribution. To assess whether each group fulfilled this criterion, a Q–Q plot was constructed for each combination of training group and testing phase, as shown in Figure 2.
Code
<- df_flag %>%
df_flag mutate(Test = factor(Test,
levels = c("Pre-test", "Mid-test", "Post-test", "Carryover")))
ggplot(df_flag %>% filter(!is_outlier),
aes(sample = Error)) +
stat_qq(size = 1, colour = "grey30") +
stat_qq_line(colour = "red", linewidth = 0.6) +
facet_grid(Training ~ Test, switch = "both") +
theme_clean() +
labs(
title = "Q–Q plots for normality by Training × Test",
x = "Theoretical quantiles (normal distribution)",
y = "Observed quantiles of error"
+
) theme(
strip.placement = "outside",
strip.background = element_blank(),
panel.grid.minor = element_blank()
)
It is evident that the data mostly fulfil this criterion, as the data points approximate the normal line (red). Of course, given the moderately large size of the data (\(n>20\) per group), small deviations are unlikely to affect the data.
Homogenity of variance
Code
<- df_flag %>%
df_pre filter(Test == "Pre-test", !is.na(Error), !is_outlier) %>%
mutate(Training = as.factor(Training))
<- leveneTest(Error ~ Training, data = df_pre, center = median)
levene_res <- levene_res$`Pr(>F)`[1] levene_p
To assess the baseline variance, Levene’s test was performed, revealing homogeneous variance between each group: \(F(2, 73)=1.002\), \(p=0.37\).
A violin plot (Figure 3) was also included for the purpose of displaying the variance visually.
Code
<- df_flag %>%
df_pre filter(Test == "Pre-test", !is_outlier, !is.na(Error)) %>%
mutate(Training = factor(Training))
ggplot(df_pre, aes(x = Training, y = Error, fill = Training)) +
geom_violin(trim = FALSE, alpha = 0.7) +
geom_boxplot(width = 0.1, colour = "black", fill = "white", outlier.shape = NA) +
theme_clean() +
labs(
title = "Baseline (pre-test) error distributions",
x = "Training group",
y = "Error"
+
) theme(legend.position = "none")
Analyses
Descriptive plots
Adjusting for baseline performance was made possible by taking the difference between later test scores and each participant’s mean pre-test score, as shown in Figure 4.
Code
<- c("Pre-test", "Mid-test", "Post-test", "Carryover")
test_levels
# 1) Baselines per participant (average if somehow >1 Pre row), outliers removed
<- df_flag %>%
pre_vals filter(!is_outlier, Test == "Pre-test") %>%
group_by(Participant, Training) %>%
summarise(pre_val = mean(Error, na.rm = TRUE), .groups = "drop")
# 2) Join baselines back, compute change, drop cases with no baseline
<- df_flag %>%
df_change filter(!is_outlier) %>%
left_join(pre_vals, by = c("Participant", "Training")) %>%
mutate(
Test = factor(Test, levels = test_levels),
Change = Error - pre_val
%>%
) filter(Test != "Pre-test") %>% # no change for Pre
drop_na(pre_val) # remove participants lacking a Pre value
# 3) Summarise and plot
<- df_change %>%
df_change_summary group_by(Training, Test) %>%
summarise(
mean_change = mean(Change, na.rm = TRUE),
se = sd(Change, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
)
ggplot(df_change_summary,
aes(x = Test, y = mean_change, group = Training, colour = Training)) +
geom_ribbon(aes(ymin = mean_change - se, ymax = mean_change + se),
fill = "grey70", alpha = 0.25, colour = NA) +
geom_line(linewidth = 1) +
geom_point(size = 3) +
geom_hline(yintercept = 0, linetype = "dashed", alpha = 0.6) +
theme_clean() +
labs(
title = "Mean change from baseline (pre-test)",
x = "Test phase",
y = "Change in Error (±SE)",
colour = "Training group"
)
Of course, it may be more intuitive to compare both training groups against the control, as shown in Figure 5. Again, this shows that both training groups had consistently smaller changes from baseline (on average) across all test phases.
Code
<- c("Pre-test", "Mid-test", "Post-test", "Carryover")
test_levels
# --- Baselines (Pre-test) per participant
<- df_flag %>%
pre_vals filter(!is_outlier, Test == "Pre-test") %>%
group_by(Participant, Training) %>%
summarise(pre_val = mean(Error, na.rm = TRUE), .groups = "drop")
# --- Pooled groups: Training vs Control
<- df_flag %>%
df_pooled filter(!is_outlier) %>%
mutate(
Group2 = case_when(
%in% c("Physical", "Visual") ~ "Training",
Training %in% c("Control", "No training", "None") ~ "Control",
Training TRUE ~ Training
)%>%
) left_join(pre_vals, by = c("Participant", "Training")) %>%
mutate(
Test = factor(Test, levels = test_levels),
Change = Error - pre_val
%>%
) drop_na(pre_val) %>%
filter(Group2 %in% c("Training", "Control"))
# --- Summarise mean ± SE
<- df_pooled %>%
df_change_summary group_by(Group2, Test) %>%
summarise(
mean_change = mean(Change, na.rm = TRUE),
se = sd(Change, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
)
# --- Plot: mean ± SE with colour-matched ribbons
ggplot(df_change_summary,
aes(x = Test, y = mean_change, group = Group2, colour = Group2, fill = Group2)) +
geom_ribbon(aes(ymin = mean_change - se, ymax = mean_change + se),
alpha = 0.2, colour = NA) +
geom_line(linewidth = 1.1) +
geom_point(size = 3) +
geom_hline(yintercept = 0, linetype = "dashed", alpha = 0.6) +
theme_clean() +
labs(
title = "Mean change from baseline (pre-test)",
x = "Test phase",
y = "Change in error from baseline",
colour = "Group",
fill = "Group"
)
Figure 6 clearly shows that (on average) baseline-adjusted errors remained lower than control in physical group (except for in the carryover tests), and higher than control in the visual group.
Code
<- c("Pre-test", "Mid-test", "Post-test", "Carryover")
test_levels
# 1) Get each participant's baseline (after removing outliers)
<- df_flag %>%
pre_vals filter(!is_outlier, Test == "Pre-test") %>%
group_by(Participant, Training) %>%
summarise(pre_val = mean(Error, na.rm = TRUE), .groups = "drop")
# 2) Join baseline, compute normalised score, drop those without a baseline
<- df_flag %>%
df_norm filter(!is_outlier) %>%
left_join(pre_vals, by = c("Participant", "Training")) %>%
mutate(
Test = factor(Test, levels = test_levels),
Norm = Error / pre_val
%>%
) drop_na(pre_val) # ensures Norm is well-defined (no Pre value -> remove)
# 3) Summarise and plot
<- df_norm %>%
df_norm_summary group_by(Training, Test) %>%
summarise(
mean_norm = mean(Norm, na.rm = TRUE),
se = sd(Norm, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
)
ggplot(df_norm_summary,
aes(x = Test, y = mean_norm, group = Training, colour = Training)) +
geom_ribbon(aes(ymin = mean_norm - se, ymax = mean_norm + se),
fill = "grey70", alpha = 0.25, colour = NA) +
geom_line(linewidth = 1) +
geom_point(size = 3) +
geom_hline(yintercept = 1, linetype = "dashed", alpha = 0.6) +
theme_clean() +
labs(
title = "Baseline-normalised error across test phases",
x = "Test phase",
y = "Error relative to baseline",
colour = "Training group"
)
Code
# Baseline per participant (for covariate)
<- df_flag %>%
pre_vals filter(Test == "Pre-test", !is_outlier) %>%
group_by(Participant, Training) %>%
summarise(PreError = mean(Error, na.rm = TRUE), .groups = "drop")
# Merge baseline into full dataset
<- df_flag %>%
df_anova filter(!is_outlier) %>%
left_join(pre_vals, by = c("Participant", "Training")) %>%
mutate(
Training = factor(Training),
Test = factor(Test, levels = c("Pre-test", "Mid-test", "Post-test", "Carryover")),
Participant = factor(Participant)
%>%
) drop_na(Error, PreError)
<- aov_car(
anova_baseline ~ PreError + Training * Test + Error(Participant/Test),
Error data = df_anova,
type = 3, # Type III sums of squares
factorize = FALSE
)
Warning: Numerical variables NOT centered on 0 (matters if variable in interaction):
PreError
Warning: Missing values for 3 ID(s), which were removed before analysis:
4, 28, 67
Below the first few rows (in wide format) of the removed cases with missing data.
Participant PreError Training Pre.test Mid.test Post.test Carryover
# 3 4 26.500 Physical 26.500 NA 15.500 21.875
# 25 28 22.875 Physical 22.875 NA 15.625 23.375
# 61 67 22.250 Control 22.250 NA 15.000 10.000
Contrasts set to contr.sum for the following variables: Training
Code
anova_baseline
Anova Table (Type 3 tests)
Response: Error
Effect df MSE F ges p.value
1 PreError 1, 69 25.02 98.89 *** .379 <.001
2 Training 2, 69 25.02 3.51 * .042 .035
3 Test 2.76, 190.78 12.22 16.01 *** .118 <.001
4 PreError:Test 2.76, 190.78 12.22 36.28 *** .232 <.001
5 Training:Test 5.53, 190.78 12.22 1.84 + .030 .099
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
Sphericity correction method: GG
Code
# Between-group comparisons within each test
<- emmeans(anova_baseline, ~ Training | Test) emm_train_by_test
Warning in summary.lm(object, ...): essentially perfect fit: summary may be
unreliable
Code
pairs(emm_train_by_test, adjust = "tukey")
Test = Pre.test:
contrast estimate SE df t.ratio p.value
Control - Physical 0.000 0.00 69 0.729 0.7472
Control - Visual 0.000 0.00 69 0.056 0.9983
Physical - Visual 0.000 0.00 69 -0.736 0.7430
Test = Mid.test:
contrast estimate SE df t.ratio p.value
Control - Physical 1.866 1.38 69 1.352 0.3716
Control - Visual -0.370 1.37 69 -0.270 0.9608
Physical - Visual -2.236 1.26 69 -1.770 0.1872
Test = Post.test:
contrast estimate SE df t.ratio p.value
Control - Physical 2.960 1.29 69 2.295 0.0632
Control - Visual 1.191 1.28 69 0.929 0.6240
Physical - Visual -1.769 1.18 69 -1.499 0.2978
Test = Carryover:
contrast estimate SE df t.ratio p.value
Control - Physical 0.807 1.37 69 0.588 0.8268
Control - Visual -2.269 1.36 69 -1.664 0.2265
Physical - Visual -3.076 1.26 69 -2.450 0.0438
P value adjustment: tukey method for comparing a family of 3 estimates
Code
# Within-group comparisons across tests
<- emmeans(anova_baseline, ~ Test | Training) emm_test_by_train
Warning in summary.lm(object, ...): essentially perfect fit: summary may be
unreliable
Code
pairs(emm_test_by_train, adjust = "bonferroni")
Training = Control:
contrast estimate SE df t.ratio p.value
Pre.test - Mid.test 2.072 1.030 69 2.003 0.2949
Pre.test - Post.test 2.879 0.967 69 2.978 0.0240
Pre.test - Carryover 2.514 1.030 69 2.445 0.1023
Mid.test - Post.test 0.807 1.080 69 0.748 1.0000
Mid.test - Carryover 0.442 1.100 69 0.402 1.0000
Post.test - Carryover -0.365 1.270 69 -0.288 1.0000
Training = Physical:
contrast estimate SE df t.ratio p.value
Pre.test - Mid.test 3.939 0.874 69 4.507 0.0002
Pre.test - Post.test 5.840 0.817 69 7.151 <.0001
Pre.test - Carryover 3.321 0.868 69 3.824 0.0017
Mid.test - Post.test 1.901 0.911 69 2.088 0.2432
Mid.test - Carryover -0.618 0.929 69 -0.665 1.0000
Post.test - Carryover -2.518 1.070 69 -2.354 0.1285
Training = Visual:
contrast estimate SE df t.ratio p.value
Pre.test - Mid.test 1.702 0.907 69 1.876 0.3889
Pre.test - Post.test 4.070 0.848 69 4.801 0.0001
Pre.test - Carryover 0.245 0.902 69 0.272 1.0000
Mid.test - Post.test 2.368 0.945 69 2.505 0.0877
Mid.test - Carryover -1.457 0.964 69 -1.511 0.8115
Post.test - Carryover -3.825 1.110 69 -3.444 0.0059
P value adjustment: bonferroni method for 6 tests
Code
<- as.data.frame(emmeans(anova_baseline, ~ Training * Test)) emm_df
Warning in summary.lm(object, ...): essentially perfect fit: summary may be
unreliable
Code
ggplot(emm_df, aes(x = Test, y = emmean, colour = Training, group = Training)) +
geom_ribbon(aes(ymin = lower.CL, ymax = upper.CL, fill = Training),
alpha = 0.15, colour = NA) +
geom_line(linewidth = 1.1) +
geom_point(size = 3) +
theme_clean() +
labs(
title = "Interaction between Training type and Test phase (baseline-adjusted)",
x = "Test phase",
y = "Estimated marginal mean Error (±95 % CI)",
colour = "Training group",
fill = "Training group"
)
Code
library(dplyr)
library(stringr)
library(ggplot2)
library(ggthemes)
library(emmeans)
# Helper to standardise test labels from emmeans (dot -> hyphen) and order them
<- function(x) {
fix_test_labels %>%
x mutate(Test = str_replace_all(Test, c("Pre\\.test"="Pre-test",
"Mid\\.test"="Mid-test",
"Post\\.test"="Post-test"))) %>%
mutate(Test = factor(Test, levels = c("Pre-test","Mid-test","Post-test","Carryover")))
}
# 0) Get EMMs from your ANOVA and fix labels
<- emmeans(anova_baseline, ~ Training * Test) %>% as.data.frame() %>% fix_test_labels() emm_full
Warning in summary.lm(object, ...): essentially perfect fit: summary may be
unreliable
Code
# -------------------------------
# 1) Control vs Physical
# -------------------------------
<- emm_full %>% filter(Training %in% c("Control","Physical"))
emm_CP
ggplot(emm_CP, aes(x = Test, y = emmean, colour = Training, group = Training)) +
geom_ribbon(aes(ymin = lower.CL, ymax = upper.CL, fill = Training),
alpha = 0.15, colour = NA) +
geom_line(linewidth = 1.1) +
geom_point(size = 3) +
theme_clean() +
labs(
title = "Baseline-adjusted means: Control vs Physical",
x = "Test phase",
y = "Estimated marginal mean Error (±95% CI)",
colour = "Group", fill = "Group"
)
Code
# -------------------------------
# 2) Physical vs (Control+Visual pooled)
# -------------------------------
# EMMs by Training | Test (for pooling), then fix labels
<- emmeans(anova_baseline, ~ Training | Test) emm_T_by_Tr
Warning in summary.lm(object, ...): essentially perfect fit: summary may be
unreliable
Code
<- as.data.frame(emm_T_by_Tr) %>% fix_test_labels()
emm_T_by_Tr_df
# Weighted pooling weights (per-participant counts)
<- df_anova %>% distinct(Participant, Training) %>% count(Training)
n_tbl <- n_tbl$n[n_tbl$Training == "Control"]; if (length(w_C)==0) w_C <- 0
w_C <- n_tbl$n[n_tbl$Training == "Visual"]; if (length(w_V)==0) w_V <- 0
w_V <- w_C + w_V; if (w_sum == 0) { w_C <- 1; w_V <- 1; w_sum <- 2 }
w_sum
# Build contrast vector aligned to emm factor order
<- levels(emm_T_by_Tr)$Training
levs <- setNames(rep(0, length(levs)), levs)
coef_vec "Control"] <- w_C / w_sum
coef_vec["Visual"] <- w_V / w_sum
coef_vec[
<- contrast(emm_T_by_Tr, method = list("Control+Visual" = as.numeric(coef_vec)),
emm_pooled by = "Test", infer = c(TRUE, TRUE)) %>%
as.data.frame() %>%
fix_test_labels() %>%
transmute(Group2 = "Control+Visual", Test, emmean = estimate, lower.CL, upper.CL)
<- emmeans(anova_baseline, ~ Test | Training) %>%
phys_df as.data.frame() %>%
fix_test_labels() %>%
filter(Training == "Physical") %>%
transmute(Group2 = "Physical", Test, emmean, lower.CL, upper.CL)
Warning in summary.lm(object, ...): essentially perfect fit: summary may be
unreliable
Code
<- bind_rows(emm_pooled, phys_df)
emm_two
ggplot(emm_two, aes(x = Test, y = emmean, colour = Group2, group = Group2)) +
geom_ribbon(aes(ymin = lower.CL, ymax = upper.CL, fill = Group2),
alpha = 0.18, colour = NA) +
geom_line(linewidth = 1.1) +
geom_point(size = 3) +
theme_clean() +
labs(
title = "Baseline-adjusted means: Physical vs (Control+Visual pooled)",
x = "Test phase",
y = "Estimated marginal mean Error (±95% CI)",
colour = "Group", fill = "Group"
)