Data analysis

Author

Kai Gavriel

Published

Sunday 12 October 2025

Several required packages are loaded here, to be used later.

library(readr)
library(tidyverse)
library(ggplot2)
library(ggridges)
library(ggthemes)
library(car)
library(dplyr)
library(afex)       # for aov_car (ANOVA with corrections)
library(lmerTest)   # for LMM with p-values
library(effectsize) # for η² etc.
library(emmeans)

Preliminary

Firstly, the data were imported and stored as a data frame (df), and converted to a more workable format.

Code
df <- read_csv("2025-W41-6_BMS1052_A3_Data.csv")

exclude_carryover <- FALSE # Whether to remove the Carryover test data

df_long <- df %>%
  pivot_longer(
    cols = c(`Pre-test`, `Mid-test`, `Post-test`, `Carryover`),
    names_to = "Test",
    values_to = "Error"
  ) %>%
  { if (exclude_carryover) filter(., Test != "Carryover") else . }

Removing outliers

Code
df_flag <- df_long %>%
  group_by(Training, Test) %>%
  mutate(
    Q1 = quantile(Error, 0.25, na.rm = TRUE),
    Q3 = quantile(Error, 0.75, na.rm = TRUE),
    IQR = Q3 - Q1,
    is_outlier = Error < (Q1 - 1.5 * IQR) | Error > (Q3 + 1.5 * IQR)
  ) %>%
  ungroup()
outliers_list <- df_flag %>%
  filter(is_outlier) %>%
  arrange(Training, Test, Participant)

outliers_summary <- outliers_list %>%
  summarise(
    total_outliers = n(),
    total_rows = nrow(df_long),
    perc = 100 * total_outliers / total_rows
  )

df_flag <- df_flag %>%
  mutate(Test = factor(Test,
                       levels = c("Carryover", "Post-test", "Mid-test", "Pre-test")))

df_clean <- df_flag %>% filter(!is_outlier)
df_out   <- df_flag %>% filter(is_outlier)

point_nudge <- 0.06

ggplot() +
  stat_density_ridges(
    data = df_clean,
    aes(x = Error, y = Test),
    fill = "grey85",
    colour = "grey40",
    alpha = 0.7,
    scale = 0.95,         
    rel_min_height = 0.01,
    size = 0.3
  ) +
  geom_point(
    data = df_out,
    aes(x = Error, y = Test),
    shape = 21,           
    fill  = "red",
    colour = "white",    
    stroke = 0.6,
    size = 2.3,
    position = position_nudge(y = point_nudge) 
  ) +
  facet_wrap(~ Training) +
  theme_clean() +
  theme(
    panel.grid.minor = element_blank()
  ) +
  labs(
    title = "Ridgeline distributions of the data collected",
    subtitle = "Outliers shown as red points",
    x = "Absolute error (cm)",
    y = "Test phase"
  )
Figure 1: Distrubutions of each testing group by phase, with outliers highlighted

Before performing analyses on the data, they were checked for outliers. A total of 12 outliers were found within the data, comprising 3.61 per cent thereof. These were subsequently removed, and are shown in Figure 1.

Normality

Many common statistical analysis models rely on the assumption that the data approximate a normal distribution. To assess whether each group fulfilled this criterion, a Q–Q plot was constructed for each combination of training group and testing phase, as shown in Figure 2.

Code
df_flag <- df_flag %>%
  mutate(Test = factor(Test,
                       levels = c("Pre-test", "Mid-test", "Post-test", "Carryover")))

ggplot(df_flag %>% filter(!is_outlier),
       aes(sample = Error)) +
  stat_qq(size = 1, colour = "grey30") +
  stat_qq_line(colour = "red", linewidth = 0.6) +
  facet_grid(Training ~ Test, switch = "both") +
  theme_clean() +
  labs(
    title = "Q–Q plots for normality by Training × Test",
    x = "Theoretical quantiles (normal distribution)",
    y = "Observed quantiles of error"
  ) +
  theme(
    strip.placement = "outside",
    strip.background = element_blank(),
    panel.grid.minor = element_blank()
  )
Figure 2: Quantile–quantile (Q–Q) plots for each training group and test phase, describing the normality of each against a theoretical normal distribution

It is evident that the data mostly fulfil this criterion, as the data points approximate the normal line (red). Of course, given the moderately large size of the data (\(n>20\) per group), small deviations are unlikely to affect the data.

Homogenity of variance

Code
df_pre <- df_flag %>%
  filter(Test == "Pre-test", !is.na(Error), !is_outlier) %>%
  mutate(Training = as.factor(Training))

levene_res <- leveneTest(Error ~ Training, data = df_pre, center = median)
levene_p <- levene_res$`Pr(>F)`[1]

To assess the baseline variance, Levene’s test was performed, revealing homogeneous variance between each group: \(F(2, 73)=1.002\), \(p=0.37\).

A violin plot (Figure 3) was also included for the purpose of displaying the variance visually.

Code
df_pre <- df_flag %>%
  filter(Test == "Pre-test", !is_outlier, !is.na(Error)) %>%
  mutate(Training = factor(Training))

ggplot(df_pre, aes(x = Training, y = Error, fill = Training)) +
  geom_violin(trim = FALSE, alpha = 0.7) +
  geom_boxplot(width = 0.1, colour = "black", fill = "white", outlier.shape = NA) +
  theme_clean() +
  labs(
    title = "Baseline (pre-test) error distributions",
    x = "Training group",
    y = "Error"
  ) +
  theme(legend.position = "none")
Figure 3: Error distributions by training group for pre-tests only (outliers removed)

Analyses

Descriptive plots

Adjusting for baseline performance was made possible by taking the difference between later test scores and each participant’s mean pre-test score, as shown in Figure 4.

Code
test_levels <- c("Pre-test", "Mid-test", "Post-test", "Carryover")

# 1) Baselines per participant (average if somehow >1 Pre row), outliers removed
pre_vals <- df_flag %>%
  filter(!is_outlier, Test == "Pre-test") %>%
  group_by(Participant, Training) %>%
  summarise(pre_val = mean(Error, na.rm = TRUE), .groups = "drop")

# 2) Join baselines back, compute change, drop cases with no baseline
df_change <- df_flag %>%
  filter(!is_outlier) %>%
  left_join(pre_vals, by = c("Participant", "Training")) %>%
  mutate(
    Test = factor(Test, levels = test_levels),
    Change = Error - pre_val
  ) %>%
  filter(Test != "Pre-test") %>%          # no change for Pre
  drop_na(pre_val)                        # remove participants lacking a Pre value

# 3) Summarise and plot
df_change_summary <- df_change %>%
  group_by(Training, Test) %>%
  summarise(
    mean_change = mean(Change, na.rm = TRUE),
    se = sd(Change, na.rm = TRUE) / sqrt(n()),
    .groups = "drop"
  )

ggplot(df_change_summary,
       aes(x = Test, y = mean_change, group = Training, colour = Training)) +
  geom_ribbon(aes(ymin = mean_change - se, ymax = mean_change + se),
              fill = "grey70", alpha = 0.25, colour = NA) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  geom_hline(yintercept = 0, linetype = "dashed", alpha = 0.6) +
  theme_clean() +
  labs(
    title = "Mean change from baseline (pre-test)",
    x = "Test phase",
    y = "Change in Error (±SE)",
    colour = "Training group"
  )
Figure 4: A plot of mean change from pre-test scores by training group (shaded ribbons show standard error)

Of course, it may be more intuitive to compare both training groups against the control, as shown in Figure 5. Again, this shows that both training groups had consistently smaller changes from baseline (on average) across all test phases.

Code
test_levels <- c("Pre-test", "Mid-test", "Post-test", "Carryover")

# --- Baselines (Pre-test) per participant
pre_vals <- df_flag %>%
  filter(!is_outlier, Test == "Pre-test") %>%
  group_by(Participant, Training) %>%
  summarise(pre_val = mean(Error, na.rm = TRUE), .groups = "drop")

# --- Pooled groups: Training vs Control
df_pooled <- df_flag %>%
  filter(!is_outlier) %>%
  mutate(
    Group2 = case_when(
      Training %in% c("Physical", "Visual") ~ "Training",
      Training %in% c("Control", "No training", "None") ~ "Control",
      TRUE ~ Training
    )
  ) %>%
  left_join(pre_vals, by = c("Participant", "Training")) %>%
  mutate(
    Test = factor(Test, levels = test_levels),
    Change = Error - pre_val
  ) %>%
  drop_na(pre_val) %>%
  filter(Group2 %in% c("Training", "Control"))

# --- Summarise mean ± SE
df_change_summary <- df_pooled %>%
  group_by(Group2, Test) %>%
  summarise(
    mean_change = mean(Change, na.rm = TRUE),
    se = sd(Change, na.rm = TRUE) / sqrt(n()),
    .groups = "drop"
  )

# --- Plot: mean ± SE with colour-matched ribbons
ggplot(df_change_summary,
       aes(x = Test, y = mean_change, group = Group2, colour = Group2, fill = Group2)) +
  geom_ribbon(aes(ymin = mean_change - se, ymax = mean_change + se),
              alpha = 0.2, colour = NA) +
  geom_line(linewidth = 1.1) +
  geom_point(size = 3) +
  geom_hline(yintercept = 0, linetype = "dashed", alpha = 0.6) +
  theme_clean() +
  labs(
    title = "Mean change from baseline (pre-test)",
    x = "Test phase",
    y = "Change in error from baseline",
    colour = "Group",
    fill = "Group"
  )
Figure 5: Mean change from pre-test scores by presence or absence of either training type (ribbons show standard error)

Figure 6 clearly shows that (on average) baseline-adjusted errors remained lower than control in physical group (except for in the carryover tests), and higher than control in the visual group.

Code
test_levels <- c("Pre-test", "Mid-test", "Post-test", "Carryover")

# 1) Get each participant's baseline (after removing outliers)
pre_vals <- df_flag %>%
  filter(!is_outlier, Test == "Pre-test") %>%
  group_by(Participant, Training) %>%
  summarise(pre_val = mean(Error, na.rm = TRUE), .groups = "drop")

# 2) Join baseline, compute normalised score, drop those without a baseline
df_norm <- df_flag %>%
  filter(!is_outlier) %>%
  left_join(pre_vals, by = c("Participant", "Training")) %>%
  mutate(
    Test = factor(Test, levels = test_levels),
    Norm = Error / pre_val
  ) %>%
  drop_na(pre_val)   # ensures Norm is well-defined (no Pre value -> remove)

# 3) Summarise and plot
df_norm_summary <- df_norm %>%
  group_by(Training, Test) %>%
  summarise(
    mean_norm = mean(Norm, na.rm = TRUE),
    se = sd(Norm, na.rm = TRUE) / sqrt(n()),
    .groups = "drop"
  )

ggplot(df_norm_summary,
       aes(x = Test, y = mean_norm, group = Training, colour = Training)) +
  geom_ribbon(aes(ymin = mean_norm - se, ymax = mean_norm + se),
              fill = "grey70", alpha = 0.25, colour = NA) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  geom_hline(yintercept = 1, linetype = "dashed", alpha = 0.6) +
  theme_clean() +
  labs(
    title = "Baseline-normalised error across test phases",
    x = "Test phase",
    y = "Error relative to baseline",
    colour = "Training group"
  )
Figure 6: Distribution of error scores after adjustment for baseline (pre-test score) – ribbons show standard error
Code
# Baseline per participant (for covariate)
pre_vals <- df_flag %>%
  filter(Test == "Pre-test", !is_outlier) %>%
  group_by(Participant, Training) %>%
  summarise(PreError = mean(Error, na.rm = TRUE), .groups = "drop")

# Merge baseline into full dataset
df_anova <- df_flag %>%
  filter(!is_outlier) %>%
  left_join(pre_vals, by = c("Participant", "Training")) %>%
  mutate(
    Training = factor(Training),
    Test = factor(Test, levels = c("Pre-test", "Mid-test", "Post-test", "Carryover")),
    Participant = factor(Participant)
  ) %>%
  drop_na(Error, PreError)
anova_baseline <- aov_car(
  Error ~ PreError + Training * Test + Error(Participant/Test),
  data = df_anova,
  type = 3,    # Type III sums of squares
  factorize = FALSE
)
Warning: Numerical variables NOT centered on 0 (matters if variable in interaction):
   PreError
Warning: Missing values for 3 ID(s), which were removed before analysis:
4, 28, 67
Below the first few rows (in wide format) of the removed cases with missing data.
     Participant PreError Training Pre.test Mid.test Post.test Carryover
# 3            4   26.500 Physical   26.500       NA    15.500    21.875
# 25          28   22.875 Physical   22.875       NA    15.625    23.375
# 61          67   22.250  Control   22.250       NA    15.000    10.000
Contrasts set to contr.sum for the following variables: Training
Code
anova_baseline
Anova Table (Type 3 tests)

Response: Error
         Effect           df   MSE         F  ges p.value
1      PreError        1, 69 25.02 98.89 *** .379   <.001
2      Training        2, 69 25.02    3.51 * .042    .035
3          Test 2.76, 190.78 12.22 16.01 *** .118   <.001
4 PreError:Test 2.76, 190.78 12.22 36.28 *** .232   <.001
5 Training:Test 5.53, 190.78 12.22    1.84 + .030    .099
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1

Sphericity correction method: GG 
Code
# Between-group comparisons within each test
emm_train_by_test <- emmeans(anova_baseline, ~ Training | Test)
Warning in summary.lm(object, ...): essentially perfect fit: summary may be
unreliable
Code
pairs(emm_train_by_test, adjust = "tukey")
Test = Pre.test:
 contrast           estimate   SE df t.ratio p.value
 Control - Physical    0.000 0.00 69   0.729  0.7472
 Control - Visual      0.000 0.00 69   0.056  0.9983
 Physical - Visual     0.000 0.00 69  -0.736  0.7430

Test = Mid.test:
 contrast           estimate   SE df t.ratio p.value
 Control - Physical    1.866 1.38 69   1.352  0.3716
 Control - Visual     -0.370 1.37 69  -0.270  0.9608
 Physical - Visual    -2.236 1.26 69  -1.770  0.1872

Test = Post.test:
 contrast           estimate   SE df t.ratio p.value
 Control - Physical    2.960 1.29 69   2.295  0.0632
 Control - Visual      1.191 1.28 69   0.929  0.6240
 Physical - Visual    -1.769 1.18 69  -1.499  0.2978

Test = Carryover:
 contrast           estimate   SE df t.ratio p.value
 Control - Physical    0.807 1.37 69   0.588  0.8268
 Control - Visual     -2.269 1.36 69  -1.664  0.2265
 Physical - Visual    -3.076 1.26 69  -2.450  0.0438

P value adjustment: tukey method for comparing a family of 3 estimates 
Code
# Within-group comparisons across tests
emm_test_by_train <- emmeans(anova_baseline, ~ Test | Training)
Warning in summary.lm(object, ...): essentially perfect fit: summary may be
unreliable
Code
pairs(emm_test_by_train, adjust = "bonferroni")
Training = Control:
 contrast              estimate    SE df t.ratio p.value
 Pre.test - Mid.test      2.072 1.030 69   2.003  0.2949
 Pre.test - Post.test     2.879 0.967 69   2.978  0.0240
 Pre.test - Carryover     2.514 1.030 69   2.445  0.1023
 Mid.test - Post.test     0.807 1.080 69   0.748  1.0000
 Mid.test - Carryover     0.442 1.100 69   0.402  1.0000
 Post.test - Carryover   -0.365 1.270 69  -0.288  1.0000

Training = Physical:
 contrast              estimate    SE df t.ratio p.value
 Pre.test - Mid.test      3.939 0.874 69   4.507  0.0002
 Pre.test - Post.test     5.840 0.817 69   7.151  <.0001
 Pre.test - Carryover     3.321 0.868 69   3.824  0.0017
 Mid.test - Post.test     1.901 0.911 69   2.088  0.2432
 Mid.test - Carryover    -0.618 0.929 69  -0.665  1.0000
 Post.test - Carryover   -2.518 1.070 69  -2.354  0.1285

Training = Visual:
 contrast              estimate    SE df t.ratio p.value
 Pre.test - Mid.test      1.702 0.907 69   1.876  0.3889
 Pre.test - Post.test     4.070 0.848 69   4.801  0.0001
 Pre.test - Carryover     0.245 0.902 69   0.272  1.0000
 Mid.test - Post.test     2.368 0.945 69   2.505  0.0877
 Mid.test - Carryover    -1.457 0.964 69  -1.511  0.8115
 Post.test - Carryover   -3.825 1.110 69  -3.444  0.0059

P value adjustment: bonferroni method for 6 tests 
Code
emm_df <- as.data.frame(emmeans(anova_baseline, ~ Training * Test))
Warning in summary.lm(object, ...): essentially perfect fit: summary may be
unreliable
Code
ggplot(emm_df, aes(x = Test, y = emmean, colour = Training, group = Training)) +
  geom_ribbon(aes(ymin = lower.CL, ymax = upper.CL, fill = Training),
              alpha = 0.15, colour = NA) +
  geom_line(linewidth = 1.1) +
  geom_point(size = 3) +
  theme_clean() +
  labs(
    title = "Interaction between Training type and Test phase (baseline-adjusted)",
    x = "Test phase",
    y = "Estimated marginal mean Error (±95 % CI)",
    colour = "Training group",
    fill = "Training group"
  )

Code
library(dplyr)
library(stringr)
library(ggplot2)
library(ggthemes)
library(emmeans)

# Helper to standardise test labels from emmeans (dot -> hyphen) and order them
fix_test_labels <- function(x) {
  x %>%
    mutate(Test = str_replace_all(Test, c("Pre\\.test"="Pre-test",
                                          "Mid\\.test"="Mid-test",
                                          "Post\\.test"="Post-test"))) %>%
    mutate(Test = factor(Test, levels = c("Pre-test","Mid-test","Post-test","Carryover")))
}

# 0) Get EMMs from your ANOVA and fix labels
emm_full <- emmeans(anova_baseline, ~ Training * Test) %>% as.data.frame() %>% fix_test_labels()
Warning in summary.lm(object, ...): essentially perfect fit: summary may be
unreliable
Code
# -------------------------------
# 1) Control vs Physical
# -------------------------------
emm_CP <- emm_full %>% filter(Training %in% c("Control","Physical"))

ggplot(emm_CP, aes(x = Test, y = emmean, colour = Training, group = Training)) +
  geom_ribbon(aes(ymin = lower.CL, ymax = upper.CL, fill = Training),
              alpha = 0.15, colour = NA) +
  geom_line(linewidth = 1.1) +
  geom_point(size = 3) +
  theme_clean() +
  labs(
    title = "Baseline-adjusted means: Control vs Physical",
    x = "Test phase",
    y = "Estimated marginal mean Error (±95% CI)",
    colour = "Group", fill = "Group"
  )

Code
# -------------------------------
# 2) Physical vs (Control+Visual pooled)
# -------------------------------

# EMMs by Training | Test (for pooling), then fix labels
emm_T_by_Tr <- emmeans(anova_baseline, ~ Training | Test)
Warning in summary.lm(object, ...): essentially perfect fit: summary may be
unreliable
Code
emm_T_by_Tr_df <- as.data.frame(emm_T_by_Tr) %>% fix_test_labels()

# Weighted pooling weights (per-participant counts)
n_tbl <- df_anova %>% distinct(Participant, Training) %>% count(Training)
w_C <- n_tbl$n[n_tbl$Training == "Control"]; if (length(w_C)==0) w_C <- 0
w_V <- n_tbl$n[n_tbl$Training == "Visual"];  if (length(w_V)==0) w_V <- 0
w_sum <- w_C + w_V; if (w_sum == 0) { w_C <- 1; w_V <- 1; w_sum <- 2 }

# Build contrast vector aligned to emm factor order
levs <- levels(emm_T_by_Tr)$Training
coef_vec <- setNames(rep(0, length(levs)), levs)
coef_vec["Control"] <- w_C / w_sum
coef_vec["Visual"]  <- w_V / w_sum

emm_pooled <- contrast(emm_T_by_Tr, method = list("Control+Visual" = as.numeric(coef_vec)),
                       by = "Test", infer = c(TRUE, TRUE)) %>%
  as.data.frame() %>%
  fix_test_labels() %>%
  transmute(Group2 = "Control+Visual", Test, emmean = estimate, lower.CL, upper.CL)

phys_df <- emmeans(anova_baseline, ~ Test | Training) %>%
  as.data.frame() %>%
  fix_test_labels() %>%
  filter(Training == "Physical") %>%
  transmute(Group2 = "Physical", Test, emmean, lower.CL, upper.CL)
Warning in summary.lm(object, ...): essentially perfect fit: summary may be
unreliable
Code
emm_two <- bind_rows(emm_pooled, phys_df)

ggplot(emm_two, aes(x = Test, y = emmean, colour = Group2, group = Group2)) +
  geom_ribbon(aes(ymin = lower.CL, ymax = upper.CL, fill = Group2),
              alpha = 0.18, colour = NA) +
  geom_line(linewidth = 1.1) +
  geom_point(size = 3) +
  theme_clean() +
  labs(
    title = "Baseline-adjusted means: Physical vs (Control+Visual pooled)",
    x = "Test phase",
    y = "Estimated marginal mean Error (±95% CI)",
    colour = "Group", fill = "Group"
  )