Match Quantity

Prepare data

Set up

Code
library(dplyr)
library(tidyr)
library(here)
library(car)
library(ggplot2)
library(stringr)
library(purrr)
library(stringr)
library(readr)
library(moments)
library(patchwork)
library(scales)
library(kableExtra)
library(knitr)
library(glue)
library(lubridate)

probe_dir <- "02-Match_quantity"

Question presentation

Load & clean student responses

  • Remove practice questions and filter response time > 0 second
Code
results_df <- read_csv(here(probe_dir,"match_quantity_raw.csv")) %>% select(-starts_with("..."))  

student_df <- read_csv(here(".","01-Students","Student_Assessment.csv"))%>% select(-starts_with("..."))

student_clean <- student_df %>% 
  mutate(
    GradeGroup = case_when(
      str_detect(`Assessment Event Identifier`, "^year-1a-2025$")          ~ "Year 1A",
      str_detect(`Assessment Event Identifier`, "^year-1b-2025$")          ~ "Year 1B",
      str_detect(`Assessment Event Identifier`, "^foundation-a-2025$")     ~ "Foundation A",
      str_detect(`Assessment Event Identifier`, "^foundation-b-2025$")     ~ "Foundation B",
      TRUE                                                                 ~ NA_character_  
    )
  ) %>% 
  select(Identifier, GradeGroup) %>%          
  distinct()

results_df <- results_df %>% 
  left_join(student_clean, by = "Identifier") 

results_df <- results_df %>% 
  mutate(
    GradeGroup = case_when(
      GradeGroup == "Year 1A" ~ "Foundation A",
      GradeGroup == "Year 1B" ~ "Foundation B", 
      TRUE ~ GradeGroup
    )
  ) %>% 
  select(-`Assessment Event Identifier`)

# Remove practice and responses > 0 sec
results_cleaned_df <- results_df %>% 
  filter(!str_detect(`Question Identifier`, "TRANS_INTRO|_Prac|_Tch|-Prac|_prac")) %>% 
  filter(`Answer Duration` > 0)
  

  # CHECK: Adding non-attempted questions back in 
  # CHECK: Group items by regular/irregular patterns
  # CHECK: Include target answer in the performance output (correlation between target number and % correct)

# Normalise question identifier 
results_cleaned_df <- results_cleaned_df %>%
  mutate(
    `Question Identifier` = str_replace(
      `Question Identifier`,
      "^MQ1-20-(\\d{3})$",    # e.g. MQ1-20-001
      "MQ1-20_\\1"            # → MQ1-20_001
    )
  )


# De-duplicate: keep only the latest per student × test × question
tmp <- results_cleaned_df %>%
  mutate(.ts = parse_date_time(`Date Completed`, orders = "dmy HM p"))

deduped <- tmp %>%
  group_by(Identifier, `Test Identifier`, `Question Identifier`) %>%
  slice_max(.ts, n = 1, with_ties = FALSE) %>%
  ungroup()

n_removed <- nrow(tmp) - nrow(deduped)
message(n_removed, " duplicate rows removed; keeping only the most recent per Identifier/Test/Question")

results_cleaned_df <- deduped %>% select(-.ts)

Map distractor identity

Code
distractor_df <- read_csv(here(probe_dir,"match_quant_distractor_map.csv")) %>% select(-starts_with("..."))  

# Check distractor mapping is consistent
distractor_df %>%
  mutate(
    mapped_stimuli = case_when(
      `Correct Distractor` == 1 ~ `Distractor 1`,
      `Correct Distractor` == 2 ~ `Distractor 2`,
      `Correct Distractor` == 3 ~ `Distractor 3`,
      `Correct Distractor` == 4 ~ `Distractor 4`,
      TRUE                       ~ NA_real_
    )
  ) %>%
  filter(mapped_stimuli != Stimuli) -> inconsistencies

if (nrow(inconsistencies) == 0) {
  message("All ‘Correct Distractor’ entries line up with the Stimuli column.")
} else {
  warning("Found mismatches in the following questions:\n",
          paste0(inconsistencies$`Question Identifier`, collapse = ", "))
}

# Add selected distractor identity
results_cleaned_df <- results_cleaned_df %>%
  left_join(
    distractor_df %>% select(`Question Identifier`,
                             starts_with("Distractor "),
                             Stimuli),
    by = "Question Identifier"
  ) %>%
  mutate(
    Selected_Distractor = case_when(
      `Answer Response` == 1 ~ `Distractor 1`,
      `Answer Response` == 2 ~ `Distractor 2`,
      `Answer Response` == 3 ~ `Distractor 3`,
      `Answer Response` == 4 ~ `Distractor 4`,
      TRUE                    ~ NA_real_
    ),
    Selected_Correct = (Selected_Distractor == Stimuli)
  )

# results_cleaned_df %>% filter(`Is Answer Correct` != Selected_Correct) # consistency check

write_csv(results_cleaned_df %>% 
            select(Test, `Question Identifier`, `Correct Answer`, 
                   `Distractor 1`, `Distractor 2`, `Distractor 3`, `Distractor 4`,
                   Identifier, GradeGroup, Selected_Distractor, Selected_Correct, `Raw Score`, `Answer Duration`, `Date Completed`), 
          "match_quant_response.csv")

Explore

Distractor selected

Code
# Get all distractors for each question 
full_choices <- distractor_df %>%
  mutate(
    Test = if_else(
      str_starts(`Question Identifier`, "MQ1-10"),
      "Match Quantity 1-10",
      "Match Quantity 1-20"
    )
  ) %>%
  select(Test, `Question Identifier`, Stimuli, starts_with("Distractor")) %>%
  pivot_longer(
    cols      = starts_with("Distractor"),
    names_to  = "ord",               # just a throwaway index
    values_to = "Selected_Distractor"
  ) %>%
  distinct(Test, `Question Identifier`, Stimuli, Selected_Distractor)

counts <- results_cleaned_df %>%
  group_by(Test, `Question Identifier`, Selected_Distractor) %>%
  summarise(n = n(), .groups = "drop")

dist_df <- full_choices %>%
  left_join(counts, by = c("Test", "Question Identifier", "Selected_Distractor")) %>%
  replace_na(list(n = 0)) %>%
  group_by(Test, `Question Identifier`) %>%
  mutate(
    pct        = n / sum(n) * 100,
    is_correct = (Selected_Distractor == Stimuli)
  ) %>%
  ungroup()

tests <- unique(dist_df$Test)
plots <- map(tests, function(tst) {
  dist_df %>%
    filter(Test == tst) %>%
    ggplot(aes(
      x    = factor(Selected_Distractor),
      y    = pct,
      fill = is_correct
    )) +
    geom_col(width = 0.7) +
    scale_fill_manual(
      values = c("TRUE" = "steelblue", "FALSE" = "grey80"),
      labels = c("TRUE" = "Correct", "FALSE" = "Incorrect"),
      name   = NULL
    ) +
    facet_wrap(
      ~ `Question Identifier`,
      scales = "free_x",
      ncol   = 5
    ) +
    labs(
      title = tst,
      x     = "Distractor identity",
      y     = "Percentage of students"
    ) +
    theme_minimal() +
    theme(
      panel.background = element_rect(fill = "white", colour = NA),
      plot.background  = element_rect(fill = "white", colour = NA),
      panel.grid.major.x = element_blank(),
      panel.grid.minor   = element_blank(),
      strip.text         = element_text(size = 8),
    )
})

walk(plots, print)

Code
ggsave(
  filename = "MQ1-10_distribution.png",
  plot     = plots[[1]],
  width    = 12,      # inches
  height   = 8,       
  units    = "in",
  dpi      = 300
)

ggsave(
  filename = "MQ1-20_distribution.png",
  plot     = plots[[2]],
  width    = 12,
  height   = 8,
  units    = "in",
  dpi      = 300
)

Correlation between target number and performance

Code
# 1. Extract percent‐correct by Stimuli for each Test
correct_pct_df <- dist_df %>%
  filter(is_correct) %>%
  select(Test, Stimuli, pct) %>%
  rename(percent_correct = pct)

# 2. Build one plot per Test
make_corr_plot <- function(test_name) {
  correct_pct_df %>%
    filter(Test == test_name) %>%
    ggplot(aes(x = Stimuli, y = percent_correct)) +
    geom_point(size = 2) +
    geom_smooth(method = "lm", se = FALSE, colour = "steelblue") +
    scale_x_continuous(breaks = unique(correct_pct_df$Stimuli)) +
    scale_y_continuous(limit=c(0,100)) +
    labs(
      title = test_name,
      x     = "Target number (Stimuli)",
      y     = "% of students correct"
    ) +
    theme_minimal() +
    theme(
      panel.background   = element_rect(fill = "white", colour = NA),
      plot.background    = element_rect(fill = "white", colour = NA),
      panel.grid.minor   = element_blank(),
      strip.text         = element_text(size = 9)
    )
}

p1 <- make_corr_plot("Match Quantity 1-10")
p2 <- make_corr_plot("Match Quantity 1-20")

# 3. Stack them vertically
(p1 / p2) + 
  plot_annotation(
    title = "Item difficulty vs. target number",
    theme = theme(plot.title = element_text(hjust = 0.5))
  )

Accuracy and RCPM (response correct per min)

Code
student_perf <- results_cleaned_df %>%
  group_by(Identifier, Test, `Test Identifier`) %>%
  summarise(
    Attempted     = n_distinct(`Question Identifier`),
    Correct       = sum(`Raw Score`, na.rm = TRUE),
    Accuracy      = Correct / Attempted,
    Time_minutes  = sum(`Answer Duration`, na.rm = TRUE) / 60,
    RCPM          = Correct / Time_minutes,
    .groups       = "drop"
  )

student_perf_long <- student_perf %>%
  pivot_longer(
    cols      = c(Accuracy, RCPM),
    names_to  = "Metric",
    values_to = "Value"
  )

p_acc <- ggplot(student_perf, aes(x = Accuracy, colour = Test, fill = Test)) +
  geom_density(alpha = 0.3, size = 0.5) +
  labs(
    title = "Density of Accuracy by Test",
    x     = "Accuracy",
    y     = "Density"
  ) +
  theme_minimal() +
  theme(
    legend.position    = "right",
    legend.title = element_blank(),
    panel.background   = element_rect(fill = "white", colour = NA),
    plot.background    = element_rect(fill = "white", colour = NA),
    panel.grid.minor   = element_blank()
  )

p_rcpm <- ggplot(student_perf, aes(x = RCPM, colour = Test, fill = Test)) +
  geom_density(alpha = 0.3, size = 0.5) +
  labs(
    title = "Density of RCPM by Test",
    x     = "Correct Responses Per Minute",
    y     = "Density"
  ) +
  theme_minimal() +
  theme(
    legend.position    = "none",
    panel.background   = element_rect(fill = "white", colour = NA),
    plot.background    = element_rect(fill = "white", colour = NA),
    panel.grid.minor   = element_blank()
  )

# Stack them vertically
combined_plot <- p_acc / p_rcpm + 
  plot_layout(heights = c(1, 1))  # equal height panels

ggsave(
  filename = "match_quant_score_density.png",
  plot     = combined_plot,
  width    = 12,
  height   = 8,
  units    = "in",
  dpi      = 300
)

write_csv(student_perf, "match_quant_performance.csv")

print(combined_plot)

Test summary stats

Code
perf_by_test <- student_perf %>%
  group_by(`Test Identifier`) %>%
  summarise(
    Acc_mean   = mean(Accuracy, na.rm = TRUE),
    Acc_sd     = sd(Accuracy,   na.rm = TRUE),
    Acc_skew   = skewness(Accuracy, na.rm = TRUE),
    Acc_kurt   = kurtosis(Accuracy, na.rm = TRUE),
    RCPM_mean  = mean(RCPM, na.rm = TRUE),
    RCPM_sd    = sd(RCPM,   na.rm = TRUE),
    RCPM_skew  = skewness(RCPM, na.rm = TRUE),
    RCPM_kurt  = kurtosis(RCPM, na.rm = TRUE),
    .groups    = "drop"
  ) %>%
  pivot_longer(
    cols      = -`Test Identifier`,
    names_to  = "Statistic",
    values_to = "Value"
  ) %>%
  pivot_wider(
    names_from  = `Test Identifier`,
    values_from = Value
  )

knitr::kable(
  perf_by_test,
  digits     = 3,
  caption    = "Test results summary",
  col.names  = c("Statistic", "Match Qty 1-10", "Match Qty 1-20")
)
Test results summary
Statistic Match Qty 1-10 Match Qty 1-20
Acc_mean 0.820 0.620
Acc_sd 0.167 0.232
Acc_skew -1.905 -0.420
Acc_kurt 7.890 2.437
RCPM_mean 6.697 3.212
RCPM_sd 2.655 2.425
RCPM_skew 0.204 3.282
RCPM_kurt 3.059 22.893

Appendix: comparing regular vs irregular 10

Code
results_10 <- results_cleaned_df %>% 
  filter(Stimuli < 11) %>% 
  mutate(
    type_10 = case_when(
      `Test Identifier` == "MQ1-10_2025" ~ "Regular",
      `Test Identifier` == "MQ1-20_2025" ~ "Irregular",
      TRUE ~ NA_character_
    )
  )

results_10 %>% group_by(type_10) %>% summarise(response_count = n())
# A tibble: 2 × 2
  type_10   response_count
  <chr>              <int>
1 Irregular           4906
2 Regular            19031
Code
student_perf_type10 <- results_10 %>%
  group_by(Identifier, Test, `Test Identifier`, type_10) %>%
  summarise(
    Attempted     = n_distinct(`Question Identifier`),
    Correct       = sum(`Raw Score`, na.rm = TRUE),
    Accuracy      = Correct / Attempted,
    Time_minutes  = sum(`Answer Duration`, na.rm = TRUE) / 60,
    RCPM          = Correct / Time_minutes,
    .groups       = "drop"
  )

perf_by_type10 <- student_perf_type10 %>%
  group_by(type_10) %>%
  summarise(
    Acc_mean   = mean(Accuracy, na.rm = TRUE),
    Acc_sd     = sd(Accuracy,   na.rm = TRUE),
    Acc_skew   = skewness(Accuracy, na.rm = TRUE),
    Acc_kurt   = kurtosis(Accuracy, na.rm = TRUE),
    RCPM_mean  = mean(RCPM, na.rm = TRUE),
    RCPM_sd    = sd(RCPM,   na.rm = TRUE),
    RCPM_skew  = skewness(RCPM, na.rm = TRUE),
    RCPM_kurt  = kurtosis(RCPM, na.rm = TRUE),
    .groups    = "drop"
  ) %>%
  pivot_longer(
    cols      = -`type_10`,
    names_to  = "Statistic",
    values_to = "Value"
  ) %>%
  pivot_wider(
    names_from  = `type_10`,
    values_from = Value
  )

knitr::kable(
  perf_by_type10,
  digits     = 2,
  caption    = "Test results summary"
)
Test results summary
Statistic Irregular Regular
Acc_mean 0.76 0.82
Acc_sd 0.27 0.17
Acc_skew -0.89 -1.91
Acc_kurt 3.00 7.89
RCPM_mean 5.35 6.70
RCPM_sd 3.35 2.65
RCPM_skew 1.83 0.20
RCPM_kurt 10.81 3.06
Code
# Plot density
perf_long_type10 <- student_perf_type10 %>%
  select(type_10, Accuracy, RCPM) %>%
  pivot_longer(
    cols      = c(Accuracy, RCPM),
    names_to  = "Metric",
    values_to = "Value"
  )

p_acc_hist <- ggplot(
  filter(perf_long_type10, Metric == "Accuracy"),
  aes(x = Value, fill = type_10)
) +
  geom_histogram(
    aes(y = ..count.. / sum(..count..) * 100),
    position = "identity",
    alpha    = 0.4,
    bins     = 20,
    colour   = NA
  ) +
  scale_fill_manual(
    values = c("Regular" = "steelblue", "Irregular" = "maroon"),
    guide  = guide_legend(reverse = TRUE)
  ) +
  labs(
    title = "Accuracy Distribution: Regular vs Irregular",
    x     = "Accuracy",
    y     = "Percent of students"
  ) +
  theme_minimal() +
  theme(
    legend.position    = "none",
    legend.title = element_blank(),
    panel.background   = element_rect(fill = "white", colour = NA),
    plot.background    = element_rect(fill = "white", colour = NA),
    panel.grid.minor   = element_blank()
  )

p_rcpm_hist <- ggplot(
  filter(perf_long_type10, Metric == "RCPM"),
  aes(x = Value, fill = type_10)
) +
  geom_histogram(
    aes(y = ..count.. / sum(..count..) * 100),
    position = "identity",
    alpha    = 0.4,
    bins     = 20,
    colour   = NA
  ) +
  scale_fill_manual(
    values = c("Regular" = "steelblue", "Irregular" = "maroon"),
    guide  = guide_legend(reverse = TRUE)
  ) +
  labs(
    title = "Fluency (RCPM) Distribution: Regular vs Irregular",
    x     = "Responses Correct Per Minute",
    y     = "Percent of students"
  ) +
  theme_minimal() +
  theme(
    legend.position    = "bottom",
    legend.title = element_blank(),
    panel.background   = element_rect(fill = "white", colour = NA),
    plot.background    = element_rect(fill = "white", colour = NA),
    panel.grid.minor   = element_blank()
  )

# Stack them
(histograms_type10 <- p_acc_hist / p_rcpm_hist + 
  plot_layout(heights = c(1, 1)))

Mean and spread test

  • Welch’s two-sample t-test (as opposed to Student’s) given lack of homogeneity of variance, and two groups are completely independent (no overlap in students)

  • Levene’s test for spread as distribution is not normal (required for F-test)

Code
t_acc  <- t.test(Accuracy ~ type_10,
                 data    = student_perf_type10,
                 var.equal = FALSE)   # Welch’s t-test
t_rcpm <- t.test(RCPM     ~ type_10,
                 data    = student_perf_type10,
                 var.equal = FALSE)

l_acc  <- leveneTest(Accuracy ~ type_10, data = student_perf_type10)
l_rcpm <- leveneTest(RCPM     ~ type_10, data = student_perf_type10)

# 4. Glance at the results
list(
  accuracy_mean_test   = t_acc,
  rcpm_mean_test       = t_rcpm,
  accuracy_levene      = l_acc,
  rcpm_levene          = l_rcpm
)
$accuracy_mean_test

    Welch Two Sample t-test

data:  Accuracy by type_10
t = -6.1644, df = 1994.4, p-value = 8.541e-10
alternative hypothesis: true difference in means between group Irregular and group Regular is not equal to 0
95 percent confidence interval:
 -0.07377433 -0.03816238
sample estimates:
mean in group Irregular   mean in group Regular 
              0.7644619               0.8204303 


$rcpm_mean_test

    Welch Two Sample t-test

data:  RCPM by type_10
t = -11.004, df = 2290.2, p-value < 2.2e-16
alternative hypothesis: true difference in means between group Irregular and group Regular is not equal to 0
95 percent confidence interval:
 -1.583494 -1.104480
sample estimates:
mean in group Irregular   mean in group Regular 
               5.353307                6.697294 


$accuracy_levene
Levene's Test for Homogeneity of Variance (center = median)
        Df F value    Pr(>F)    
group    1  394.51 < 2.2e-16 ***
      2467                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

$rcpm_levene
Levene's Test for Homogeneity of Variance (center = median)
        Df F value    Pr(>F)    
group    1  13.524 0.0002405 ***
      2467                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Appendix: including non-attempted questions

Code
results_cleaned_df2 <- results_df %>% 
    filter(!str_detect(`Question Identifier`, "TRANS_INTRO|_Prac|_Tch|-Prac|_prac"))

# Normalise question identifier 
results_cleaned_df2 <- results_cleaned_df2 %>%
  mutate(
    `Question Identifier` = str_replace(
      `Question Identifier`,
      "^MQ1-20-(\\d{3})$",    # e.g. MQ1-20-001
      "MQ1-20_\\1"            # → MQ1-20_001
    )
  )

# De-duplicate: keep only the latest per student × test × question
tmp <- results_cleaned_df2 %>%
  mutate(.ts = parse_date_time(`Date Completed`, orders = "dmy HM p"))

deduped <- tmp %>%
  group_by(Identifier, `Test Identifier`, `Question Identifier`) %>%
  slice_max(.ts, n = 1, with_ties = FALSE) %>%
  ungroup()

n_removed <- nrow(tmp) - nrow(deduped)
message(n_removed, " duplicate rows removed; keeping only the most recent per Identifier/Test/Question")

results_cleaned_df2 <- deduped %>% select(-.ts)


# Add selected distractor identity
results_cleaned_df2 <- results_cleaned_df2 %>%
  left_join(
    distractor_df %>% select(`Question Identifier`,
                             starts_with("Distractor "),
                             Stimuli),
    by = "Question Identifier"
  ) %>%
  mutate(
    Selected_Distractor = case_when(
      `Answer Response` == 1 ~ `Distractor 1`,
      `Answer Response` == 2 ~ `Distractor 2`,
      `Answer Response` == 3 ~ `Distractor 3`,
      `Answer Response` == 4 ~ `Distractor 4`,
      TRUE                    ~ NA_real_
    ),
    Selected_Correct = (Selected_Distractor == Stimuli)
  ) %>% 
  mutate(
    `Raw Score` = case_when(
      Selected_Correct == TRUE ~ 1,
      TRUE ~ 0 #N/A and FALSE
    )
  )

# results_cleaned_df2 %>% filter(`Is Answer Correct` != Selected_Correct) # consistency check

student_perf2 <- results_cleaned_df2 %>%
  group_by(Identifier, Test, `Test Identifier`) %>%
  summarise(
    Attempted     = n_distinct(`Question Identifier`), # total items per test
    Correct       = sum(`Raw Score`),
    Accuracy      = Correct / Attempted,
    Time_minutes  = sum(`Answer Duration`) / 60,
    RCPM          = Correct / Time_minutes,
    .groups       = "drop"
  )

perf_by_test2 <- student_perf2 %>%
  group_by(`Test Identifier`) %>%
  summarise(
    Acc_mean   = mean(Accuracy, na.rm = TRUE),
    Acc_sd     = sd(Accuracy,   na.rm = TRUE),
    Acc_skew   = skewness(Accuracy, na.rm = TRUE),
    Acc_kurt   = kurtosis(Accuracy, na.rm = TRUE),
    RCPM_mean  = mean(RCPM, na.rm = TRUE),
    RCPM_sd    = sd(RCPM,   na.rm = TRUE),
    RCPM_skew  = skewness(RCPM, na.rm = TRUE),
    RCPM_kurt  = kurtosis(RCPM, na.rm = TRUE),
    .groups    = "drop"
  ) %>%
  pivot_longer(
    cols      = -`Test Identifier`,
    names_to  = "Statistic",
    values_to = "Value"
  ) %>%
  pivot_wider(
    names_from  = `Test Identifier`,
    values_from = Value
  )

knitr::kable(
  perf_by_test2,
  digits     = 3,
  caption    = "Test results summary",
  col.names  = c("Statistic", "Match Qty 1-10", "Match Qty 1-20")
)
Test results summary
Statistic Match Qty 1-10 Match Qty 1-20
Acc_mean 0.413 0.196
Acc_sd 0.158 0.122
Acc_skew 0.017 1.428
Acc_kurt 2.961 7.053
RCPM_mean 6.697 3.212
RCPM_sd 2.655 2.426
RCPM_skew 0.204 3.280
RCPM_kurt 3.059 22.885