Match Quantity

Prepare data

Set up

Code

library(dplyr)
library(tidyr)
library(here)
library(car)
library(ggplot2)
library(stringr)
library(purrr)
library(stringr)
library(readr)
library(moments)
library(patchwork)
library(scales)
library(kableExtra)
library(knitr)
library(glue)
library(lubridate)

probe_dir <- "02-Match_quantity"

Question presentation

Load & clean student responses

Remove practice questions and filter response time > 0 second

Code

results_df <- read_csv(here(probe_dir,"match_quantity_raw.csv")) %>% select(-starts_with("..."))  

student_df <- read_csv(here(".","01-Students","Student_Assessment.csv"))%>% select(-starts_with("..."))

student_clean <- student_df %>% 
  mutate(
    GradeGroup = case_when(
      str_detect(`Assessment Event Identifier`, "^year-1a-2025$")          ~ "Year 1A",
      str_detect(`Assessment Event Identifier`, "^year-1b-2025$")          ~ "Year 1B",
      str_detect(`Assessment Event Identifier`, "^foundation-a-2025$")     ~ "Foundation A",
      str_detect(`Assessment Event Identifier`, "^foundation-b-2025$")     ~ "Foundation B",
      TRUE                                                                 ~ NA_character_  
    )
  ) %>% 
  select(Identifier, GradeGroup) %>%          
  distinct()

results_df <- results_df %>% 
  left_join(student_clean, by = "Identifier") 

results_df <- results_df %>% 
  mutate(
    GradeGroup = case_when(
      GradeGroup == "Year 1A" ~ "Foundation A",
      GradeGroup == "Year 1B" ~ "Foundation B", 
      TRUE ~ GradeGroup
    )
  ) %>% 
  select(-`Assessment Event Identifier`)

# Remove practice and responses > 0 sec
results_cleaned_df <- results_df %>% 
  filter(!str_detect(`Question Identifier`, "TRANS_INTRO|_Prac|_Tch|-Prac|_prac")) %>% 
  filter(`Answer Duration` > 0)
  

  # CHECK: Adding non-attempted questions back in 
  # CHECK: Group items by regular/irregular patterns
  # CHECK: Include target answer in the performance output (correlation between target number and % correct)

# Normalise question identifier 
results_cleaned_df <- results_cleaned_df %>%
  mutate(
    `Question Identifier` = str_replace(
      `Question Identifier`,
      "^MQ1-20-(\\d{3})$",    # e.g. MQ1-20-001
      "MQ1-20_\\1"            # → MQ1-20_001
    )
  )


# De-duplicate: keep only the latest per student × test × question
tmp <- results_cleaned_df %>%
  mutate(.ts = parse_date_time(`Date Completed`, orders = "dmy HM p"))

deduped <- tmp %>%
  group_by(Identifier, `Test Identifier`, `Question Identifier`) %>%
  slice_max(.ts, n = 1, with_ties = FALSE) %>%
  ungroup()

n_removed <- nrow(tmp) - nrow(deduped)
message(n_removed, " duplicate rows removed; keeping only the most recent per Identifier/Test/Question")

results_cleaned_df <- deduped %>% select(-.ts)

Map distractor identity

Code

distractor_df <- read_csv(here(probe_dir,"match_quant_distractor_map.csv")) %>% select(-starts_with("..."))  

# Check distractor mapping is consistent
distractor_df %>%
  mutate(
    mapped_stimuli = case_when(
      `Correct Distractor` == 1 ~ `Distractor 1`,
      `Correct Distractor` == 2 ~ `Distractor 2`,
      `Correct Distractor` == 3 ~ `Distractor 3`,
      `Correct Distractor` == 4 ~ `Distractor 4`,
      TRUE                       ~ NA_real_
    )
  ) %>%
  filter(mapped_stimuli != Stimuli) -> inconsistencies

if (nrow(inconsistencies) == 0) {
  message("All ‘Correct Distractor’ entries line up with the Stimuli column.")
} else {
  warning("Found mismatches in the following questions:\n",
          paste0(inconsistencies$`Question Identifier`, collapse = ", "))
}

# Add selected distractor identity
results_cleaned_df <- results_cleaned_df %>%
  left_join(
    distractor_df %>% select(`Question Identifier`,
                             starts_with("Distractor "),
                             Stimuli),
    by = "Question Identifier"
  ) %>%
  mutate(
    Selected_Distractor = case_when(
      `Answer Response` == 1 ~ `Distractor 1`,
      `Answer Response` == 2 ~ `Distractor 2`,
      `Answer Response` == 3 ~ `Distractor 3`,
      `Answer Response` == 4 ~ `Distractor 4`,
      TRUE                    ~ NA_real_
    ),
    Selected_Correct = (Selected_Distractor == Stimuli)
  )

# results_cleaned_df %>% filter(`Is Answer Correct` != Selected_Correct) # consistency check

write_csv(results_cleaned_df %>% 
            select(Test, `Question Identifier`, `Correct Answer`, 
                   `Distractor 1`, `Distractor 2`, `Distractor 3`, `Distractor 4`,
                   Identifier, GradeGroup, Selected_Distractor, Selected_Correct, `Raw Score`, `Answer Duration`, `Date Completed`), 
          "match_quant_response.csv")

Explore

Distractor selected

Code

# Get all distractors for each question 
full_choices <- distractor_df %>%
  mutate(
    Test = if_else(
      str_starts(`Question Identifier`, "MQ1-10"),
      "Match Quantity 1-10",
      "Match Quantity 1-20"
    )
  ) %>%
  select(Test, `Question Identifier`, Stimuli, starts_with("Distractor")) %>%
  pivot_longer(
    cols      = starts_with("Distractor"),
    names_to  = "ord",               # just a throwaway index
    values_to = "Selected_Distractor"
  ) %>%
  distinct(Test, `Question Identifier`, Stimuli, Selected_Distractor)

counts <- results_cleaned_df %>%
  group_by(Test, `Question Identifier`, Selected_Distractor) %>%
  summarise(n = n(), .groups = "drop")

dist_df <- full_choices %>%
  left_join(counts, by = c("Test", "Question Identifier", "Selected_Distractor")) %>%
  replace_na(list(n = 0)) %>%
  group_by(Test, `Question Identifier`) %>%
  mutate(
    pct        = n / sum(n) * 100,
    is_correct = (Selected_Distractor == Stimuli)
  ) %>%
  ungroup()

tests <- unique(dist_df$Test)
plots <- map(tests, function(tst) {
  dist_df %>%
    filter(Test == tst) %>%
    ggplot(aes(
      x    = factor(Selected_Distractor),
      y    = pct,
      fill = is_correct
    )) +
    geom_col(width = 0.7) +
    scale_fill_manual(
      values = c("TRUE" = "steelblue", "FALSE" = "grey80"),
      labels = c("TRUE" = "Correct", "FALSE" = "Incorrect"),
      name   = NULL
    ) +
    facet_wrap(
      ~ `Question Identifier`,
      scales = "free_x",
      ncol   = 5
    ) +
    labs(
      title = tst,
      x     = "Distractor identity",
      y     = "Percentage of students"
    ) +
    theme_minimal() +
    theme(
      panel.background = element_rect(fill = "white", colour = NA),
      plot.background  = element_rect(fill = "white", colour = NA),
      panel.grid.major.x = element_blank(),
      panel.grid.minor   = element_blank(),
      strip.text         = element_text(size = 8),
    )
})

walk(plots, print)

Code

ggsave(
  filename = "MQ1-10_distribution.png",
  plot     = plots[[1]],
  width    = 12,      # inches
  height   = 8,       
  units    = "in",
  dpi      = 300
)

ggsave(
  filename = "MQ1-20_distribution.png",
  plot     = plots[[2]],
  width    = 12,
  height   = 8,
  units    = "in",
  dpi      = 300
)

Correlation between target number and performance

Code

# 1. Extract percent‐correct by Stimuli for each Test
correct_pct_df <- dist_df %>%
  filter(is_correct) %>%
  select(Test, Stimuli, pct) %>%
  rename(percent_correct = pct)

# 2. Build one plot per Test
make_corr_plot <- function(test_name) {
  correct_pct_df %>%
    filter(Test == test_name) %>%
    ggplot(aes(x = Stimuli, y = percent_correct)) +
    geom_point(size = 2) +
    geom_smooth(method = "lm", se = FALSE, colour = "steelblue") +
    scale_x_continuous(breaks = unique(correct_pct_df$Stimuli)) +
    scale_y_continuous(limit=c(0,100)) +
    labs(
      title = test_name,
      x     = "Target number (Stimuli)",
      y     = "% of students correct"
    ) +
    theme_minimal() +
    theme(
      panel.background   = element_rect(fill = "white", colour = NA),
      plot.background    = element_rect(fill = "white", colour = NA),
      panel.grid.minor   = element_blank(),
      strip.text         = element_text(size = 9)
    )
}

p1 <- make_corr_plot("Match Quantity 1-10")
p2 <- make_corr_plot("Match Quantity 1-20")

# 3. Stack them vertically
(p1 / p2) + 
  plot_annotation(
    title = "Item difficulty vs. target number",
    theme = theme(plot.title = element_text(hjust = 0.5))
  )

Accuracy and RCPM (response correct per min)

Code

student_perf <- results_cleaned_df %>%
  group_by(Identifier, Test, `Test Identifier`) %>%
  summarise(
    Attempted     = n_distinct(`Question Identifier`),
    Correct       = sum(`Raw Score`, na.rm = TRUE),
    Accuracy      = Correct / Attempted,
    Time_minutes  = sum(`Answer Duration`, na.rm = TRUE) / 60,
    RCPM          = Correct / Time_minutes,
    .groups       = "drop"
  )

student_perf_long <- student_perf %>%
  pivot_longer(
    cols      = c(Accuracy, RCPM),
    names_to  = "Metric",
    values_to = "Value"
  )

p_acc <- ggplot(student_perf, aes(x = Accuracy, colour = Test, fill = Test)) +
  geom_density(alpha = 0.3, size = 0.5) +
  labs(
    title = "Density of Accuracy by Test",
    x     = "Accuracy",
    y     = "Density"
  ) +
  theme_minimal() +
  theme(
    legend.position    = "right",
    legend.title = element_blank(),
    panel.background   = element_rect(fill = "white", colour = NA),
    plot.background    = element_rect(fill = "white", colour = NA),
    panel.grid.minor   = element_blank()
  )

p_rcpm <- ggplot(student_perf, aes(x = RCPM, colour = Test, fill = Test)) +
  geom_density(alpha = 0.3, size = 0.5) +
  labs(
    title = "Density of RCPM by Test",
    x     = "Correct Responses Per Minute",
    y     = "Density"
  ) +
  theme_minimal() +
  theme(
    legend.position    = "none",
    panel.background   = element_rect(fill = "white", colour = NA),
    plot.background    = element_rect(fill = "white", colour = NA),
    panel.grid.minor   = element_blank()
  )

# Stack them vertically
combined_plot <- p_acc / p_rcpm + 
  plot_layout(heights = c(1, 1))  # equal height panels

ggsave(
  filename = "match_quant_score_density.png",
  plot     = combined_plot,
  width    = 12,
  height   = 8,
  units    = "in",
  dpi      = 300
)

write_csv(student_perf, "match_quant_performance.csv")

print(combined_plot)

Test summary stats

Code

perf_by_test <- student_perf %>%
  group_by(`Test Identifier`) %>%
  summarise(
    Acc_mean   = mean(Accuracy, na.rm = TRUE),
    Acc_sd     = sd(Accuracy,   na.rm = TRUE),
    Acc_skew   = skewness(Accuracy, na.rm = TRUE),
    Acc_kurt   = kurtosis(Accuracy, na.rm = TRUE),
    RCPM_mean  = mean(RCPM, na.rm = TRUE),
    RCPM_sd    = sd(RCPM,   na.rm = TRUE),
    RCPM_skew  = skewness(RCPM, na.rm = TRUE),
    RCPM_kurt  = kurtosis(RCPM, na.rm = TRUE),
    .groups    = "drop"
  ) %>%
  pivot_longer(
    cols      = -`Test Identifier`,
    names_to  = "Statistic",
    values_to = "Value"
  ) %>%
  pivot_wider(
    names_from  = `Test Identifier`,
    values_from = Value
  )

knitr::kable(
  perf_by_test,
  digits     = 3,
  caption    = "Test results summary",
  col.names  = c("Statistic", "Match Qty 1-10", "Match Qty 1-20")
)

Test results summary
Statistic	Match Qty 1-10	Match Qty 1-20
Acc_mean	0.820	0.620
Acc_sd	0.167	0.232
Acc_skew	-1.905	-0.420
Acc_kurt	7.890	2.437
RCPM_mean	6.697	3.212
RCPM_sd	2.655	2.425
RCPM_skew	0.204	3.282
RCPM_kurt	3.059	22.893

Appendix: comparing regular vs irregular 10

Code

results_10 <- results_cleaned_df %>% 
  filter(Stimuli < 11) %>% 
  mutate(
    type_10 = case_when(
      `Test Identifier` == "MQ1-10_2025" ~ "Regular",
      `Test Identifier` == "MQ1-20_2025" ~ "Irregular",
      TRUE ~ NA_character_
    )
  )

results_10 %>% group_by(type_10) %>% summarise(response_count = n())

# A tibble: 2 × 2
  type_10   response_count
  <chr>              <int>
1 Irregular           4906
2 Regular            19031

Code

student_perf_type10 <- results_10 %>%
  group_by(Identifier, Test, `Test Identifier`, type_10) %>%
  summarise(
    Attempted     = n_distinct(`Question Identifier`),
    Correct       = sum(`Raw Score`, na.rm = TRUE),
    Accuracy      = Correct / Attempted,
    Time_minutes  = sum(`Answer Duration`, na.rm = TRUE) / 60,
    RCPM          = Correct / Time_minutes,
    .groups       = "drop"
  )

perf_by_type10 <- student_perf_type10 %>%
  group_by(type_10) %>%
  summarise(
    Acc_mean   = mean(Accuracy, na.rm = TRUE),
    Acc_sd     = sd(Accuracy,   na.rm = TRUE),
    Acc_skew   = skewness(Accuracy, na.rm = TRUE),
    Acc_kurt   = kurtosis(Accuracy, na.rm = TRUE),
    RCPM_mean  = mean(RCPM, na.rm = TRUE),
    RCPM_sd    = sd(RCPM,   na.rm = TRUE),
    RCPM_skew  = skewness(RCPM, na.rm = TRUE),
    RCPM_kurt  = kurtosis(RCPM, na.rm = TRUE),
    .groups    = "drop"
  ) %>%
  pivot_longer(
    cols      = -`type_10`,
    names_to  = "Statistic",
    values_to = "Value"
  ) %>%
  pivot_wider(
    names_from  = `type_10`,
    values_from = Value
  )

knitr::kable(
  perf_by_type10,
  digits     = 2,
  caption    = "Test results summary"
)

Test results summary
Statistic	Irregular	Regular
Acc_mean	0.76	0.82
Acc_sd	0.27	0.17
Acc_skew	-0.89	-1.91
Acc_kurt	3.00	7.89
RCPM_mean	5.35	6.70
RCPM_sd	3.35	2.65
RCPM_skew	1.83	0.20
RCPM_kurt	10.81	3.06

Code

# Plot density
perf_long_type10 <- student_perf_type10 %>%
  select(type_10, Accuracy, RCPM) %>%
  pivot_longer(
    cols      = c(Accuracy, RCPM),
    names_to  = "Metric",
    values_to = "Value"
  )

p_acc_hist <- ggplot(
  filter(perf_long_type10, Metric == "Accuracy"),
  aes(x = Value, fill = type_10)
) +
  geom_histogram(
    aes(y = ..count.. / sum(..count..) * 100),
    position = "identity",
    alpha    = 0.4,
    bins     = 20,
    colour   = NA
  ) +
  scale_fill_manual(
    values = c("Regular" = "steelblue", "Irregular" = "maroon"),
    guide  = guide_legend(reverse = TRUE)
  ) +
  labs(
    title = "Accuracy Distribution: Regular vs Irregular",
    x     = "Accuracy",
    y     = "Percent of students"
  ) +
  theme_minimal() +
  theme(
    legend.position    = "none",
    legend.title = element_blank(),
    panel.background   = element_rect(fill = "white", colour = NA),
    plot.background    = element_rect(fill = "white", colour = NA),
    panel.grid.minor   = element_blank()
  )

p_rcpm_hist <- ggplot(
  filter(perf_long_type10, Metric == "RCPM"),
  aes(x = Value, fill = type_10)
) +
  geom_histogram(
    aes(y = ..count.. / sum(..count..) * 100),
    position = "identity",
    alpha    = 0.4,
    bins     = 20,
    colour   = NA
  ) +
  scale_fill_manual(
    values = c("Regular" = "steelblue", "Irregular" = "maroon"),
    guide  = guide_legend(reverse = TRUE)
  ) +
  labs(
    title = "Fluency (RCPM) Distribution: Regular vs Irregular",
    x     = "Responses Correct Per Minute",
    y     = "Percent of students"
  ) +
  theme_minimal() +
  theme(
    legend.position    = "bottom",
    legend.title = element_blank(),
    panel.background   = element_rect(fill = "white", colour = NA),
    plot.background    = element_rect(fill = "white", colour = NA),
    panel.grid.minor   = element_blank()
  )

# Stack them
(histograms_type10 <- p_acc_hist / p_rcpm_hist + 
  plot_layout(heights = c(1, 1)))

Mean and spread test

Welch’s two-sample t-test (as opposed to Student’s) given lack of homogeneity of variance, and two groups are completely independent (no overlap in students)
Levene’s test for spread as distribution is not normal (required for F-test)

Code

t_acc  <- t.test(Accuracy ~ type_10,
                 data    = student_perf_type10,
                 var.equal = FALSE)   # Welch’s t-test
t_rcpm <- t.test(RCPM     ~ type_10,
                 data    = student_perf_type10,
                 var.equal = FALSE)

l_acc  <- leveneTest(Accuracy ~ type_10, data = student_perf_type10)
l_rcpm <- leveneTest(RCPM     ~ type_10, data = student_perf_type10)

# 4. Glance at the results
list(
  accuracy_mean_test   = t_acc,
  rcpm_mean_test       = t_rcpm,
  accuracy_levene      = l_acc,
  rcpm_levene          = l_rcpm
)

$accuracy_mean_test

    Welch Two Sample t-test

data:  Accuracy by type_10
t = -6.1644, df = 1994.4, p-value = 8.541e-10
alternative hypothesis: true difference in means between group Irregular and group Regular is not equal to 0
95 percent confidence interval:
 -0.07377433 -0.03816238
sample estimates:
mean in group Irregular   mean in group Regular 
              0.7644619               0.8204303 


$rcpm_mean_test

    Welch Two Sample t-test

data:  RCPM by type_10
t = -11.004, df = 2290.2, p-value < 2.2e-16
alternative hypothesis: true difference in means between group Irregular and group Regular is not equal to 0
95 percent confidence interval:
 -1.583494 -1.104480
sample estimates:
mean in group Irregular   mean in group Regular 
               5.353307                6.697294 


$accuracy_levene
Levene's Test for Homogeneity of Variance (center = median)
        Df F value    Pr(>F)    
group    1  394.51 < 2.2e-16 ***
      2467                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

$rcpm_levene
Levene's Test for Homogeneity of Variance (center = median)
        Df F value    Pr(>F)    
group    1  13.524 0.0002405 ***
      2467                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Appendix: including non-attempted questions

Code

results_cleaned_df2 <- results_df %>% 
    filter(!str_detect(`Question Identifier`, "TRANS_INTRO|_Prac|_Tch|-Prac|_prac"))

# Normalise question identifier 
results_cleaned_df2 <- results_cleaned_df2 %>%
  mutate(
    `Question Identifier` = str_replace(
      `Question Identifier`,
      "^MQ1-20-(\\d{3})$",    # e.g. MQ1-20-001
      "MQ1-20_\\1"            # → MQ1-20_001
    )
  )

# De-duplicate: keep only the latest per student × test × question
tmp <- results_cleaned_df2 %>%
  mutate(.ts = parse_date_time(`Date Completed`, orders = "dmy HM p"))

deduped <- tmp %>%
  group_by(Identifier, `Test Identifier`, `Question Identifier`) %>%
  slice_max(.ts, n = 1, with_ties = FALSE) %>%
  ungroup()

n_removed <- nrow(tmp) - nrow(deduped)
message(n_removed, " duplicate rows removed; keeping only the most recent per Identifier/Test/Question")

results_cleaned_df2 <- deduped %>% select(-.ts)


# Add selected distractor identity
results_cleaned_df2 <- results_cleaned_df2 %>%
  left_join(
    distractor_df %>% select(`Question Identifier`,
                             starts_with("Distractor "),
                             Stimuli),
    by = "Question Identifier"
  ) %>%
  mutate(
    Selected_Distractor = case_when(
      `Answer Response` == 1 ~ `Distractor 1`,
      `Answer Response` == 2 ~ `Distractor 2`,
      `Answer Response` == 3 ~ `Distractor 3`,
      `Answer Response` == 4 ~ `Distractor 4`,
      TRUE                    ~ NA_real_
    ),
    Selected_Correct = (Selected_Distractor == Stimuli)
  ) %>% 
  mutate(
    `Raw Score` = case_when(
      Selected_Correct == TRUE ~ 1,
      TRUE ~ 0 #N/A and FALSE
    )
  )

# results_cleaned_df2 %>% filter(`Is Answer Correct` != Selected_Correct) # consistency check

student_perf2 <- results_cleaned_df2 %>%
  group_by(Identifier, Test, `Test Identifier`) %>%
  summarise(
    Attempted     = n_distinct(`Question Identifier`), # total items per test
    Correct       = sum(`Raw Score`),
    Accuracy      = Correct / Attempted,
    Time_minutes  = sum(`Answer Duration`) / 60,
    RCPM          = Correct / Time_minutes,
    .groups       = "drop"
  )

perf_by_test2 <- student_perf2 %>%
  group_by(`Test Identifier`) %>%
  summarise(
    Acc_mean   = mean(Accuracy, na.rm = TRUE),
    Acc_sd     = sd(Accuracy,   na.rm = TRUE),
    Acc_skew   = skewness(Accuracy, na.rm = TRUE),
    Acc_kurt   = kurtosis(Accuracy, na.rm = TRUE),
    RCPM_mean  = mean(RCPM, na.rm = TRUE),
    RCPM_sd    = sd(RCPM,   na.rm = TRUE),
    RCPM_skew  = skewness(RCPM, na.rm = TRUE),
    RCPM_kurt  = kurtosis(RCPM, na.rm = TRUE),
    .groups    = "drop"
  ) %>%
  pivot_longer(
    cols      = -`Test Identifier`,
    names_to  = "Statistic",
    values_to = "Value"
  ) %>%
  pivot_wider(
    names_from  = `Test Identifier`,
    values_from = Value
  )

knitr::kable(
  perf_by_test2,
  digits     = 3,
  caption    = "Test results summary",
  col.names  = c("Statistic", "Match Qty 1-10", "Match Qty 1-20")
)

Test results summary
Statistic	Match Qty 1-10	Match Qty 1-20
Acc_mean	0.413	0.196
Acc_sd	0.158	0.122
Acc_skew	0.017	1.428
Acc_kurt	2.961	7.053
RCPM_mean	6.697	3.212
RCPM_sd	2.655	2.426
RCPM_skew	0.204	3.280
RCPM_kurt	3.059	22.885