Overview

This document evaluates the first pilot dataset for the cross-cultural food-image triplet experiment. Four participants (US Young pilot) each completed ~900 random + 90 validation + 10 catch trials. We:

  1. Compute 2D and 3D per-participant embeddings via the Salmon OfflineEmbedding (skipping participants whose CSV already exists).
  2. Visualise 2D embeddings with the actual food images as points.
  3. Visualise 3D embeddings as PC biplots with item loadings.
  4. Procrustes-align every participant to a common reference, cluster participants by SSE, and visualise aligned spaces.
  5. Show item-wise displacement arrows across the key comparisons.

Configuration

root_dir    <- here::here()                   # project root
data_dir    <- file.path(root_dir, "data",    "food_us_young_pilot")
emb_dir     <- file.path(root_dir, "analyses","food_pilot_embeddings")
img_dir     <- file.path(root_dir, "experiment","stimuli","food2")
python_bin  <- "/opt/anaconda3/envs/salmon/bin/python3"

dir.create(emb_dir, showWarnings = FALSE, recursive = TRUE)

# Helper: bare item name → image path
item_to_img <- function(item) {
  file.path(img_dir, paste0(item, ".jpg"))
}

# Helper: strip "stimuli/food2/" and extension
normalize_item <- function(x) {
  x %>%
    str_remove("^stimuli/food2/") %>%
    str_remove("\\.(jpg|jpeg|png)$")
}

1. Compute Embeddings

Set run_embeddings <- TRUE to (re-)run the Python script; set FALSE to load pre-computed CSVs.

run_embeddings <- FALSE   # change to FALSE once embeddings exist

if (run_embeddings) {
  script <- file.path(root_dir, "food_pilot_embeddings.py")
  Sys.setenv(OMP_NUM_THREADS = "1", MKL_NUM_THREADS = "1")   # avoid OpenMP issues on macOS ARM
  cat("Running embedding script …\n")
  exit_code <- system2(python_bin, args = script, stdout = "", stderr = "")
  if (exit_code != 0) {
    stop(sprintf("food_pilot_embeddings.py exited with code %d", exit_code))
  }
  cat("Done.\n")
}

Embedding History

hist_path <- file.path(emb_dir, "food_pilot_embedding_history.csv")
if (file.exists(hist_path)) {
  hist_df <- read_csv(hist_path, show_col_types = FALSE)
  hist_df %>%
    mutate(across(c(lowest_loss), ~ round(.x, 5))) %>%
    kable(caption = "Embedding training history") %>%
    kable_styling(bootstrap_options = "striped", full_width = FALSE)
} else {
  cat("No history file found — set run_embeddings <- TRUE to generate.\n")
}
Embedding training history
worker_id d lowest_loss epoch_stopped counter_from_last_update n_train n_test
netaniel 2 0.39852 49999 3744 794 206
netaniel 3 0.39514 49999 1250 794 206
69cde97d0bb7897221d96495 2 0.66894 49999 48425 812 188
69cde97d0bb7897221d96495 3 0.63222 49999 46508 812 188
649d809191fa2e4b6e84c69e 2 0.58715 49999 46658 803 197
649d809191fa2e4b6e84c69e 3 0.55012 49999 42143 803 197
697a7fb7a5dd76031424daca 2 0.55557 49999 31212 777 223
697a7fb7a5dd76031424daca 3 0.55804 49999 42405 777 223

2. Load Embeddings

path_2d <- file.path(emb_dir, "food_pilot_embeddings_2d.csv")
path_3d <- file.path(emb_dir, "food_pilot_embeddings_3d.csv")

if (!file.exists(path_2d)) {
  stop("food_pilot_embeddings_2d.csv not found. Run the Python script first.")
}

emb_2d_raw <- read_csv(path_2d, show_col_types = FALSE)
emb_3d_raw <- read_csv(path_3d, show_col_types = FALSE)

# Normalize item names
emb_2d_raw$item <- normalize_item(emb_2d_raw$stimulus)
emb_3d_raw$item <- normalize_item(emb_3d_raw$stimulus)

# Z-score each participant's embedding per dimension
emb_2d <- emb_2d_raw %>%
  group_by(worker_id) %>%
  mutate(
    dim_0 = as.numeric(scale(dim_0)),
    dim_1 = as.numeric(scale(dim_1))
  ) %>%
  ungroup()

emb_3d <- emb_3d_raw %>%
  group_by(worker_id) %>%
  mutate(
    dim_0 = as.numeric(scale(dim_0)),
    dim_1 = as.numeric(scale(dim_1)),
    dim_2 = as.numeric(scale(dim_2))
  ) %>%
  ungroup()

# Add image paths
emb_2d$img_path <- item_to_img(emb_2d$item)
emb_3d$img_path <- item_to_img(emb_3d$item)

# Short participant labels (last 6 chars of worker_id)
emb_2d <- emb_2d %>%
  mutate(participant = str_trunc(worker_id, 8, side = "left", ellipsis = "…"))
emb_3d <- emb_3d %>%
  mutate(participant = str_trunc(worker_id, 8, side = "left", ellipsis = "…"))

workers    <- unique(emb_2d$worker_id)
n_workers  <- length(workers)
cat(sprintf("Loaded 2D embeddings: %d items × %d participants\n",
            n_distinct(emb_2d$item), n_workers))
## Loaded 2D embeddings: 45 items × 4 participants
cat(sprintf("Loaded 3D embeddings: %d items × %d participants\n",
            n_distinct(emb_3d$item), length(unique(emb_3d$worker_id))))
## Loaded 3D embeddings: 45 items × 4 participants

3. 2D Embedding Visualisations

Each participant’s 2D semantic embedding plotted with food images at their coordinates.

plot_2d_embedding <- function(df, title) {
  df <- df %>% filter(file.exists(img_path))
  ggplot(df, aes(x = dim_0, y = dim_1)) +
    geom_image(aes(image = img_path), size = 0.07) +
    theme_minimal() +
    theme(
      axis.text  = element_blank(),
      axis.ticks = element_blank(),
      axis.title = element_blank(),
      plot.title = element_text(hjust = 0.5, face = "bold")
    ) +
    labs(title = title)
}
for (wid in workers) {
  short <- str_trunc(wid, 10, side = "left", ellipsis = "…")
  cat(sprintf("\n## %s\n\n", short))
  p <- plot_2d_embedding(
    emb_2d %>% filter(worker_id == wid),
    sprintf("2D Embedding — %s", short)
  )
  print(p)
  cat("\n")
}

netaniel

…221d96495

…b6e84c69e

…31424daca

Composite (all participants, text labels)

p_list_2d <- map(workers, function(wid) {
  short <- str_trunc(wid, 10, side = "left", ellipsis = "…")
  ggplot(emb_2d %>% filter(worker_id == wid),
         aes(x = dim_0, y = dim_1, label = item)) +
    geom_text(size = 2, check_overlap = TRUE) +
    theme_minimal() +
    theme(axis.text  = element_blank(), axis.ticks = element_blank(),
          axis.title = element_blank(),
          plot.title = element_text(hjust = 0.5, size = 10, face = "bold")) +
    labs(title = short)
})

plot_grid(plotlist = p_list_2d, ncol = 2)


4. 3D Embedding Visualisations

PCA Biplots

We run a joint PCA over the 3D embeddings (one row per item per participant) to find dimensions that capture the most shared variance, then plot item loadings on PC1 × PC2 and PC1 × PC3.

# One row per item, average coordinates across participants for the biplot
emb_3d_avg <- emb_3d %>%
  group_by(item) %>%
  summarise(
    dim_0 = mean(dim_0),
    dim_1 = mean(dim_1),
    dim_2 = mean(dim_2),
    .groups = "drop"
  )

mat_3d <- emb_3d_avg %>% select(dim_0, dim_1, dim_2) %>% as.matrix()
rownames(mat_3d) <- emb_3d_avg$item

pca_res <- prcomp(mat_3d, scale. = TRUE)
pca_df  <- as_tibble(pca_res$x, rownames = "item") %>%
  mutate(img_path = item_to_img(item))

var_exp <- summary(pca_res)$importance[2, ] * 100   # % variance

# PC1 × PC2
p_pc12 <- ggplot(pca_df, aes(x = PC1, y = PC2)) +
  geom_image(aes(image = img_path), size = 0.07) +
  geom_text_repel(aes(label = item), size = 2.2, max.overlaps = 20) +
  theme_minimal() +
  theme(axis.text  = element_blank(), axis.ticks = element_blank(),
        plot.title = element_text(hjust = 0.5, face = "bold")) +
  labs(
    title = "3D Embedding — PC1 × PC2 (averaged across participants)",
    x = sprintf("PC1 (%.1f%%)", var_exp[1]),
    y = sprintf("PC2 (%.1f%%)", var_exp[2])
  )

# PC1 × PC3
p_pc13 <- ggplot(pca_df, aes(x = PC1, y = PC3)) +
  geom_image(aes(image = img_path), size = 0.07) +
  geom_text_repel(aes(label = item), size = 2.2, max.overlaps = 20) +
  theme_minimal() +
  theme(axis.text  = element_blank(), axis.ticks = element_blank(),
        plot.title = element_text(hjust = 0.5, face = "bold")) +
  labs(
    title = "3D Embedding — PC1 × PC3 (averaged across participants)",
    x = sprintf("PC1 (%.1f%%)", var_exp[1]),
    y = sprintf("PC3 (%.1f%%)", var_exp[3])
  )

plot_grid(p_pc12, p_pc13, ncol = 1)

Per-Participant PC Biplots

for (wid in workers) {
  short <- str_trunc(wid, 10, side = "left", ellipsis = "…")
  cat(sprintf("\n### %s\n\n", short))

  mat_w <- emb_3d %>%
    filter(worker_id == wid) %>%
    arrange(item) %>%
    select(dim_0, dim_1, dim_2) %>%
    as.matrix()
  rownames(mat_w) <- emb_3d %>% filter(worker_id == wid) %>% arrange(item) %>% pull(item)

  if (nrow(mat_w) < 3) { cat("Not enough items.\n\n"); next }

  pca_w  <- prcomp(mat_w, scale. = TRUE)
  pca_wdf <- as_tibble(pca_w$x, rownames = "item") %>%
    mutate(img_path = item_to_img(item))
  ve_w <- summary(pca_w)$importance[2, ] * 100

  pw12 <- ggplot(pca_wdf, aes(x = PC1, y = PC2)) +
    geom_image(aes(image = img_path), size = 0.08) +
    geom_text_repel(aes(label = item), size = 2, max.overlaps = 15) +
    theme_minimal() +
    theme(axis.text = element_blank(), axis.ticks = element_blank()) +
    labs(title = sprintf("%s — PC1×PC2", short),
         x = sprintf("PC1 (%.1f%%)", ve_w[1]),
         y = sprintf("PC2 (%.1f%%)", ve_w[2]))

  pw13 <- ggplot(pca_wdf, aes(x = PC1, y = PC3)) +
    geom_image(aes(image = img_path), size = 0.08) +
    geom_text_repel(aes(label = item), size = 2, max.overlaps = 15) +
    theme_minimal() +
    theme(axis.text = element_blank(), axis.ticks = element_blank()) +
    labs(title = sprintf("%s — PC1×PC3", short),
         x = sprintf("PC1 (%.1f%%)", ve_w[1]),
         y = sprintf("PC3 (%.1f%%)", ve_w[3]))

  print(plot_grid(pw12, pw13, ncol = 2))
  cat("\n")
}

netaniel

…221d96495

…b6e84c69e

…31424daca

Interactive 3D Plot (mean embedding)

pca_3d_df <- as_tibble(pca_res$x, rownames = "item")

plot_ly(
  data = pca_3d_df,
  x = ~PC1, y = ~PC2, z = ~PC3,
  text = ~item,
  type = "scatter3d",
  mode = "markers+text",
  marker = list(size = 4, opacity = 0.8),
  textposition = "top center",
  textfont = list(size = 9)
) %>%
  layout(
    title = "Mean 3D Embedding (PCA-projected)",
    scene = list(
      xaxis = list(title = sprintf("PC1 (%.1f%%)", var_exp[1])),
      yaxis = list(title = sprintf("PC2 (%.1f%%)", var_exp[2])),
      zaxis = list(title = sprintf("PC3 (%.1f%%)", var_exp[3]))
    )
  )

5. Procrustes Alignment & Participant Clustering

We Procrustes-align every participant’s 2D embedding to a common reference (the first participant in the list) using shared items, then cluster participants by their pairwise SSE.

# Build named list of item-sorted matrices (shared items only)
all_items_shared <- emb_2d %>%
  count(item, worker_id) %>%
  group_by(item) %>%
  filter(n() == n_workers) %>%
  pull(item) %>%
  unique() %>%
  sort()

cat(sprintf("Shared items across all %d participants: %d\n",
            n_workers, length(all_items_shared)))
## Shared items across all 4 participants: 45
get_matrix_2d <- function(wid) {
  m <- emb_2d %>%
    filter(worker_id == wid, item %in% all_items_shared) %>%
    arrange(item) %>%
    select(dim_0, dim_1) %>%
    as.matrix()
  rownames(m) <- emb_2d %>%
    filter(worker_id == wid, item %in% all_items_shared) %>%
    arrange(item) %>%
    pull(item)
  m
}

emb_matrices <- setNames(map(workers, get_matrix_2d), workers)

Pairwise SSE Matrix

short_labels <- str_trunc(workers, 10, side = "left", ellipsis = "…")

sse_mat <- matrix(NA, n_workers, n_workers,
                  dimnames = list(short_labels, short_labels))

for (i in seq_len(n_workers)) {
  for (j in seq_len(n_workers)) {
    if (i == j) {
      sse_mat[i, j] <- 0
    } else {
      pro <- procrustes(emb_matrices[[workers[i]]],
                        emb_matrices[[workers[j]]])
      sse_mat[i, j] <- pro$ss
    }
  }
}

sse_df <- as.data.frame(sse_mat) %>%
  rownames_to_column("Part_A") %>%
  pivot_longer(-Part_A, names_to = "Part_B", values_to = "SSE")
sse_df$Part_A <- factor(sse_df$Part_A, levels = short_labels)
sse_df$Part_B <- factor(sse_df$Part_B, levels = short_labels)

ggplot(sse_df, aes(x = Part_A, y = Part_B, fill = SSE)) +
  geom_tile(color = "white") +
  geom_text(aes(label = round(SSE, 3)), size = 3.5) +
  scale_fill_distiller(palette = "YlOrRd", direction = 1) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
  labs(title = "Pairwise Procrustes SSE (2D embeddings)", x = "", y = "")

Participant Clustering by SSE

# Use mean SSE to each other participant as distance
sses_sym <- (sse_mat + t(sse_mat)) / 2
diag(sses_sym) <- 0

hc_parts <- hclust(as.dist(sses_sym), method = "ward.D2")

dend <- as.dendrogram(hc_parts)
dend <- color_branches(dend, k = min(3, n_workers))

par(mar = c(5, 4, 2, 1))
plot(dend,
     main  = "Participant Clustering by Procrustes SSE (ward.D2)",
     ylab  = "SSE distance",
     xlab  = "")

Cluster Assignments

n_clusters <- min(3, n_workers)
cluster_assign <- cutree(hc_parts, k = n_clusters)

tibble(
  participant = names(cluster_assign),
  cluster     = cluster_assign
) %>%
  kable(caption = sprintf("Participant cluster assignments (k=%d)", n_clusters)) %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE)
Participant cluster assignments (k=3)
participant cluster
netaniel 1
…221d96495 2
…b6e84c69e 3
…31424daca 1

6. Procrustes-Aligned Embedding Visualisations

All participants aligned to the first participant’s space.

ref_wid <- workers[1]
ref_mat <- emb_matrices[[ref_wid]]

aligned_list <- map(workers, function(wid) {
  mat <- emb_matrices[[wid]]
  if (wid == ref_wid) {
    coords <- mat
  } else {
    pro    <- procrustes(ref_mat, mat)
    coords <- pro$Yrot
  }
  tibble(
    item        = rownames(ref_mat),
    dim_0       = coords[, 1],
    dim_1       = coords[, 2],
    worker_id   = wid,
    participant = str_trunc(wid, 10, side = "left", ellipsis = "…"),
    img_path    = item_to_img(rownames(ref_mat))
  )
})

aligned_df <- bind_rows(aligned_list)

Overlay (all participants)

pal_parts <- scales::hue_pal()(n_workers)
names(pal_parts) <- str_trunc(workers, 10, side = "left", ellipsis = "…")

ggplot(aligned_df, aes(x = dim_0, y = dim_1, color = participant)) +
  geom_point(size = 2, alpha = 0.7) +
  geom_text_repel(aes(label = item), size = 2, max.overlaps = 10) +
  scale_color_manual(values = pal_parts) +
  theme_minimal() +
  theme(
    axis.text  = element_blank(),
    axis.ticks = element_blank(),
    axis.title = element_blank(),
    legend.title = element_blank(),
    plot.title = element_text(hjust = 0.5, face = "bold")
  ) +
  labs(title = sprintf("Procrustes-Aligned 2D Embeddings (reference: %s)",
                       str_trunc(ref_wid, 10, side = "left", ellipsis = "…")))

Per-Participant (image icons)

for (wid in workers) {
  short <- str_trunc(wid, 10, side = "left", ellipsis = "…")
  cat(sprintf("\n### %s\n\n", short))

  df <- aligned_df %>% filter(worker_id == wid, file.exists(img_path))

  p <- ggplot(df, aes(x = dim_0, y = dim_1)) +
    geom_image(aes(image = img_path), size = 0.07) +
    theme_minimal() +
    theme(axis.text  = element_blank(), axis.ticks = element_blank(),
          axis.title = element_blank(),
          plot.title = element_text(hjust = 0.5, face = "bold")) +
    labs(title = sprintf("Aligned 2D Embedding — %s", short))

  print(p)
  cat("\n")
}

netaniel

…221d96495

…b6e84c69e

…31424daca


7. Item-Wise Displacement Analysis

For each pair of participants we compute the displacement of each item after Procrustes alignment, summarise mean displacement per item and per pair, and show arrow plots.

compute_displacements <- function(wid_a, wid_b) {
  mat_a <- emb_matrices[[wid_a]]
  mat_b <- emb_matrices[[wid_b]]
  pro   <- procrustes(mat_a, mat_b)
  aln_b <- pro$Yrot

  tibble(
    item      = rownames(mat_a),
    x_a       = mat_a[, 1],
    y_a       = mat_a[, 2],
    x_b       = aln_b[, 1],
    y_b       = aln_b[, 2],
    dx        = aln_b[, 1] - mat_a[, 1],
    dy        = aln_b[, 2] - mat_a[, 2],
    magnitude = sqrt(dx^2 + dy^2),
    part_a    = str_trunc(wid_a, 10, side = "left", ellipsis = "…"),
    part_b    = str_trunc(wid_b, 10, side = "left", ellipsis = "…"),
    comparison = paste(
      str_trunc(wid_a, 6, side = "left", ellipsis = "…"),
      "vs",
      str_trunc(wid_b, 6, side = "left", ellipsis = "…")
    )
  )
}

# All pairwise comparisons
pairs_idx <- combn(seq_len(n_workers), 2)
all_displacements <- map_dfr(seq_len(ncol(pairs_idx)), function(k) {
  compute_displacements(workers[pairs_idx[1, k]], workers[pairs_idx[2, k]])
})

Mean Displacement per Pair

disp_summary <- all_displacements %>%
  group_by(comparison) %>%
  summarise(
    n_items      = n(),
    total_dist   = sum(magnitude),
    mean_dist    = mean(magnitude),
    sd_dist      = sd(magnitude),
    se_dist      = sd_dist / sqrt(n_items),
    .groups = "drop"
  )

disp_summary %>%
  mutate(across(c(total_dist, mean_dist, sd_dist, se_dist), ~ round(.x, 4))) %>%
  kable(caption = "Displacement summary per participant pair") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
Displacement summary per participant pair
comparison n_items total_dist mean_dist sd_dist se_dist
…4c69e vs …4daca 45 42.6832 0.9485 0.5582 0.0832
…96495 vs …4c69e 45 51.8899 1.1531 0.5623 0.0838
…96495 vs …4daca 45 49.2560 1.0946 0.5077 0.0757
…aniel vs …4c69e 45 41.7480 0.9277 0.5390 0.0803
…aniel vs …4daca 45 38.6923 0.8598 0.5115 0.0763
…aniel vs …96495 45 49.3868 1.0975 0.6012 0.0896

Arrow Plots (pairwise displacements)

comparisons <- unique(all_displacements$comparison)

for (comp in comparisons) {
  cat(sprintf("\n### %s\n\n", comp))
  d <- all_displacements %>% filter(comparison == comp)

  p <- ggplot(d) +
    geom_segment(
      aes(x = x_a, y = y_a, xend = x_b, yend = y_b, color = magnitude),
      arrow    = arrow(length = unit(0.1, "inches")),
      linewidth = 0.6, alpha = 0.85
    ) +
    geom_point(aes(x = x_a, y = y_a), size = 0.8, alpha = 0.5) +
    geom_text_repel(
      aes(x = x_a, y = y_a, label = item),
      size = 2.2, max.overlaps = 20, color = "grey30"
    ) +
    scale_color_viridis_c(option = "plasma") +
    theme_minimal() +
    theme(
      axis.text  = element_blank(),
      axis.ticks = element_blank(),
      axis.title = element_blank(),
      plot.title = element_text(hjust = 0.5, face = "bold")
    ) +
    labs(
      title = sprintf("Item Displacement: %s", comp),
      color = "Displacement"
    )

  print(p)
  cat("\n")
}

…aniel vs …96495

…aniel vs …4c69e

…aniel vs …4daca

…96495 vs …4c69e

…96495 vs …4daca

…4c69e vs …4daca

Top Displaced Items

# Average displacement across all pairwise comparisons per item
item_disp <- all_displacements %>%
  group_by(item) %>%
  summarise(
    mean_displacement = mean(magnitude),
    sd_displacement   = sd(magnitude),
    .groups = "drop"
  ) %>%
  arrange(desc(mean_displacement))

# Bar chart — top 20
item_disp %>%
  slice_head(n = 20) %>%
  mutate(item = fct_reorder(item, mean_displacement)) %>%
  ggplot(aes(x = item, y = mean_displacement,
             ymin = mean_displacement - sd_displacement,
             ymax = mean_displacement + sd_displacement)) +
  geom_col(fill = "steelblue", alpha = 0.8) +
  geom_errorbar(width = 0.3) +
  coord_flip() +
  theme_minimal() +
  labs(
    title = "Top 20 Most-Displaced Items (mean across all participant pairs)",
    x     = NULL,
    y     = "Mean Displacement (± SD)"
  )

Displacement with Images

top_items <- item_disp %>% slice_head(n = 20) %>% pull(item)

# Show the mean position across participants in the reference space (first participant)
mean_pos <- aligned_df %>%
  filter(item %in% top_items) %>%
  group_by(item) %>%
  summarise(
    x = mean(dim_0),
    y = mean(dim_1),
    img_path = first(img_path),
    .groups = "drop"
  ) %>%
  left_join(item_disp, by = "item")

ggplot(mean_pos, aes(x = x, y = y)) +
  geom_image(aes(image = img_path), size = 0.07) +
  geom_point(aes(size = mean_displacement, color = mean_displacement),
             alpha = 0.4) +
  scale_size_continuous(range = c(3, 12)) +
  scale_color_viridis_c(option = "plasma") +
  theme_minimal() +
  theme(
    axis.text  = element_blank(),
    axis.ticks = element_blank(),
    axis.title = element_blank(),
    plot.title = element_text(hjust = 0.5, face = "bold")
  ) +
  labs(
    title = "Most-Displaced Items in the Aligned Space\n(larger circle = more variable across participants)",
    size  = "Mean displacement",
    color = "Mean displacement"
  )


8. Distance Matrix Correlations

Pearson and Spearman correlations between pairwise item-distance matrices across participants (computed in aligned space).

build_dist_matrix <- function(wid) {
  m <- aligned_df %>%
    filter(worker_id == wid) %>%
    arrange(item) %>%
    select(dim_0, dim_1) %>%
    as.matrix()
  rownames(m) <- aligned_df %>% filter(worker_id == wid) %>% arrange(item) %>% pull(item)
  as.matrix(dist(m))
}

dist_mats <- setNames(map(workers, build_dist_matrix), workers)

upper_tri <- function(m) m[upper.tri(m)]

part_pairs <- combn(workers, 2, simplify = FALSE)

dist_cors <- map_dfr(part_pairs, function(pair) {
  v1 <- upper_tri(dist_mats[[pair[1]]])
  v2 <- upper_tri(dist_mats[[pair[2]]])
  tibble(
    Part_A = str_trunc(pair[1], 10, side = "left", ellipsis = "…"),
    Part_B = str_trunc(pair[2], 10, side = "left", ellipsis = "…"),
    r      = cor(v1, v2, method = "pearson"),
    rho    = cor(v1, v2, method = "spearman")
  )
})

dist_cors %>%
  mutate(across(c(r, rho), ~ round(.x, 3))) %>%
  kable(
    caption  = "Pearson and Spearman correlations between pairwise distance matrices (aligned space)",
    col.names = c("Participant A", "Participant B", "Pearson r", "Spearman ρ")
  ) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
Pearson and Spearman correlations between pairwise distance matrices (aligned space)
Participant A Participant B Pearson r Spearman ρ
netaniel …221d96495 0.202 0.195
netaniel …b6e84c69e 0.338 0.308
netaniel …31424daca 0.496 0.492
…221d96495 …b6e84c69e 0.098 0.101
…221d96495 …31424daca 0.296 0.264
…b6e84c69e …31424daca 0.291 0.310
short_labs <- str_trunc(workers, 10, side = "left", ellipsis = "…")
cor_mat <- matrix(1, n_workers, n_workers,
                  dimnames = list(short_labs, short_labs))
for (i in seq_len(nrow(dist_cors))) {
  cor_mat[dist_cors$Part_A[i], dist_cors$Part_B[i]] <- dist_cors$r[i]
  cor_mat[dist_cors$Part_B[i], dist_cors$Part_A[i]] <- dist_cors$r[i]
}

cor_df <- as.data.frame(cor_mat) %>%
  rownames_to_column("Part_A") %>%
  pivot_longer(-Part_A, names_to = "Part_B", values_to = "r")
cor_df$Part_A <- factor(cor_df$Part_A, levels = short_labs)
cor_df$Part_B <- factor(cor_df$Part_B, levels = short_labs)

ggplot(cor_df, aes(x = Part_A, y = Part_B, fill = r)) +
  geom_tile(color = "white") +
  geom_text(aes(label = round(r, 3)), size = 4) +
  scale_fill_distiller(palette = "RdYlBu", direction = 1, limits = c(0, 1)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
  labs(title = "Distance-Matrix Similarity (Pearson r)", x = "", y = "", fill = "r")