Overview

This document evaluates the first pilot dataset for the cross-cultural food-image triplet experiment. Four participants (US Young pilot) each completed ~900 random + 90 validation + 10 catch trials. We:

Compute 2D and 3D per-participant embeddings via the Salmon OfflineEmbedding (skipping participants whose CSV already exists).
Visualise 2D embeddings with the actual food images as points.
Visualise 3D embeddings as PC biplots with item loadings.
Procrustes-align every participant to a common reference, cluster participants by SSE, and visualise aligned spaces.
Show item-wise displacement arrows across the key comparisons.

Configuration

root_dir    <- here::here()                   # project root
data_dir    <- file.path(root_dir, "data",    "food_us_young_pilot")
emb_dir     <- file.path(root_dir, "analyses","food_pilot_embeddings")
img_dir     <- file.path(root_dir, "experiment","stimuli","food2")
python_bin  <- "/opt/anaconda3/envs/salmon/bin/python3"

dir.create(emb_dir, showWarnings = FALSE, recursive = TRUE)

# Helper: bare item name → image path
item_to_img <- function(item) {
  file.path(img_dir, paste0(item, ".jpg"))
}

# Helper: strip "stimuli/food2/" and extension
normalize_item <- function(x) {
  x %>%
    str_remove("^stimuli/food2/") %>%
    str_remove("\\.(jpg|jpeg|png)$")
}

1. Compute Embeddings

Set run_embeddings <- TRUE to (re-)run the Python script; set FALSE to load pre-computed CSVs.

run_embeddings <- FALSE   # change to FALSE once embeddings exist

if (run_embeddings) {
  script <- file.path(root_dir, "food_pilot_embeddings.py")
  Sys.setenv(OMP_NUM_THREADS = "1", MKL_NUM_THREADS = "1")   # avoid OpenMP issues on macOS ARM
  cat("Running embedding script …\n")
  exit_code <- system2(python_bin, args = script, stdout = "", stderr = "")
  if (exit_code != 0) {
    stop(sprintf("food_pilot_embeddings.py exited with code %d", exit_code))
  }
  cat("Done.\n")
}

Embedding History

hist_path <- file.path(emb_dir, "food_pilot_embedding_history.csv")
if (file.exists(hist_path)) {
  hist_df <- read_csv(hist_path, show_col_types = FALSE)
  hist_df %>%
    mutate(across(c(lowest_loss), ~ round(.x, 5))) %>%
    kable(caption = "Embedding training history") %>%
    kable_styling(bootstrap_options = "striped", full_width = FALSE)
} else {
  cat("No history file found — set run_embeddings <- TRUE to generate.\n")
}

Embedding training history
worker_id	d	lowest_loss	epoch_stopped	counter_from_last_update	n_train	n_test
netaniel	2	0.39852	49999	3744	794	206
netaniel	3	0.39514	49999	1250	794	206
69cde97d0bb7897221d96495	2	0.66894	49999	48425	812	188
69cde97d0bb7897221d96495	3	0.63222	49999	46508	812	188
649d809191fa2e4b6e84c69e	2	0.58715	49999	46658	803	197
649d809191fa2e4b6e84c69e	3	0.55012	49999	42143	803	197
697a7fb7a5dd76031424daca	2	0.55557	49999	31212	777	223
697a7fb7a5dd76031424daca	3	0.55804	49999	42405	777	223

2. Load Embeddings

path_2d <- file.path(emb_dir, "food_pilot_embeddings_2d.csv")
path_3d <- file.path(emb_dir, "food_pilot_embeddings_3d.csv")

if (!file.exists(path_2d)) {
  stop("food_pilot_embeddings_2d.csv not found. Run the Python script first.")
}

emb_2d_raw <- read_csv(path_2d, show_col_types = FALSE)
emb_3d_raw <- read_csv(path_3d, show_col_types = FALSE)

# Normalize item names
emb_2d_raw$item <- normalize_item(emb_2d_raw$stimulus)
emb_3d_raw$item <- normalize_item(emb_3d_raw$stimulus)

# Z-score each participant's embedding per dimension
emb_2d <- emb_2d_raw %>%
  group_by(worker_id) %>%
  mutate(
    dim_0 = as.numeric(scale(dim_0)),
    dim_1 = as.numeric(scale(dim_1))
  ) %>%
  ungroup()

emb_3d <- emb_3d_raw %>%
  group_by(worker_id) %>%
  mutate(
    dim_0 = as.numeric(scale(dim_0)),
    dim_1 = as.numeric(scale(dim_1)),
    dim_2 = as.numeric(scale(dim_2))
  ) %>%
  ungroup()

# Add image paths
emb_2d$img_path <- item_to_img(emb_2d$item)
emb_3d$img_path <- item_to_img(emb_3d$item)

# Short participant labels (last 6 chars of worker_id)
emb_2d <- emb_2d %>%
  mutate(participant = str_trunc(worker_id, 8, side = "left", ellipsis = "…"))
emb_3d <- emb_3d %>%
  mutate(participant = str_trunc(worker_id, 8, side = "left", ellipsis = "…"))

workers    <- unique(emb_2d$worker_id)
n_workers  <- length(workers)
cat(sprintf("Loaded 2D embeddings: %d items × %d participants\n",
            n_distinct(emb_2d$item), n_workers))

## Loaded 2D embeddings: 45 items × 4 participants

cat(sprintf("Loaded 3D embeddings: %d items × %d participants\n",
            n_distinct(emb_3d$item), length(unique(emb_3d$worker_id))))

## Loaded 3D embeddings: 45 items × 4 participants

3. 2D Embedding Visualisations

Each participant’s 2D semantic embedding plotted with food images at their coordinates.

plot_2d_embedding <- function(df, title) {
  df <- df %>% filter(file.exists(img_path))
  ggplot(df, aes(x = dim_0, y = dim_1)) +
    geom_image(aes(image = img_path), size = 0.07) +
    theme_minimal() +
    theme(
      axis.text  = element_blank(),
      axis.ticks = element_blank(),
      axis.title = element_blank(),
      plot.title = element_text(hjust = 0.5, face = "bold")
    ) +
    labs(title = title)
}

for (wid in workers) {
  short <- str_trunc(wid, 10, side = "left", ellipsis = "…")
  cat(sprintf("\n## %s\n\n", short))
  p <- plot_2d_embedding(
    emb_2d %>% filter(worker_id == wid),
    sprintf("2D Embedding — %s", short)
  )
  print(p)
  cat("\n")
}

netaniel

…221d96495

…b6e84c69e

…31424daca

Composite (all participants, text labels)

p_list_2d <- map(workers, function(wid) {
  short <- str_trunc(wid, 10, side = "left", ellipsis = "…")
  ggplot(emb_2d %>% filter(worker_id == wid),
         aes(x = dim_0, y = dim_1, label = item)) +
    geom_text(size = 2, check_overlap = TRUE) +
    theme_minimal() +
    theme(axis.text  = element_blank(), axis.ticks = element_blank(),
          axis.title = element_blank(),
          plot.title = element_text(hjust = 0.5, size = 10, face = "bold")) +
    labs(title = short)
})

plot_grid(plotlist = p_list_2d, ncol = 2)

4. 3D Embedding Visualisations

PCA Biplots

We run a joint PCA over the 3D embeddings (one row per item per participant) to find dimensions that capture the most shared variance, then plot item loadings on PC1 × PC2 and PC1 × PC3.

# One row per item, average coordinates across participants for the biplot
emb_3d_avg <- emb_3d %>%
  group_by(item) %>%
  summarise(
    dim_0 = mean(dim_0),
    dim_1 = mean(dim_1),
    dim_2 = mean(dim_2),
    .groups = "drop"
  )

mat_3d <- emb_3d_avg %>% select(dim_0, dim_1, dim_2) %>% as.matrix()
rownames(mat_3d) <- emb_3d_avg$item

pca_res <- prcomp(mat_3d, scale. = TRUE)
pca_df  <- as_tibble(pca_res$x, rownames = "item") %>%
  mutate(img_path = item_to_img(item))

var_exp <- summary(pca_res)$importance[2, ] * 100   # % variance

# PC1 × PC2
p_pc12 <- ggplot(pca_df, aes(x = PC1, y = PC2)) +
  geom_image(aes(image = img_path), size = 0.07) +
  geom_text_repel(aes(label = item), size = 2.2, max.overlaps = 20) +
  theme_minimal() +
  theme(axis.text  = element_blank(), axis.ticks = element_blank(),
        plot.title = element_text(hjust = 0.5, face = "bold")) +
  labs(
    title = "3D Embedding — PC1 × PC2 (averaged across participants)",
    x = sprintf("PC1 (%.1f%%)", var_exp[1]),
    y = sprintf("PC2 (%.1f%%)", var_exp[2])
  )

# PC1 × PC3
p_pc13 <- ggplot(pca_df, aes(x = PC1, y = PC3)) +
  geom_image(aes(image = img_path), size = 0.07) +
  geom_text_repel(aes(label = item), size = 2.2, max.overlaps = 20) +
  theme_minimal() +
  theme(axis.text  = element_blank(), axis.ticks = element_blank(),
        plot.title = element_text(hjust = 0.5, face = "bold")) +
  labs(
    title = "3D Embedding — PC1 × PC3 (averaged across participants)",
    x = sprintf("PC1 (%.1f%%)", var_exp[1]),
    y = sprintf("PC3 (%.1f%%)", var_exp[3])
  )

plot_grid(p_pc12, p_pc13, ncol = 1)

Per-Participant PC Biplots

for (wid in workers) {
  short <- str_trunc(wid, 10, side = "left", ellipsis = "…")
  cat(sprintf("\n### %s\n\n", short))

  mat_w <- emb_3d %>%
    filter(worker_id == wid) %>%
    arrange(item) %>%
    select(dim_0, dim_1, dim_2) %>%
    as.matrix()
  rownames(mat_w) <- emb_3d %>% filter(worker_id == wid) %>% arrange(item) %>% pull(item)

  if (nrow(mat_w) < 3) { cat("Not enough items.\n\n"); next }

  pca_w  <- prcomp(mat_w, scale. = TRUE)
  pca_wdf <- as_tibble(pca_w$x, rownames = "item") %>%
    mutate(img_path = item_to_img(item))
  ve_w <- summary(pca_w)$importance[2, ] * 100

  pw12 <- ggplot(pca_wdf, aes(x = PC1, y = PC2)) +
    geom_image(aes(image = img_path), size = 0.08) +
    geom_text_repel(aes(label = item), size = 2, max.overlaps = 15) +
    theme_minimal() +
    theme(axis.text = element_blank(), axis.ticks = element_blank()) +
    labs(title = sprintf("%s — PC1×PC2", short),
         x = sprintf("PC1 (%.1f%%)", ve_w[1]),
         y = sprintf("PC2 (%.1f%%)", ve_w[2]))

  pw13 <- ggplot(pca_wdf, aes(x = PC1, y = PC3)) +
    geom_image(aes(image = img_path), size = 0.08) +
    geom_text_repel(aes(label = item), size = 2, max.overlaps = 15) +
    theme_minimal() +
    theme(axis.text = element_blank(), axis.ticks = element_blank()) +
    labs(title = sprintf("%s — PC1×PC3", short),
         x = sprintf("PC1 (%.1f%%)", ve_w[1]),
         y = sprintf("PC3 (%.1f%%)", ve_w[3]))

  print(plot_grid(pw12, pw13, ncol = 2))
  cat("\n")
}

netaniel

…221d96495

…b6e84c69e

…31424daca

Interactive 3D Plot (mean embedding)

pca_3d_df <- as_tibble(pca_res$x, rownames = "item")

plot_ly(
  data = pca_3d_df,
  x = ~PC1, y = ~PC2, z = ~PC3,
  text = ~item,
  type = "scatter3d",
  mode = "markers+text",
  marker = list(size = 4, opacity = 0.8),
  textposition = "top center",
  textfont = list(size = 9)
) %>%
  layout(
    title = "Mean 3D Embedding (PCA-projected)",
    scene = list(
      xaxis = list(title = sprintf("PC1 (%.1f%%)", var_exp[1])),
      yaxis = list(title = sprintf("PC2 (%.1f%%)", var_exp[2])),
      zaxis = list(title = sprintf("PC3 (%.1f%%)", var_exp[3]))
    )
  )

5. Procrustes Alignment & Participant Clustering

We Procrustes-align every participant’s 2D embedding to a common reference (the first participant in the list) using shared items, then cluster participants by their pairwise SSE.

# Build named list of item-sorted matrices (shared items only)
all_items_shared <- emb_2d %>%
  count(item, worker_id) %>%
  group_by(item) %>%
  filter(n() == n_workers) %>%
  pull(item) %>%
  unique() %>%
  sort()

cat(sprintf("Shared items across all %d participants: %d\n",
            n_workers, length(all_items_shared)))

## Shared items across all 4 participants: 45

get_matrix_2d <- function(wid) {
  m <- emb_2d %>%
    filter(worker_id == wid, item %in% all_items_shared) %>%
    arrange(item) %>%
    select(dim_0, dim_1) %>%
    as.matrix()
  rownames(m) <- emb_2d %>%
    filter(worker_id == wid, item %in% all_items_shared) %>%
    arrange(item) %>%
    pull(item)
  m
}

emb_matrices <- setNames(map(workers, get_matrix_2d), workers)

Pairwise SSE Matrix

short_labels <- str_trunc(workers, 10, side = "left", ellipsis = "…")

sse_mat <- matrix(NA, n_workers, n_workers,
                  dimnames = list(short_labels, short_labels))

for (i in seq_len(n_workers)) {
  for (j in seq_len(n_workers)) {
    if (i == j) {
      sse_mat[i, j] <- 0
    } else {
      pro <- procrustes(emb_matrices[[workers[i]]],
                        emb_matrices[[workers[j]]])
      sse_mat[i, j] <- pro$ss
    }
  }
}

sse_df <- as.data.frame(sse_mat) %>%
  rownames_to_column("Part_A") %>%
  pivot_longer(-Part_A, names_to = "Part_B", values_to = "SSE")
sse_df$Part_A <- factor(sse_df$Part_A, levels = short_labels)
sse_df$Part_B <- factor(sse_df$Part_B, levels = short_labels)

ggplot(sse_df, aes(x = Part_A, y = Part_B, fill = SSE)) +
  geom_tile(color = "white") +
  geom_text(aes(label = round(SSE, 3)), size = 3.5) +
  scale_fill_distiller(palette = "YlOrRd", direction = 1) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
  labs(title = "Pairwise Procrustes SSE (2D embeddings)", x = "", y = "")

Participant Clustering by SSE

# Use mean SSE to each other participant as distance
sses_sym <- (sse_mat + t(sse_mat)) / 2
diag(sses_sym) <- 0

hc_parts <- hclust(as.dist(sses_sym), method = "ward.D2")

dend <- as.dendrogram(hc_parts)
dend <- color_branches(dend, k = min(3, n_workers))

par(mar = c(5, 4, 2, 1))
plot(dend,
     main  = "Participant Clustering by Procrustes SSE (ward.D2)",
     ylab  = "SSE distance",
     xlab  = "")

Cluster Assignments

n_clusters <- min(3, n_workers)
cluster_assign <- cutree(hc_parts, k = n_clusters)

tibble(
  participant = names(cluster_assign),
  cluster     = cluster_assign
) %>%
  kable(caption = sprintf("Participant cluster assignments (k=%d)", n_clusters)) %>%
  kable_styling(bootstrap_options = "striped", full_width = FALSE)

Participant cluster assignments (k=3)
participant	cluster
netaniel	1
…221d96495	2
…b6e84c69e	3
…31424daca	1

6. Procrustes-Aligned Embedding Visualisations

All participants aligned to the first participant’s space.

ref_wid <- workers[1]
ref_mat <- emb_matrices[[ref_wid]]

aligned_list <- map(workers, function(wid) {
  mat <- emb_matrices[[wid]]
  if (wid == ref_wid) {
    coords <- mat
  } else {
    pro    <- procrustes(ref_mat, mat)
    coords <- pro$Yrot
  }
  tibble(
    item        = rownames(ref_mat),
    dim_0       = coords[, 1],
    dim_1       = coords[, 2],
    worker_id   = wid,
    participant = str_trunc(wid, 10, side = "left", ellipsis = "…"),
    img_path    = item_to_img(rownames(ref_mat))
  )
})

aligned_df <- bind_rows(aligned_list)

Overlay (all participants)

pal_parts <- scales::hue_pal()(n_workers)
names(pal_parts) <- str_trunc(workers, 10, side = "left", ellipsis = "…")

ggplot(aligned_df, aes(x = dim_0, y = dim_1, color = participant)) +
  geom_point(size = 2, alpha = 0.7) +
  geom_text_repel(aes(label = item), size = 2, max.overlaps = 10) +
  scale_color_manual(values = pal_parts) +
  theme_minimal() +
  theme(
    axis.text  = element_blank(),
    axis.ticks = element_blank(),
    axis.title = element_blank(),
    legend.title = element_blank(),
    plot.title = element_text(hjust = 0.5, face = "bold")
  ) +
  labs(title = sprintf("Procrustes-Aligned 2D Embeddings (reference: %s)",
                       str_trunc(ref_wid, 10, side = "left", ellipsis = "…")))

Per-Participant (image icons)

for (wid in workers) {
  short <- str_trunc(wid, 10, side = "left", ellipsis = "…")
  cat(sprintf("\n### %s\n\n", short))

  df <- aligned_df %>% filter(worker_id == wid, file.exists(img_path))

  p <- ggplot(df, aes(x = dim_0, y = dim_1)) +
    geom_image(aes(image = img_path), size = 0.07) +
    theme_minimal() +
    theme(axis.text  = element_blank(), axis.ticks = element_blank(),
          axis.title = element_blank(),
          plot.title = element_text(hjust = 0.5, face = "bold")) +
    labs(title = sprintf("Aligned 2D Embedding — %s", short))

  print(p)
  cat("\n")
}

netaniel

…221d96495

…b6e84c69e

…31424daca

7. Item-Wise Displacement Analysis

For each pair of participants we compute the displacement of each item after Procrustes alignment, summarise mean displacement per item and per pair, and show arrow plots.

compute_displacements <- function(wid_a, wid_b) {
  mat_a <- emb_matrices[[wid_a]]
  mat_b <- emb_matrices[[wid_b]]
  pro   <- procrustes(mat_a, mat_b)
  aln_b <- pro$Yrot

  tibble(
    item      = rownames(mat_a),
    x_a       = mat_a[, 1],
    y_a       = mat_a[, 2],
    x_b       = aln_b[, 1],
    y_b       = aln_b[, 2],
    dx        = aln_b[, 1] - mat_a[, 1],
    dy        = aln_b[, 2] - mat_a[, 2],
    magnitude = sqrt(dx^2 + dy^2),
    part_a    = str_trunc(wid_a, 10, side = "left", ellipsis = "…"),
    part_b    = str_trunc(wid_b, 10, side = "left", ellipsis = "…"),
    comparison = paste(
      str_trunc(wid_a, 6, side = "left", ellipsis = "…"),
      "vs",
      str_trunc(wid_b, 6, side = "left", ellipsis = "…")
    )
  )
}

# All pairwise comparisons
pairs_idx <- combn(seq_len(n_workers), 2)
all_displacements <- map_dfr(seq_len(ncol(pairs_idx)), function(k) {
  compute_displacements(workers[pairs_idx[1, k]], workers[pairs_idx[2, k]])
})

Mean Displacement per Pair

disp_summary <- all_displacements %>%
  group_by(comparison) %>%
  summarise(
    n_items      = n(),
    total_dist   = sum(magnitude),
    mean_dist    = mean(magnitude),
    sd_dist      = sd(magnitude),
    se_dist      = sd_dist / sqrt(n_items),
    .groups = "drop"
  )

disp_summary %>%
  mutate(across(c(total_dist, mean_dist, sd_dist, se_dist), ~ round(.x, 4))) %>%
  kable(caption = "Displacement summary per participant pair") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Displacement summary per participant pair
comparison	n_items	total_dist	mean_dist	sd_dist	se_dist
…4c69e vs …4daca	45	42.6832	0.9485	0.5582	0.0832
…96495 vs …4c69e	45	51.8899	1.1531	0.5623	0.0838
…96495 vs …4daca	45	49.2560	1.0946	0.5077	0.0757
…aniel vs …4c69e	45	41.7480	0.9277	0.5390	0.0803
…aniel vs …4daca	45	38.6923	0.8598	0.5115	0.0763
…aniel vs …96495	45	49.3868	1.0975	0.6012	0.0896

Arrow Plots (pairwise displacements)

comparisons <- unique(all_displacements$comparison)

for (comp in comparisons) {
  cat(sprintf("\n### %s\n\n", comp))
  d <- all_displacements %>% filter(comparison == comp)

  p <- ggplot(d) +
    geom_segment(
      aes(x = x_a, y = y_a, xend = x_b, yend = y_b, color = magnitude),
      arrow    = arrow(length = unit(0.1, "inches")),
      linewidth = 0.6, alpha = 0.85
    ) +
    geom_point(aes(x = x_a, y = y_a), size = 0.8, alpha = 0.5) +
    geom_text_repel(
      aes(x = x_a, y = y_a, label = item),
      size = 2.2, max.overlaps = 20, color = "grey30"
    ) +
    scale_color_viridis_c(option = "plasma") +
    theme_minimal() +
    theme(
      axis.text  = element_blank(),
      axis.ticks = element_blank(),
      axis.title = element_blank(),
      plot.title = element_text(hjust = 0.5, face = "bold")
    ) +
    labs(
      title = sprintf("Item Displacement: %s", comp),
      color = "Displacement"
    )

  print(p)
  cat("\n")
}

…aniel vs …96495

…aniel vs …4c69e

…aniel vs …4daca

…96495 vs …4c69e

…96495 vs …4daca

…4c69e vs …4daca

Top Displaced Items

# Average displacement across all pairwise comparisons per item
item_disp <- all_displacements %>%
  group_by(item) %>%
  summarise(
    mean_displacement = mean(magnitude),
    sd_displacement   = sd(magnitude),
    .groups = "drop"
  ) %>%
  arrange(desc(mean_displacement))

# Bar chart — top 20
item_disp %>%
  slice_head(n = 20) %>%
  mutate(item = fct_reorder(item, mean_displacement)) %>%
  ggplot(aes(x = item, y = mean_displacement,
             ymin = mean_displacement - sd_displacement,
             ymax = mean_displacement + sd_displacement)) +
  geom_col(fill = "steelblue", alpha = 0.8) +
  geom_errorbar(width = 0.3) +
  coord_flip() +
  theme_minimal() +
  labs(
    title = "Top 20 Most-Displaced Items (mean across all participant pairs)",
    x     = NULL,
    y     = "Mean Displacement (± SD)"
  )

Displacement with Images

top_items <- item_disp %>% slice_head(n = 20) %>% pull(item)

# Show the mean position across participants in the reference space (first participant)
mean_pos <- aligned_df %>%
  filter(item %in% top_items) %>%
  group_by(item) %>%
  summarise(
    x = mean(dim_0),
    y = mean(dim_1),
    img_path = first(img_path),
    .groups = "drop"
  ) %>%
  left_join(item_disp, by = "item")

ggplot(mean_pos, aes(x = x, y = y)) +
  geom_image(aes(image = img_path), size = 0.07) +
  geom_point(aes(size = mean_displacement, color = mean_displacement),
             alpha = 0.4) +
  scale_size_continuous(range = c(3, 12)) +
  scale_color_viridis_c(option = "plasma") +
  theme_minimal() +
  theme(
    axis.text  = element_blank(),
    axis.ticks = element_blank(),
    axis.title = element_blank(),
    plot.title = element_text(hjust = 0.5, face = "bold")
  ) +
  labs(
    title = "Most-Displaced Items in the Aligned Space\n(larger circle = more variable across participants)",
    size  = "Mean displacement",
    color = "Mean displacement"
  )

8. Distance Matrix Correlations

Pearson and Spearman correlations between pairwise item-distance matrices across participants (computed in aligned space).

build_dist_matrix <- function(wid) {
  m <- aligned_df %>%
    filter(worker_id == wid) %>%
    arrange(item) %>%
    select(dim_0, dim_1) %>%
    as.matrix()
  rownames(m) <- aligned_df %>% filter(worker_id == wid) %>% arrange(item) %>% pull(item)
  as.matrix(dist(m))
}

dist_mats <- setNames(map(workers, build_dist_matrix), workers)

upper_tri <- function(m) m[upper.tri(m)]

part_pairs <- combn(workers, 2, simplify = FALSE)

dist_cors <- map_dfr(part_pairs, function(pair) {
  v1 <- upper_tri(dist_mats[[pair[1]]])
  v2 <- upper_tri(dist_mats[[pair[2]]])
  tibble(
    Part_A = str_trunc(pair[1], 10, side = "left", ellipsis = "…"),
    Part_B = str_trunc(pair[2], 10, side = "left", ellipsis = "…"),
    r      = cor(v1, v2, method = "pearson"),
    rho    = cor(v1, v2, method = "spearman")
  )
})

dist_cors %>%
  mutate(across(c(r, rho), ~ round(.x, 3))) %>%
  kable(
    caption  = "Pearson and Spearman correlations between pairwise distance matrices (aligned space)",
    col.names = c("Participant A", "Participant B", "Pearson r", "Spearman ρ")
  ) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Pearson and Spearman correlations between pairwise distance matrices (aligned space)
Participant A	Participant B	Pearson r	Spearman ρ
netaniel	…221d96495	0.202	0.195
netaniel	…b6e84c69e	0.338	0.308
netaniel	…31424daca	0.496	0.492
…221d96495	…b6e84c69e	0.098	0.101
…221d96495	…31424daca	0.296	0.264
…b6e84c69e	…31424daca	0.291	0.310

short_labs <- str_trunc(workers, 10, side = "left", ellipsis = "…")
cor_mat <- matrix(1, n_workers, n_workers,
                  dimnames = list(short_labs, short_labs))
for (i in seq_len(nrow(dist_cors))) {
  cor_mat[dist_cors$Part_A[i], dist_cors$Part_B[i]] <- dist_cors$r[i]
  cor_mat[dist_cors$Part_B[i], dist_cors$Part_A[i]] <- dist_cors$r[i]
}

cor_df <- as.data.frame(cor_mat) %>%
  rownames_to_column("Part_A") %>%
  pivot_longer(-Part_A, names_to = "Part_B", values_to = "r")
cor_df$Part_A <- factor(cor_df$Part_A, levels = short_labs)
cor_df$Part_B <- factor(cor_df$Part_B, levels = short_labs)

ggplot(cor_df, aes(x = Part_A, y = Part_B, fill = r)) +
  geom_tile(color = "white") +
  geom_text(aes(label = round(r, 3)), size = 4) +
  scale_fill_distiller(palette = "RdYlBu", direction = 1, limits = c(0, 1)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
  labs(title = "Distance-Matrix Similarity (Pearson r)", x = "", y = "", fill = "r")

Food Pilot: Cross-Participant Semantic Embeddings

Ivette Colón

2026-04-17

Overview

Configuration

1. Compute Embeddings

Embedding History

2. Load Embeddings

3. 2D Embedding Visualisations

netaniel

…221d96495

…b6e84c69e

…31424daca

Composite (all participants, text labels)

4. 3D Embedding Visualisations

PCA Biplots

Per-Participant PC Biplots

netaniel

…221d96495

…b6e84c69e

…31424daca

Interactive 3D Plot (mean embedding)

5. Procrustes Alignment & Participant Clustering

Pairwise SSE Matrix

Participant Clustering by SSE

Cluster Assignments

6. Procrustes-Aligned Embedding Visualisations

Overlay (all participants)

Per-Participant (image icons)

netaniel

…221d96495

…b6e84c69e

…31424daca

7. Item-Wise Displacement Analysis

Mean Displacement per Pair

Arrow Plots (pairwise displacements)

…aniel vs …96495

…aniel vs …4c69e

…aniel vs …4daca

…96495 vs …4c69e

…96495 vs …4daca

…4c69e vs …4daca

Top Displaced Items

Displacement with Images

8. Distance Matrix Correlations