This document evaluates the first pilot dataset for the cross-cultural food-image triplet experiment. Four participants (US Young pilot) each completed ~900 random + 90 validation + 10 catch trials. We:
OfflineEmbedding (skipping participants whose CSV already
exists).root_dir <- here::here() # project root
data_dir <- file.path(root_dir, "data", "food_us_young_pilot")
emb_dir <- file.path(root_dir, "analyses","food_pilot_embeddings")
img_dir <- file.path(root_dir, "experiment","stimuli","food2")
python_bin <- "/opt/anaconda3/envs/salmon/bin/python3"
dir.create(emb_dir, showWarnings = FALSE, recursive = TRUE)
# Helper: bare item name → image path
item_to_img <- function(item) {
file.path(img_dir, paste0(item, ".jpg"))
}
# Helper: strip "stimuli/food2/" and extension
normalize_item <- function(x) {
x %>%
str_remove("^stimuli/food2/") %>%
str_remove("\\.(jpg|jpeg|png)$")
}
Set run_embeddings <- TRUE to (re-)run the Python
script; set FALSE to load pre-computed CSVs.
run_embeddings <- FALSE # change to FALSE once embeddings exist
if (run_embeddings) {
script <- file.path(root_dir, "food_pilot_embeddings.py")
Sys.setenv(OMP_NUM_THREADS = "1", MKL_NUM_THREADS = "1") # avoid OpenMP issues on macOS ARM
cat("Running embedding script …\n")
exit_code <- system2(python_bin, args = script, stdout = "", stderr = "")
if (exit_code != 0) {
stop(sprintf("food_pilot_embeddings.py exited with code %d", exit_code))
}
cat("Done.\n")
}
hist_path <- file.path(emb_dir, "food_pilot_embedding_history.csv")
if (file.exists(hist_path)) {
hist_df <- read_csv(hist_path, show_col_types = FALSE)
hist_df %>%
mutate(across(c(lowest_loss), ~ round(.x, 5))) %>%
kable(caption = "Embedding training history") %>%
kable_styling(bootstrap_options = "striped", full_width = FALSE)
} else {
cat("No history file found — set run_embeddings <- TRUE to generate.\n")
}
| worker_id | d | lowest_loss | epoch_stopped | counter_from_last_update | n_train | n_test |
|---|---|---|---|---|---|---|
| netaniel | 2 | 0.39852 | 49999 | 3744 | 794 | 206 |
| netaniel | 3 | 0.39514 | 49999 | 1250 | 794 | 206 |
| 69cde97d0bb7897221d96495 | 2 | 0.66894 | 49999 | 48425 | 812 | 188 |
| 69cde97d0bb7897221d96495 | 3 | 0.63222 | 49999 | 46508 | 812 | 188 |
| 649d809191fa2e4b6e84c69e | 2 | 0.58715 | 49999 | 46658 | 803 | 197 |
| 649d809191fa2e4b6e84c69e | 3 | 0.55012 | 49999 | 42143 | 803 | 197 |
| 697a7fb7a5dd76031424daca | 2 | 0.55557 | 49999 | 31212 | 777 | 223 |
| 697a7fb7a5dd76031424daca | 3 | 0.55804 | 49999 | 42405 | 777 | 223 |
path_2d <- file.path(emb_dir, "food_pilot_embeddings_2d.csv")
path_3d <- file.path(emb_dir, "food_pilot_embeddings_3d.csv")
if (!file.exists(path_2d)) {
stop("food_pilot_embeddings_2d.csv not found. Run the Python script first.")
}
emb_2d_raw <- read_csv(path_2d, show_col_types = FALSE)
emb_3d_raw <- read_csv(path_3d, show_col_types = FALSE)
# Normalize item names
emb_2d_raw$item <- normalize_item(emb_2d_raw$stimulus)
emb_3d_raw$item <- normalize_item(emb_3d_raw$stimulus)
# Z-score each participant's embedding per dimension
emb_2d <- emb_2d_raw %>%
group_by(worker_id) %>%
mutate(
dim_0 = as.numeric(scale(dim_0)),
dim_1 = as.numeric(scale(dim_1))
) %>%
ungroup()
emb_3d <- emb_3d_raw %>%
group_by(worker_id) %>%
mutate(
dim_0 = as.numeric(scale(dim_0)),
dim_1 = as.numeric(scale(dim_1)),
dim_2 = as.numeric(scale(dim_2))
) %>%
ungroup()
# Add image paths
emb_2d$img_path <- item_to_img(emb_2d$item)
emb_3d$img_path <- item_to_img(emb_3d$item)
# Short participant labels (last 6 chars of worker_id)
emb_2d <- emb_2d %>%
mutate(participant = str_trunc(worker_id, 8, side = "left", ellipsis = "…"))
emb_3d <- emb_3d %>%
mutate(participant = str_trunc(worker_id, 8, side = "left", ellipsis = "…"))
workers <- unique(emb_2d$worker_id)
n_workers <- length(workers)
cat(sprintf("Loaded 2D embeddings: %d items × %d participants\n",
n_distinct(emb_2d$item), n_workers))
## Loaded 2D embeddings: 45 items × 4 participants
cat(sprintf("Loaded 3D embeddings: %d items × %d participants\n",
n_distinct(emb_3d$item), length(unique(emb_3d$worker_id))))
## Loaded 3D embeddings: 45 items × 4 participants
Each participant’s 2D semantic embedding plotted with food images at their coordinates.
plot_2d_embedding <- function(df, title) {
df <- df %>% filter(file.exists(img_path))
ggplot(df, aes(x = dim_0, y = dim_1)) +
geom_image(aes(image = img_path), size = 0.07) +
theme_minimal() +
theme(
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank(),
plot.title = element_text(hjust = 0.5, face = "bold")
) +
labs(title = title)
}
for (wid in workers) {
short <- str_trunc(wid, 10, side = "left", ellipsis = "…")
cat(sprintf("\n## %s\n\n", short))
p <- plot_2d_embedding(
emb_2d %>% filter(worker_id == wid),
sprintf("2D Embedding — %s", short)
)
print(p)
cat("\n")
}
p_list_2d <- map(workers, function(wid) {
short <- str_trunc(wid, 10, side = "left", ellipsis = "…")
ggplot(emb_2d %>% filter(worker_id == wid),
aes(x = dim_0, y = dim_1, label = item)) +
geom_text(size = 2, check_overlap = TRUE) +
theme_minimal() +
theme(axis.text = element_blank(), axis.ticks = element_blank(),
axis.title = element_blank(),
plot.title = element_text(hjust = 0.5, size = 10, face = "bold")) +
labs(title = short)
})
plot_grid(plotlist = p_list_2d, ncol = 2)
We run a joint PCA over the 3D embeddings (one row per item per participant) to find dimensions that capture the most shared variance, then plot item loadings on PC1 × PC2 and PC1 × PC3.
# One row per item, average coordinates across participants for the biplot
emb_3d_avg <- emb_3d %>%
group_by(item) %>%
summarise(
dim_0 = mean(dim_0),
dim_1 = mean(dim_1),
dim_2 = mean(dim_2),
.groups = "drop"
)
mat_3d <- emb_3d_avg %>% select(dim_0, dim_1, dim_2) %>% as.matrix()
rownames(mat_3d) <- emb_3d_avg$item
pca_res <- prcomp(mat_3d, scale. = TRUE)
pca_df <- as_tibble(pca_res$x, rownames = "item") %>%
mutate(img_path = item_to_img(item))
var_exp <- summary(pca_res)$importance[2, ] * 100 # % variance
# PC1 × PC2
p_pc12 <- ggplot(pca_df, aes(x = PC1, y = PC2)) +
geom_image(aes(image = img_path), size = 0.07) +
geom_text_repel(aes(label = item), size = 2.2, max.overlaps = 20) +
theme_minimal() +
theme(axis.text = element_blank(), axis.ticks = element_blank(),
plot.title = element_text(hjust = 0.5, face = "bold")) +
labs(
title = "3D Embedding — PC1 × PC2 (averaged across participants)",
x = sprintf("PC1 (%.1f%%)", var_exp[1]),
y = sprintf("PC2 (%.1f%%)", var_exp[2])
)
# PC1 × PC3
p_pc13 <- ggplot(pca_df, aes(x = PC1, y = PC3)) +
geom_image(aes(image = img_path), size = 0.07) +
geom_text_repel(aes(label = item), size = 2.2, max.overlaps = 20) +
theme_minimal() +
theme(axis.text = element_blank(), axis.ticks = element_blank(),
plot.title = element_text(hjust = 0.5, face = "bold")) +
labs(
title = "3D Embedding — PC1 × PC3 (averaged across participants)",
x = sprintf("PC1 (%.1f%%)", var_exp[1]),
y = sprintf("PC3 (%.1f%%)", var_exp[3])
)
plot_grid(p_pc12, p_pc13, ncol = 1)
for (wid in workers) {
short <- str_trunc(wid, 10, side = "left", ellipsis = "…")
cat(sprintf("\n### %s\n\n", short))
mat_w <- emb_3d %>%
filter(worker_id == wid) %>%
arrange(item) %>%
select(dim_0, dim_1, dim_2) %>%
as.matrix()
rownames(mat_w) <- emb_3d %>% filter(worker_id == wid) %>% arrange(item) %>% pull(item)
if (nrow(mat_w) < 3) { cat("Not enough items.\n\n"); next }
pca_w <- prcomp(mat_w, scale. = TRUE)
pca_wdf <- as_tibble(pca_w$x, rownames = "item") %>%
mutate(img_path = item_to_img(item))
ve_w <- summary(pca_w)$importance[2, ] * 100
pw12 <- ggplot(pca_wdf, aes(x = PC1, y = PC2)) +
geom_image(aes(image = img_path), size = 0.08) +
geom_text_repel(aes(label = item), size = 2, max.overlaps = 15) +
theme_minimal() +
theme(axis.text = element_blank(), axis.ticks = element_blank()) +
labs(title = sprintf("%s — PC1×PC2", short),
x = sprintf("PC1 (%.1f%%)", ve_w[1]),
y = sprintf("PC2 (%.1f%%)", ve_w[2]))
pw13 <- ggplot(pca_wdf, aes(x = PC1, y = PC3)) +
geom_image(aes(image = img_path), size = 0.08) +
geom_text_repel(aes(label = item), size = 2, max.overlaps = 15) +
theme_minimal() +
theme(axis.text = element_blank(), axis.ticks = element_blank()) +
labs(title = sprintf("%s — PC1×PC3", short),
x = sprintf("PC1 (%.1f%%)", ve_w[1]),
y = sprintf("PC3 (%.1f%%)", ve_w[3]))
print(plot_grid(pw12, pw13, ncol = 2))
cat("\n")
}
pca_3d_df <- as_tibble(pca_res$x, rownames = "item")
plot_ly(
data = pca_3d_df,
x = ~PC1, y = ~PC2, z = ~PC3,
text = ~item,
type = "scatter3d",
mode = "markers+text",
marker = list(size = 4, opacity = 0.8),
textposition = "top center",
textfont = list(size = 9)
) %>%
layout(
title = "Mean 3D Embedding (PCA-projected)",
scene = list(
xaxis = list(title = sprintf("PC1 (%.1f%%)", var_exp[1])),
yaxis = list(title = sprintf("PC2 (%.1f%%)", var_exp[2])),
zaxis = list(title = sprintf("PC3 (%.1f%%)", var_exp[3]))
)
)
We Procrustes-align every participant’s 2D embedding to a common reference (the first participant in the list) using shared items, then cluster participants by their pairwise SSE.
# Build named list of item-sorted matrices (shared items only)
all_items_shared <- emb_2d %>%
count(item, worker_id) %>%
group_by(item) %>%
filter(n() == n_workers) %>%
pull(item) %>%
unique() %>%
sort()
cat(sprintf("Shared items across all %d participants: %d\n",
n_workers, length(all_items_shared)))
## Shared items across all 4 participants: 45
get_matrix_2d <- function(wid) {
m <- emb_2d %>%
filter(worker_id == wid, item %in% all_items_shared) %>%
arrange(item) %>%
select(dim_0, dim_1) %>%
as.matrix()
rownames(m) <- emb_2d %>%
filter(worker_id == wid, item %in% all_items_shared) %>%
arrange(item) %>%
pull(item)
m
}
emb_matrices <- setNames(map(workers, get_matrix_2d), workers)
short_labels <- str_trunc(workers, 10, side = "left", ellipsis = "…")
sse_mat <- matrix(NA, n_workers, n_workers,
dimnames = list(short_labels, short_labels))
for (i in seq_len(n_workers)) {
for (j in seq_len(n_workers)) {
if (i == j) {
sse_mat[i, j] <- 0
} else {
pro <- procrustes(emb_matrices[[workers[i]]],
emb_matrices[[workers[j]]])
sse_mat[i, j] <- pro$ss
}
}
}
sse_df <- as.data.frame(sse_mat) %>%
rownames_to_column("Part_A") %>%
pivot_longer(-Part_A, names_to = "Part_B", values_to = "SSE")
sse_df$Part_A <- factor(sse_df$Part_A, levels = short_labels)
sse_df$Part_B <- factor(sse_df$Part_B, levels = short_labels)
ggplot(sse_df, aes(x = Part_A, y = Part_B, fill = SSE)) +
geom_tile(color = "white") +
geom_text(aes(label = round(SSE, 3)), size = 3.5) +
scale_fill_distiller(palette = "YlOrRd", direction = 1) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
labs(title = "Pairwise Procrustes SSE (2D embeddings)", x = "", y = "")
# Use mean SSE to each other participant as distance
sses_sym <- (sse_mat + t(sse_mat)) / 2
diag(sses_sym) <- 0
hc_parts <- hclust(as.dist(sses_sym), method = "ward.D2")
dend <- as.dendrogram(hc_parts)
dend <- color_branches(dend, k = min(3, n_workers))
par(mar = c(5, 4, 2, 1))
plot(dend,
main = "Participant Clustering by Procrustes SSE (ward.D2)",
ylab = "SSE distance",
xlab = "")
n_clusters <- min(3, n_workers)
cluster_assign <- cutree(hc_parts, k = n_clusters)
tibble(
participant = names(cluster_assign),
cluster = cluster_assign
) %>%
kable(caption = sprintf("Participant cluster assignments (k=%d)", n_clusters)) %>%
kable_styling(bootstrap_options = "striped", full_width = FALSE)
| participant | cluster |
|---|---|
| netaniel | 1 |
| …221d96495 | 2 |
| …b6e84c69e | 3 |
| …31424daca | 1 |
All participants aligned to the first participant’s space.
ref_wid <- workers[1]
ref_mat <- emb_matrices[[ref_wid]]
aligned_list <- map(workers, function(wid) {
mat <- emb_matrices[[wid]]
if (wid == ref_wid) {
coords <- mat
} else {
pro <- procrustes(ref_mat, mat)
coords <- pro$Yrot
}
tibble(
item = rownames(ref_mat),
dim_0 = coords[, 1],
dim_1 = coords[, 2],
worker_id = wid,
participant = str_trunc(wid, 10, side = "left", ellipsis = "…"),
img_path = item_to_img(rownames(ref_mat))
)
})
aligned_df <- bind_rows(aligned_list)
pal_parts <- scales::hue_pal()(n_workers)
names(pal_parts) <- str_trunc(workers, 10, side = "left", ellipsis = "…")
ggplot(aligned_df, aes(x = dim_0, y = dim_1, color = participant)) +
geom_point(size = 2, alpha = 0.7) +
geom_text_repel(aes(label = item), size = 2, max.overlaps = 10) +
scale_color_manual(values = pal_parts) +
theme_minimal() +
theme(
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank(),
legend.title = element_blank(),
plot.title = element_text(hjust = 0.5, face = "bold")
) +
labs(title = sprintf("Procrustes-Aligned 2D Embeddings (reference: %s)",
str_trunc(ref_wid, 10, side = "left", ellipsis = "…")))
for (wid in workers) {
short <- str_trunc(wid, 10, side = "left", ellipsis = "…")
cat(sprintf("\n### %s\n\n", short))
df <- aligned_df %>% filter(worker_id == wid, file.exists(img_path))
p <- ggplot(df, aes(x = dim_0, y = dim_1)) +
geom_image(aes(image = img_path), size = 0.07) +
theme_minimal() +
theme(axis.text = element_blank(), axis.ticks = element_blank(),
axis.title = element_blank(),
plot.title = element_text(hjust = 0.5, face = "bold")) +
labs(title = sprintf("Aligned 2D Embedding — %s", short))
print(p)
cat("\n")
}
For each pair of participants we compute the displacement of each item after Procrustes alignment, summarise mean displacement per item and per pair, and show arrow plots.
compute_displacements <- function(wid_a, wid_b) {
mat_a <- emb_matrices[[wid_a]]
mat_b <- emb_matrices[[wid_b]]
pro <- procrustes(mat_a, mat_b)
aln_b <- pro$Yrot
tibble(
item = rownames(mat_a),
x_a = mat_a[, 1],
y_a = mat_a[, 2],
x_b = aln_b[, 1],
y_b = aln_b[, 2],
dx = aln_b[, 1] - mat_a[, 1],
dy = aln_b[, 2] - mat_a[, 2],
magnitude = sqrt(dx^2 + dy^2),
part_a = str_trunc(wid_a, 10, side = "left", ellipsis = "…"),
part_b = str_trunc(wid_b, 10, side = "left", ellipsis = "…"),
comparison = paste(
str_trunc(wid_a, 6, side = "left", ellipsis = "…"),
"vs",
str_trunc(wid_b, 6, side = "left", ellipsis = "…")
)
)
}
# All pairwise comparisons
pairs_idx <- combn(seq_len(n_workers), 2)
all_displacements <- map_dfr(seq_len(ncol(pairs_idx)), function(k) {
compute_displacements(workers[pairs_idx[1, k]], workers[pairs_idx[2, k]])
})
disp_summary <- all_displacements %>%
group_by(comparison) %>%
summarise(
n_items = n(),
total_dist = sum(magnitude),
mean_dist = mean(magnitude),
sd_dist = sd(magnitude),
se_dist = sd_dist / sqrt(n_items),
.groups = "drop"
)
disp_summary %>%
mutate(across(c(total_dist, mean_dist, sd_dist, se_dist), ~ round(.x, 4))) %>%
kable(caption = "Displacement summary per participant pair") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
| comparison | n_items | total_dist | mean_dist | sd_dist | se_dist |
|---|---|---|---|---|---|
| …4c69e vs …4daca | 45 | 42.6832 | 0.9485 | 0.5582 | 0.0832 |
| …96495 vs …4c69e | 45 | 51.8899 | 1.1531 | 0.5623 | 0.0838 |
| …96495 vs …4daca | 45 | 49.2560 | 1.0946 | 0.5077 | 0.0757 |
| …aniel vs …4c69e | 45 | 41.7480 | 0.9277 | 0.5390 | 0.0803 |
| …aniel vs …4daca | 45 | 38.6923 | 0.8598 | 0.5115 | 0.0763 |
| …aniel vs …96495 | 45 | 49.3868 | 1.0975 | 0.6012 | 0.0896 |
comparisons <- unique(all_displacements$comparison)
for (comp in comparisons) {
cat(sprintf("\n### %s\n\n", comp))
d <- all_displacements %>% filter(comparison == comp)
p <- ggplot(d) +
geom_segment(
aes(x = x_a, y = y_a, xend = x_b, yend = y_b, color = magnitude),
arrow = arrow(length = unit(0.1, "inches")),
linewidth = 0.6, alpha = 0.85
) +
geom_point(aes(x = x_a, y = y_a), size = 0.8, alpha = 0.5) +
geom_text_repel(
aes(x = x_a, y = y_a, label = item),
size = 2.2, max.overlaps = 20, color = "grey30"
) +
scale_color_viridis_c(option = "plasma") +
theme_minimal() +
theme(
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank(),
plot.title = element_text(hjust = 0.5, face = "bold")
) +
labs(
title = sprintf("Item Displacement: %s", comp),
color = "Displacement"
)
print(p)
cat("\n")
}
# Average displacement across all pairwise comparisons per item
item_disp <- all_displacements %>%
group_by(item) %>%
summarise(
mean_displacement = mean(magnitude),
sd_displacement = sd(magnitude),
.groups = "drop"
) %>%
arrange(desc(mean_displacement))
# Bar chart — top 20
item_disp %>%
slice_head(n = 20) %>%
mutate(item = fct_reorder(item, mean_displacement)) %>%
ggplot(aes(x = item, y = mean_displacement,
ymin = mean_displacement - sd_displacement,
ymax = mean_displacement + sd_displacement)) +
geom_col(fill = "steelblue", alpha = 0.8) +
geom_errorbar(width = 0.3) +
coord_flip() +
theme_minimal() +
labs(
title = "Top 20 Most-Displaced Items (mean across all participant pairs)",
x = NULL,
y = "Mean Displacement (± SD)"
)
top_items <- item_disp %>% slice_head(n = 20) %>% pull(item)
# Show the mean position across participants in the reference space (first participant)
mean_pos <- aligned_df %>%
filter(item %in% top_items) %>%
group_by(item) %>%
summarise(
x = mean(dim_0),
y = mean(dim_1),
img_path = first(img_path),
.groups = "drop"
) %>%
left_join(item_disp, by = "item")
ggplot(mean_pos, aes(x = x, y = y)) +
geom_image(aes(image = img_path), size = 0.07) +
geom_point(aes(size = mean_displacement, color = mean_displacement),
alpha = 0.4) +
scale_size_continuous(range = c(3, 12)) +
scale_color_viridis_c(option = "plasma") +
theme_minimal() +
theme(
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank(),
plot.title = element_text(hjust = 0.5, face = "bold")
) +
labs(
title = "Most-Displaced Items in the Aligned Space\n(larger circle = more variable across participants)",
size = "Mean displacement",
color = "Mean displacement"
)
Pearson and Spearman correlations between pairwise item-distance matrices across participants (computed in aligned space).
build_dist_matrix <- function(wid) {
m <- aligned_df %>%
filter(worker_id == wid) %>%
arrange(item) %>%
select(dim_0, dim_1) %>%
as.matrix()
rownames(m) <- aligned_df %>% filter(worker_id == wid) %>% arrange(item) %>% pull(item)
as.matrix(dist(m))
}
dist_mats <- setNames(map(workers, build_dist_matrix), workers)
upper_tri <- function(m) m[upper.tri(m)]
part_pairs <- combn(workers, 2, simplify = FALSE)
dist_cors <- map_dfr(part_pairs, function(pair) {
v1 <- upper_tri(dist_mats[[pair[1]]])
v2 <- upper_tri(dist_mats[[pair[2]]])
tibble(
Part_A = str_trunc(pair[1], 10, side = "left", ellipsis = "…"),
Part_B = str_trunc(pair[2], 10, side = "left", ellipsis = "…"),
r = cor(v1, v2, method = "pearson"),
rho = cor(v1, v2, method = "spearman")
)
})
dist_cors %>%
mutate(across(c(r, rho), ~ round(.x, 3))) %>%
kable(
caption = "Pearson and Spearman correlations between pairwise distance matrices (aligned space)",
col.names = c("Participant A", "Participant B", "Pearson r", "Spearman ρ")
) %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
| Participant A | Participant B | Pearson r | Spearman ρ |
|---|---|---|---|
| netaniel | …221d96495 | 0.202 | 0.195 |
| netaniel | …b6e84c69e | 0.338 | 0.308 |
| netaniel | …31424daca | 0.496 | 0.492 |
| …221d96495 | …b6e84c69e | 0.098 | 0.101 |
| …221d96495 | …31424daca | 0.296 | 0.264 |
| …b6e84c69e | …31424daca | 0.291 | 0.310 |
short_labs <- str_trunc(workers, 10, side = "left", ellipsis = "…")
cor_mat <- matrix(1, n_workers, n_workers,
dimnames = list(short_labs, short_labs))
for (i in seq_len(nrow(dist_cors))) {
cor_mat[dist_cors$Part_A[i], dist_cors$Part_B[i]] <- dist_cors$r[i]
cor_mat[dist_cors$Part_B[i], dist_cors$Part_A[i]] <- dist_cors$r[i]
}
cor_df <- as.data.frame(cor_mat) %>%
rownames_to_column("Part_A") %>%
pivot_longer(-Part_A, names_to = "Part_B", values_to = "r")
cor_df$Part_A <- factor(cor_df$Part_A, levels = short_labs)
cor_df$Part_B <- factor(cor_df$Part_B, levels = short_labs)
ggplot(cor_df, aes(x = Part_A, y = Part_B, fill = r)) +
geom_tile(color = "white") +
geom_text(aes(label = round(r, 3)), size = 4) +
scale_fill_distiller(palette = "RdYlBu", direction = 1, limits = c(0, 1)) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
labs(title = "Distance-Matrix Similarity (Pearson r)", x = "", y = "", fill = "r")