# H1 and H2 use directional asymmetry metrics as primary, but cosine baselines are kept for systematic comparison.
h1_human <- pair_behavior %>% filter(hypothesis == "H1_asymmetry_ipsative")
h2_human <- pair_behavior %>% filter(hypothesis == "H2_salience")
h1_all <- join_metrics_to_behavior(h1_human)
h2_all <- join_metrics_to_behavior(h2_human)
rank_group_cols <- c(
"metric_id", "metric_source", "metric_family", "metric_name", "metric_short", "variant", "variant_label", "cluster_method",
"global_k_method", "local_k_method", "basis_object", "object", "state_vector_source", "state_type", "metric_direction"
)
h1_rank_all <- cor_by_group(h1_all, rank_group_cols) %>% mutate(analysis_block = "H1 ipsative asymmetry")
h1_rank_directional <- h1_rank_all %>% filter(is_asymmetry_metric(metric_name))
h1_rank_baselines <- h1_rank_all %>% filter(metric_source == "classical_baselines")
h2_rank_all <- cor_by_group(h2_all, rank_group_cols) %>% mutate(analysis_block = "H2 salience")
h2_rank_directional <- h2_rank_all %>% filter(is_asymmetry_metric(metric_name))
h2_rank_baselines <- h2_rank_all %>% filter(metric_source == "classical_baselines")
# H2b: single-concept Likert salience.
# This is concept-level rather than pairwise. Therefore, the relevant predictors are not pairwise cosines,
# but properties of each concept representation: frequency, global dimensionality, and local richness.
salience_metadata_cols <- c(
"ID", "source_file", "source_row", "raw_ID", "participant_id",
"attempt_index_within_source", "n_rows_for_participant_in_source"
)
salience_likert_long <- if (nrow(saliency_2_raw) > 0) {
saliency_2_raw %>%
select(-any_of(salience_metadata_cols), -ends_with("_rt")) %>%
pivot_longer(everything(), names_to = "target_raw", values_to = "salience_likert") %>%
mutate(
target_id = normalize_salience_target(target_raw),
salience_likert = as.numeric(salience_likert)
) %>%
filter(!is.na(salience_likert), salience_likert >= 1, salience_likert <= 9)
} else {
tibble(target_raw = character(), target_id = character(), salience_likert = numeric())
}
salience_likert_summary <- salience_likert_long %>%
group_by(target_id) %>%
summarise(
mean_salience_likert = mean(salience_likert, na.rm = TRUE),
sd_salience_likert = sd(salience_likert, na.rm = TRUE),
n_salience_likert = n(),
se_salience_likert = sd_salience_likert / sqrt(n_salience_likert),
.groups = "drop"
)
local_dimensionality_summary <- if (nrow(local_dimensionality_raw) > 0) {
local_dimensionality_raw %>%
mutate(
is_estimable_local = !is.na(rank_effective) & rank_effective >= 1,
across(any_of(c("gavish_donoho", "horn_wishart", "permutation_pa", "profile_likelihood", "var80", "var90", "var95")), as.numeric)
) %>%
group_by(target_id, cluster_method) %>%
summarise(
n_local_candidates = n(),
n_estimable_local_clusters = sum(is_estimable_local, na.rm = TRUE),
mean_local_gavish_donoho = mean(gavish_donoho[is_estimable_local], na.rm = TRUE),
mean_local_horn_wishart = mean(horn_wishart[is_estimable_local], na.rm = TRUE),
mean_local_permutation_pa = mean(permutation_pa[is_estimable_local], na.rm = TRUE),
sum_local_gavish_donoho = sum(gavish_donoho[is_estimable_local], na.rm = TRUE),
sum_local_horn_wishart = sum(horn_wishart[is_estimable_local], na.rm = TRUE),
sum_local_permutation_pa = sum(permutation_pa[is_estimable_local], na.rm = TRUE),
.groups = "drop"
) %>%
pivot_wider(
names_from = cluster_method,
values_from = c(
n_local_candidates, n_estimable_local_clusters,
mean_local_gavish_donoho, mean_local_horn_wishart, mean_local_permutation_pa,
sum_local_gavish_donoho, sum_local_horn_wishart, sum_local_permutation_pa
),
names_sep = "__"
)
} else {
tibble(target_id = character())
}
salience_likert_dimensionality <- salience_likert_summary %>%
inner_join(global_dimensionality_raw, by = "target_id") %>%
left_join(local_dimensionality_summary, by = "target_id") %>%
mutate(
log10_n_occurrences = log10(n_occurrences),
salience_z = z_score(mean_salience_likert),
log10_n_occurrences_z = z_score(log10_n_occurrences)
)
salience_predictor_cols <- c(
"log10_n_occurrences", "n_occurrences", "rank_effective", "broken_stick", "gavish_donoho",
"horn_wishart", "permutation_pa", "marchenko_pastur", "profile_likelihood",
"individual_var_ge_0.005", "var80", "var90", "var95", "var99",
"n_estimable_local_clusters__bayesian_gmm", "n_estimable_local_clusters__gmm_bic",
"mean_local_gavish_donoho__bayesian_gmm", "mean_local_gavish_donoho__gmm_bic",
"sum_local_gavish_donoho__bayesian_gmm", "sum_local_gavish_donoho__gmm_bic",
"mean_local_horn_wishart__bayesian_gmm", "mean_local_horn_wishart__gmm_bic",
"sum_local_horn_wishart__bayesian_gmm", "sum_local_horn_wishart__gmm_bic"
)
salience_predictor_labels <- c(
log10_n_occurrences = "Log frequency",
n_occurrences = "Raw frequency",
rank_effective = "Effective rank",
broken_stick = "Broken-stick dimensionality",
gavish_donoho = "Gavish-Donoho dimensionality",
horn_wishart = "Horn/Wishart dimensionality",
permutation_pa = "Permutation PA dimensionality",
marchenko_pastur = "Marchenko-Pastur dimensionality",
profile_likelihood = "Profile-likelihood dimensionality",
individual_var_ge_0.005 = "Individual variance ≥ .005",
var80 = "Dimensions for 80% variance",
var90 = "Dimensions for 90% variance",
var95 = "Dimensions for 95% variance",
var99 = "Dimensions for 99% variance",
`n_estimable_local_clusters__bayesian_gmm` = "Estimable local clusters, Bayesian GMM",
`n_estimable_local_clusters__gmm_bic` = "Estimable local clusters, GMM-BIC",
`mean_local_gavish_donoho__bayesian_gmm` = "Mean local Gavish, Bayesian GMM",
`mean_local_gavish_donoho__gmm_bic` = "Mean local Gavish, GMM-BIC",
`sum_local_gavish_donoho__bayesian_gmm` = "Total local Gavish, Bayesian GMM",
`sum_local_gavish_donoho__gmm_bic` = "Total local Gavish, GMM-BIC",
`mean_local_horn_wishart__bayesian_gmm` = "Mean local Horn, Bayesian GMM",
`mean_local_horn_wishart__gmm_bic` = "Mean local Horn, GMM-BIC",
`sum_local_horn_wishart__bayesian_gmm` = "Total local Horn, Bayesian GMM",
`sum_local_horn_wishart__gmm_bic` = "Total local Horn, GMM-BIC"
)
h2_likert_predictor_long <- salience_likert_dimensionality %>%
pivot_longer(
cols = any_of(salience_predictor_cols),
names_to = "metric_name",
values_to = "metric_value"
) %>%
mutate(
metric_value = as.numeric(metric_value),
human_value = mean_salience_likert,
metric_source = "concept_level_dimensionality",
metric_family = case_when(
metric_name %in% c("log10_n_occurrences", "n_occurrences") ~ "Corpus frequency baseline",
str_detect(metric_name, "local|clusters") ~ "Local representational richness",
TRUE ~ "Global representational dimensionality"
),
metric_short = recode(metric_name, !!!salience_predictor_labels, .default = metric_name),
variant = NA_character_,
variant_label = NA_character_,
cluster_method = NA_character_,
global_k_method = NA_character_,
local_k_method = NA_character_,
basis_object = NA_character_,
object = "concept-level representation",
state_vector_source = NA_character_,
state_type = NA_character_,
metric_direction = "concept_level_predictor",
metric_id = paste(metric_source, metric_name, sep = " | ")
) %>%
filter(!is.na(metric_value), is.finite(metric_value), !is.na(human_value))
h2_likert_rank <- cor_by_group(h2_likert_predictor_long, rank_group_cols) %>%
mutate(analysis_block = "H2b Likert salience")
run_frequency_adjusted_salience_model <- function(predictor_name) {
d <- salience_likert_dimensionality %>%
select(target_id, mean_salience_likert, log10_n_occurrences, all_of(predictor_name)) %>%
rename(predictor = all_of(predictor_name)) %>%
mutate(
predictor = as.numeric(predictor),
salience_z = z_score(mean_salience_likert),
predictor_z = z_score(predictor),
log_frequency_z = z_score(log10_n_occurrences)
) %>%
filter(if_all(all_of(c("salience_z", "predictor_z", "log_frequency_z")), ~ !is.na(.x) & is.finite(.x)))
if (nrow(d) < 6 || n_distinct(d$predictor_z) < 2) {
return(tibble(
metric_name = predictor_name,
n = nrow(d),
beta_predictor = NA_real_,
p_predictor = NA_real_,
beta_log_frequency = NA_real_,
p_log_frequency = NA_real_,
r_squared = NA_real_
))
}
fit <- lm(salience_z ~ predictor_z + log_frequency_z, data = d)
td <- broom::tidy(fit)
gl <- broom::glance(fit)
tibble(
metric_name = predictor_name,
n = nrow(d),
beta_predictor = td$estimate[td$term == "predictor_z"],
p_predictor = td$p.value[td$term == "predictor_z"],
beta_log_frequency = td$estimate[td$term == "log_frequency_z"],
p_log_frequency = td$p.value[td$term == "log_frequency_z"],
r_squared = gl$r.squared
)
}
h2_likert_frequency_adjusted <- map_dfr(
setdiff(intersect(salience_predictor_cols, names(salience_likert_dimensionality)), c("log10_n_occurrences", "n_occurrences")),
run_frequency_adjusted_salience_model
) %>%
mutate(
metric_short = recode(metric_name, !!!salience_predictor_labels, .default = metric_name),
p_fdr_predictor = p.adjust(p_predictor, method = "BH"),
abs_beta_predictor = abs(beta_predictor)
) %>%
arrange(p_predictor, desc(abs_beta_predictor))
# H2c: planned salience/similarity condition structure from the thesis annex.
# The annex condition table is entered explicitly because the exported response files do not carry
# this full design metadata. The order here is the theoretical/planned order, not necessarily the
# actual on-screen presentation order in every task file.
annex_salience_conditions <- tibble::tribble(
~planned_concept_1, ~planned_concept_2, ~planned_concept_1_es, ~planned_concept_2_es, ~planned_salience_1, ~planned_salience_2, ~planned_similarity, ~annex_condition, ~condition_source,
"ghana", "luxemburgo", "Ghana", "Luxemburgo", "low", "low", "low", "low_low__low_similarity", "thesis_annex",
"islas_feroe", "singapur", "Islas Feroe", "Singapur", "low", "low", "low", "low_low__low_similarity", "thesis_annex",
"burkina_faso", "guayana_francesa", "Burkina Faso", "Guayana Francesa", "low", "low", "low", "low_low__low_similarity", "thesis_annex",
"mongolia", "bulgaria", "Mongolia", "Bulgaria", "low", "low", "low", "low_low__low_similarity", "thesis_annex",
"oman", "belice", "Omán", "Belice", "low", "low", "low", "low_low__low_similarity", "thesis_annex",
"eslovenia", "eslovaquia", "Eslovenia", "Eslovaquia", "low", "low", "high", "low_low__high_similarity", "thesis_annex",
"zambia", "zimbabue", "Zambia", "Zimbabue", "low", "low", "high", "low_low__high_similarity", "thesis_annex",
"laos", "camboya", "Laos", "Camboya", "low", "low", "high", "low_low__high_similarity", "thesis_annex",
"oman", "yemen", "Omán", "Yemen", "low", "low", "high", "low_low__high_similarity", "thesis_annex",
"turkmenistan", "uzbekistan", "Turkmenistán", "Uzbekistán", "low", "low", "high", "low_low__high_similarity", "thesis_annex",
"francia", "bolivia", "Francia", "Bolivia", "high", "low", "low", "high_low__low_similarity", "thesis_annex",
"estados_unidos", "belice", "Estados Unidos", "Belice", "high", "low", "low", "high_low__low_similarity", "thesis_annex",
"china", "luxemburgo", "China", "Luxemburgo", "high", "low", "low", "high_low__low_similarity", "thesis_annex",
"mexico", "laos", "México", "Laos", "high", "low", "low", "high_low__low_similarity", "thesis_annex",
"rusia", "burkina_faso", "Rusia", "Burkina Faso", "high", "low", "low", "high_low__low_similarity", "thesis_annex",
"francia", "belgica", "Francia", "Bélgica", "high", "low", "high", "high_low__high_similarity", "thesis_annex",
"espana", "andorra", "España", "Andorra", "high", "low", "high", "high_low__high_similarity", "thesis_annex",
"china", "singapur", "China", "Singapur", "high", "low", "high", "high_low__high_similarity", "thesis_annex",
"rusia", "bielorrusia", "Rusia", "Bielorrusia", "high", "low", "high", "high_low__high_similarity", "thesis_annex",
"argentina", "paraguay", "Argentina", "Paraguay", "high", "low", "high", "high_low__high_similarity", "thesis_annex",
"francia", "brasil", "Francia", "Brasil", "high", "high", "low", "high_high__low_similarity", "thesis_annex",
"alemania", "canada", "Alemania", "Canadá", "high", "high", "low", "high_high__low_similarity", "thesis_annex",
"italia", "japon", "Italia", "Japón", "high", "high", "low", "high_high__low_similarity", "thesis_annex",
"marruecos", "argentina", "Marruecos", "Argentina", "high", "high", "low", "high_high__low_similarity", "thesis_annex",
"india", "mexico", "India", "México", "high", "high", "low", "high_high__low_similarity", "thesis_annex",
"italia", "espana", "Italia", "España", "high", "high", "high", "high_high__high_similarity", "thesis_annex",
"reino_unido", "estados_unidos", "Reino Unido", "Estados Unidos", "high", "high", "high", "high_high__high_similarity", "thesis_annex",
"suecia", "noruega", "Suecia", "Noruega", "high", "high", "high", "high_high__high_similarity", "thesis_annex",
"brasil", "portugal", "Brasil", "Portugal", "high", "high", "high", "high_high__high_similarity", "thesis_annex",
"mexico", "colombia", "México", "Colombia", "high", "high", "high", "high_high__high_similarity", "thesis_annex_not_observed_in_current_human_pair_file",
"ecuador", "peru", "Ecuador", "Perú", "high", "high", "high", "high_high__high_similarity", "observed_human_file_replacement_or_extra"
) %>%
mutate(
pair_id = canonical_pair(planned_concept_1, planned_concept_2),
planned_salience_pair = paste(planned_salience_1, planned_salience_2, sep = "_"),
planned_similarity = factor(planned_similarity, levels = c("low", "high")),
planned_salience_pair = factor(planned_salience_pair, levels = c("low_low", "high_low", "high_high")),
annex_condition = factor(
annex_condition,
levels = c(
"low_low__low_similarity", "low_low__high_similarity",
"high_low__low_similarity", "high_low__high_similarity",
"high_high__low_similarity", "high_high__high_similarity"
)
)
)
annex_condition_audit <- annex_salience_conditions %>%
left_join(
pair_behavior %>% distinct(pair_id) %>% mutate(observed_in_human_pair_design = TRUE),
by = "pair_id"
) %>%
mutate(observed_in_human_pair_design = coalesce(observed_in_human_pair_design, FALSE))
pair_behavior_with_conditions <- pair_behavior %>%
left_join(annex_salience_conditions, by = "pair_id") %>%
mutate(
human_first_planned_salience = case_when(
human_first == planned_concept_1 ~ planned_salience_1,
human_first == planned_concept_2 ~ planned_salience_2,
TRUE ~ NA_character_
),
human_second_planned_salience = case_when(
human_second == planned_concept_1 ~ planned_salience_1,
human_second == planned_concept_2 ~ planned_salience_2,
TRUE ~ NA_character_
),
presentation_salience_pattern = if_else(
!is.na(human_first_planned_salience) & !is.na(human_second_planned_salience),
paste(human_first_planned_salience, human_second_planned_salience, sep = "_"),
NA_character_
),
presentation_salience_pattern = factor(
presentation_salience_pattern,
levels = c("low_low", "low_high", "high_low", "high_high")
),
salience_contrast_type = case_when(
presentation_salience_pattern %in% c("high_low", "low_high") ~ "mixed_salience",
presentation_salience_pattern == "high_high" ~ "both_high",
presentation_salience_pattern == "low_low" ~ "both_low",
TRUE ~ NA_character_
),
prop_choose_high_salience = case_when(
hypothesis == "H2_salience" & presentation_salience_pattern == "high_low" ~ human_value,
hypothesis == "H2_salience" & presentation_salience_pattern == "low_high" ~ 1 - human_value,
TRUE ~ NA_real_
),
distance_from_chance = abs(human_value - .5)
)
annex_concept_salience_labels <- annex_salience_conditions %>%
select(pair_id, planned_concept_1, planned_concept_2, planned_salience_1, planned_salience_2) %>%
pivot_longer(
cols = c(planned_concept_1, planned_concept_2),
names_to = "concept_position",
values_to = "target_id"
) %>%
mutate(
planned_salience_label = if_else(concept_position == "planned_concept_1", planned_salience_1, planned_salience_2)
) %>%
distinct(target_id, planned_salience_label) %>%
group_by(target_id) %>%
summarise(
n_high_labels = sum(planned_salience_label == "high"),
n_low_labels = sum(planned_salience_label == "low"),
planned_salience_label = case_when(
n_high_labels > 0 & n_low_labels == 0 ~ "high",
n_low_labels > 0 & n_high_labels == 0 ~ "low",
n_high_labels > 0 & n_low_labels > 0 ~ "mixed_or_inconsistent",
TRUE ~ NA_character_
),
.groups = "drop"
)
group_difference <- function(data, value_col, group_col, high_label = "high", low_label = "low") {
d <- data %>%
select(value = all_of(value_col), group = all_of(group_col)) %>%
filter(!is.na(value), is.finite(value), group %in% c(high_label, low_label)) %>%
mutate(group = as.character(group))
if (nrow(d) < 4 || n_distinct(d$group) < 2) {
return(tibble(
outcome = value_col,
n_high = sum(d$group == high_label),
n_low = sum(d$group == low_label),
mean_high = mean(d$value[d$group == high_label], na.rm = TRUE),
mean_low = mean(d$value[d$group == low_label], na.rm = TRUE),
difference_high_minus_low = NA_real_,
p.value = NA_real_
))
}
tt <- suppressWarnings(t.test(value ~ group, data = d))
tibble(
outcome = value_col,
n_high = sum(d$group == high_label),
n_low = sum(d$group == low_label),
mean_high = mean(d$value[d$group == high_label], na.rm = TRUE),
mean_low = mean(d$value[d$group == low_label], na.rm = TRUE),
difference_high_minus_low = mean_high - mean_low,
p.value = tt$p.value
)
}
h2c_human_condition_summary <- pair_behavior_with_conditions %>%
filter(hypothesis %in% c("H1_asymmetry_ipsative", "H2_salience", "H3_likert_order_asymmetry_H4_triangle_network")) %>%
filter(!is.na(annex_condition)) %>%
group_by(hypothesis, annex_condition, presentation_salience_pattern, planned_similarity) %>%
summarise(
n_items = n_distinct(pair_id),
mean_human_value = mean(human_value, na.rm = TRUE),
mean_abs_deviation_from_chance = mean(distance_from_chance, na.rm = TRUE),
mean_prop_choose_high_salience = mean(prop_choose_high_salience, na.rm = TRUE),
.groups = "drop"
)
h2c_likert_condition_dimensionality <- salience_likert_dimensionality %>%
inner_join(annex_concept_salience_labels, by = "target_id") %>%
filter(planned_salience_label %in% c("high", "low"))
h2c_concept_condition_summary <- h2c_likert_condition_dimensionality %>%
group_by(planned_salience_label) %>%
summarise(
n_concepts = n(),
mean_likert_salience = mean(mean_salience_likert, na.rm = TRUE),
mean_log10_frequency = mean(log10_n_occurrences, na.rm = TRUE),
mean_effective_rank = mean(rank_effective, na.rm = TRUE),
mean_horn_dimensionality = mean(horn_wishart, na.rm = TRUE),
mean_gavish_dimensionality = mean(gavish_donoho, na.rm = TRUE),
mean_permutation_dimensionality = mean(permutation_pa, na.rm = TRUE),
.groups = "drop"
)
h2c_concept_condition_tests <- map_dfr(
intersect(c("mean_salience_likert", "log10_n_occurrences", "rank_effective", "horn_wishart", "gavish_donoho", "permutation_pa", "profile_likelihood"), names(h2c_likert_condition_dimensionality)),
~ group_difference(h2c_likert_condition_dimensionality, .x, "planned_salience_label")
) %>%
mutate(
p_fdr = p.adjust(p.value, method = "BH"),
outcome_label = recode(outcome, !!!salience_predictor_labels, mean_salience_likert = "Mean Likert salience", log10_n_occurrences = "Log frequency", .default = outcome)
) %>%
arrange(p.value)
h2c_similarity_condition_human <- pair_behavior %>%
filter(hypothesis == "H3_likert_order_asymmetry_H4_triangle_network", response_type == "rating_1_9") %>%
group_by(pair_id) %>%
summarise(
human_value = mean(human_value, na.rm = TRUE),
human_sd = sd(human_value, na.rm = TRUE),
human_n = sum(human_n, na.rm = TRUE),
.groups = "drop"
) %>%
inner_join(annex_salience_conditions, by = "pair_id")
h2c_similarity_condition_summary <- h2c_similarity_condition_human %>%
group_by(planned_similarity, planned_salience_pair) %>%
summarise(
n_pairs = n_distinct(pair_id),
mean_human_similarity = mean(human_value, na.rm = TRUE),
sd_human_similarity = sd(human_value, na.rm = TRUE),
.groups = "drop"
)
h2c_similarity_condition_lm <- if (nrow(h2c_similarity_condition_human) >= 6) {
broom::tidy(lm(human_value ~ planned_similarity + planned_salience_pair, data = h2c_similarity_condition_human))
} else {
tibble(term = character(), estimate = numeric(), std.error = numeric(), statistic = numeric(), p.value = numeric())
}
h2c_pairwise_condition_metric_tests <- pairwise_metric_values %>%
inner_join(annex_salience_conditions %>% select(pair_id, planned_similarity, annex_condition, planned_salience_pair), by = "pair_id") %>%
filter(!is_asymmetry_metric(metric_name)) %>%
mutate(metric_value = similarity_orient_value(metric_name, value)) %>%
filter(!is.na(metric_value), is.finite(metric_value)) %>%
group_by(metric_id, metric_source, metric_family, metric_name, metric_short, variant, variant_label, basis_object, object, state_vector_source, state_type) %>%
group_modify(~ group_difference(.x, "metric_value", "planned_similarity")) %>%
ungroup() %>%
mutate(
p_fdr = p.adjust(p.value, method = "BH"),
abs_difference = abs(difference_high_minus_low)
) %>%
arrange(desc(abs_difference), p.value)
# H3a: symmetric pairwise similarity, order-averaged by pair.
h3_similarity_human <- pair_behavior %>%
filter(hypothesis == "H3_likert_order_asymmetry_H4_triangle_network", response_type == "rating_1_9") %>%
group_by(pair_id) %>%
summarise(
human_value = mean(human_value, na.rm = TRUE),
human_sd = sd(human_value, na.rm = TRUE),
human_n = sum(human_n, na.rm = TRUE),
human_first = first(human_first),
human_second = first(human_second),
hypothesis = "H3_rating_similarity_order_averaged",
response_type = "rating_1_9_order_averaged",
.groups = "drop"
)
h3_similarity_metrics <- pairwise_metric_values %>%
filter(!is_asymmetry_metric(metric_name)) %>%
distinct(metric_id, pair_id, .keep_all = TRUE) %>%
inner_join(h3_similarity_human, by = "pair_id") %>%
mutate(
metric_value = similarity_orient_value(metric_name, value),
metric_direction = if_else(is_distance_metric(metric_name), "distance_reversed_to_similarity", "similarity_or_overlap")
)
h3_rating_rank <- cor_by_group(h3_similarity_metrics, rank_group_cols) %>%
mutate(analysis_block = "H3 Likert similarity")
# H3b: human order asymmetry, compared only with directional asymmetry metrics.
h3_order_human <- pair_behavior %>%
filter(hypothesis == "H3_likert_order_asymmetry_H4_triangle_network", response_type == "rating_1_9") %>%
mutate(
order_label = case_when(
str_detect(coalesce(direction_version, ""), "direct|a_direct") ~ "direct",
str_detect(coalesce(direction_version, ""), "reverse|b_reverse") ~ "reverse",
TRUE ~ as.character(direction_version)
)
) %>%
filter(order_label %in% c("direct", "reverse")) %>%
group_by(pair_id) %>%
summarise(
direct_mean = mean(human_value[order_label == "direct"], na.rm = TRUE),
reverse_mean = mean(human_value[order_label == "reverse"], na.rm = TRUE),
human_first = first(human_first),
human_second = first(human_second),
n_orders = n_distinct(order_label),
.groups = "drop"
) %>%
filter(n_orders == 2, is.finite(direct_mean), is.finite(reverse_mean)) %>%
mutate(
human_order_delta = direct_mean - reverse_mean,
human_value = human_order_delta,
hypothesis = "H3_order_asymmetry"
)
h3_order_metrics <- join_metrics_to_behavior(h3_order_human, pairwise_metric_values) %>%
filter(is_asymmetry_metric(metric_name)) %>%
distinct(metric_id, pair_id, .keep_all = TRUE)
h3_order_rank <- cor_by_group(h3_order_metrics, rank_group_cols, y = "human_order_delta") %>%
mutate(analysis_block = "H3 order asymmetry")
h3_order_test <- broom::tidy(t.test(h3_order_human$human_order_delta, mu = 0)) %>%
transmute(
n = nrow(h3_order_human),
mean_delta = mean(h3_order_human$human_order_delta, na.rm = TRUE),
median_abs_delta = median(abs(h3_order_human$human_order_delta), na.rm = TRUE),
statistic,
df = parameter,
p.value
)
# H4: triplet residuals from order-averaged human ratings.
h3_pair_similarity <- h3_similarity_human %>%
left_join(
pair_behavior %>%
filter(hypothesis == "H3_likert_order_asymmetry_H4_triangle_network", response_type == "rating_1_9") %>%
group_by(pair_id) %>%
summarise(
scale_min = suppressWarnings(min(human_scale_min, na.rm = TRUE)),
scale_max = suppressWarnings(max(human_scale_max, na.rm = TRUE)),
.groups = "drop"
),
by = "pair_id"
) %>%
mutate(
scale_min = if_else(is.finite(scale_min), scale_min, 1),
scale_max = if_else(is.finite(scale_max), scale_max, 9),
human_similarity = (human_value - scale_min) / (scale_max - scale_min),
human_distance = 1 - human_similarity
)
human_triplets <- triplet_design %>%
left_join(h3_pair_similarity %>% select(pair_id, sim_endpoint = human_similarity, d_endpoint = human_distance),
by = c("endpoint_pair_id" = "pair_id")) %>%
left_join(h3_pair_similarity %>% select(pair_id, sim_leg_1 = human_similarity, d_leg_1 = human_distance),
by = c("leg_pair_1_id" = "pair_id")) %>%
left_join(h3_pair_similarity %>% select(pair_id, sim_leg_2 = human_similarity, d_leg_2 = human_distance),
by = c("leg_pair_2_id" = "pair_id")) %>%
mutate(
human_triangle_residual = d_endpoint - (d_leg_1 + d_leg_2),
human_triangle_violation = human_triangle_residual > 0,
human_mti_residual = sim_endpoint - (sim_leg_1 * sim_leg_2),
human_mti_lower_than_product = human_mti_residual < 0,
human_mti_higher_than_product = human_mti_residual > 0
)
human_triplet_summary <- human_triplets %>%
summarise(
n_contrasts = n(),
n_unordered_triplets = n_distinct(triplet_unordered_id),
additive_violations = sum(human_triangle_violation, na.rm = TRUE),
mti_lower_than_product = sum(human_mti_lower_than_product, na.rm = TRUE),
mean_triangle_residual = mean(human_triangle_residual, na.rm = TRUE),
mean_mti_residual = mean(human_mti_residual, na.rm = TRUE)
)
triplet_distance_human <- triplet_distance_raw %>%
inner_join(human_triplets %>% select(triplet_id, human_triangle_residual, human_triangle_violation), by = "triplet_id")
h4_distance_rank <- cor_by_group(
triplet_distance_human,
group_cols = c("variant", "basis_object", "distance_metric"),
x = "triangle_residual_ac_minus_ab_bc",
y = "human_triangle_residual"
) %>%
mutate(
analysis_block = "H4 additive triangle",
metric_source = "triplet_distance_violations",
metric_family = "Triplet distance geometry",
metric_name = distance_metric,
metric_short = metric_short_label(distance_metric),
variant_label = variant_short(variant)
)
triplet_mti_human <- triplet_mti_raw %>%
inner_join(
human_triplets %>% select(triplet_id, human_mti_residual, human_mti_lower_than_product, human_mti_higher_than_product),
by = "triplet_id"
)
h4_mti_rank <- cor_by_group(
triplet_mti_human,
group_cols = c("variant", "basis_object", "similarity_metric"),
x = "mti_residual_sac_minus_sab_sbc",
y = "human_mti_residual"
) %>%
mutate(
analysis_block = "H4 multiplicative triangle",
metric_source = "triplet_mti_violations",
metric_family = "Triplet similarity geometry",
metric_name = similarity_metric,
metric_short = metric_short_label(similarity_metric),
variant_label = variant_short(variant)
)
mti_classification <- triplet_mti_human %>%
group_by(variant, basis_object, similarity_metric) %>%
summarise(
n = n(),
true_positive = sum(mti_lower_than_product & human_mti_lower_than_product, na.rm = TRUE),
false_positive = sum(mti_lower_than_product & !human_mti_lower_than_product, na.rm = TRUE),
false_negative = sum(!mti_lower_than_product & human_mti_lower_than_product, na.rm = TRUE),
true_negative = sum(!mti_lower_than_product & !human_mti_lower_than_product, na.rm = TRUE),
accuracy = mean(mti_lower_than_product == human_mti_lower_than_product, na.rm = TRUE),
precision = true_positive / pmax(true_positive + false_positive, 1),
recall = true_positive / pmax(true_positive + false_negative, 1),
f1 = if_else(precision + recall > 0, 2 * precision * recall / (precision + recall), 0),
.groups = "drop"
) %>%
arrange(desc(f1), desc(accuracy))
# H6 static pairwise metrics.
h6_static <- pair_behavior %>% filter(str_detect(hypothesis, "H6_diagnosticity"))
h6_static_metrics <- join_metrics_to_behavior(h6_static, pairwise_metric_values)
h6_static_rank <- cor_by_group(
h6_static_metrics,
group_cols = c("hypothesis", rank_group_cols),
x = "metric_value",
y = "human_value"
) %>%
group_by(hypothesis) %>%
mutate(p_fdr_within_hypothesis = p.adjust(p.value, method = "BH")) %>%
ungroup() %>%
mutate(analysis_block = paste("H6 static", hypothesis))
# H6 contextualized metrics.
diag_human_rows <- pair_behavior %>%
filter(str_detect(hypothesis, "H6_diagnosticity")) %>%
transmute(
hypothesis,
response_type,
task_family,
initial = human_first,
target = human_second,
context,
human_value,
human_n,
item_id,
pair_id
) %>%
filter(!is.na(context), !is.na(initial), !is.na(target))
context_metric_cols <- c(
"contextualized_vector_cosine", "contextualized_vector_dot",
"density_hs_inner", "density_hs_cosine", "density_fidelity",
"density_bures_distance", "density_trace_distance", "density_frobenius_distance"
)
h6_context_long <- diag_context_raw %>%
pivot_longer(cols = any_of(context_metric_cols), names_to = "metric_name", values_to = "value") %>%
filter(!is.na(value), status == "ok") %>%
mutate(
metric_value = similarity_orient_value(metric_name, value),
metric_source = "diagnostic_contextualized_similarity",
metric_family = case_when(
metric_family == "contextualized_vector" ~ "Contextualized vector",
metric_family == "luders_contextualized_density" ~ "Lüders contextualized density",
TRUE ~ as.character(metric_family)
),
metric_short = metric_short_label(metric_name),
metric_id = paste(metric_family, metric_name, variant, basis_object, density_object, state_vector_source, sep = " | "),
variant_label = variant_short(variant)
) %>%
inner_join(diag_human_rows, by = c("initial", "context", "target"))
h6_context_rank <- cor_by_group(
h6_context_long,
group_cols = c(
"hypothesis", "metric_id", "metric_source", "metric_family", "metric_name", "metric_short", "variant",
"variant_label", "cluster_method", "global_k_method", "local_k_method", "basis_object", "density_object", "state_vector_source"
),
x = "metric_value",
y = "human_value"
) %>%
group_by(hypothesis) %>%
mutate(p_fdr_within_hypothesis = p.adjust(p.value, method = "BH")) %>%
ungroup() %>%
mutate(analysis_block = paste("H6 contextualized", hypothesis))
# H6 perplexity-inhibited metrics.
perplexity_metric_cols <- c(
"baseline_projector_containment_initial_to_target",
"perplexity_residual_density_to_target_containment",
"perplexity_residual_projector_to_target_containment",
"delta_residual_density_containment_minus_baseline",
"delta_residual_projector_containment_minus_baseline",
"perplexity_density_hs_inner",
"perplexity_density_hs_cosine",
"perplexity_density_frobenius_distance",
"perplexity_density_fidelity",
"perplexity_density_bures_distance",
"perplexity_density_trace_distance"
)
h6_perplexity_long <- diag_perplexity_raw %>%
mutate(context = str_remove(condition, "^ctx-")) %>%
pivot_longer(cols = any_of(perplexity_metric_cols), names_to = "metric_name", values_to = "value") %>%
filter(!is.na(value)) %>%
mutate(
metric_value = similarity_orient_value(metric_name, value),
metric_source = "diagnostic_perplexity_metrics",
metric_family = "Perplexity-inhibited diagnostic",
metric_short = metric_short_label(metric_name),
metric_id = paste(metric_family, metric_name, variant, basis_object, density_object, sep = " | "),
variant_label = variant_short(variant)
) %>%
inner_join(diag_human_rows, by = c("initial", "context", "target"))
h6_perplexity_rank <- cor_by_group(
h6_perplexity_long,
group_cols = c(
"hypothesis", "metric_id", "metric_source", "metric_family", "metric_name", "metric_short", "variant",
"variant_label", "cluster_method", "global_k_method", "local_k_method", "basis_object", "density_object"
),
x = "metric_value",
y = "human_value"
) %>%
group_by(hypothesis) %>%
mutate(p_fdr_within_hypothesis = p.adjust(p.value, method = "BH")) %>%
ungroup() %>%
mutate(analysis_block = paste("H6 perplexity", hypothesis))
# Diagnostic choice predictions: compare predicted pairwise winner with the human winner.
# The prediction table contains pairwise target choices; the human option values come from
# H6 choice-full rows, where human_value is the proportion selecting each option.
human_option_values <- pair_behavior %>%
filter(hypothesis == "H6_diagnosticity_choice_full") %>%
transmute(
initial = human_first,
context,
target = human_second,
human_value
) %>%
group_by(initial, context, target) %>%
summarise(human_value = mean(human_value, na.rm = TRUE), .groups = "drop")
h6_choice_prediction_human <- diag_choice_raw %>%
mutate(
context = str_remove(condition, "^ctx-"),
diagnostic_choice_set = case_when(
str_detect(trial_id, "choice_candidates_only") ~ "choice_candidates_only",
str_detect(trial_id, "choice_full_with_context") ~ "choice_full_with_context",
TRUE ~ "unknown"
),
hypothesis = "H6_diagnosticity_choice_full"
) %>%
left_join(
human_option_values %>% rename(target_a = target, human_value_a = human_value),
by = c("initial", "context", "target_a")
) %>%
left_join(
human_option_values %>% rename(target_b = target, human_value_b = human_value),
by = c("initial", "context", "target_b")
) %>%
filter(!is.na(human_value_a), !is.na(human_value_b)) %>%
mutate(
human_preferred_choice = if_else(human_value_a >= human_value_b, target_a, target_b),
correct = predicted_choice == human_preferred_choice,
metric_source = source_table,
metric_short = metric_short_label(metric),
metric_family_label = case_when(
metric_family == "contextualized_vector" ~ "Contextualized vector",
metric_family == "luders_contextualized_density" ~ "Lüders contextualized density",
metric_family == "perplexity" ~ "Perplexity-inhibited diagnostic",
TRUE ~ as.character(metric_family)
)
)
h6_choice_accuracy <- h6_choice_prediction_human %>%
group_by(hypothesis, diagnostic_choice_set, metric_family_label, metric, variant) %>%
summarise(
n = n(),
accuracy = mean(correct, na.rm = TRUE),
mean_abs_preference = mean(abs(preference_score_a_minus_b), na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(accuracy), desc(mean_abs_preference))
# Representation alignment and variant sensitivity.
alignment_summary <- representation_alignment %>%
group_by(object_a, object_b) %>%
summarise(
n = n(),
mean_hs_cosine = mean(cosine_hilbert_schmidt, na.rm = TRUE),
sd_hs_cosine = sd(cosine_hilbert_schmidt, na.rm = TRUE),
mean_trace_overlap = mean(symmetric_trace_normalized_overlap, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(mean_hs_cosine))
parse_variant <- function(x, field) {
m <- str_match(x, "clust-(.*?)__g-(.*?)__l-(.*)$")
case_when(
field == "cluster" ~ m[, 2],
field == "global" ~ m[, 3],
field == "local" ~ m[, 4],
TRUE ~ NA_character_
)
}
variant_sensitivity_labeled <- variant_sensitivity %>%
mutate(
cluster_a = parse_variant(variant_a, "cluster"),
cluster_b = parse_variant(variant_b, "cluster"),
global_a = parse_variant(variant_a, "global"),
global_b = parse_variant(variant_b, "global"),
local_a = parse_variant(variant_a, "local"),
local_b = parse_variant(variant_b, "local"),
changed_cluster = cluster_a != cluster_b,
changed_global = global_a != global_b,
changed_local = local_a != local_b,
comparison_type = case_when(
changed_cluster & !changed_global & !changed_local ~ "Clustering only",
!changed_cluster & (changed_global | changed_local) ~ "Dimensionality only",
changed_cluster & (changed_global | changed_local) ~ "Both clustering and dimensionality",
TRUE ~ "No detected change"
)
)
variant_sensitivity_summary <- variant_sensitivity_labeled %>%
group_by(object, comparison_type) %>%
summarise(
n = n(),
mean_hs_cosine = mean(cosine_hilbert_schmidt, na.rm = TRUE),
sd_hs_cosine = sd(cosine_hilbert_schmidt, na.rm = TRUE),
mean_frobenius_distance = mean(frobenius_distance, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(object, comparison_type)
# Combined summaries.
rank_for_summary <- bind_rows(
h1_rank_directional,
h2_rank_directional,
h2_likert_rank,
h3_rating_rank,
h3_order_rank,
h4_distance_rank %>% rename(metric_id = distance_metric),
h4_mti_rank %>% rename(metric_id = similarity_metric),
h6_static_rank,
h6_context_rank,
h6_perplexity_rank
)
best_predictor_by_block <- bind_rows(
h1_rank_directional %>% slice_head(n = 1),
h2_rank_directional %>% slice_head(n = 1),
h2_likert_rank %>% slice_head(n = 1),
h3_rating_rank %>% slice_head(n = 1),
h3_order_rank %>% slice_head(n = 1),
h4_distance_rank %>% slice_head(n = 1),
h4_mti_rank %>% slice_head(n = 1),
h6_static_rank %>% group_by(hypothesis) %>% slice_head(n = 1) %>% ungroup(),
h6_context_rank %>% group_by(hypothesis) %>% slice_head(n = 1) %>% ungroup(),
h6_perplexity_rank %>% group_by(hypothesis) %>% slice_head(n = 1) %>% ungroup()
) %>%
mutate(
metric_family = coalesce(metric_family, metric_family_from_source(metric_source)),
display_block = coalesce(analysis_block, hypothesis),
display_metric = metric_short_label(metric_name)
)
cosine_comparisons <- bind_rows(
compare_quantum_vs_cosine(h1_rank_all) %>% mutate(analysis_block = "H1 ipsative asymmetry"),
compare_quantum_vs_cosine(h2_rank_all) %>% mutate(analysis_block = "H2 salience"),
compare_quantum_vs_cosine(h3_rating_rank) %>% mutate(analysis_block = "H3 Likert similarity"),
compare_quantum_vs_cosine(h6_static_rank %>% filter(hypothesis == "H6_diagnosticity_choice_full")) %>% mutate(analysis_block = "H6 choice full"),
compare_quantum_vs_cosine(h6_static_rank %>% filter(hypothesis == "H6_diagnosticity_rating")) %>% mutate(analysis_block = "H6 rating")
)
# Export full tables.
write_table(h1_rank_all, "h1_all_metric_rankings.csv")
write_table(h1_rank_directional, "h1_directional_metric_rankings.csv")
write_table(h2_rank_all, "h2_all_metric_rankings.csv")
write_table(h2_rank_directional, "h2_directional_metric_rankings.csv")
write_table(h2_likert_rank, "h2b_likert_salience_dimensionality_correlations.csv")
write_table(h2_likert_frequency_adjusted, "h2b_likert_salience_frequency_adjusted_models.csv")
write_table(h3_rating_rank, "h3_rating_similarity_metric_rankings.csv")
write_table(h3_order_rank, "h3_order_asymmetry_metric_rankings.csv")
write_table(h3_order_test, "h3_human_order_asymmetry_test.csv")
write_table(human_triplets, "h4_human_triplet_residuals.csv")
write_table(human_triplet_summary, "h4_human_triplet_summary.csv")
write_table(h4_distance_rank, "h4_additive_triangle_metric_rankings.csv")
write_table(h4_mti_rank, "h4_mti_metric_rankings.csv")
write_table(mti_classification, "h4_mti_classification_performance.csv")
write_table(h6_static_rank, "h6_static_pairwise_metric_rankings.csv")
write_table(h6_context_rank, "h6_contextualized_metric_rankings.csv")
write_table(h6_perplexity_rank, "h6_perplexity_metric_rankings.csv")
write_table(h6_choice_accuracy, "h6_diagnostic_choice_prediction_accuracy.csv")
write_table(alignment_summary, "representation_alignment_summary.csv")
write_table(variant_sensitivity_summary, "variant_sensitivity_summary.csv")
write_table(best_predictor_by_block, "best_predictor_by_analysis_block.csv")
write_table(cosine_comparisons, "cosine_vs_quantum_comparisons.csv")