rm(list = ls())
This script does two things:
Part C runs the full Bucur network
analysis across all drawing types (all, silhouette, skeleton,
mixed) — replicating the structure of
category_fits_by_bucur.Rmd but with corrected data paths
and namespaced variables.
Parts D-F then run the shape-feature
analysis on silhouettes and compare the two models head-to-head
on a matched sample (same image_ids in
both pipelines).
All variable names are prefixed (net_ /
shp_ / cmp_) so nothing is overwritten when
both sets of results coexist.
Capping strategy: Shape data is capped first
(matching the original standalone silhouette script behaviour with
set.seed(42)). The network data then inherits the same
image_ids — no independent capping — so both pipelines
always operate on identical observations.
library(tidyverse)
library(readxl)
library(mclust)
library(vcd)
map <- purrr::map
Update these to match your local file structure.
# -- Network feature CSVs (from extract_network_features.py) --
# NOTE: Original bucur script had BOTH paths pointing at the sirius
# file — corrected here to use the separate D1 and Sirius CSVs.
D1_NETWORK <- "Data/Network_signatures/test_run/network_features_d1_1.csv"
SIR_NETWORK <- "Data/Network_signatures/test_run/network_features_sirius.csv"
# -- Shape feature CSVs (from extract_shape_features_*.py) --
D1_SHAPES <- "Data/Feature_extraction_March26/Drawingpage1/shape_features_s2d1_v2.csv"
SIR_SHAPES <- "Data/Feature_extraction_March26/Sirius/shape_features_sirius.csv"
# -- Label spreadsheets --
D1_EXCEL <- "Data/Drawingpage1_s2/Use/pair_fixed_d1_segtruefixed_relabelled_v7_lowercase_2.xlsx"
SIR_EXCEL <- "Data/Sirius_s1/Use/sirius_pair_fixed_v2_categorised_2_1.xlsx"
# -- Drawing-type manifests --
D1_MANIFEST <- "Data/Drawingpage1_s2/Use/full_review_manifest_5_D1_only.xlsx"
SIR_MANIFEST <- "Data/Sirius_s1/Use/full_review_manifest_5_sirius_only.xlsx"
The network CSVs already contain image_id,
response_id, scene, drawing_type,
and all label columns (drawn_l1-l4) baked in
from extract_network_features.py. No join needed.
cat("=== Loading network features ===\n")
## === Loading network features ===
net_d1_raw <- read_csv(D1_NETWORK, show_col_types = FALSE)
net_sir_raw <- read_csv(SIR_NETWORK, show_col_types = FALSE)
cat("D1:", nrow(net_d1_raw), "| Sirius:", nrow(net_sir_raw), "\n")
## D1: 235 | Sirius: 196
net_combined <- bind_rows(net_d1_raw, net_sir_raw) %>%
mutate(
drawing_type_detailed = drawing_type,
drawing_type = case_when(
drawing_type %in% c("mixed_out", "mixed_in", "mixed_both",
"if the last little edge is fixed") ~ "mixed",
drawing_type == "mixed" ~ "mixed",
is.na(drawing_type) ~ "unknown",
TRUE ~ drawing_type
),
# Harmonise scene codes to match shape pipeline
scene = case_when(
scene == "s2d1" ~ "d1",
TRUE ~ scene
)
)
cat("Network combined:", nrow(net_combined), "\n")
## Network combined: 431
print(table(net_combined$drawing_type, net_combined$scene))
##
## d1 sirius
## mixed 84 90
## silhouette_only 104 64
## skeleton 46 35
## unknown 1 7
Shape CSVs only contain image_id + feature columns.
Labels and drawing type come from the Excel/manifest files.
cat("\n=== Loading shape features + labels + manifests ===\n")
##
## === Loading shape features + labels + manifests ===
# -- Labels --
shp_d1_labels <- read_excel(D1_EXCEL, guess_max = Inf) %>%
transmute(
response_id = response_id,
seg = seg_true_v4,
image_id = paste0("response_", response_id, "_seg_", seg_true_v4),
drawn_l1 = drawn_l1_simple,
drawn_l2 = drawn_l2_interraterDecision,
drawn_l3 = drawn_l3_cat,
drawn_l4 = drawn_l4_cat,
scene = "d1"
) %>% distinct(image_id, .keep_all = TRUE)
shp_sir_labels <- read_excel(SIR_EXCEL, guess_max = Inf) %>%
transmute(
response_id = response_id,
seg = seg_true_v2,
image_id = paste0("response_", response_id, "_seg_", seg_true_v2),
drawn_l1 = drawn_l1_label,
drawn_l2 = drawn_l2_cat,
drawn_l3 = drawn_l3_cat,
drawn_l4 = drawn_l4_cat,
scene = "sirius"
) %>% distinct(image_id, .keep_all = TRUE)
shp_all_labels <- bind_rows(shp_d1_labels, shp_sir_labels)
# -- Manifests (drawing type) --
shp_d1_manifest <- read_excel(D1_MANIFEST) %>%
transmute(
image_id = str_replace(image_id, "\\.png$", ""),
drawing_type = case_when(
correct_type %in% c("mixed_out", "mixed_in", "mixed_both") ~ "mixed",
TRUE ~ correct_type
)
)
shp_sir_manifest <- read_excel(SIR_MANIFEST) %>%
transmute(
image_id = str_replace(image_id, "\\.png$", ""),
drawing_type = case_when(
correct_type %in% c("mixed_out", "mixed_in", "mixed_both") ~ "mixed",
TRUE ~ correct_type
)
)
shp_all_manifest <- bind_rows(shp_d1_manifest, shp_sir_manifest)
# -- Shape feature CSVs --
shp_d1_feats <- read_csv(D1_SHAPES, show_col_types = FALSE)
shp_sir_feats <- read_csv(SIR_SHAPES, show_col_types = FALSE)
shp_all_feats <- bind_rows(shp_d1_feats, shp_sir_feats)
# -- Merge --
shp_combined <- shp_all_labels %>%
inner_join(shp_all_feats, by = "image_id") %>%
inner_join(shp_all_manifest, by = "image_id")
cat("Shape combined:", nrow(shp_combined), "\n")
## Shape combined: 423
print(table(shp_combined$drawing_type, shp_combined$scene))
##
## d1 sirius
## if the last little edge is fixed 0 1
## mixed 84 89
## silhouette_only 104 64
## skeleton 46 35
Strategy: The shape data is capped first with
set.seed(42), exactly matching the standalone silhouette
script’s behaviour. The network data then inherits those same
image_ids — no independent capping — so the RNG-drift
problem is eliminated and both pipelines always operate on identical
observations.
# Cap shape data first (this is the anchor — matches standalone script)
set.seed(42)
shp_combined_capped <- shp_combined %>%
group_by(response_id, scene, drawn_l1) %>%
sample_frac(1) %>% slice_head(n = 5) %>% ungroup()
# Network: keep exactly the same image_ids (no independent capping)
net_combined_capped <- net_combined %>%
filter(image_id %in% shp_combined_capped$image_id)
cat("Shape capped: ", nrow(shp_combined), "->", nrow(shp_combined_capped), "\n")
## Shape capped: 423 -> 402
cat("Network (matched to shape caps):", nrow(net_combined_capped), "\n")
## Network (matched to shape caps): 402
# Check for any image_ids in shape but missing from network
shp_not_in_net <- setdiff(shp_combined_capped$image_id, net_combined_capped$image_id)
net_not_in_shp <- setdiff(net_combined_capped$image_id, shp_combined_capped$image_id)
cat("\nShape IDs not in network:", length(shp_not_in_net), "\n")
##
## Shape IDs not in network: 0
cat("Network IDs not in shape:", length(net_not_in_shp), "\n")
## Network IDs not in shape: 0
if (length(shp_not_in_net) > 0) {
cat("\nThese shape observations have no network features (will be in shape-only runs):\n")
shp_combined_capped %>%
filter(image_id %in% shp_not_in_net) %>%
select(image_id, drawing_type, drawn_l1) %>%
print(n = 20)
}
cat("\nDrawing type breakdown (shape-capped):\n")
##
## Drawing type breakdown (shape-capped):
print(table(shp_combined_capped$drawing_type, shp_combined_capped$scene))
##
## d1 sirius
## if the last little edge is fixed 0 1
## mixed 82 89
## silhouette_only 89 64
## skeleton 42 35
cat("\nDrawing type breakdown (network-matched):\n")
##
## Drawing type breakdown (network-matched):
print(table(net_combined_capped$drawing_type, net_combined_capped$scene))
##
## d1 sirius
## mixed 82 90
## silhouette_only 89 64
## skeleton 42 35
# Silhouettes
net_sil_capped <- net_combined_capped %>% filter(drawing_type == "silhouette_only")
shp_sil_capped <- shp_combined_capped %>% filter(drawing_type == "silhouette_only")
# Verify silhouette match
sil_shared <- intersect(net_sil_capped$image_id, shp_sil_capped$image_id)
sil_net_only <- setdiff(net_sil_capped$image_id, shp_sil_capped$image_id)
sil_shp_only <- setdiff(shp_sil_capped$image_id, net_sil_capped$image_id)
cat("Network silhouettes:", nrow(net_sil_capped), "\n")
## Network silhouettes: 153
cat("Shape silhouettes:", nrow(shp_sil_capped), "\n")
## Shape silhouettes: 153
cat("Shared silhouette IDs:", length(sil_shared), "\n")
## Shared silhouette IDs: 153
cat("Net-only:", length(sil_net_only), "| Shp-only:", length(sil_shp_only), "\n")
## Net-only: 0 | Shp-only: 0
# Matched silhouettes (for head-to-head comparison)
net_sil_matched <- net_sil_capped %>% filter(image_id %in% sil_shared)
shp_sil_matched <- shp_sil_capped %>% filter(image_id %in% sil_shared)
cat("\nMatched silhouettes -- Network:", nrow(net_sil_matched),
"| Shape:", nrow(shp_sil_matched), "\n")
##
## Matched silhouettes -- Network: 153 | Shape: 153
# Verify labels agree
label_check <- inner_join(
net_sil_matched %>% select(image_id, net_l1 = drawn_l1),
shp_sil_matched %>% select(image_id, shp_l1 = drawn_l1),
by = "image_id"
)
n_mismatch <- sum(label_check$net_l1 != label_check$shp_l1, na.rm = TRUE)
cat("Label mismatches (drawn_l1):", n_mismatch, "\n")
## Label mismatches (drawn_l1): 0
if (n_mismatch > 0) {
cat("\nWARNING: Some labels differ between network and shape data.\n")
label_check %>% filter(net_l1 != shp_l1) %>% print(n = 20)
}
# Other subsets for network analysis
net_skel_capped <- net_combined_capped %>% filter(drawing_type == "skeleton")
net_mixed_capped <- net_combined_capped %>% filter(drawing_type == "mixed")
cat("\nNetwork skeletons:", nrow(net_skel_capped), "\n")
##
## Network skeletons: 77
cat("Network mixed: ", nrow(net_mixed_capped), "\n")
## Network mixed: 172
All helpers are prefixed with cmp_ to avoid collisions
with any functions from the original scripts.
cmp_permutation_test <- function(cluster_assignments, labels, n_perm = 5000) {
observed_tab <- table(cluster_assignments, labels)
observed_cv <- vcd::assocstats(observed_tab)$cramer
perm_cvs <- replicate(n_perm, {
shuffled <- sample(labels)
perm_tab <- table(cluster_assignments, shuffled)
tryCatch(vcd::assocstats(perm_tab)$cramer, error = function(e) NA_real_)
})
perm_cvs <- perm_cvs[!is.na(perm_cvs)]
list(
observed_cv = observed_cv,
perm_mean = mean(perm_cvs),
perm_sd = sd(perm_cvs),
p_value = if (length(perm_cvs) > 0) mean(perm_cvs >= observed_cv) else NA_real_,
n_valid_perms = length(perm_cvs)
)
}
cmp_label_distribution <- function(data, cluster_col, label_col = "drawn_l1") {
data %>%
count(!!sym(label_col), !!sym(cluster_col)) %>%
group_by(!!sym(label_col)) %>%
mutate(
total_drawings_of_label = sum(n),
pct_of_label = round(n / total_drawings_of_label * 100, 1)
) %>%
select(!!sym(label_col), !!sym(cluster_col), pct_of_label, total_drawings_of_label) %>%
pivot_wider(
names_from = !!sym(cluster_col),
values_from = pct_of_label,
names_prefix = "Cluster_",
values_fill = 0
) %>%
arrange(desc(total_drawings_of_label))
}
cmp_get_purity <- function(data, cluster_col = "cluster", label_col = "drawn_l1", min_n = 2) {
data %>%
count(!!sym(label_col), !!sym(cluster_col)) %>%
group_by(!!sym(label_col)) %>%
mutate(total = sum(n), modal_n = max(n),
purity = modal_n / total) %>%
filter(n == modal_n) %>%
slice(1) %>%
ungroup() %>%
filter(total >= min_n) %>%
select(!!sym(label_col), total, modal_cluster = !!sym(cluster_col), purity)
}
net_run_pipeline <- function(data, label) {
cat("\n###########################################################\n")
cat(" ", toupper(label), " -- NETWORK FEATURES\n")
cat(" n =", nrow(data), "\n")
cat("###########################################################\n\n")
if (nrow(data) < 10) { cat("Too few observations.\n"); return(NULL) }
net_feat_names <- c(
"s01_n_links", "s02_max_degree", "s03_avg_degree",
"s04_clustering_coeff", "s05_max_core_number",
"s06_n_cycles", "s07_max_cycle_size",
"s09_diameter", "s10_avg_shortest_path",
"s11_edge_connectivity",
"s12_spatial_diameter", "s13_avg_link_length",
"s14_min_angle", "s15_avg_angle",
"s17_avg_magnitude",
"n_nodes", "density"
)
net_feat_mat <- data %>%
select(any_of(net_feat_names)) %>%
mutate(across(everything(), ~ifelse(is.na(.), median(., na.rm = TRUE), .))) %>%
select(where(~var(., na.rm = TRUE) > 1e-10))
cat("Feature matrix:", nrow(net_feat_mat), "x", ncol(net_feat_mat), "\n")
cat("Features used:", paste(names(net_feat_mat), collapse = ", "), "\n")
net_scaled <- scale(net_feat_mat)
net_pca <- prcomp(net_scaled)
net_varexp <- summary(net_pca)$importance[3, ]
net_ncomp <- which(net_varexp >= 0.80)[1]
cat("PCA components for 80% var:", net_ncomp, "\n")
print(round(summary(net_pca)$importance[, 1:min(8, ncol(net_scaled))], 3))
net_scores <- net_pca$x[, 1:net_ncomp]
net_gmm <- Mclust(net_scores)
cat("\n=== GMM Results ===\n")
cat("Model:", net_gmm$modelName, " | Clusters:", net_gmm$G,
" | BIC:", round(net_gmm$bic, 2), "\n")
cat("Mean uncertainty:", round(mean(net_gmm$uncertainty), 3), "\n")
data$cluster <- net_gmm$classification
cat("\nCluster sizes:\n")
print(table(data$cluster))
# Feature means per cluster
cat("\n=== Feature means per cluster ===\n")
data %>%
group_by(cluster) %>%
summarise(across(any_of(names(net_feat_mat)), ~round(mean(., na.rm = TRUE), 3)),
.groups = "drop") %>%
pivot_longer(-cluster) %>%
pivot_wider(names_from = cluster, values_from = value) %>%
print(n = 20)
# Permutation tests
for (level in c("drawn_l1", "drawn_l2", "drawn_l3", "drawn_l4")) {
labels_vec <- data[[level]]
if (all(is.na(labels_vec))) next
cat("\n--- ", toupper(level), " ---\n")
tryCatch({
tab <- table(data$cluster, labels_vec)
print(vcd::assocstats(tab))
perm <- cmp_permutation_test(data$cluster, labels_vec)
cat("Observed V:", round(perm$observed_cv, 3),
" | Perm mean:", round(perm$perm_mean, 3), "+-", round(perm$perm_sd, 3),
" | p:", perm$p_value, "\n")
}, error = function(e) {
cat(" Skipped -- too sparse for this level (", conditionMessage(e), ")\n")
})
}
# Cluster composition (L1)
cat("\n=== Cluster composition (L1) ===\n")
data %>%
count(cluster, drawn_l1) %>%
group_by(cluster) %>%
mutate(cluster_n = sum(n), pct = round(n / cluster_n * 100, 1)) %>%
arrange(cluster, desc(pct)) %>%
print(n = 200)
list(data = data, gmm = net_gmm, pca = net_pca,
feature_names = names(net_feat_mat))
}
set.seed(42)
net_all_results_capped <- net_run_pipeline(net_combined_capped, "All drawings (capped)")
##
## ###########################################################
## ALL DRAWINGS (CAPPED) -- NETWORK FEATURES
## n = 402
## ###########################################################
##
## Feature matrix: 402 x 17
## Features used: s01_n_links, s02_max_degree, s03_avg_degree, s04_clustering_coeff, s05_max_core_number, s06_n_cycles, s07_max_cycle_size, s09_diameter, s10_avg_shortest_path, s11_edge_connectivity, s12_spatial_diameter, s13_avg_link_length, s14_min_angle, s15_avg_angle, s17_avg_magnitude, n_nodes, density
## PCA components for 80% var: 5
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
## Standard deviation 2.332 1.880 1.441 1.221 1.189 0.949 0.709 0.630
## Proportion of Variance 0.320 0.208 0.122 0.088 0.083 0.053 0.030 0.023
## Cumulative Proportion 0.320 0.528 0.650 0.738 0.821 0.874 0.903 0.927
##
## === GMM Results ===
## Model: VVV | Clusters: 7 | BIC: -5030.27
## Mean uncertainty: 0.019
##
## Cluster sizes:
##
## 1 2 3 4 5 6 7
## 100 110 21 61 32 42 36
##
## === Feature means per cluster ===
## # A tibble: 17 × 8
## name `1` `2` `3` `4` `5` `6` `7`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 s01_n_links 9.22 16.1 24.8 7.69 32.7 7.38 3.92
## 2 s02_max_degree 2 3.96 2.43 2.43 5.56 3 2.33
## 3 s03_avg_degree 2 2.24 2.02 1.68 2.15 2.00 2.08
## 4 s04_clustering_coeff 0 0.167 0.02 0 0.163 0.221 0.833
## 5 s05_max_core_number 2 1.99 2 1 1.78 2 2
## 6 s06_n_cycles 1 2.73 1.57 0 8.41 1.10 1.25
## 7 s07_max_cycle_size 9.22 7.14 21.6 0 8.16 3.86 3.31
## 8 s09_diameter 4.36 6.31 10.9 6.87 9 4.45 1.39
## 9 s10_avg_shortest_path 2.58 3.00 5.74 3.00 4.03 2.19 1.14
## 10 s11_edge_connectivity 2 1.44 2 1 1.16 1 2
## 11 s12_spatial_diameter 826. 899. 790. 679. 818. 845. 381.
## 12 s13_avg_link_length 256. 234. 112. 186. 215. 299. 267.
## 13 s14_min_angle 41.9 30.0 39.2 59.7 129. 36.9 32.6
## 14 s15_avg_angle 106. 104. 124. 110. 189. 91.5 66.2
## 15 s17_avg_magnitude 4.37 3.63 2.33 3.83 3.10 4.58 3.65
## 16 n_nodes 9.22 14.8 24.7 8.80 25.7 7.36 3.67
## 17 density 0.314 0.241 0.107 0.306 0.35 0.398 0.885
##
## --- DRAWN_L1 ---
## X^2 df P(> X^2)
## Likelihood Ratio 853.19 906 8.9433e-01
## Pearson 1250.96 906 1.7719e-13
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.87
## Cramer's V : 0.72
## Observed V: 0.72 | Perm mean: 0.613 +- 0.013 | p: 0
##
## --- DRAWN_L2 ---
## X^2 df P(> X^2)
## Likelihood Ratio 526.26 354 7.0523e-09
## Pearson 629.52 354 0.0000e+00
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.781
## Cramer's V : 0.511
## Observed V: 0.511 | Perm mean: 0.383 +- 0.014 | p: 0
##
## --- DRAWN_L3 ---
## X^2 df P(> X^2)
## Likelihood Ratio 230.19 84 1.5543e-15
## Pearson 233.49 84 5.5511e-16
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.606
## Cramer's V : 0.311
## Observed V: 0.311 | Perm mean: 0.187 +- 0.014 | p: 0
##
## --- DRAWN_L4 ---
## X^2 df P(> X^2)
## Likelihood Ratio 109.97 36 2.0413e-09
## Pearson 105.26 36 1.0416e-08
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.456
## Cramer's V : 0.209
## Observed V: 0.209 | Perm mean: 0.121 +- 0.014 | p: 0
##
## === Cluster composition (L1) ===
## # A tibble: 241 × 5
## # Groups: cluster [7]
## cluster drawn_l1 n cluster_n pct
## <dbl> <chr> <int> <int> <dbl>
## 1 1 heart 14 100 14
## 2 1 bird 9 100 9
## 3 1 mountain 6 100 6
## 4 1 abstractshape 5 100 5
## 5 1 house 5 100 5
## 6 1 star 4 100 4
## 7 1 lightningbolt 3 100 3
## 8 1 bat 2 100 2
## 9 1 brightstars 2 100 2
## 10 1 butterfly 2 100 2
## 11 1 cat 2 100 2
## 12 1 diamond 2 100 2
## 13 1 rabbit 2 100 2
## 14 1 spaceship 2 100 2
## 15 1 starfish 2 100 2
## 16 1 tear 2 100 2
## 17 1 archerbow 1 100 1
## 18 1 arrow 1 100 1
## 19 1 beaver 1 100 1
## 20 1 boat 1 100 1
## 21 1 box 1 100 1
## 22 1 car 1 100 1
## 23 1 cliff 1 100 1
## 24 1 cloud 1 100 1
## 25 1 cow 1 100 1
## 26 1 determined 1 100 1
## 27 1 drumstick 1 100 1
## 28 1 enzyme 1 100 1
## 29 1 fish 1 100 1
## 30 1 flower 1 100 1
## 31 1 hand 1 100 1
## 32 1 hierarchy 1 100 1
## 33 1 hope 1 100 1
## 34 1 horse 1 100 1
## 35 1 jellyfish 1 100 1
## 36 1 kite 1 100 1
## 37 1 love 1 100 1
## 38 1 missile 1 100 1
## 39 1 mist 1 100 1
## 40 1 pants 1 100 1
## 41 1 person 1 100 1
## 42 1 quilt 1 100 1
## 43 1 ring 1 100 1
## 44 1 rock 1 100 1
## 45 1 shark 1 100 1
## 46 1 shield 1 100 1
## 47 1 stingray 1 100 1
## 48 1 stone 1 100 1
## 49 1 table 1 100 1
## 50 1 victoria 1 100 1
## 51 1 whale 1 100 1
## 52 1 wings 1 100 1
## 53 2 bird 10 110 9.1
## 54 2 person 10 110 9.1
## 55 2 dog 5 110 4.5
## 56 2 kite 5 110 4.5
## 57 2 butterfly 4 110 3.6
## 58 2 cat 3 110 2.7
## 59 2 fox 3 110 2.7
## 60 2 arrow 2 110 1.8
## 61 2 fish 2 110 1.8
## 62 2 hand 2 110 1.8
## 63 2 horse 2 110 1.8
## 64 2 hourglass 2 110 1.8
## 65 2 house 2 110 1.8
## 66 2 leaf 2 110 1.8
## 67 2 mountain 2 110 1.8
## 68 2 rabbit 2 110 1.8
## 69 2 squirrel 2 110 1.8
## 70 2 trapezoid 2 110 1.8
## 71 2 umbrella 2 110 1.8
## 72 2 abstractshape 1 110 0.9
## 73 2 antelope 1 110 0.9
## 74 2 arrow_box 1 110 0.9
## 75 2 assimilator 1 110 0.9
## 76 2 boat 1 110 0.9
## 77 2 book 1 110 0.9
## 78 2 bowl 1 110 0.9
## 79 2 box 1 110 0.9
## 80 2 caduceus 1 110 0.9
## 81 2 cage 1 110 0.9
## 82 2 cake 1 110 0.9
## 83 2 centaur 1 110 0.9
## 84 2 corn 1 110 0.9
## 85 2 diamond 1 110 0.9
## 86 2 doubletriangle 1 110 0.9
## 87 2 dragonfly 1 110 0.9
## 88 2 fairy 1 110 0.9
## 89 2 flower 1 110 0.9
## 90 2 gemini 1 110 0.9
## 91 2 ghost 1 110 0.9
## 92 2 heart 1 110 0.9
## 93 2 infinity 1 110 0.9
## 94 2 island 1 110 0.9
## 95 2 jigsaw 1 110 0.9
## 96 2 letter 1 110 0.9
## 97 2 line 1 110 0.9
## 98 2 lion 1 110 0.9
## 99 2 lionhead 1 110 0.9
## 100 2 man 1 110 0.9
## 101 2 monster 1 110 0.9
## 102 2 plane 1 110 0.9
## 103 2 robotman 1 110 0.9
## 104 2 rocket 1 110 0.9
## 105 2 slingshot 1 110 0.9
## 106 2 snail 1 110 0.9
## 107 2 snake 1 110 0.9
## 108 2 social_media 1 110 0.9
## 109 2 spill 1 110 0.9
## 110 2 stairs 1 110 0.9
## 111 2 star 1 110 0.9
## 112 2 sun 1 110 0.9
## 113 2 sunflower 1 110 0.9
## 114 2 sword 1 110 0.9
## 115 2 teapot 1 110 0.9
## 116 2 tortoise 1 110 0.9
## 117 2 waterfall 1 110 0.9
## 118 3 bird 3 21 14.3
## 119 3 book 3 21 14.3
## 120 3 bag 1 21 4.8
## 121 3 boy 1 21 4.8
## 122 3 cat 1 21 4.8
## 123 3 comma 1 21 4.8
## 124 3 dinosaur 1 21 4.8
## 125 3 eye 1 21 4.8
## 126 3 horse 1 21 4.8
## 127 3 minotaur 1 21 4.8
## 128 3 monster 1 21 4.8
## 129 3 person 1 21 4.8
## 130 3 plane 1 21 4.8
## 131 3 scorpion 1 21 4.8
## 132 3 squirrel 1 21 4.8
## 133 3 tank 1 21 4.8
## 134 3 woman 1 21 4.8
## 135 4 lightningbolt 6 61 9.8
## 136 4 abstractshape 4 61 6.6
## 137 4 bird 4 61 6.6
## 138 4 heart 4 61 6.6
## 139 4 brightstars 3 61 4.9
## 140 4 hand 3 61 4.9
## 141 4 letter 3 61 4.9
## 142 4 arrow 2 61 3.3
## 143 4 dog 2 61 3.3
## 144 4 horse 2 61 3.3
## 145 4 life 2 61 3.3
## 146 4 tree 2 61 3.3
## 147 4 ant 1 61 1.6
## 148 4 bowl 1 61 1.6
## 149 4 cat 1 61 1.6
## 150 4 catears 1 61 1.6
## 151 4 diver 1 61 1.6
## 152 4 face 1 61 1.6
## 153 4 falling 1 61 1.6
## 154 4 fish 1 61 1.6
## 155 4 hook 1 61 1.6
## 156 4 man 1 61 1.6
## 157 4 mermaid tail 1 61 1.6
## 158 4 mountain 1 61 1.6
## 159 4 mouth 1 61 1.6
## 160 4 net 1 61 1.6
## 161 4 person 1 61 1.6
## 162 4 s 1 61 1.6
## 163 4 saucepan 1 61 1.6
## 164 4 seal 1 61 1.6
## 165 4 seven 1 61 1.6
## 166 4 snake 1 61 1.6
## 167 4 triangle 1 61 1.6
## 168 4 wing 1 61 1.6
## 169 4 wings 1 61 1.6
## 170 4 zigzag 1 61 1.6
## 171 5 flower 3 32 9.4
## 172 5 line 2 32 6.2
## 173 5 star 2 32 6.2
## 174 5 abstractshape 1 32 3.1
## 175 5 arrow 1 32 3.1
## 176 5 bat 1 32 3.1
## 177 5 butterfly 1 32 3.1
## 178 5 butterfly_flower 1 32 3.1
## 179 5 c 1 32 3.1
## 180 5 car 1 32 3.1
## 181 5 cat 1 32 3.1
## 182 5 dog 1 32 3.1
## 183 5 dragon 1 32 3.1
## 184 5 eye 1 32 3.1
## 185 5 fairy 1 32 3.1
## 186 5 firework 1 32 3.1
## 187 5 fox 1 32 3.1
## 188 5 hand 1 32 3.1
## 189 5 horse 1 32 3.1
## 190 5 letter 1 32 3.1
## 191 5 link 1 32 3.1
## 192 5 motion 1 32 3.1
## 193 5 pot 1 32 3.1
## 194 5 pyramid 1 32 3.1
## 195 5 rose 1 32 3.1
## 196 5 stage 1 32 3.1
## 197 5 triangle 1 32 3.1
## 198 5 wishes 1 32 3.1
## 199 6 plant 5 42 11.9
## 200 6 kite 4 42 9.5
## # ℹ 41 more rows
if (!is.null(net_all_results_capped)) {
cat("\n=== Network ALL: Label Distribution across Clusters ===\n")
cmp_label_distribution(net_all_results_capped$data, "cluster") %>% print(n = 69)
cat("\n=== Drawing Type x Cluster ===\n")
print(table(net_all_results_capped$data$drawing_type, net_all_results_capped$data$cluster))
}
##
## === Network ALL: Label Distribution across Clusters ===
## # A tibble: 152 × 9
## # Groups: drawn_l1 [152]
## drawn_l1 total_drawings_of_la…¹ Cluster_1 Cluster_2 Cluster_4 Cluster_5
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 bird 31 29 32.3 12.9 0
## 2 heart 19 73.7 5.3 21.1 0
## 3 abstractshape 16 31.2 6.2 25 6.2
## 4 triangle 16 0 0 6.2 6.2
## 5 person 13 7.7 76.9 7.7 0
## 6 kite 10 10 50 0 0
## 7 lightningbolt 10 30 0 60 0
## 8 mountain 10 60 20 10 0
## 9 cat 9 22.2 33.3 11.1 11.1
## 10 arrow 8 12.5 25 25 12.5
## 11 butterfly 8 25 50 0 12.5
## 12 dog 8 0 62.5 25 12.5
## 13 fish 8 12.5 25 12.5 0
## 14 horse 8 12.5 25 25 12.5
## 15 hand 7 14.3 28.6 42.9 14.3
## 16 house 7 71.4 28.6 0 0
## 17 star 7 57.1 14.3 0 28.6
## 18 fox 6 0 50 0 16.7
## 19 brightstars 5 40 0 60 0
## 20 flower 5 20 20 0 60
## 21 letter 5 0 20 60 20
## 22 plant 5 0 0 0 0
## 23 book 4 0 25 0 0
## 24 rabbit 4 50 50 0 0
## 25 tree 4 0 0 50 0
## 26 umbrella 4 0 50 0 0
## 27 wings 4 25 0 25 0
## 28 bat 3 66.7 0 0 33.3
## 29 boat 3 33.3 33.3 0 0
## 30 diamond 3 66.7 33.3 0 0
## 31 hourglass 3 0 66.7 0 0
## 32 line 3 0 33.3 0 66.7
## 33 plane 3 0 33.3 0 0
## 34 squirrel 3 0 66.7 0 0
## 35 tear 3 66.7 0 0 0
## 36 bowl 2 0 50 50 0
## 37 box 2 50 50 0 0
## 38 car 2 50 0 0 50
## 39 corn 2 0 50 0 0
## 40 crab 2 0 0 0 0
## 41 eye 2 0 0 0 50
## 42 fairy 2 0 50 0 50
## 43 leaf 2 0 100 0 0
## 44 life 2 0 0 100 0
## 45 man 2 0 50 50 0
## 46 monster 2 0 50 0 0
## 47 rocket 2 0 50 0 0
## 48 scorpion 2 0 0 0 0
## 49 shovel 2 0 0 0 0
## 50 snake 2 0 50 50 0
## 51 spaceship 2 100 0 0 0
## 52 spear 2 0 0 0 0
## 53 starfish 2 100 0 0 0
## 54 stingray 2 50 0 0 0
## 55 trapezoid 2 0 100 0 0
## 56 angle 1 0 0 0 0
## 57 ant 1 0 0 100 0
## 58 antelope 1 0 100 0 0
## 59 archerbow 1 100 0 0 0
## 60 arrow_box 1 0 100 0 0
## 61 arrowhead 1 0 0 0 0
## 62 assimilator 1 0 100 0 0
## 63 bag 1 0 0 0 0
## 64 beaver 1 100 0 0 0
## 65 boy 1 0 0 0 0
## 66 butterfly_flo… 1 0 0 0 100
## 67 c 1 0 0 0 100
## 68 caduceus 1 0 100 0 0
## 69 cage 1 0 100 0 0
## # ℹ 83 more rows
## # ℹ abbreviated name: ¹total_drawings_of_label
## # ℹ 3 more variables: Cluster_6 <dbl>, Cluster_7 <dbl>, Cluster_3 <dbl>
##
## === Drawing Type x Cluster ===
##
## 1 2 3 4 5 6 7
## mixed 4 93 11 0 19 41 4
## silhouette_only 96 9 10 4 2 1 31
## skeleton 0 8 0 57 11 0 1
set.seed(42)
net_sil_results_capped <- net_run_pipeline(net_sil_capped, "Silhouettes (capped)")
##
## ###########################################################
## SILHOUETTES (CAPPED) -- NETWORK FEATURES
## n = 153
## ###########################################################
##
## Feature matrix: 153 x 17
## Features used: s01_n_links, s02_max_degree, s03_avg_degree, s04_clustering_coeff, s05_max_core_number, s06_n_cycles, s07_max_cycle_size, s09_diameter, s10_avg_shortest_path, s11_edge_connectivity, s12_spatial_diameter, s13_avg_link_length, s14_min_angle, s15_avg_angle, s17_avg_magnitude, n_nodes, density
## PCA components for 80% var: 4
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
## Standard deviation 2.473 2.038 1.481 1.205 1.108 0.900 0.605 0.463
## Proportion of Variance 0.360 0.244 0.129 0.085 0.072 0.048 0.022 0.013
## Cumulative Proportion 0.360 0.604 0.733 0.819 0.891 0.938 0.960 0.973
##
## === GMM Results ===
## Model: VVV | Clusters: 7 | BIC: -946.92
## Mean uncertainty: 0.028
##
## Cluster sizes:
##
## 1 2 3 4 5 6 7
## 19 9 20 27 18 39 21
##
## === Feature means per cluster ===
## # A tibble: 17 × 8
## name `1` `2` `3` `4` `5` `6` `7`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 s01_n_links 8.10 29.6 5.3 3 11.8 1.28e+1 7.48
## 2 s02_max_degree 2 2 2 2 3.06 2 e+0 2
## 3 s03_avg_degree 2 2 2 2 2.03 2 e+0 2
## 4 s04_clustering_coeff 0 0 0 1 0.242 0 0
## 5 s05_max_core_number 2 2 2 2 1.67 2 e+0 2
## 6 s06_n_cycles 1 1 1 1 1.61 1 e+0 1
## 7 s07_max_cycle_size 8.10 29.6 5.3 3 4.89 1.28e+1 7.48
## 8 s09_diameter 3.84 14.4 2.5 1 6.5 6.08e+0 3.57
## 9 s10_avg_shortest_path 2.30 7.64 1.62 1 3.00 3.46e+0 2.15
## 10 s11_edge_connectivity 2 2 2 2 1.61 2 e+0 2
## 11 s12_spatial_diameter 1289. 969. 572. 378. 700. 1.13e+3 273.
## 12 s13_avg_link_length 444. 121. 283. 297. 194. 2.74e+2 95.6
## 13 s14_min_angle 25.4 40.9 45.9 35.2 70.7 4.53e+1 54.0
## 14 s15_avg_angle 102. 129. 93.8 60 132. 1.15e+2 116.
## 15 s17_avg_magnitude 6.21 2.45 5.02 3.60 3.53 4.50e+0 2.23
## 16 n_nodes 8.10 29.6 5.3 3 11.3 1.28e+1 7.48
## 17 density 0.343 0.073 0.498 1 0.377 1.84e-1 0.352
##
## --- DRAWN_L1 ---
## X^2 df P(> X^2)
## Likelihood Ratio 397.40 402 5.5547e-01
## Pearson 562.97 402 1.8746e-07
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.887
## Cramer's V : 0.783
## Observed V: 0.783 | Perm mean: 0.664 +- 0.016 | p: 0
##
## --- DRAWN_L2 ---
## X^2 df P(> X^2)
## Likelihood Ratio 302.94 240 3.6374e-03
## Pearson 385.29 240 7.6555e-09
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.846
## Cramer's V : 0.648
## Observed V: 0.648 | Perm mean: 0.513 +- 0.018 | p: 0
##
## --- DRAWN_L3 ---
## X^2 df P(> X^2)
## Likelihood Ratio 161.98 78 7.8525e-08
## Pearson 169.24 78 1.0644e-08
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.725
## Cramer's V : 0.429
## Observed V: 0.429 | Perm mean: 0.292 +- 0.022 | p: 0
##
## --- DRAWN_L4 ---
## X^2 df P(> X^2)
## Likelihood Ratio 105.947 36 8.2155e-09
## Pearson 96.978 36 1.6870e-07
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.623
## Cramer's V : 0.325
## Observed V: 0.325 | Perm mean: 0.198 +- 0.022 | p: 0
##
## === Cluster composition (L1) ===
## # A tibble: 95 × 5
## # Groups: cluster [7]
## cluster drawn_l1 n cluster_n pct
## <dbl> <chr> <int> <int> <dbl>
## 1 1 bird 2 19 10.5
## 2 1 mountain 2 19 10.5
## 3 1 rabbit 2 19 10.5
## 4 1 trapezoid 2 19 10.5
## 5 1 bat 1 19 5.3
## 6 1 brightstars 1 19 5.3
## 7 1 cliff 1 19 5.3
## 8 1 cloud 1 19 5.3
## 9 1 hand 1 19 5.3
## 10 1 jellyfish 1 19 5.3
## 11 1 lightningbolt 1 19 5.3
## 12 1 pants 1 19 5.3
## 13 1 starfish 1 19 5.3
## 14 1 stingray 1 19 5.3
## 15 1 whale 1 19 5.3
## 16 2 bird 2 9 22.2
## 17 2 bag 1 9 11.1
## 18 2 cat 1 9 11.1
## 19 2 dinosaur 1 9 11.1
## 20 2 horse 1 9 11.1
## 21 2 minotaur 1 9 11.1
## 22 2 plane 1 9 11.1
## 23 2 tank 1 9 11.1
## 24 3 heart 4 20 20
## 25 3 diamond 2 20 10
## 26 3 lightningbolt 2 20 10
## 27 3 mountain 2 20 10
## 28 3 abstractshape 1 20 5
## 29 3 bat 1 20 5
## 30 3 box 1 20 5
## 31 3 brightstars 1 20 5
## 32 3 hope 1 20 5
## 33 3 house 1 20 5
## 34 3 kite 1 20 5
## 35 3 shield 1 20 5
## 36 3 stone 1 20 5
## 37 3 wings 1 20 5
## 38 4 triangle 14 27 51.9
## 39 4 abstractshape 4 27 14.8
## 40 4 bird 3 27 11.1
## 41 4 angle 1 27 3.7
## 42 4 carrot 1 27 3.7
## 43 4 cat 1 27 3.7
## 44 4 corn 1 27 3.7
## 45 4 lightningbolt 1 27 3.7
## 46 4 tear 1 27 3.7
## 47 5 fish 4 18 22.2
## 48 5 heart 2 18 11.1
## 49 5 star 2 18 11.1
## 50 5 abstractshape 1 18 5.6
## 51 5 butterfly 1 18 5.6
## 52 5 doubletriangle 1 18 5.6
## 53 5 hourglass 1 18 5.6
## 54 5 infinity 1 18 5.6
## 55 5 mountain 1 18 5.6
## 56 5 person 1 18 5.6
## 57 5 scorpion 1 18 5.6
## 58 5 seal 1 18 5.6
## 59 5 wings 1 18 5.6
## 60 6 bird 7 39 17.9
## 61 6 heart 4 39 10.3
## 62 6 butterfly 2 39 5.1
## 63 6 cat 2 39 5.1
## 64 6 mountain 2 39 5.1
## 65 6 spaceship 2 39 5.1
## 66 6 abstractshape 1 39 2.6
## 67 6 archerbow 1 39 2.6
## 68 6 arrow 1 39 2.6
## 69 6 beaver 1 39 2.6
## 70 6 boat 1 39 2.6
## 71 6 cage 1 39 2.6
## 72 6 car 1 39 2.6
## 73 6 cow 1 39 2.6
## 74 6 drumstick 1 39 2.6
## 75 6 enzyme 1 39 2.6
## 76 6 fish 1 39 2.6
## 77 6 flower 1 39 2.6
## 78 6 hierarchy 1 39 2.6
## 79 6 horse 1 39 2.6
## 80 6 love 1 39 2.6
## 81 6 mist 1 39 2.6
## 82 6 person 1 39 2.6
## 83 6 rock 1 39 2.6
## 84 6 shark 1 39 2.6
## 85 6 victoria 1 39 2.6
## 86 7 heart 6 21 28.6
## 87 7 house 4 21 19
## 88 7 abstractshape 3 21 14.3
## 89 7 tear 2 21 9.5
## 90 7 determined 1 21 4.8
## 91 7 missile 1 21 4.8
## 92 7 quilt 1 21 4.8
## 93 7 ring 1 21 4.8
## 94 7 starfish 1 21 4.8
## 95 7 table 1 21 4.8
if (!is.null(net_sil_results_capped)) {
cat("\n=== Network SILHOUETTES: Label Distribution ===\n")
cmp_label_distribution(net_sil_results_capped$data, "cluster") %>% print(n = 69)
}
##
## === Network SILHOUETTES: Label Distribution ===
## # A tibble: 68 × 9
## # Groups: drawn_l1 [68]
## drawn_l1 total_drawings_of_la…¹ Cluster_3 Cluster_4 Cluster_5 Cluster_6
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 heart 16 25 0 12.5 25
## 2 bird 14 0 21.4 0 50
## 3 triangle 14 0 100 0 0
## 4 abstractshape 10 10 40 10 10
## 5 mountain 7 28.6 0 14.3 28.6
## 6 fish 5 0 0 80 20
## 7 house 5 20 0 0 0
## 8 cat 4 0 25 0 50
## 9 lightningbolt 4 50 25 0 0
## 10 butterfly 3 0 0 33.3 66.7
## 11 tear 3 0 33.3 0 0
## 12 bat 2 50 0 0 0
## 13 brightstars 2 50 0 0 0
## 14 diamond 2 100 0 0 0
## 15 horse 2 0 0 0 50
## 16 person 2 0 0 50 50
## 17 rabbit 2 0 0 0 0
## 18 spaceship 2 0 0 0 100
## 19 star 2 0 0 100 0
## 20 starfish 2 0 0 0 0
## 21 trapezoid 2 0 0 0 0
## 22 wings 2 50 0 50 0
## 23 angle 1 0 100 0 0
## 24 archerbow 1 0 0 0 100
## 25 arrow 1 0 0 0 100
## 26 bag 1 0 0 0 0
## 27 beaver 1 0 0 0 100
## 28 boat 1 0 0 0 100
## 29 box 1 100 0 0 0
## 30 cage 1 0 0 0 100
## 31 car 1 0 0 0 100
## 32 carrot 1 0 100 0 0
## 33 cliff 1 0 0 0 0
## 34 cloud 1 0 0 0 0
## 35 corn 1 0 100 0 0
## 36 cow 1 0 0 0 100
## 37 determined 1 0 0 0 0
## 38 dinosaur 1 0 0 0 0
## 39 doubletriangle 1 0 0 100 0
## 40 drumstick 1 0 0 0 100
## 41 enzyme 1 0 0 0 100
## 42 flower 1 0 0 0 100
## 43 hand 1 0 0 0 0
## 44 hierarchy 1 0 0 0 100
## 45 hope 1 100 0 0 0
## 46 hourglass 1 0 0 100 0
## 47 infinity 1 0 0 100 0
## 48 jellyfish 1 0 0 0 0
## 49 kite 1 100 0 0 0
## 50 love 1 0 0 0 100
## 51 minotaur 1 0 0 0 0
## 52 missile 1 0 0 0 0
## 53 mist 1 0 0 0 100
## 54 pants 1 0 0 0 0
## 55 plane 1 0 0 0 0
## 56 quilt 1 0 0 0 0
## 57 ring 1 0 0 0 0
## 58 rock 1 0 0 0 100
## 59 scorpion 1 0 0 100 0
## 60 seal 1 0 0 100 0
## 61 shark 1 0 0 0 100
## 62 shield 1 100 0 0 0
## 63 stingray 1 0 0 0 0
## 64 stone 1 100 0 0 0
## 65 table 1 0 0 0 0
## 66 tank 1 0 0 0 0
## 67 victoria 1 0 0 0 100
## 68 whale 1 0 0 0 0
## # ℹ abbreviated name: ¹total_drawings_of_label
## # ℹ 3 more variables: Cluster_7 <dbl>, Cluster_2 <dbl>, Cluster_1 <dbl>
set.seed(42)
cat("Skeleton n:", nrow(net_skel_capped), "\n")
## Skeleton n: 77
net_skel_results_capped <- net_run_pipeline(net_skel_capped, "Skeletons (capped)")
##
## ###########################################################
## SKELETONS (CAPPED) -- NETWORK FEATURES
## n = 77
## ###########################################################
##
## Feature matrix: 77 x 17
## Features used: s01_n_links, s02_max_degree, s03_avg_degree, s04_clustering_coeff, s05_max_core_number, s06_n_cycles, s07_max_cycle_size, s09_diameter, s10_avg_shortest_path, s11_edge_connectivity, s12_spatial_diameter, s13_avg_link_length, s14_min_angle, s15_avg_angle, s17_avg_magnitude, n_nodes, density
## PCA components for 80% var: 4
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
## Standard deviation 2.397 2.159 1.443 1.364 0.987 0.661 0.630 0.586
## Proportion of Variance 0.338 0.274 0.123 0.109 0.057 0.026 0.023 0.020
## Cumulative Proportion 0.338 0.612 0.735 0.844 0.902 0.927 0.951 0.971
##
## === GMM Results ===
## Model: EEV | Clusters: 6 | BIC: -656.46
## Mean uncertainty: 0.029
##
## Cluster sizes:
##
## 1 2 3 4 5 6
## 24 25 2 12 9 5
##
## === Feature means per cluster ===
## # A tibble: 17 × 7
## name `1` `2` `3` `4` `5` `6`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 s01_n_links 9.38 6.92 10 4.5 1.56 35.6
## 2 s02_max_degree 2.29 2.44 5 3.67 1 3
## 3 s03_avg_degree 1.72 1.69 2 1.56 1 1.90
## 4 s04_clustering_coeff 0 0 0.564 0 0 0
## 5 s05_max_core_number 1 1 2 1 1 1
## 6 s06_n_cycles 0 0 1 0 0 0
## 7 s07_max_cycle_size 0 0 3 0 0 0
## 8 s09_diameter 7.96 6 4 2.75 1 23.8
## 9 s10_avg_shortest_path 3.35 2.71 2.28 1.71 1 8.86
## 10 s11_edge_connectivity 1 1 1.5 1 1 1
## 11 s12_spatial_diameter 559. 990. 452. 363. 410. 1075.
## 12 s13_avg_link_length 107. 271. 168. 170. 309. 122.
## 13 s14_min_angle 59.2 64.8 9.11 53.9 360 36.5
## 14 s15_avg_angle 118. 109. 81.9 97.3 360 125.
## 15 s17_avg_magnitude 2.53 5.37 4.79 3.09 3.97 2.74
## 16 n_nodes 10.7 7.92 10 5.58 3.11 37.6
## 17 density 0.252 0.314 0.562 0.41 0.748 0.058
##
## --- DRAWN_L1 ---
## X^2 df P(> X^2)
## Likelihood Ratio 169.62 225 0.997672
## Pearson 260.29 225 0.053167
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.878
## Cramer's V : 0.822
## Observed V: 0.822 | Perm mean: 0.769 +- 0.037 | p: 0.0792
##
## --- DRAWN_L2 ---
## X^2 df P(> X^2)
## Likelihood Ratio 111.78 125 0.795354
## Pearson 163.66 125 0.011546
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.825
## Cramer's V : 0.652
## Observed V: 0.652 | Perm mean: 0.573 +- 0.043 | p: 0.0428
##
## --- DRAWN_L3 ---
## X^2 df P(> X^2)
## Likelihood Ratio 58.994 50 0.179758
## Pearson 64.638 50 0.079791
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.676
## Cramer's V : 0.41
## Observed V: 0.41 | Perm mean: 0.361 +- 0.036 | p: 0.09
##
## --- DRAWN_L4 ---
## X^2 df P(> X^2)
## Likelihood Ratio 44.488 30 0.043049
## Pearson 37.382 30 0.166276
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.572
## Cramer's V : 0.312
## Observed V: 0.312 | Perm mean: 0.279 +- 0.033 | p: 0.16
##
## === Cluster composition (L1) ===
## # A tibble: 62 × 5
## # Groups: cluster [6]
## cluster drawn_l1 n cluster_n pct
## <dbl> <chr> <int> <int> <dbl>
## 1 1 bird 4 24 16.7
## 2 1 lightningbolt 3 24 12.5
## 3 1 dog 2 24 8.3
## 4 1 hand 2 24 8.3
## 5 1 abstractshape 1 24 4.2
## 6 1 brightstars 1 24 4.2
## 7 1 face 1 24 4.2
## 8 1 fish 1 24 4.2
## 9 1 ghost 1 24 4.2
## 10 1 hook 1 24 4.2
## 11 1 letter 1 24 4.2
## 12 1 line 1 24 4.2
## 13 1 s 1 24 4.2
## 14 1 saucepan 1 24 4.2
## 15 1 tree 1 24 4.2
## 16 1 triangle 1 24 4.2
## 17 1 zigzag 1 24 4.2
## 18 2 brightstars 2 25 8
## 19 2 heart 2 25 8
## 20 2 horse 2 25 8
## 21 2 life 2 25 8
## 22 2 lightningbolt 2 25 8
## 23 2 abstractshape 1 25 4
## 24 2 arrow 1 25 4
## 25 2 bowl 1 25 4
## 26 2 cat 1 25 4
## 27 2 catears 1 25 4
## 28 2 falling 1 25 4
## 29 2 hand 1 25 4
## 30 2 letter 1 25 4
## 31 2 man 1 25 4
## 32 2 mermaid tail 1 25 4
## 33 2 mountain 1 25 4
## 34 2 net 1 25 4
## 35 2 snake 1 25 4
## 36 2 wing 1 25 4
## 37 2 wings 1 25 4
## 38 3 sunflower 1 2 50
## 39 3 wings 1 2 50
## 40 4 hand 2 12 16.7
## 41 4 letter 2 12 16.7
## 42 4 abstractshape 1 12 8.3
## 43 4 arrow 1 12 8.3
## 44 4 diver 1 12 8.3
## 45 4 lightningbolt 1 12 8.3
## 46 4 mouth 1 12 8.3
## 47 4 person 1 12 8.3
## 48 4 seven 1 12 8.3
## 49 4 star 1 12 8.3
## 50 5 line 2 9 22.2
## 51 5 arrow 1 9 11.1
## 52 5 c 1 9 11.1
## 53 5 eye 1 9 11.1
## 54 5 letter 1 9 11.1
## 55 5 link 1 9 11.1
## 56 5 motion 1 9 11.1
## 57 5 wishes 1 9 11.1
## 58 6 ant 1 5 20
## 59 6 centaur 1 5 20
## 60 6 fairy 1 5 20
## 61 6 hand 1 5 20
## 62 6 tree 1 5 20
if (!is.null(net_skel_results_capped)) {
cat("\n=== Network SKELETONS: Label Distribution ===\n")
cmp_label_distribution(net_skel_results_capped$data, "cluster") %>% print(n = 69)
}
##
## === Network SKELETONS: Label Distribution ===
## # A tibble: 46 × 8
## # Groups: drawn_l1 [46]
## drawn_l1 total_drawings_of_label Cluster_1 Cluster_2 Cluster_4 Cluster_6
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 hand 6 33.3 16.7 33.3 16.7
## 2 lightningbolt 6 50 33.3 16.7 0
## 3 letter 5 20 20 40 0
## 4 bird 4 100 0 0 0
## 5 abstractshape 3 33.3 33.3 33.3 0
## 6 arrow 3 0 33.3 33.3 0
## 7 brightstars 3 33.3 66.7 0 0
## 8 line 3 33.3 0 0 0
## 9 dog 2 100 0 0 0
## 10 heart 2 0 100 0 0
## 11 horse 2 0 100 0 0
## 12 life 2 0 100 0 0
## 13 tree 2 50 0 0 50
## 14 wings 2 0 50 0 0
## 15 ant 1 0 0 0 100
## 16 bowl 1 0 100 0 0
## 17 c 1 0 0 0 0
## 18 cat 1 0 100 0 0
## 19 catears 1 0 100 0 0
## 20 centaur 1 0 0 0 100
## 21 diver 1 0 0 100 0
## 22 eye 1 0 0 0 0
## 23 face 1 100 0 0 0
## 24 fairy 1 0 0 0 100
## 25 falling 1 0 100 0 0
## 26 fish 1 100 0 0 0
## 27 ghost 1 100 0 0 0
## 28 hook 1 100 0 0 0
## 29 link 1 0 0 0 0
## 30 man 1 0 100 0 0
## 31 mermaid tail 1 0 100 0 0
## 32 motion 1 0 0 0 0
## 33 mountain 1 0 100 0 0
## 34 mouth 1 0 0 100 0
## 35 net 1 0 100 0 0
## 36 person 1 0 0 100 0
## 37 s 1 100 0 0 0
## 38 saucepan 1 100 0 0 0
## 39 seven 1 0 0 100 0
## 40 snake 1 0 100 0 0
## 41 star 1 0 0 100 0
## 42 sunflower 1 0 0 0 0
## 43 triangle 1 100 0 0 0
## 44 wing 1 0 100 0 0
## 45 wishes 1 0 0 0 0
## 46 zigzag 1 100 0 0 0
## # ℹ 2 more variables: Cluster_5 <dbl>, Cluster_3 <dbl>
set.seed(42)
cat("Mixed n:", nrow(net_mixed_capped), "\n")
## Mixed n: 172
net_mixed_results_capped <- net_run_pipeline(net_mixed_capped, "Mixed (capped)")
##
## ###########################################################
## MIXED (CAPPED) -- NETWORK FEATURES
## n = 172
## ###########################################################
##
## Feature matrix: 172 x 17
## Features used: s01_n_links, s02_max_degree, s03_avg_degree, s04_clustering_coeff, s05_max_core_number, s06_n_cycles, s07_max_cycle_size, s09_diameter, s10_avg_shortest_path, s11_edge_connectivity, s12_spatial_diameter, s13_avg_link_length, s14_min_angle, s15_avg_angle, s17_avg_magnitude, n_nodes, density
## PCA components for 80% var: 5
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
## Standard deviation 2.409 1.925 1.393 1.097 1.017 0.868 0.779 0.756
## Proportion of Variance 0.341 0.218 0.114 0.071 0.061 0.044 0.036 0.034
## Cumulative Proportion 0.341 0.559 0.674 0.745 0.805 0.850 0.885 0.919
##
## === GMM Results ===
## Model: VEV | Clusters: 4 | BIC: -2831.21
## Mean uncertainty: 0.042
##
## Cluster sizes:
##
## 1 2 3 4
## 69 40 27 36
##
## === Feature means per cluster ===
## # A tibble: 17 × 5
## name `1` `2` `3` `4`
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 s01_n_links 9.97 14.0 38.0 21.1
## 2 s02_max_degree 3.30 3.85 7.22 3.5
## 3 s03_avg_degree 2.05 2.42 2.82 2.05
## 4 s04_clustering_coeff 0.157 0.194 0.346 0.08
## 5 s05_max_core_number 2 2.05 2.37 2
## 6 s06_n_cycles 1.42 3.62 10.8 2.56
## 7 s07_max_cycle_size 5.20 6.25 11.8 11.6
## 8 s09_diameter 5.03 4.45 9.44 8.44
## 9 s10_avg_shortest_path 2.46 2.30 4.33 3.94
## 10 s11_edge_connectivity 1 1.95 1.41 1.42
## 11 s12_spatial_diameter 920. 765. 1008. 668.
## 12 s13_avg_link_length 274. 234. 269. 113.
## 13 s14_min_angle 34.8 22.9 6.91 35.4
## 14 s15_avg_angle 97.7 89.6 91.6 116.
## 15 s17_avg_magnitude 4.31 3.92 3.36 2.30
## 16 n_nodes 9.71 11.6 28.3 20.8
## 17 density 0.314 0.334 0.243 0.127
##
## --- DRAWN_L1 ---
## X^2 df P(> X^2)
## Likelihood Ratio 298.20 255 0.0326250
## Pearson 320.76 255 0.0032378
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.807
## Cramer's V : 0.788
## Observed V: 0.788 | Perm mean: 0.704 +- 0.017 | p: 0
##
## --- DRAWN_L2 ---
## X^2 df P(> X^2)
## Likelihood Ratio 223.78 144 2.3044e-05
## Pearson 224.17 144 2.1386e-05
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.752
## Cramer's V : 0.659
## Observed V: 0.659 | Perm mean: 0.53 +- 0.023 | p: 0
##
## --- DRAWN_L3 ---
## X^2 df P(> X^2)
## Likelihood Ratio 64.324 42 0.014903
## Pearson 58.638 42 0.045519
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.504
## Cramer's V : 0.337
## Observed V: 0.337 | Perm mean: 0.286 +- 0.028 | p: 0.0348
##
## --- DRAWN_L4 ---
## X^2 df P(> X^2)
## Likelihood Ratio 33.161 18 0.015959
## Pearson 31.436 18 0.025617
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.393
## Cramer's V : 0.247
## Observed V: 0.247 | Perm mean: 0.185 +- 0.029 | p: 0.0234
##
## === Cluster composition (L1) ===
## # A tibble: 114 × 5
## # Groups: cluster [4]
## cluster drawn_l1 n cluster_n pct
## <dbl> <chr> <int> <int> <dbl>
## 1 1 bird 7 69 10.1
## 2 1 kite 5 69 7.2
## 3 1 person 5 69 7.2
## 4 1 plant 5 69 7.2
## 5 1 arrow 3 69 4.3
## 6 1 horse 3 69 4.3
## 7 1 boat 2 69 2.9
## 8 1 butterfly 2 69 2.9
## 9 1 crab 2 69 2.9
## 10 1 dog 2 69 2.9
## 11 1 leaf 2 69 2.9
## 12 1 shovel 2 69 2.9
## 13 1 spear 2 69 2.9
## 14 1 tree 2 69 2.9
## 15 1 umbrella 2 69 2.9
## 16 1 abstractshape 1 69 1.4
## 17 1 arrowhead 1 69 1.4
## 18 1 cat 1 69 1.4
## 19 1 fox 1 69 1.4
## 20 1 frog 1 69 1.4
## 21 1 gemini 1 69 1.4
## 22 1 hourglass 1 69 1.4
## 23 1 jigsaw 1 69 1.4
## 24 1 m 1 69 1.4
## 25 1 mountain 1 69 1.4
## 26 1 necklace 1 69 1.4
## 27 1 paralellogram 1 69 1.4
## 28 1 rabbit 1 69 1.4
## 29 1 scales 1 69 1.4
## 30 1 scorpion 1 69 1.4
## 31 1 sheep 1 69 1.4
## 32 1 slingshot 1 69 1.4
## 33 1 snail 1 69 1.4
## 34 1 spoon 1 69 1.4
## 35 1 stairs 1 69 1.4
## 36 1 stingray 1 69 1.4
## 37 1 sword 1 69 1.4
## 38 1 thoughtbubble 1 69 1.4
## 39 2 star 4 40 10
## 40 2 dog 3 40 7.5
## 41 2 bird 2 40 5
## 42 2 fish 2 40 5
## 43 2 fox 2 40 5
## 44 2 house 2 40 5
## 45 2 kite 2 40 5
## 46 2 person 2 40 5
## 47 2 plane 2 40 5
## 48 2 rocket 2 40 5
## 49 2 squirrel 2 40 5
## 50 2 abstractshape 1 40 2.5
## 51 2 book 1 40 2.5
## 52 2 bowl 1 40 2.5
## 53 2 cake 1 40 2.5
## 54 2 cat 1 40 2.5
## 55 2 corn 1 40 2.5
## 56 2 croissant 1 40 2.5
## 57 2 diamond 1 40 2.5
## 58 2 hourglass 1 40 2.5
## 59 2 island 1 40 2.5
## 60 2 lion 1 40 2.5
## 61 2 mountain 1 40 2.5
## 62 2 spill 1 40 2.5
## 63 2 teapot 1 40 2.5
## 64 2 umbrella 1 40 2.5
## 65 3 flower 4 27 14.8
## 66 3 butterfly 2 27 7.4
## 67 3 kite 2 27 7.4
## 68 3 abstractshape 1 27 3.7
## 69 3 arrow 1 27 3.7
## 70 3 bat 1 27 3.7
## 71 3 box 1 27 3.7
## 72 3 butterfly_flower 1 27 3.7
## 73 3 car 1 27 3.7
## 74 3 cat 1 27 3.7
## 75 3 dragon 1 27 3.7
## 76 3 firework 1 27 3.7
## 77 3 fox 1 27 3.7
## 78 3 heart 1 27 3.7
## 79 3 horse 1 27 3.7
## 80 3 monster 1 27 3.7
## 81 3 pot 1 27 3.7
## 82 3 pyramid 1 27 3.7
## 83 3 rose 1 27 3.7
## 84 3 stage 1 27 3.7
## 85 3 triangle 1 27 3.7
## 86 3 woman 1 27 3.7
## 87 4 bird 4 36 11.1
## 88 4 book 3 36 8.3
## 89 4 person 3 36 8.3
## 90 4 fox 2 36 5.6
## 91 4 antelope 1 36 2.8
## 92 4 arrow_box 1 36 2.8
## 93 4 assimilator 1 36 2.8
## 94 4 boy 1 36 2.8
## 95 4 butterfly 1 36 2.8
## 96 4 caduceus 1 36 2.8
## 97 4 cat 1 36 2.8
## 98 4 comma 1 36 2.8
## 99 4 dog 1 36 2.8
## 100 4 dragonfly 1 36 2.8
## 101 4 eye 1 36 2.8
## 102 4 fairy 1 36 2.8
## 103 4 lionhead 1 36 2.8
## 104 4 man 1 36 2.8
## 105 4 monster 1 36 2.8
## 106 4 rabbit 1 36 2.8
## 107 4 robotman 1 36 2.8
## 108 4 snake 1 36 2.8
## 109 4 social_media 1 36 2.8
## 110 4 squirrel 1 36 2.8
## 111 4 sun 1 36 2.8
## 112 4 tortoise 1 36 2.8
## 113 4 umbrella 1 36 2.8
## 114 4 waterfall 1 36 2.8
if (!is.null(net_mixed_results_capped)) {
cat("\n=== Network MIXED: Label Distribution ===\n")
cmp_label_distribution(net_mixed_results_capped$data, "cluster") %>% print(n = 69)
}
##
## === Network MIXED: Label Distribution ===
## # A tibble: 86 × 6
## # Groups: drawn_l1 [86]
## drawn_l1 total_drawings_of_la…¹ Cluster_1 Cluster_2 Cluster_3 Cluster_4
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 bird 13 53.8 15.4 0 30.8
## 2 person 10 50 20 0 30
## 3 kite 9 55.6 22.2 22.2 0
## 4 dog 6 33.3 50 0 16.7
## 5 fox 6 16.7 33.3 16.7 33.3
## 6 butterfly 5 40 0 40 20
## 7 plant 5 100 0 0 0
## 8 arrow 4 75 0 25 0
## 9 book 4 0 25 0 75
## 10 cat 4 25 25 25 25
## 11 flower 4 0 0 100 0
## 12 horse 4 75 0 25 0
## 13 star 4 0 100 0 0
## 14 umbrella 4 50 25 0 25
## 15 abstractshape 3 33.3 33.3 33.3 0
## 16 squirrel 3 0 66.7 0 33.3
## 17 boat 2 100 0 0 0
## 18 crab 2 100 0 0 0
## 19 fish 2 0 100 0 0
## 20 hourglass 2 50 50 0 0
## 21 house 2 0 100 0 0
## 22 leaf 2 100 0 0 0
## 23 monster 2 0 0 50 50
## 24 mountain 2 50 50 0 0
## 25 plane 2 0 100 0 0
## 26 rabbit 2 50 0 0 50
## 27 rocket 2 0 100 0 0
## 28 shovel 2 100 0 0 0
## 29 spear 2 100 0 0 0
## 30 tree 2 100 0 0 0
## 31 antelope 1 0 0 0 100
## 32 arrow_box 1 0 0 0 100
## 33 arrowhead 1 100 0 0 0
## 34 assimilator 1 0 0 0 100
## 35 bat 1 0 0 100 0
## 36 bowl 1 0 100 0 0
## 37 box 1 0 0 100 0
## 38 boy 1 0 0 0 100
## 39 butterfly_flo… 1 0 0 100 0
## 40 caduceus 1 0 0 0 100
## 41 cake 1 0 100 0 0
## 42 car 1 0 0 100 0
## 43 comma 1 0 0 0 100
## 44 corn 1 0 100 0 0
## 45 croissant 1 0 100 0 0
## 46 diamond 1 0 100 0 0
## 47 dragon 1 0 0 100 0
## 48 dragonfly 1 0 0 0 100
## 49 eye 1 0 0 0 100
## 50 fairy 1 0 0 0 100
## 51 firework 1 0 0 100 0
## 52 frog 1 100 0 0 0
## 53 gemini 1 100 0 0 0
## 54 heart 1 0 0 100 0
## 55 island 1 0 100 0 0
## 56 jigsaw 1 100 0 0 0
## 57 lion 1 0 100 0 0
## 58 lionhead 1 0 0 0 100
## 59 m 1 100 0 0 0
## 60 man 1 0 0 0 100
## 61 necklace 1 100 0 0 0
## 62 paralellogram 1 100 0 0 0
## 63 pot 1 0 0 100 0
## 64 pyramid 1 0 0 100 0
## 65 robotman 1 0 0 0 100
## 66 rose 1 0 0 100 0
## 67 scales 1 100 0 0 0
## 68 scorpion 1 100 0 0 0
## 69 sheep 1 100 0 0 0
## # ℹ 17 more rows
## # ℹ abbreviated name: ¹total_drawings_of_label
cat("\n========================================\n")
##
## ========================================
cat("NETWORK ANALYSIS SUMMARY\n")
## NETWORK ANALYSIS SUMMARY
cat("========================================\n")
## ========================================
net_results_list <- list(
"ALL (capped)" = net_all_results_capped,
"SILHOUETTES (capped)" = net_sil_results_capped,
"SKELETONS (capped)" = net_skel_results_capped,
"MIXED (capped)" = net_mixed_results_capped
)
for (name in names(net_results_list)) {
res <- net_results_list[[name]]
if (!is.null(res)) {
cat(paste0("\n", name, ":\n"))
cat(" n:", nrow(res$data), "\n")
cat(" Clusters:", res$gmm$G, "(", res$gmm$modelName, ")\n")
cat(" BIC:", round(res$gmm$bic, 2), "\n")
cat(" Mean uncertainty:", round(mean(res$gmm$uncertainty), 3), "\n")
}
}
##
## ALL (capped):
## n: 402
## Clusters: 7 ( VVV )
## BIC: -5030.27
## Mean uncertainty: 0.019
##
## SILHOUETTES (capped):
## n: 153
## Clusters: 7 ( VVV )
## BIC: -946.92
## Mean uncertainty: 0.028
##
## SKELETONS (capped):
## n: 77
## Clusters: 6 ( EEV )
## BIC: -656.46
## Mean uncertainty: 0.029
##
## MIXED (capped):
## n: 172
## Clusters: 4 ( VEV )
## BIC: -2831.21
## Mean uncertainty: 0.042
cat("========================================\n")
## ========================================
This uses only the image_ids shared between network and
shape datasets for the head-to-head comparison in Part E.
set.seed(42)
net_results <- net_run_pipeline(net_sil_matched, "Silhouettes matched (capped)")
##
## ###########################################################
## SILHOUETTES MATCHED (CAPPED) -- NETWORK FEATURES
## n = 153
## ###########################################################
##
## Feature matrix: 153 x 17
## Features used: s01_n_links, s02_max_degree, s03_avg_degree, s04_clustering_coeff, s05_max_core_number, s06_n_cycles, s07_max_cycle_size, s09_diameter, s10_avg_shortest_path, s11_edge_connectivity, s12_spatial_diameter, s13_avg_link_length, s14_min_angle, s15_avg_angle, s17_avg_magnitude, n_nodes, density
## PCA components for 80% var: 4
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
## Standard deviation 2.473 2.038 1.481 1.205 1.108 0.900 0.605 0.463
## Proportion of Variance 0.360 0.244 0.129 0.085 0.072 0.048 0.022 0.013
## Cumulative Proportion 0.360 0.604 0.733 0.819 0.891 0.938 0.960 0.973
##
## === GMM Results ===
## Model: VVV | Clusters: 7 | BIC: -946.92
## Mean uncertainty: 0.028
##
## Cluster sizes:
##
## 1 2 3 4 5 6 7
## 19 9 20 27 18 39 21
##
## === Feature means per cluster ===
## # A tibble: 17 × 8
## name `1` `2` `3` `4` `5` `6` `7`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 s01_n_links 8.10 29.6 5.3 3 11.8 1.28e+1 7.48
## 2 s02_max_degree 2 2 2 2 3.06 2 e+0 2
## 3 s03_avg_degree 2 2 2 2 2.03 2 e+0 2
## 4 s04_clustering_coeff 0 0 0 1 0.242 0 0
## 5 s05_max_core_number 2 2 2 2 1.67 2 e+0 2
## 6 s06_n_cycles 1 1 1 1 1.61 1 e+0 1
## 7 s07_max_cycle_size 8.10 29.6 5.3 3 4.89 1.28e+1 7.48
## 8 s09_diameter 3.84 14.4 2.5 1 6.5 6.08e+0 3.57
## 9 s10_avg_shortest_path 2.30 7.64 1.62 1 3.00 3.46e+0 2.15
## 10 s11_edge_connectivity 2 2 2 2 1.61 2 e+0 2
## 11 s12_spatial_diameter 1289. 969. 572. 378. 700. 1.13e+3 273.
## 12 s13_avg_link_length 444. 121. 283. 297. 194. 2.74e+2 95.6
## 13 s14_min_angle 25.4 40.9 45.9 35.2 70.7 4.53e+1 54.0
## 14 s15_avg_angle 102. 129. 93.8 60 132. 1.15e+2 116.
## 15 s17_avg_magnitude 6.21 2.45 5.02 3.60 3.53 4.50e+0 2.23
## 16 n_nodes 8.10 29.6 5.3 3 11.3 1.28e+1 7.48
## 17 density 0.343 0.073 0.498 1 0.377 1.84e-1 0.352
##
## --- DRAWN_L1 ---
## X^2 df P(> X^2)
## Likelihood Ratio 397.40 402 5.5547e-01
## Pearson 562.97 402 1.8746e-07
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.887
## Cramer's V : 0.783
## Observed V: 0.783 | Perm mean: 0.664 +- 0.016 | p: 0
##
## --- DRAWN_L2 ---
## X^2 df P(> X^2)
## Likelihood Ratio 302.94 240 3.6374e-03
## Pearson 385.29 240 7.6555e-09
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.846
## Cramer's V : 0.648
## Observed V: 0.648 | Perm mean: 0.513 +- 0.018 | p: 0
##
## --- DRAWN_L3 ---
## X^2 df P(> X^2)
## Likelihood Ratio 161.98 78 7.8525e-08
## Pearson 169.24 78 1.0644e-08
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.725
## Cramer's V : 0.429
## Observed V: 0.429 | Perm mean: 0.292 +- 0.022 | p: 0
##
## --- DRAWN_L4 ---
## X^2 df P(> X^2)
## Likelihood Ratio 105.947 36 8.2155e-09
## Pearson 96.978 36 1.6870e-07
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.623
## Cramer's V : 0.325
## Observed V: 0.325 | Perm mean: 0.198 +- 0.022 | p: 0
##
## === Cluster composition (L1) ===
## # A tibble: 95 × 5
## # Groups: cluster [7]
## cluster drawn_l1 n cluster_n pct
## <dbl> <chr> <int> <int> <dbl>
## 1 1 bird 2 19 10.5
## 2 1 mountain 2 19 10.5
## 3 1 rabbit 2 19 10.5
## 4 1 trapezoid 2 19 10.5
## 5 1 bat 1 19 5.3
## 6 1 brightstars 1 19 5.3
## 7 1 cliff 1 19 5.3
## 8 1 cloud 1 19 5.3
## 9 1 hand 1 19 5.3
## 10 1 jellyfish 1 19 5.3
## 11 1 lightningbolt 1 19 5.3
## 12 1 pants 1 19 5.3
## 13 1 starfish 1 19 5.3
## 14 1 stingray 1 19 5.3
## 15 1 whale 1 19 5.3
## 16 2 bird 2 9 22.2
## 17 2 bag 1 9 11.1
## 18 2 cat 1 9 11.1
## 19 2 dinosaur 1 9 11.1
## 20 2 horse 1 9 11.1
## 21 2 minotaur 1 9 11.1
## 22 2 plane 1 9 11.1
## 23 2 tank 1 9 11.1
## 24 3 heart 4 20 20
## 25 3 diamond 2 20 10
## 26 3 lightningbolt 2 20 10
## 27 3 mountain 2 20 10
## 28 3 abstractshape 1 20 5
## 29 3 bat 1 20 5
## 30 3 box 1 20 5
## 31 3 brightstars 1 20 5
## 32 3 hope 1 20 5
## 33 3 house 1 20 5
## 34 3 kite 1 20 5
## 35 3 shield 1 20 5
## 36 3 stone 1 20 5
## 37 3 wings 1 20 5
## 38 4 triangle 14 27 51.9
## 39 4 abstractshape 4 27 14.8
## 40 4 bird 3 27 11.1
## 41 4 angle 1 27 3.7
## 42 4 carrot 1 27 3.7
## 43 4 cat 1 27 3.7
## 44 4 corn 1 27 3.7
## 45 4 lightningbolt 1 27 3.7
## 46 4 tear 1 27 3.7
## 47 5 fish 4 18 22.2
## 48 5 heart 2 18 11.1
## 49 5 star 2 18 11.1
## 50 5 abstractshape 1 18 5.6
## 51 5 butterfly 1 18 5.6
## 52 5 doubletriangle 1 18 5.6
## 53 5 hourglass 1 18 5.6
## 54 5 infinity 1 18 5.6
## 55 5 mountain 1 18 5.6
## 56 5 person 1 18 5.6
## 57 5 scorpion 1 18 5.6
## 58 5 seal 1 18 5.6
## 59 5 wings 1 18 5.6
## 60 6 bird 7 39 17.9
## 61 6 heart 4 39 10.3
## 62 6 butterfly 2 39 5.1
## 63 6 cat 2 39 5.1
## 64 6 mountain 2 39 5.1
## 65 6 spaceship 2 39 5.1
## 66 6 abstractshape 1 39 2.6
## 67 6 archerbow 1 39 2.6
## 68 6 arrow 1 39 2.6
## 69 6 beaver 1 39 2.6
## 70 6 boat 1 39 2.6
## 71 6 cage 1 39 2.6
## 72 6 car 1 39 2.6
## 73 6 cow 1 39 2.6
## 74 6 drumstick 1 39 2.6
## 75 6 enzyme 1 39 2.6
## 76 6 fish 1 39 2.6
## 77 6 flower 1 39 2.6
## 78 6 hierarchy 1 39 2.6
## 79 6 horse 1 39 2.6
## 80 6 love 1 39 2.6
## 81 6 mist 1 39 2.6
## 82 6 person 1 39 2.6
## 83 6 rock 1 39 2.6
## 84 6 shark 1 39 2.6
## 85 6 victoria 1 39 2.6
## 86 7 heart 6 21 28.6
## 87 7 house 4 21 19
## 88 7 abstractshape 3 21 14.3
## 89 7 tear 2 21 9.5
## 90 7 determined 1 21 4.8
## 91 7 missile 1 21 4.8
## 92 7 quilt 1 21 4.8
## 93 7 ring 1 21 4.8
## 94 7 starfish 1 21 4.8
## 95 7 table 1 21 4.8
if (!is.null(net_results)) {
cat("\n=== Network MATCHED SILHOUETTES: Label Distribution ===\n")
cmp_label_distribution(net_results$data, "cluster") %>% print(n = 69)
}
##
## === Network MATCHED SILHOUETTES: Label Distribution ===
## # A tibble: 68 × 9
## # Groups: drawn_l1 [68]
## drawn_l1 total_drawings_of_la…¹ Cluster_3 Cluster_4 Cluster_5 Cluster_6
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 heart 16 25 0 12.5 25
## 2 bird 14 0 21.4 0 50
## 3 triangle 14 0 100 0 0
## 4 abstractshape 10 10 40 10 10
## 5 mountain 7 28.6 0 14.3 28.6
## 6 fish 5 0 0 80 20
## 7 house 5 20 0 0 0
## 8 cat 4 0 25 0 50
## 9 lightningbolt 4 50 25 0 0
## 10 butterfly 3 0 0 33.3 66.7
## 11 tear 3 0 33.3 0 0
## 12 bat 2 50 0 0 0
## 13 brightstars 2 50 0 0 0
## 14 diamond 2 100 0 0 0
## 15 horse 2 0 0 0 50
## 16 person 2 0 0 50 50
## 17 rabbit 2 0 0 0 0
## 18 spaceship 2 0 0 0 100
## 19 star 2 0 0 100 0
## 20 starfish 2 0 0 0 0
## 21 trapezoid 2 0 0 0 0
## 22 wings 2 50 0 50 0
## 23 angle 1 0 100 0 0
## 24 archerbow 1 0 0 0 100
## 25 arrow 1 0 0 0 100
## 26 bag 1 0 0 0 0
## 27 beaver 1 0 0 0 100
## 28 boat 1 0 0 0 100
## 29 box 1 100 0 0 0
## 30 cage 1 0 0 0 100
## 31 car 1 0 0 0 100
## 32 carrot 1 0 100 0 0
## 33 cliff 1 0 0 0 0
## 34 cloud 1 0 0 0 0
## 35 corn 1 0 100 0 0
## 36 cow 1 0 0 0 100
## 37 determined 1 0 0 0 0
## 38 dinosaur 1 0 0 0 0
## 39 doubletriangle 1 0 0 100 0
## 40 drumstick 1 0 0 0 100
## 41 enzyme 1 0 0 0 100
## 42 flower 1 0 0 0 100
## 43 hand 1 0 0 0 0
## 44 hierarchy 1 0 0 0 100
## 45 hope 1 100 0 0 0
## 46 hourglass 1 0 0 100 0
## 47 infinity 1 0 0 100 0
## 48 jellyfish 1 0 0 0 0
## 49 kite 1 100 0 0 0
## 50 love 1 0 0 0 100
## 51 minotaur 1 0 0 0 0
## 52 missile 1 0 0 0 0
## 53 mist 1 0 0 0 100
## 54 pants 1 0 0 0 0
## 55 plane 1 0 0 0 0
## 56 quilt 1 0 0 0 0
## 57 ring 1 0 0 0 0
## 58 rock 1 0 0 0 100
## 59 scorpion 1 0 0 100 0
## 60 seal 1 0 0 100 0
## 61 shark 1 0 0 0 100
## 62 shield 1 100 0 0 0
## 63 stingray 1 0 0 0 0
## 64 stone 1 100 0 0 0
## 65 table 1 0 0 0 0
## 66 tank 1 0 0 0 0
## 67 victoria 1 0 0 0 100
## 68 whale 1 0 0 0 0
## # ℹ abbreviated name: ¹total_drawings_of_label
## # ℹ 3 more variables: Cluster_7 <dbl>, Cluster_2 <dbl>, Cluster_1 <dbl>
shp_run_pipeline <- function(data, label) {
cat("\n###########################################################\n")
cat(" ", toupper(label), " -- SHAPE FEATURES\n")
cat(" n =", nrow(data), "\n")
cat("###########################################################\n\n")
if (nrow(data) < 10) { cat("Too few observations.\n"); return(NULL) }
shp_feat_names <- c(
"rot_aspect_ratio", "elongation", "solidity", "compactness",
"n_defects", "max_defect_depth", "n_corners",
"h_symmetry", "v_symmetry",
"hu1", "hu2", "hu3", "hu4"
)
shp_feat_mat <- data %>%
select(any_of(shp_feat_names)) %>%
mutate(across(everything(), ~ifelse(is.na(.), median(., na.rm = TRUE), .)))
cat("Feature matrix:", nrow(shp_feat_mat), "x", ncol(shp_feat_mat), "\n")
cat("Features used:", paste(names(shp_feat_mat), collapse = ", "), "\n")
shp_scaled <- scale(shp_feat_mat)
shp_pca <- prcomp(shp_scaled)
shp_varexp <- summary(shp_pca)$importance[3, ]
shp_ncomp <- which(shp_varexp >= 0.80)[1]
cat("PCA components for 80% var:", shp_ncomp, "\n")
print(round(summary(shp_pca)$importance[, 1:min(8, ncol(shp_scaled))], 3))
shp_scores <- shp_pca$x[, 1:shp_ncomp]
shp_gmm <- Mclust(shp_scores)
cat("\n=== GMM Results ===\n")
cat("Model:", shp_gmm$modelName, " | Clusters:", shp_gmm$G,
" | BIC:", round(shp_gmm$bic, 2), "\n")
cat("Mean uncertainty:", round(mean(shp_gmm$uncertainty), 3), "\n")
data$cluster <- shp_gmm$classification
cat("\nCluster sizes:\n")
print(table(data$cluster))
# Feature means per cluster
cat("\n=== Feature means per cluster ===\n")
data %>%
group_by(cluster) %>%
summarise(across(any_of(shp_feat_names), ~round(mean(., na.rm = TRUE), 3)),
.groups = "drop") %>%
pivot_longer(-cluster) %>%
pivot_wider(names_from = cluster, values_from = value) %>%
print(n = 20)
# Permutation tests
for (level in c("drawn_l1", "drawn_l2", "drawn_l3", "drawn_l4")) {
labels_vec <- data[[level]]
if (all(is.na(labels_vec))) next
cat("\n--- ", toupper(level), " ---\n")
tryCatch({
tab <- table(data$cluster, labels_vec)
print(vcd::assocstats(tab))
perm <- cmp_permutation_test(data$cluster, labels_vec)
cat("Observed V:", round(perm$observed_cv, 3),
" | Perm mean:", round(perm$perm_mean, 3), "+-", round(perm$perm_sd, 3),
" | p:", perm$p_value, "\n")
}, error = function(e) {
cat(" Skipped -- too sparse for this level (", conditionMessage(e), ")\n")
})
}
# Cluster composition (L1)
cat("\n=== Cluster composition (L1) ===\n")
data %>%
count(cluster, drawn_l1) %>%
group_by(cluster) %>%
mutate(cluster_n = sum(n), pct = round(n / cluster_n * 100, 1)) %>%
arrange(cluster, desc(pct)) %>%
print(n = 200)
list(data = data, gmm = shp_gmm, pca = shp_pca,
feature_names = shp_feat_names)
}
set.seed(42)
shp_results <- shp_run_pipeline(shp_sil_matched, "Silhouettes matched (capped)")
##
## ###########################################################
## SILHOUETTES MATCHED (CAPPED) -- SHAPE FEATURES
## n = 153
## ###########################################################
##
## Feature matrix: 153 x 13
## Features used: rot_aspect_ratio, elongation, solidity, compactness, n_defects, max_defect_depth, n_corners, h_symmetry, v_symmetry, hu1, hu2, hu3, hu4
## PCA components for 80% var: 5
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
## Standard deviation 2.004 1.604 1.353 1.132 0.917 0.850 0.676 0.603
## Proportion of Variance 0.309 0.198 0.141 0.098 0.065 0.056 0.035 0.028
## Cumulative Proportion 0.309 0.507 0.648 0.746 0.811 0.866 0.901 0.929
##
## === GMM Results ===
## Model: VEV | Clusters: 3 | BIC: -2197.48
## Mean uncertainty: 0.027
##
## Cluster sizes:
##
## 1 2 3
## 100 48 5
##
## === Feature means per cluster ===
## # A tibble: 13 × 4
## name `1` `2` `3`
## <chr> <dbl> <dbl> <dbl>
## 1 rot_aspect_ratio 1.75 2.71 38.4
## 2 elongation 0.394 0.573 0.501
## 3 solidity 0.723 0.979 0.964
## 4 compactness 0.38 0.464 0.449
## 5 n_defects 1.99 0.104 0
## 6 max_defect_depth 212. 9.67 6.43
## 7 n_corners 16.0 5.54 39
## 8 h_symmetry 0.052 0.028 0.153
## 9 v_symmetry 0.037 0.026 0.081
## 10 hu1 1.61 1.54 1.44
## 11 hu2 4.11 3.52 3.35
## 12 hu3 5.91 5.66 4.82
## 13 hu4 7.13 7.30 5.90
##
## --- DRAWN_L1 ---
## X^2 df P(> X^2)
## Likelihood Ratio 166.94 134 0.0282352
## Pearson 185.91 134 0.0020253
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.741
## Cramer's V : 0.779
## Observed V: 0.779 | Perm mean: 0.661 +- 0.066 | p: 0.0386
##
## --- DRAWN_L2 ---
## X^2 df P(> X^2)
## Likelihood Ratio 121.41 80 0.0019505
## Pearson 124.09 80 0.0011593
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.669
## Cramer's V : 0.637
## Observed V: 0.637 | Perm mean: 0.509 +- 0.064 | p: 0.03
##
## --- DRAWN_L3 ---
## X^2 df P(> X^2)
## Likelihood Ratio 83.224 26 6.6183e-08
## Pearson 71.836 26 3.5726e-06
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.565
## Cramer's V : 0.485
## Observed V: 0.485 | Perm mean: 0.29 +- 0.047 | p: 6e-04
##
## --- DRAWN_L4 ---
## X^2 df P(> X^2)
## Likelihood Ratio 62.329 12 8.4639e-09
## Pearson 55.656 12 1.3800e-07
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.516
## Cramer's V : 0.426
## Observed V: 0.426 | Perm mean: 0.194 +- 0.041 | p: 0
##
## === Cluster composition (L1) ===
## # A tibble: 79 × 5
## # Groups: cluster [3]
## cluster drawn_l1 n cluster_n pct
## <dbl> <chr> <int> <int> <dbl>
## 1 1 heart 16 100 16
## 2 1 bird 12 100 12
## 3 1 mountain 7 100 7
## 4 1 fish 5 100 5
## 5 1 abstractshape 4 100 4
## 6 1 cat 4 100 4
## 7 1 butterfly 3 100 3
## 8 1 horse 2 100 2
## 9 1 person 2 100 2
## 10 1 rabbit 2 100 2
## 11 1 spaceship 2 100 2
## 12 1 triangle 2 100 2
## 13 1 archerbow 1 100 1
## 14 1 arrow 1 100 1
## 15 1 bag 1 100 1
## 16 1 bat 1 100 1
## 17 1 beaver 1 100 1
## 18 1 boat 1 100 1
## 19 1 car 1 100 1
## 20 1 cliff 1 100 1
## 21 1 cow 1 100 1
## 22 1 determined 1 100 1
## 23 1 dinosaur 1 100 1
## 24 1 doubletriangle 1 100 1
## 25 1 drumstick 1 100 1
## 26 1 enzyme 1 100 1
## 27 1 flower 1 100 1
## 28 1 hand 1 100 1
## 29 1 hierarchy 1 100 1
## 30 1 hourglass 1 100 1
## 31 1 house 1 100 1
## 32 1 infinity 1 100 1
## 33 1 jellyfish 1 100 1
## 34 1 lightningbolt 1 100 1
## 35 1 minotaur 1 100 1
## 36 1 mist 1 100 1
## 37 1 pants 1 100 1
## 38 1 plane 1 100 1
## 39 1 quilt 1 100 1
## 40 1 ring 1 100 1
## 41 1 rock 1 100 1
## 42 1 scorpion 1 100 1
## 43 1 seal 1 100 1
## 44 1 shark 1 100 1
## 45 1 star 1 100 1
## 46 1 starfish 1 100 1
## 47 1 stingray 1 100 1
## 48 1 table 1 100 1
## 49 1 tank 1 100 1
## 50 1 whale 1 100 1
## 51 1 wings 1 100 1
## 52 2 triangle 12 48 25
## 53 2 abstractshape 6 48 12.5
## 54 2 house 4 48 8.3
## 55 2 lightningbolt 3 48 6.2
## 56 2 brightstars 2 48 4.2
## 57 2 diamond 2 48 4.2
## 58 2 tear 2 48 4.2
## 59 2 trapezoid 2 48 4.2
## 60 2 angle 1 48 2.1
## 61 2 bat 1 48 2.1
## 62 2 bird 1 48 2.1
## 63 2 box 1 48 2.1
## 64 2 carrot 1 48 2.1
## 65 2 cloud 1 48 2.1
## 66 2 corn 1 48 2.1
## 67 2 hope 1 48 2.1
## 68 2 kite 1 48 2.1
## 69 2 love 1 48 2.1
## 70 2 missile 1 48 2.1
## 71 2 shield 1 48 2.1
## 72 2 stone 1 48 2.1
## 73 2 victoria 1 48 2.1
## 74 2 wings 1 48 2.1
## 75 3 bird 1 5 20
## 76 3 cage 1 5 20
## 77 3 star 1 5 20
## 78 3 starfish 1 5 20
## 79 3 tear 1 5 20
if (!is.null(shp_results)) {
cat("\n=== Shape: Label Distribution across Clusters ===\n")
cmp_label_distribution(shp_results$data, "cluster") %>% print(n = 69)
}
##
## === Shape: Label Distribution across Clusters ===
## # A tibble: 68 × 5
## # Groups: drawn_l1 [68]
## drawn_l1 total_drawings_of_label Cluster_1 Cluster_2 Cluster_3
## <chr> <int> <dbl> <dbl> <dbl>
## 1 heart 16 100 0 0
## 2 bird 14 85.7 7.1 7.1
## 3 triangle 14 14.3 85.7 0
## 4 abstractshape 10 40 60 0
## 5 mountain 7 100 0 0
## 6 fish 5 100 0 0
## 7 house 5 20 80 0
## 8 cat 4 100 0 0
## 9 lightningbolt 4 25 75 0
## 10 butterfly 3 100 0 0
## 11 tear 3 0 66.7 33.3
## 12 bat 2 50 50 0
## 13 brightstars 2 0 100 0
## 14 diamond 2 0 100 0
## 15 horse 2 100 0 0
## 16 person 2 100 0 0
## 17 rabbit 2 100 0 0
## 18 spaceship 2 100 0 0
## 19 star 2 50 0 50
## 20 starfish 2 50 0 50
## 21 trapezoid 2 0 100 0
## 22 wings 2 50 50 0
## 23 angle 1 0 100 0
## 24 archerbow 1 100 0 0
## 25 arrow 1 100 0 0
## 26 bag 1 100 0 0
## 27 beaver 1 100 0 0
## 28 boat 1 100 0 0
## 29 box 1 0 100 0
## 30 cage 1 0 0 100
## 31 car 1 100 0 0
## 32 carrot 1 0 100 0
## 33 cliff 1 100 0 0
## 34 cloud 1 0 100 0
## 35 corn 1 0 100 0
## 36 cow 1 100 0 0
## 37 determined 1 100 0 0
## 38 dinosaur 1 100 0 0
## 39 doubletriangle 1 100 0 0
## 40 drumstick 1 100 0 0
## 41 enzyme 1 100 0 0
## 42 flower 1 100 0 0
## 43 hand 1 100 0 0
## 44 hierarchy 1 100 0 0
## 45 hope 1 0 100 0
## 46 hourglass 1 100 0 0
## 47 infinity 1 100 0 0
## 48 jellyfish 1 100 0 0
## 49 kite 1 0 100 0
## 50 love 1 0 100 0
## 51 minotaur 1 100 0 0
## 52 missile 1 0 100 0
## 53 mist 1 100 0 0
## 54 pants 1 100 0 0
## 55 plane 1 100 0 0
## 56 quilt 1 100 0 0
## 57 ring 1 100 0 0
## 58 rock 1 100 0 0
## 59 scorpion 1 100 0 0
## 60 seal 1 100 0 0
## 61 shark 1 100 0 0
## 62 shield 1 0 100 0
## 63 stingray 1 100 0 0
## 64 stone 1 0 100 0
## 65 table 1 100 0 0
## 66 tank 1 100 0 0
## 67 victoria 1 0 100 0
## 68 whale 1 100 0 0
if (!is.null(shp_results)) {
plot(shp_results$gmm, what = "BIC")
}
cat("=============================================================\n")
## =============================================================
cat(" SIDE-BY-SIDE SUMMARY\n")
## SIDE-BY-SIDE SUMMARY
cat("=============================================================\n\n")
## =============================================================
if (!is.null(net_results) && !is.null(shp_results)) {
net_npc <- which(summary(net_results$pca)$importance[3,] >= 0.80)[1]
shp_npc <- which(summary(shp_results$pca)$importance[3,] >= 0.80)[1]
summary_tbl <- tibble(
Metric = c("n observations",
"Features used",
"PCA components (80% var)",
"GMM model selected",
"Clusters (G)",
"BIC",
"Mean uncertainty"),
Network = c(
nrow(net_results$data),
length(net_results$feature_names),
net_npc,
net_results$gmm$modelName,
net_results$gmm$G,
round(net_results$gmm$bic, 2),
round(mean(net_results$gmm$uncertainty), 3)
),
Shape = c(
nrow(shp_results$data),
length(shp_results$feature_names),
shp_npc,
shp_results$gmm$modelName,
shp_results$gmm$G,
round(shp_results$gmm$bic, 2),
round(mean(shp_results$gmm$uncertainty), 3)
)
)
print(summary_tbl)
}
## # A tibble: 7 × 3
## Metric Network Shape
## <chr> <chr> <chr>
## 1 n observations 153 153
## 2 Features used 17 13
## 3 PCA components (80% var) 4 5
## 4 GMM model selected VVV VEV
## 5 Clusters (G) 7 3
## 6 BIC -946.92 -2197.48
## 7 Mean uncertainty 0.028 0.027
if (!is.null(net_results) && !is.null(shp_results)) {
cv_compare <- map_dfr(c("drawn_l1", "drawn_l2", "drawn_l3", "drawn_l4"), function(lvl) {
net_tab <- table(net_results$data$cluster, net_results$data[[lvl]])
net_cv <- vcd::assocstats(net_tab)$cramer
shp_tab <- table(shp_results$data$cluster, shp_results$data[[lvl]])
shp_cv <- vcd::assocstats(shp_tab)$cramer
tibble(level = lvl, V_network = round(net_cv, 3), V_shape = round(shp_cv, 3),
diff_net_minus_shp = round(net_cv - shp_cv, 3))
})
cat("\n=== Cramer's V: Network vs Shape ===\n")
cat(" (Higher V = stronger cluster-label association)\n")
print(cv_compare)
}
##
## === Cramer's V: Network vs Shape ===
## (Higher V = stronger cluster-label association)
## # A tibble: 4 × 4
## level V_network V_shape diff_net_minus_shp
## <chr> <dbl> <dbl> <dbl>
## 1 drawn_l1 0.783 0.779 0.004
## 2 drawn_l2 0.648 0.637 0.011
## 3 drawn_l3 0.429 0.485 -0.055
## 4 drawn_l4 0.325 0.426 -0.101
if (!is.null(net_results) && !is.null(shp_results)) {
purity_net <- cmp_get_purity(net_results$data)
purity_shp <- cmp_get_purity(shp_results$data)
purity_compare <- inner_join(
purity_net %>% rename(purity_network = purity, n_net = total,
modal_cluster_net = modal_cluster),
purity_shp %>% rename(purity_shape = purity, n_shp = total,
modal_cluster_shp = modal_cluster),
by = "drawn_l1"
)
cat("Labels compared:", nrow(purity_compare), "\n")
cat("(Labels present in both models with n >= 2)\n\n")
print(purity_compare %>% arrange(desc(purity_network - purity_shape)), n = 50)
}
## Labels compared: 22
## (Labels present in both models with n >= 2)
##
## # A tibble: 22 × 7
## drawn_l1 n_net modal_cluster_net purity_network n_shp modal_cluster_shp
## <chr> <int> <dbl> <dbl> <int> <dbl>
## 1 star 2 5 1 2 1
## 2 triangle 14 4 1 14 2
## 3 bat 2 1 0.5 2 1
## 4 diamond 2 3 1 2 2
## 5 house 5 7 0.8 5 2
## 6 rabbit 2 1 1 2 1
## 7 spaceship 2 6 1 2 1
## 8 starfish 2 1 0.5 2 1
## 9 tear 3 7 0.667 3 2
## 10 trapezoid 2 1 1 2 2
## 11 wings 2 3 0.5 2 1
## 12 abstractshape 10 4 0.4 10 2
## 13 fish 5 5 0.8 5 1
## 14 lightningbolt 4 3 0.5 4 2
## 15 butterfly 3 6 0.667 3 1
## 16 bird 14 6 0.5 14 1
## 17 brightstars 2 1 0.5 2 2
## 18 cat 4 6 0.5 4 1
## 19 horse 2 2 0.5 2 1
## 20 person 2 5 0.5 2 1
## 21 heart 16 7 0.375 16 1
## 22 mountain 7 1 0.286 7 1
## # ℹ 1 more variable: purity_shape <dbl>
if (exists("purity_compare") && nrow(purity_compare) >= 5) {
wilcox_result <- wilcox.test(
purity_compare$purity_network,
purity_compare$purity_shape,
paired = TRUE,
alternative = "two.sided",
exact = FALSE
)
cat("\n=== Wilcoxon Signed-Rank Test ===\n")
print(wilcox_result)
cat("Mean purity (network):", round(mean(purity_compare$purity_network), 3), "\n")
cat("Mean purity (shape): ", round(mean(purity_compare$purity_shape), 3), "\n")
cat("Median diff (net - shp):", round(median(purity_compare$purity_network -
purity_compare$purity_shape), 3), "\n")
# Effect size: r = Z / sqrt(N)
z_stat <- qnorm(wilcox_result$p.value / 2) *
sign(median(purity_compare$purity_network - purity_compare$purity_shape))
r_effect <- abs(z_stat) / sqrt(nrow(purity_compare))
cat("Effect size r:", round(r_effect, 3),
ifelse(r_effect < 0.1, "(trivial)",
ifelse(r_effect < 0.3, "(small)",
ifelse(r_effect < 0.5, "(medium)", "(large)"))), "\n")
}
##
## === Wilcoxon Signed-Rank Test ===
##
## Wilcoxon signed rank test with continuity correction
##
## data: purity_compare$purity_network and purity_compare$purity_shape
## V = 10, p-value = 0.01382
## alternative hypothesis: true location shift is not equal to 0
##
## Mean purity (network): 0.659
## Mean purity (shape): 0.842
## Median diff (net - shp): -0.1
## Effect size r: 0.525 (large)
if (exists("purity_compare") && nrow(purity_compare) >= 3) {
purity_long <- purity_compare %>%
select(drawn_l1, purity_network, purity_shape) %>%
pivot_longer(starts_with("purity"), names_to = "model", values_to = "purity") %>%
mutate(
model = recode(model, purity_network = "Network", purity_shape = "Shape"),
drawn_l1 = reorder(drawn_l1, purity, mean)
)
ggplot(purity_long, aes(x = drawn_l1, y = purity, fill = model)) +
geom_col(position = "dodge", alpha = 0.85) +
coord_flip() +
scale_fill_manual(values = c("Network" = "#2E75B6", "Shape" = "#ED7D31")) +
labs(
title = "Per-Label Cluster Purity: Network vs. Shape Features",
subtitle = paste0("Silhouettes only (matched n = ", nrow(purity_compare),
") | Higher = label stays together in one cluster"),
x = NULL, y = "Purity (proportion in modal cluster)", fill = "Model"
) +
theme_minimal(base_size = 12) +
theme(legend.position = "bottom")
ggsave("purity_comparison.pdf", width = 10, height = 8)
}
if (exists("purity_compare")) {
cat("=== Labels where Network >> Shape (net wins by > 0.15) ===\n")
purity_compare %>%
filter(purity_network - purity_shape > 0.15) %>%
arrange(desc(purity_network - purity_shape)) %>%
print(n = 30)
cat("\n=== Labels where Shape >> Network (shape wins by > 0.15) ===\n")
purity_compare %>%
filter(purity_shape - purity_network > 0.15) %>%
arrange(desc(purity_shape - purity_network)) %>%
print(n = 30)
}
## === Labels where Network >> Shape (net wins by > 0.15) ===
## # A tibble: 1 × 7
## drawn_l1 n_net modal_cluster_net purity_network n_shp modal_cluster_shp
## <chr> <int> <dbl> <dbl> <int> <dbl>
## 1 star 2 5 1 2 1
## # ℹ 1 more variable: purity_shape <dbl>
##
## === Labels where Shape >> Network (shape wins by > 0.15) ===
## # A tibble: 11 × 7
## drawn_l1 n_net modal_cluster_net purity_network n_shp modal_cluster_shp
## <chr> <int> <dbl> <dbl> <int> <dbl>
## 1 mountain 7 1 0.286 7 1
## 2 heart 16 7 0.375 16 1
## 3 brightstars 2 1 0.5 2 2
## 4 cat 4 6 0.5 4 1
## 5 horse 2 2 0.5 2 1
## 6 person 2 5 0.5 2 1
## 7 bird 14 6 0.5 14 1
## 8 butterfly 3 6 0.667 3 1
## 9 lightningbolt 4 3 0.5 4 2
## 10 abstractshape 10 4 0.4 10 2
## 11 fish 5 5 0.8 5 1
## # ℹ 1 more variable: purity_shape <dbl>
This extends the top-down analysis to run on both feature sets side-by-side, showing which individual features best discriminate each semantic category level.
net_feature_cols <- if (!is.null(net_results)) net_results$feature_names else character(0)
shp_feature_cols <- if (!is.null(shp_results)) shp_results$feature_names else character(0)
net_sil_data <- if (!is.null(net_results)) net_results$data else NULL
shp_sil_data <- if (!is.null(shp_results)) shp_results$data else NULL
cat("=== Feature Discrimination Power: Kruskal-Wallis ===\n")
## === Feature Discrimination Power: Kruskal-Wallis ===
cat(" (Higher chi-sq = better at separating categories)\n\n")
## (Higher chi-sq = better at separating categories)
for (level in c("drawn_l4", "drawn_l3", "drawn_l2", "drawn_l1")) {
cat("\n============ ", toupper(level), " ============\n")
kw_rows <- list()
if (!is.null(net_sil_data)) {
for (feat in intersect(net_feature_cols, names(net_sil_data))) {
kw <- kruskal.test(net_sil_data[[feat]] ~ factor(net_sil_data[[level]]))
kw_rows[[length(kw_rows) + 1]] <- tibble(
model = "Network", feature = feat,
chi_sq = round(kw$statistic, 2), p_value = kw$p.value,
sig = ifelse(kw$p.value < 0.001, "***",
ifelse(kw$p.value < 0.01, "**",
ifelse(kw$p.value < 0.05, "*", "ns")))
)
}
}
if (!is.null(shp_sil_data)) {
for (feat in intersect(shp_feature_cols, names(shp_sil_data))) {
kw <- kruskal.test(shp_sil_data[[feat]] ~ factor(shp_sil_data[[level]]))
kw_rows[[length(kw_rows) + 1]] <- tibble(
model = "Shape", feature = feat,
chi_sq = round(kw$statistic, 2), p_value = kw$p.value,
sig = ifelse(kw$p.value < 0.001, "***",
ifelse(kw$p.value < 0.01, "**",
ifelse(kw$p.value < 0.05, "*", "ns")))
)
}
}
combined_kw <- bind_rows(kw_rows) %>% arrange(p_value)
print(combined_kw, n = 40)
cat("\n Significant features (p < .05) per model:\n")
combined_kw %>%
filter(p_value < 0.05) %>%
count(model) %>%
print()
}
##
## ============ DRAWN_L4 ============
## # A tibble: 30 × 5
## model feature chi_sq p_value sig
## <chr> <chr> <dbl> <dbl> <chr>
## 1 Network s09_diameter 53.6 8.71e-10 ***
## 2 Network s01_n_links 53.4 9.84e-10 ***
## 3 Network n_nodes 53.4 9.92e-10 ***
## 4 Network density 52.6 1.43e- 9 ***
## 5 Network s10_avg_shortest_path 52.5 1.46e- 9 ***
## 6 Shape max_defect_depth 47.7 1.34e- 8 ***
## 7 Shape solidity 45.2 4.29e- 8 ***
## 8 Shape n_corners 43.3 1.03e- 7 ***
## 9 Network s07_max_cycle_size 41.7 2.08e- 7 ***
## 10 Shape compactness 38.8 7.92e- 7 ***
## 11 Shape n_defects 38.7 8.17e- 7 ***
## 12 Network s14_min_angle 38.3 9.94e- 7 ***
## 13 Network s04_clustering_coeff 37.5 1.42e- 6 ***
## 14 Network s15_avg_angle 33.7 7.56e- 6 ***
## 15 Shape hu3 31.9 1.73e- 5 ***
## 16 Shape hu1 30.7 2.95e- 5 ***
## 17 Shape rot_aspect_ratio 30.1 3.75e- 5 ***
## 18 Shape elongation 29.1 5.81e- 5 ***
## 19 Shape hu2 28.2 8.42e- 5 ***
## 20 Shape hu4 25.3 3.02e- 4 ***
## 21 Network s12_spatial_diameter 22.9 8.24e- 4 ***
## 22 Shape h_symmetry 18.2 5.84e- 3 **
## 23 Network s13_avg_link_length 16.6 1.09e- 2 *
## 24 Network s17_avg_magnitude 10.5 1.05e- 1 ns
## 25 Network s05_max_core_number 8.87 1.81e- 1 ns
## 26 Network s06_n_cycles 8.1 2.31e- 1 ns
## 27 Network s11_edge_connectivity 7.24 2.99e- 1 ns
## 28 Network s03_avg_degree 7.1 3.12e- 1 ns
## 29 Network s02_max_degree 6.84 3.36e- 1 ns
## 30 Shape v_symmetry 5.75 4.52e- 1 ns
##
## Significant features (p < .05) per model:
## # A tibble: 2 × 2
## model n
## <chr> <int>
## 1 Network 11
## 2 Shape 12
##
## ============ DRAWN_L3 ============
## # A tibble: 30 × 5
## model feature chi_sq p_value sig
## <chr> <chr> <dbl> <dbl> <chr>
## 1 Network n_nodes 69.5 9.81e-10 ***
## 2 Network s01_n_links 69.3 1.07e- 9 ***
## 3 Network density 68.6 1.43e- 9 ***
## 4 Network s10_avg_shortest_path 67.4 2.37e- 9 ***
## 5 Network s09_diameter 67.4 2.45e- 9 ***
## 6 Network s07_max_cycle_size 63.0 1.49e- 8 ***
## 7 Shape n_corners 60.2 4.78e- 8 ***
## 8 Shape max_defect_depth 57.0 1.80e- 7 ***
## 9 Shape n_defects 55.8 2.88e- 7 ***
## 10 Shape solidity 51.2 1.86e- 6 ***
## 11 Network s04_clustering_coeff 45.2 1.90e- 5 ***
## 12 Shape hu2 43.3 4.06e- 5 ***
## 13 Shape compactness 42.4 5.70e- 5 ***
## 14 Network s14_min_angle 41.9 6.87e- 5 ***
## 15 Shape hu1 41.2 8.71e- 5 ***
## 16 Shape hu3 38.6 2.33e- 4 ***
## 17 Shape rot_aspect_ratio 38.6 2.33e- 4 ***
## 18 Shape elongation 37.5 3.47e- 4 ***
## 19 Network s12_spatial_diameter 36.6 4.72e- 4 ***
## 20 Network s15_avg_angle 36.1 5.73e- 4 ***
## 21 Shape hu4 25.2 2.15e- 2 *
## 22 Network s17_avg_magnitude 24.6 2.61e- 2 *
## 23 Shape h_symmetry 24.4 2.79e- 2 *
## 24 Network s13_avg_link_length 23.1 4.05e- 2 *
## 25 Network s02_max_degree 22.2 5.28e- 2 ns
## 26 Network s03_avg_degree 15.1 3.04e- 1 ns
## 27 Shape v_symmetry 14.5 3.41e- 1 ns
## 28 Network s06_n_cycles 14.4 3.43e- 1 ns
## 29 Network s11_edge_connectivity 8.43 8.15e- 1 ns
## 30 Network s05_max_core_number 8.24 8.28e- 1 ns
##
## Significant features (p < .05) per model:
## # A tibble: 2 × 2
## model n
## <chr> <int>
## 1 Network 12
## 2 Shape 12
##
## ============ DRAWN_L2 ============
## # A tibble: 30 × 5
## model feature chi_sq p_value sig
## <chr> <chr> <dbl> <dbl> <chr>
## 1 Network n_nodes 93.9 0.00000321 ***
## 2 Network s01_n_links 93.4 0.00000370 ***
## 3 Network density 92.9 0.00000434 ***
## 4 Network s09_diameter 91.2 0.00000728 ***
## 5 Network s10_avg_shortest_path 90.9 0.00000793 ***
## 6 Network s04_clustering_coeff 87.8 0.0000197 ***
## 7 Shape solidity 87 0.0000247 ***
## 8 Network s02_max_degree 86.0 0.0000334 ***
## 9 Network s07_max_cycle_size 85.7 0.0000361 ***
## 10 Network s05_max_core_number 84.8 0.0000468 ***
## 11 Shape n_defects 84.6 0.0000490 ***
## 12 Shape n_corners 82.9 0.0000799 ***
## 13 Network s11_edge_connectivity 82.6 0.0000862 ***
## 14 Network s06_n_cycles 82.2 0.0000953 ***
## 15 Network s03_avg_degree 81.1 0.000130 ***
## 16 Shape max_defect_depth 81.0 0.000136 ***
## 17 Shape compactness 76.2 0.000485 ***
## 18 Shape hu1 75.1 0.000651 ***
## 19 Network s15_avg_angle 73.6 0.000956 ***
## 20 Shape hu2 73.5 0.000975 ***
## 21 Shape hu3 70.6 0.00202 **
## 22 Network s12_spatial_diameter 64.8 0.00773 **
## 23 Network s14_min_angle 64.5 0.00835 **
## 24 Shape rot_aspect_ratio 63.7 0.00999 **
## 25 Shape elongation 63.3 0.0110 *
## 26 Network s13_avg_link_length 61.8 0.0150 *
## 27 Shape hu4 57.7 0.0343 *
## 28 Network s17_avg_magnitude 52.6 0.0883 ns
## 29 Shape v_symmetry 46.0 0.238 ns
## 30 Shape h_symmetry 45.8 0.246 ns
##
## Significant features (p < .05) per model:
## # A tibble: 2 × 2
## model n
## <chr> <int>
## 1 Network 16
## 2 Shape 11
##
## ============ DRAWN_L1 ============
## # A tibble: 30 × 5
## model feature chi_sq p_value sig
## <chr> <chr> <dbl> <dbl> <chr>
## 1 Network s02_max_degree 119. 0.0000866 ***
## 2 Shape hu2 112. 0.000435 ***
## 3 Network n_nodes 111. 0.000539 ***
## 4 Network s01_n_links 111. 0.000603 ***
## 5 Network density 111. 0.000648 ***
## 6 Shape n_defects 110. 0.000679 ***
## 7 Network s10_avg_shortest_path 108. 0.00102 **
## 8 Shape n_corners 107. 0.00126 **
## 9 Shape solidity 107. 0.00131 **
## 10 Network s09_diameter 107. 0.00139 **
## 11 Network s07_max_cycle_size 106. 0.00154 **
## 12 Network s03_avg_degree 106. 0.00166 **
## 13 Network s04_clustering_coeff 102. 0.00406 **
## 14 Shape max_defect_depth 101. 0.00431 **
## 15 Network s06_n_cycles 101. 0.00445 **
## 16 Network s15_avg_angle 100. 0.00519 **
## 17 Shape hu1 98.5 0.00739 **
## 18 Network s12_spatial_diameter 96.5 0.0106 *
## 19 Network s13_avg_link_length 96.2 0.0113 *
## 20 Shape rot_aspect_ratio 94.6 0.0148 *
## 21 Shape elongation 94.2 0.0160 *
## 22 Shape hu3 94.0 0.0166 *
## 23 Shape compactness 93.1 0.0192 *
## 24 Network s14_min_angle 93.1 0.0192 *
## 25 Network s17_avg_magnitude 91.1 0.0268 *
## 26 Shape v_symmetry 86.8 0.0525 ns
## 27 Shape h_symmetry 84.9 0.0690 ns
## 28 Network s05_max_core_number 82.1 0.101 ns
## 29 Shape hu4 80.8 0.120 ns
## 30 Network s11_edge_connectivity 80.3 0.127 ns
##
## Significant features (p < .05) per model:
## # A tibble: 2 × 2
## model n
## <chr> <int>
## 1 Network 15
## 2 Shape 10
if (!is.null(net_sil_data)) {
net_sil_data %>%
mutate(across(any_of(net_feature_cols), scale)) %>%
group_by(drawn_l4) %>%
summarise(across(any_of(net_feature_cols), ~mean(., na.rm = TRUE)), .groups = "drop") %>%
pivot_longer(-drawn_l4, names_to = "feature", values_to = "z_score") %>%
ggplot(aes(x = feature, y = drawn_l4, fill = z_score)) +
geom_tile() +
scale_fill_gradient2(low = "#2166AC", mid = "white", high = "#B2182B", midpoint = 0) +
geom_text(aes(label = round(z_score, 1)), size = 2.5) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7)) +
labs(title = "Network features by L4 category", fill = "z") -> p
print(p)
}
if (!is.null(shp_sil_data)) {
shp_sil_data %>%
mutate(across(any_of(shp_feature_cols), scale)) %>%
group_by(drawn_l4) %>%
summarise(across(any_of(shp_feature_cols), ~mean(., na.rm = TRUE)), .groups = "drop") %>%
pivot_longer(-drawn_l4, names_to = "feature", values_to = "z_score") %>%
ggplot(aes(x = feature, y = drawn_l4, fill = z_score)) +
geom_tile() +
scale_fill_gradient2(low = "#2166AC", mid = "white", high = "#B2182B", midpoint = 0) +
geom_text(aes(label = round(z_score, 1)), size = 2.5) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7)) +
labs(title = "Shape features by L4 category", fill = "z") -> p
print(p)
}
if (!is.null(net_sil_data)) {
net_sil_data %>%
group_by(drawn_l1) %>% filter(n() >= 3) %>% ungroup() %>%
mutate(across(any_of(net_feature_cols), scale)) %>%
group_by(drawn_l1) %>%
summarise(across(any_of(net_feature_cols), ~mean(., na.rm = TRUE)), .groups = "drop") %>%
pivot_longer(-drawn_l1, names_to = "feature", values_to = "z_score") %>%
ggplot(aes(x = feature, y = drawn_l1, fill = z_score)) +
geom_tile() +
scale_fill_gradient2(low = "#2166AC", mid = "white", high = "#B2182B", midpoint = 0) +
geom_text(aes(label = round(z_score, 1)), size = 2) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7)) +
labs(title = "Network features by L1 label (n >= 3)", fill = "z") -> p
print(p)
}
if (!is.null(shp_sil_data)) {
shp_sil_data %>%
group_by(drawn_l1) %>% filter(n() >= 3) %>% ungroup() %>%
mutate(across(any_of(shp_feature_cols), scale)) %>%
group_by(drawn_l1) %>%
summarise(across(any_of(shp_feature_cols), ~mean(., na.rm = TRUE)), .groups = "drop") %>%
pivot_longer(-drawn_l1, names_to = "feature", values_to = "z_score") %>%
ggplot(aes(x = feature, y = drawn_l1, fill = z_score)) +
geom_tile() +
scale_fill_gradient2(low = "#2166AC", mid = "white", high = "#B2182B", midpoint = 0) +
geom_text(aes(label = round(z_score, 1)), size = 2) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 7)) +
labs(title = "Shape features by L1 label (n >= 3)", fill = "z") -> p
print(p)
}
cat("=== Pairwise L4 Comparisons (top 5 features per model) ===\n\n")
## === Pairwise L4 Comparisons (top 5 features per model) ===
for (model_name in c("Network", "Shape")) {
if (model_name == "Network" && is.null(net_sil_data)) next
if (model_name == "Shape" && is.null(shp_sil_data)) next
sil_dat <- if (model_name == "Network") net_sil_data else shp_sil_data
feat_vec <- if (model_name == "Network") net_feature_cols else shp_feature_cols
cat("\n--- ", model_name, " ---\n")
kw_l4 <- map_dfr(intersect(feat_vec, names(sil_dat)), function(feat) {
kw <- kruskal.test(sil_dat[[feat]] ~ factor(sil_dat$drawn_l4))
tibble(feature = feat, p_value = kw$p.value)
}) %>% arrange(p_value)
for (feat in kw_l4 %>% slice_head(n = 5) %>% pull(feature)) {
cat("\n ", feat, "\n")
pw <- pairwise.wilcox.test(sil_dat[[feat]], sil_dat$drawn_l4, p.adjust.method = "BH")
pmat <- pw$p.value
sig_pairs <- which(pmat < 0.05, arr.ind = TRUE)
if (nrow(sig_pairs) > 0) {
for (r in 1:nrow(sig_pairs)) {
cat(" ", rownames(pmat)[sig_pairs[r,1]], "vs", colnames(pmat)[sig_pairs[r,2]],
": p =", round(pmat[sig_pairs[r,1], sig_pairs[r,2]], 4), "\n")
}
} else { cat(" No significant pairwise differences\n") }
}
}
##
## --- Network ---
##
## s09_diameter
## shapes vs abstract : p = 0.0033
## naturalworld vs creatures : p = 0.0443
## shapes vs creatures : p = 0
## shapes vs humanoid : p = 0.0014
## shapes vs manmadeobjects : p = 0
## shapes vs naturalworld : p = 3e-04
## symbols vs shapes : p = 0
##
## s01_n_links
## shapes vs abstract : p = 0.0058
## naturalworld vs creatures : p = 0.0435
## shapes vs creatures : p = 0
## shapes vs humanoid : p = 0.0012
## shapes vs manmadeobjects : p = 0
## shapes vs naturalworld : p = 3e-04
## symbols vs shapes : p = 0
##
## n_nodes
## shapes vs abstract : p = 0.005
## naturalworld vs creatures : p = 0.044
## shapes vs creatures : p = 0
## shapes vs humanoid : p = 0.0012
## shapes vs manmadeobjects : p = 0
## shapes vs naturalworld : p = 2e-04
## symbols vs shapes : p = 0
##
## density
## shapes vs abstract : p = 0.004
## naturalworld vs creatures : p = 0.0462
## shapes vs creatures : p = 0
## shapes vs humanoid : p = 0.0011
## shapes vs manmadeobjects : p = 0
## shapes vs naturalworld : p = 3e-04
## symbols vs shapes : p = 0
##
## s10_avg_shortest_path
## shapes vs abstract : p = 0.004
## naturalworld vs creatures : p = 0.0472
## shapes vs creatures : p = 0
## shapes vs humanoid : p = 0.0011
## shapes vs manmadeobjects : p = 0
## shapes vs naturalworld : p = 3e-04
## symbols vs shapes : p = 0
##
## --- Shape ---
##
## max_defect_depth
## shapes vs abstract : p = 0.0123
## manmadeobjects vs creatures : p = 0.022
## shapes vs creatures : p = 0
## shapes vs humanoid : p = 0.007
## shapes vs manmadeobjects : p = 0.0374
## symbols vs manmadeobjects : p = 0.007
## shapes vs naturalworld : p = 0.007
## symbols vs naturalworld : p = 0.022
## symbols vs shapes : p = 0
##
## solidity
## shapes vs abstract : p = 0.0214
## manmadeobjects vs creatures : p = 0.0014
## shapes vs creatures : p = 0
## shapes vs humanoid : p = 0.0037
## shapes vs naturalworld : p = 0.0017
## symbols vs shapes : p = 3e-04
##
## n_corners
## manmadeobjects vs creatures : p = 0.0263
## shapes vs creatures : p = 0
## shapes vs humanoid : p = 0.0102
## shapes vs naturalworld : p = 0.0088
## symbols vs shapes : p = 0
##
## compactness
## shapes vs abstract : p = 0.0388
## manmadeobjects vs creatures : p = 8e-04
## shapes vs creatures : p = 1e-04
## symbols vs creatures : p = 0.0066
## manmadeobjects vs humanoid : p = 0.0127
## shapes vs humanoid : p = 0.0061
## symbols vs humanoid : p = 0.0127
## naturalworld vs manmadeobjects : p = 0.0127
## shapes vs naturalworld : p = 0.0112
## symbols vs naturalworld : p = 0.029
##
## n_defects
## manmadeobjects vs creatures : p = 0.0229
## shapes vs creatures : p = 0
## shapes vs humanoid : p = 0.036
## shapes vs naturalworld : p = 0.0114
## symbols vs shapes : p = 0
cat("\n\n")
cat("===============================================================\n")
## ===============================================================
cat(" FINAL SUMMARY\n")
## FINAL SUMMARY
cat("===============================================================\n\n")
## ===============================================================
# --- A. Full network analysis ---
cat("-- NETWORK ANALYSIS (all drawing types) --\n\n")
## -- NETWORK ANALYSIS (all drawing types) --
cat("Data: D1:", nrow(net_d1_raw), "| Sirius:", nrow(net_sir_raw),
"| Combined:", nrow(net_combined), "| Capped (matched to shape):", nrow(net_combined_capped), "\n")
## Data: D1: 235 | Sirius: 196 | Combined: 431 | Capped (matched to shape): 402
cat("\nDrawing type split (capped):\n")
##
## Drawing type split (capped):
print(table(net_combined_capped$drawing_type))
##
## mixed silhouette_only skeleton
## 172 153 77
net_all_list <- list(
"ALL (capped)" = net_all_results_capped,
"SILHOUETTES (capped)" = net_sil_results_capped,
"SKELETONS (capped)" = net_skel_results_capped,
"MIXED (capped)" = net_mixed_results_capped
)
for (name in names(net_all_list)) {
res <- net_all_list[[name]]
if (!is.null(res)) {
cat(paste0("\n ", name, ":\n"))
cat(" n:", nrow(res$data), "\n")
cat(" Clusters:", res$gmm$G, "(", res$gmm$modelName, ")\n")
cat(" BIC:", round(res$gmm$bic, 2), "\n")
cat(" Mean uncertainty:", round(mean(res$gmm$uncertainty), 3), "\n")
}
}
##
## ALL (capped):
## n: 402
## Clusters: 7 ( VVV )
## BIC: -5030.27
## Mean uncertainty: 0.019
##
## SILHOUETTES (capped):
## n: 153
## Clusters: 7 ( VVV )
## BIC: -946.92
## Mean uncertainty: 0.028
##
## SKELETONS (capped):
## n: 77
## Clusters: 6 ( EEV )
## BIC: -656.46
## Mean uncertainty: 0.029
##
## MIXED (capped):
## n: 172
## Clusters: 4 ( VEV )
## BIC: -2831.21
## Mean uncertainty: 0.042
# --- B. Head-to-head comparison ---
cat("\n\n-- HEAD-TO-HEAD: NETWORK vs SHAPE (matched silhouettes) --\n\n")
##
##
## -- HEAD-TO-HEAD: NETWORK vs SHAPE (matched silhouettes) --
if (!is.null(net_results) && !is.null(shp_results)) {
cat("Matched sample: n =", nrow(net_sil_matched), "silhouette drawings\n\n")
cat("NETWORK MODEL (matched):\n")
cat(" Features:", length(net_results$feature_names), "\n")
cat(" Clusters:", net_results$gmm$G, "(", net_results$gmm$modelName, ")\n")
cat(" BIC:", round(net_results$gmm$bic, 2), "\n")
cat(" Mean uncertainty:", round(mean(net_results$gmm$uncertainty), 3), "\n")
cat("\nSHAPE MODEL (matched):\n")
cat(" Features:", length(shp_results$feature_names), "\n")
cat(" Clusters:", shp_results$gmm$G, "(", shp_results$gmm$modelName, ")\n")
cat(" BIC:", round(shp_results$gmm$bic, 2), "\n")
cat(" Mean uncertainty:", round(mean(shp_results$gmm$uncertainty), 3), "\n")
}
## Matched sample: n = 153 silhouette drawings
##
## NETWORK MODEL (matched):
## Features: 17
## Clusters: 7 ( VVV )
## BIC: -946.92
## Mean uncertainty: 0.028
##
## SHAPE MODEL (matched):
## Features: 13
## Clusters: 3 ( VEV )
## BIC: -2197.48
## Mean uncertainty: 0.027
if (exists("purity_compare")) {
cat("\nPURITY COMPARISON:\n")
cat(" Labels compared:", nrow(purity_compare), "\n")
cat(" Mean purity -- Network:", round(mean(purity_compare$purity_network), 3), "\n")
cat(" Mean purity -- Shape: ", round(mean(purity_compare$purity_shape), 3), "\n")
if (exists("wilcox_result")) {
cat(" Wilcoxon p:", format.pval(wilcox_result$p.value, digits = 3), "\n")
cat(" Effect size r:", round(r_effect, 3), "\n")
}
}
##
## PURITY COMPARISON:
## Labels compared: 22
## Mean purity -- Network: 0.659
## Mean purity -- Shape: 0.842
## Wilcoxon p: 0.0138
## Effect size r: 0.525
if (exists("cv_compare")) {
cat("\nCRAMER'S V (cluster-label association):\n")
print(cv_compare)
}
##
## CRAMER'S V (cluster-label association):
## # A tibble: 4 × 4
## level V_network V_shape diff_net_minus_shp
## <chr> <dbl> <dbl> <dbl>
## 1 drawn_l1 0.783 0.779 0.004
## 2 drawn_l2 0.648 0.637 0.011
## 3 drawn_l3 0.429 0.485 -0.055
## 4 drawn_l4 0.325 0.426 -0.101
cat("\n===============================================================\n")
##
## ===============================================================
cat(" Objects in environment:\n")
## Objects in environment:
cat(" net_all_results_capped -- network: all drawings\n")
## net_all_results_capped -- network: all drawings
cat(" net_sil_results_capped -- network: silhouettes (full)\n")
## net_sil_results_capped -- network: silhouettes (full)
cat(" net_skel_results_capped -- network: skeletons\n")
## net_skel_results_capped -- network: skeletons
cat(" net_mixed_results_capped-- network: mixed\n")
## net_mixed_results_capped-- network: mixed
cat(" net_results -- network: matched silhouettes\n")
## net_results -- network: matched silhouettes
cat(" shp_results -- shape: matched silhouettes\n")
## shp_results -- shape: matched silhouettes
cat(" purity_compare -- per-label purity joined table\n")
## purity_compare -- per-label purity joined table
cat(" cv_compare -- Cramer's V comparison table\n")
## cv_compare -- Cramer's V comparison table
cat(" net_sil_matched -- matched network silhouette data\n")
## net_sil_matched -- matched network silhouette data
cat(" shp_sil_matched -- matched shape silhouette data\n")
## shp_sil_matched -- matched shape silhouette data
cat("===============================================================\n")
## ===============================================================