Description
This document performs exploratory data analysis on the radiomic
features extracted from our three ultra-high-resolution claustrum
datasets (BigBrain, Edlow and
Lusebrink), each with two hemispheres (left
and right) and three image types (original,
Laplacian of Gaussian (σ = 0.5 mm), and
Laplacian of Gaussian (σ = 1.0 mm).
In preparation for our end goal of PCA followed by spatially-constrained clustering, this exploration is designed to:
Load libraries
#list all the packages we will need
packages <- c(
"tidyverse", # data wrangling + ggplot2
"corrplot", # correlation matrices
"ggcorrplot", # ggplot2-style correlation matrices
"scales", # axis formatting
"knitr", # table rendering
"kableExtra", # styled tables
"DT", # interactive tables
"RColorBrewer", # colour palettes
"patchwork", # combine ggplots
"glue", # string interpolation
"plotly" # hover ggplots
)
#check what's installed
installed <- rownames(installed.packages())
#find what needs to be installed
to_install <- setdiff(packages, installed)
#install missing packages
if (length(to_install) > 0) {
message("Installing missing packages: ", paste(to_install, collapse = ", "))
install.packages(to_install, repos = "https://cloud.r-project.org")
} else {
message("All packages already installed.")
}
#load all packages
invisible(lapply(packages, library, character.only = TRUE))
#clean up env
rm(packages, installed, to_install)Set file paths
#set main data directory (change to 'claustrum' in lab)
DATA_DIR <- "/Users/navona/OneDrive - UHN/projects/project10_claustrumParcellation_OS/output/1_radiomics_features/250um"
#define datasets and hemispheres
DATASETS <- c("bigbrain", "edlow", "lusebrink")
HEMIS <- c("left", "right")
#create a named list of file paths, based on naming convention
files <- setNames(
object = as.list(
outer(DATASETS, HEMIS, function(ds, hemi)
file.path(DATA_DIR, paste0(ds, "_", hemi, "_features_250um.csv"))
)
),
nm = as.vector(outer(DATASETS, HEMIS, paste, sep = "_"))
)Parse the data
#identify meta-data, ie vars not Pyradomics features
META_COLS <- c("x", "y", "z", "y_global", "dist_to_boundary")
#identify feaeture classes of Pyradiomics features
FEATURE_CLASSES <- c("firstorder", "glcm", "glrlm", "glszm", "ngtdm", "gldm")
#specify 3 image types
IMAGE_TYPES <- c(
"original" = "", #no prefix
"log05" = "log-sigma-0-5-mm-3D_",
"log10" = "log-sigma-1-0-mm-3D_"
)
#colour scheme for datasets
DATASET_COLOURS <- c(
"Bigbrain" = "darkgreen",
"Edlow" = "blue",
"Lusebrink" = "purple"
)
#helper function for feature class from col names
get_feature_class <- function(col_name) {
case_when(
str_detect(col_name, "firstorder") ~ "firstorder",
str_detect(col_name, "glcm") ~ "glcm",
str_detect(col_name, "glrlm") ~ "glrlm",
str_detect(col_name, "glszm") ~ "glszm",
str_detect(col_name, "ngtdm") ~ "ngtdm",
str_detect(col_name, "gldm") ~ "gldm",
TRUE ~ "metadata"
)
}
#helper function to get image type from col names
get_image_type <- function(col_name) {
case_when(
str_detect(col_name, "log-sigma-0-5") ~ "log05",
str_detect(col_name, "log-sigma-1-0") ~ "log10",
TRUE ~ "original"
)
}Load data
#read 6 csvs and tag each with dataset and hemi labels
df_list <- imap(files, function(path, key) {
parts <- str_split(key, "_")[[1]]
read_csv(path, show_col_types = FALSE) |>
mutate(
dataset = parts[1],
hemisphere = parts[2],
.before = everything()
)
})
#OPTIONAL: MAKE SMALL FOR TESTING SO FAST - MAKE SURE TO COMMENT OUT IN FINAL
#df_list <- map(df_list, ~ slice_sample(.x, n = 500))
#also combine 6 dfs into one long df, useful for cross-dataset comparisons
df_combined <- bind_rows(df_list)Goal: Understand the structure of our Pyradomics data, i.e., the “feature space”. This is so that we can make smart decisions for our analysis, and also make sure that nothing has gone wrong with the data extraction. The tabs below show many features we have per class and image type, how many voxels per dataset, and how much missingness exists.
Next step: Check out the variable(s) with large proportions of missing data to understand what they are (namely,
JointAverage).
#count voxels in each dataset
voxel_counts <- imap_dfr(df_list, function(df, key) { #like lapply, from purrr
parts <- str_split(key, "_")[[1]]
tibble(
Dataset = str_to_title(parts[1]), #make capital
Hemisphere = str_to_title(parts[2]),
Voxels = nrow(df),
Features = ncol(df) - length(META_COLS) - 2 #subtract meta and dataset/hemi cols
)
})
#make a pretty table
voxel_counts |>
kbl(caption = "Voxel and feature counts per file") |>
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE) |>
column_spec(3:4, bold = TRUE)| Dataset | Hemisphere | Voxels | Features |
|---|---|---|---|
| Bigbrain | Left | 84466 | 276 |
| Edlow | Left | 90943 | 276 |
| Lusebrink | Left | 107879 | 276 |
| Bigbrain | Right | 76505 | 276 |
| Edlow | Right | 92731 | 276 |
| Lusebrink | Right | 94890 | 276 |
#build a feature inventory from the colnames of first file (all files share the same structure)
feature_cols <- names(df_list[[1]]) |>
setdiff(c(META_COLS, "dataset", "hemisphere"))
#pull out image type and feature class
feature_inventory <- tibble(column = feature_cols) |>
mutate(
image_type = get_image_type(column),
feature_class = get_feature_class(column)
)
#make summary table of features by class and image type
class_summary <- feature_inventory |>
count(image_type, feature_class) |>
pivot_wider(names_from = image_type, values_from = n, values_fill = 0) |>
select(feature_class, original, log05, log10) |>
mutate(feature_class = factor(feature_class,
levels = unique(feature_inventory$feature_class))) |>
arrange(feature_class)
#make table have nice formatting
class_summary |>
rename(
`Feature Class` = feature_class,
`Original` = original,
`LoG σ=0.5mm` = log05,
`LoG σ=1.0mm` = log10
) |>
kbl(caption = "Number of features per class and image type") |>
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE) |>
add_header_above(c(" " = 1, "Image Type" = 3)) | Feature Class | Original | LoG σ=0.5mm | LoG σ=1.0mm |
|---|---|---|---|
| firstorder | 18 | 18 | 18 |
| glcm | 23 | 23 | 23 |
| glrlm | 16 | 16 | 16 |
| glszm | 16 | 16 | 16 |
| ngtdm | 5 | 5 | 5 |
| gldm | 14 | 14 | 14 |
#get NA counts per feature class and dataset
na_summary <- imap_dfr(df_list, function(df, key) {
parts <- str_split(key, "_")[[1]]
feat_df <- df |> select(all_of(feature_cols))
feat_df |>
summarise(across(everything(), ~ sum(is.na(.)))) |>
pivot_longer(everything(), names_to = "column", values_to = "n_na") |>
left_join(feature_inventory, by = "column") |>
group_by(image_type, feature_class) |>
summarise(
n_features_with_na = sum(n_na > 0),
total_na_cells = sum(n_na),
.groups = "drop"
) |>
mutate(
dataset = str_to_title(parts[1]),
hemisphere = str_to_title(parts[2])
)
})
#make a summary table - SORTABLE!
na_summary |>
filter(total_na_cells > 0) |>
arrange(dataset, hemisphere, image_type, feature_class) |>
select(Dataset = dataset, Hemisphere = hemisphere,
`Image Type` = image_type, `Feature Class` = feature_class,
`Features with NAs` = n_features_with_na,
`Total NA Cells` = total_na_cells) |>
datatable(
caption = "Missing values by dataset, class, and image type (non-zero only)",
options = list(pageLength = 30, scrollX = TRUE),
rownames = FALSE
)#calculate NA proportion per feature (pooled across datasets)
na_per_feature <- imap_dfr(df_list, function(df, key) {
parts <- str_split(key, "_")[[1]]
df |>
select(all_of(feature_cols)) |>
summarise(across(everything(), ~ mean(is.na(.)))) |>
pivot_longer(everything(), names_to = "column", values_to = "prop_na") |>
mutate(
dataset = str_to_title(parts[1]),
hemisphere = str_to_title(parts[2])
)
}) |>
left_join(feature_inventory, by = "column") |>
group_by(column, dataset, image_type, feature_class) |>
summarise(prop_na = mean(prop_na), .groups = "drop") # average across hemispheres
#interactive heatmap with plotly -
#show any variables that has any amount of NA
na_per_feature |>
#filter(prop_na > 0) |>
filter(column %in% na_per_feature$column[na_per_feature$prop_na > 0]) |> #keep rows constant
mutate(
image_type = factor(image_type,
levels = c("original", "log05", "log10"),
labels = c("Original", "LoG σ=0.5mm", "LoG σ=1.0mm")),
feature_short = str_remove(column, "^log-sigma-[0-9]-[0-9]-mm-3D_"), # shorter label
hover_text = glue::glue("{column}<br>Dataset: {dataset}<br>% Missing: {scales::percent(prop_na, accuracy = 0.1)}")
) |>
plot_ly(
x = ~ dataset, #image_type
y = ~ column,
z = ~ prop_na * 100,
text = ~ hover_text,
type = "heatmap",
hoverinfo = "text",
colorscale = list(c(0, "#fffde7"), c(1, "#d32f2f")),
colorbar = list(title = "% Missing")
) |>
layout(
title = "Proportion of missing values per feature",
xaxis = list(title = "Dataset"),
yaxis = list(title = "", ticks = "", showticklabels = FALSE)
)Goal: Here, we want to identify “outliers”, i.e., voxels with extreme feature values that could distort PCA. Features are Z-scored (mean=0, SD=1) within each dataset before plotting, putting all features on a comparable scale. Outliers are shown in red. We use the IQR (boxplot) method for outlier detection, i.e., a value is flagged as an outlier if it falls below Q1 − 1.5×IQR or above Q3 + 1.5×IQR (put otherwise, a value is an outlier if it falls beyond the whiskers of the boxplot). This method is quite conservative for our feature space, so it’s just for visulization purposes: it’s not clear yet if we want to remove these outliers, as outliers in feature space almost always reflect real image properties, not measurement error - the idea is just to understand how many outliers we have.
Next step: Identify the features, image types, and datasets that have only a small number of wildly abnormal outliers. Compare across datasets and image types to determine if this ought to be ‘expected’. Understand what these outliers ‘mean’ for a given feature. Determine if there is a spatial pattern to the outliers (e.g., are they all at edges, etc). Make a short list of voxels to remove from the analysis of certain features; we can run a ‘sensitivity analysis’, i.e., our PCA and clustering with both these outlying voxels include and excluded.
# z-score all features within each dataset
df_scaled <- imap_dfr(df_list, function(df, key) {
parts <- str_split(key, "_")[[1]]
df |>
select(all_of(META_COLS), all_of(feature_cols)) |>
mutate(across(all_of(feature_cols), ~ scale(.x)[,1])) |>
mutate(
dataset = str_to_title(parts[1]),
hemisphere = str_to_title(parts[2])
)
})#helper: ggplot boxplot for one feature class and image type
plot_feature_boxplots <- function(feat_class, img_type, img_label) {
cols_to_plot <- feature_inventory |>
filter(feature_class == feat_class, image_type == img_type) |>
pull(column)
df_scaled |>
select(dataset, hemisphere, all_of(cols_to_plot)) |>
pivot_longer(all_of(cols_to_plot), names_to = "column", values_to = "value") |>
mutate(
feature_short = str_remove(column, "^log-sigma-[0-9]-[0-9]-mm-3D_") |>
str_remove(paste0(feat_class, "_")),
hemisphere = factor(hemisphere, levels = c("Left", "Right")),
dataset = factor(dataset, levels = names(DATASET_COLOURS))
) |>
ggplot(aes(x = feature_short, y = value, fill = dataset)) +
geom_boxplot(outlier.size = 1, outlier.alpha = 0.3, linewidth = 0.3, outlier.colour = "red") +
facet_grid(hemisphere ~ .) +
scale_fill_manual(values = DATASET_COLOURS, name = "Dataset") +
labs(
title = glue::glue("{feat_class} — {img_label}"),
x = NULL,
y = "Z-score"
) +
theme_minimal(base_size = 11) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
strip.text = element_text(face = "bold"),
legend.position = "top"
)
}Goal: Examine the correlation structure across features to understand redundancy before PCA. We compute Spearman rank correlation — robust to the non-normal, right-skewed distributions seen in Section 3. A random sample of 2000 voxels per dataset is used for speed (compare to the total voxel totals in Section 1); however, correlation estimates are stable at this sample size. High correlation (|r| > 0.9) between two features means they carry nearly identical information. This will not cause us to drop features before PCA — but it will help interpret PCA loadings later, as correlated features will load onto the same principal components.
Next step: Understand if feature classes are highly correlated among themselves. Understand what features / feature classes are negatively correlated. Examine if the image types appear to have similar correlation patterns.
#compute spearman correlation matrix on a random sample of voxels
compute_corr <- function(df, cols, n_sample = NULL, seed = 123) {
set.seed(seed)
df_sub <- df |>
select(all_of(cols)) |>
select(where(~ var(.x, na.rm = TRUE) > 0))
if (!is.null(n_sample)) {
df_sub <- slice_sample(df_sub, n = min(n_sample, nrow(df_sub)))
}
cor(df_sub, use = "pairwise.complete.obs", method = "spearman")
}
#plot correlation matrix as heatmap
plot_corr <- function(corr_mat, title) {
# get feature order from inventory, preserving class grouping
feature_order <- feature_inventory |>
filter(column %in% colnames(corr_mat)) |>
arrange(feature_class) |>
mutate(f_short = str_remove(column, "^log-sigma-[0-9]-[0-9]-mm-3D_"))
corr_df <- corr_mat |>
as.data.frame() |>
rownames_to_column("feature1") |>
pivot_longer(-feature1, names_to = "feature2", values_to = "r") |>
mutate(
f1_short = str_remove(feature1, "^log-sigma-[0-9]-[0-9]-mm-3D_"),
f2_short = str_remove(feature2, "^log-sigma-[0-9]-[0-9]-mm-3D_"),
f1_short = factor(f1_short, levels = feature_order$f_short),
f2_short = factor(f2_short, levels = feature_order$f_short)
)
#heatmap
p_main <- corr_df |>
ggplot(aes(x = f2_short, y = f1_short, fill = r)) +
geom_tile() +
scale_fill_gradient2(
low = "#d73027",
mid = "white",
high = "#1a9850",
midpoint = 0,
limits = c(-1, 1),
name = "Spearman r"
) +
labs(title = title, x = NULL, y = NULL) +
theme_minimal(base_size = 10) +
theme(
axis.text = element_blank(),
axis.ticks = element_blank()
)
#feature class strip along x axis
strip_df <- feature_order |>
mutate(f_short = factor(f_short, levels = feature_order$f_short))
p_strip_x <- strip_df |>
ggplot(aes(x = f_short, y = 1, fill = feature_class)) +
geom_tile() +
scale_fill_brewer(palette = "Set2", name = "Feature Class") +
theme_void() +
theme(
legend.position = "bottom",
legend.title = element_text(size = 9),
legend.text = element_text(size = 8)
)
#feature class strip along y axis
p_strip_y <- strip_df |>
ggplot(aes(y = f_short, x = 1, fill = feature_class)) +
geom_tile() +
scale_fill_brewer(palette = "Set2", guide = "none") +
theme_void()
#combine with patchwork
top_row <- (p_strip_y | p_main) + plot_layout(widths = c(1, 40))
bottom_row <- (plot_spacer() | p_strip_x) + plot_layout(widths = c(1, 40))
top_row / bottom_row + plot_layout(heights = c(20, 1))
}#pre-compute all correlation matrices once
#one matrix per dataset x hemisphere x image type
#note this is run on a fraction of the data, 2000 voxels, just for visualization
#ie correlation values won't be exact but likely close enough
corr_matrices <- imap(df_list, function(df, key) {
map(c("original", "log05", "log10"), function(img_type) {
cols <- feature_inventory |>
filter(image_type == img_type) |>
pull(column)
compute_corr(df, cols, n_sample=2000)
}) |> set_names(c("original", "log05", "log10"))
})plot_corr(corr_matrices[["bigbrain_left"]][["original"]],
"BigBrain Left — Original (Spearman, n=500)")plot_corr(corr_matrices[["lusebrink_left"]][["original"]],
"Lüsebrink Left — Original (Spearman, n=500)")Goal: Write out two voxel-level .csvs for downstream .nii reconstruction in Python: (1) a missingness map showing the count of NA features (out of all 278) at each voxel, and (2) an outlier map showing the count of IQR-flagged outlier features at each voxel (maximum possible 278. Note: both .csvs also carry a
dominant_classcolumn (the feature class contributing the most NAs/outliers) as a secondary diagnostic, in case a separate class-coded .nii is useful later, but the primary NIfTI value for both maps is the count.
#flag whether a value is an outlier by IQR rule
is_iqr_outlier <- function(x) {
q <- quantile(x, probs = c(0.25, 0.75), na.rm = TRUE)
iqr <- q[2] - q[1]
x < (q[1] - 1.5 * iqr) | x > (q[2] + 1.5 * iqr)
}#assign an integer code to each feature class, used to colour the outlier map
#e.g. firstorder = 1, glcm = 2, glrlm = 3, glszm = 4, ngtdm = 5, gldm = 6
class_codes <- setNames(seq_along(FEATURE_CLASSES), FEATURE_CLASSES)
#class_codes |>
#enframe(name = "feature_class", value = "code") |>
#kbl(caption = "If used later: feature class integer code (for NIfTI colour-coding)") |>
#kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)#missingness map: count of how many features (out of all 278) are NA at each voxel
#dominant_class = feature class contributing the most NAs (secondary diagnostic)
missing_map <- imap_dfr(df_list, function(df, key) {
parts <- str_split(key, "_")[[1]]
feat_df <- df |> select(all_of(feature_cols))
na_flags <- map_dfc(feat_df, is.na)
#count NAs per voxel, per feature class
class_counts <- map_dfc(FEATURE_CLASSES, function(cls) {
cls_cols <- feature_inventory |> filter(feature_class == cls) |> pull(column)
tibble("{cls}" := rowSums(na_flags[, cls_cols], na.rm = TRUE))
})
#dominant class = highest NA count; 0 if no NAs at all; ties go to first listed in FEATURE_CLASSES
dominant_class <- apply(class_counts, 1, function(row) {
if (sum(row) == 0) return(0)
class_codes[[FEATURE_CLASSES[which.max(row)]]]
})
tibble(
x = df$x,
y = df$y,
z = df$z,
y_global = df$y_global,
dataset = str_to_title(parts[1]),
hemisphere = str_to_title(parts[2]),
n_missing_features = rowSums(na_flags, na.rm = TRUE),
dominant_class = dominant_class
)
})
#write out, datestamped
EXPORT_DIR <- "/Users/navona/Library/CloudStorage/OneDrive-UHN/projects/project10_claustrumParcellation_OS/output/2_preprocessing"
missing_map_path <- file.path(EXPORT_DIR, glue::glue("missing_map_{format(Sys.Date(), '%Y%m%d')}.csv"))
write_csv(missing_map, missing_map_path)
#quick summary so we can sanity check before handing off to python
missing_map |>
group_by(dataset, hemisphere) |>
summarise(
voxels = n(),
voxels_w_any_na = sum(n_missing_features > 0),
pct_voxels_w_na = scales::percent(mean(n_missing_features > 0), accuracy = 0.1),
mean_n_missing = round(mean(n_missing_features), 2),
max_n_missing = max(n_missing_features),
.groups = "drop"
) |>
kbl(caption = glue::glue("Missingness summary (written to {basename(missing_map_path)})")) |>
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| dataset | hemisphere | voxels | voxels_w_any_na | pct_voxels_w_na | mean_n_missing | max_n_missing |
|---|---|---|---|---|---|---|
| Bigbrain | Left | 84466 | 7414 | 8.8% | 0.33 | 120 |
| Bigbrain | Right | 76505 | 7660 | 10.0% | 0.35 | 120 |
| Edlow | Left | 90943 | 924 | 1.0% | 0.03 | 3 |
| Edlow | Right | 92731 | 881 | 1.0% | 0.03 | 3 |
| Lusebrink | Left | 107879 | 742 | 0.7% | 0.02 | 3 |
| Lusebrink | Right | 94890 | 548 | 0.6% | 0.02 | 3 |
#secondary diagnostic: which feature class is driving the missingness, per dataset/hemisphere
missing_map |>
mutate(dominant_class_name = c("none", FEATURE_CLASSES)[dominant_class + 1]) |>
count(dataset, hemisphere, dominant_class_name) |>
pivot_wider(names_from = dominant_class_name, values_from = n, values_fill = 0) |>
kbl(caption = "Dominant missingness class counts (secondary diagnostic)") |>
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| dataset | hemisphere | firstorder | glcm | none |
|---|---|---|---|---|
| Bigbrain | Left | 22 | 7392 | 77052 |
| Bigbrain | Right | 24 | 7636 | 68845 |
| Edlow | Left | 0 | 924 | 90019 |
| Edlow | Right | 0 | 881 | 91850 |
| Lusebrink | Left | 0 | 742 | 107137 |
| Lusebrink | Right | 0 | 548 | 94342 |
#outlier map: dominant feature class per voxel, by IQR-flagged count
outlier_map <- imap_dfr(df_list, function(df, key) {
parts <- str_split(key, "_")[[1]]
feat_df <- df |> select(all_of(feature_cols))
flags <- map_dfc(feat_df, is_iqr_outlier)
#count outliers per voxel, per feature class
class_counts <- map_dfc(FEATURE_CLASSES, function(cls) {
cls_cols <- feature_inventory |> filter(feature_class == cls) |> pull(column)
tibble("{cls}" := rowSums(flags[, cls_cols], na.rm = TRUE))
})
#dominant class = highest count; 0 if no outliers at all; ties go to first listed in FEATURE_CLASSES
dominant_class <- apply(class_counts, 1, function(row) {
if (sum(row) == 0) return(0)
class_codes[[FEATURE_CLASSES[which.max(row)]]]
})
tibble(
x = df$x,
y = df$y,
z = df$z,
y_global = df$y_global,
dataset = str_to_title(parts[1]),
hemisphere = str_to_title(parts[2]),
n_outlier_features = rowSums(flags, na.rm = TRUE),
dominant_class = dominant_class
)
})
#write out, datestamped
outlier_map_path <- file.path(EXPORT_DIR, glue::glue("outlier_map_{format(Sys.Date(), '%Y%m%d')}.csv"))
write_csv(outlier_map, outlier_map_path)
#quick summary so we can sanity check before handing off to python
outlier_map |>
group_by(dataset, hemisphere) |>
summarise(
voxels = n(),
voxels_w_any_outlier = sum(n_outlier_features > 0),
pct_voxels_w_outlier = scales::percent(mean(n_outlier_features > 0), accuracy = 0.1),
mean_n_outlier = round(mean(n_outlier_features), 2),
max_n_outlier = max(n_outlier_features),
.groups = "drop"
) |>
kbl(caption = glue::glue("Outlier summary (written to {basename(outlier_map_path)})")) |>
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| dataset | hemisphere | voxels | voxels_w_any_outlier | pct_voxels_w_outlier | mean_n_outlier | max_n_outlier |
|---|---|---|---|---|---|---|
| Bigbrain | Left | 84466 | 74103 | 87.7% | 10.32 | 107 |
| Bigbrain | Right | 76505 | 65684 | 85.9% | 11.03 | 115 |
| Edlow | Left | 90943 | 79995 | 88.0% | 10.16 | 108 |
| Edlow | Right | 92731 | 81290 | 87.7% | 10.54 | 88 |
| Lusebrink | Left | 107879 | 91961 | 85.2% | 9.11 | 93 |
| Lusebrink | Right | 94890 | 79448 | 83.7% | 8.80 | 88 |
#secondary diagnostic: which feature class is driving the outliers, per dataset/hemisphere
outlier_map |>
mutate(dominant_class_name = c("none", FEATURE_CLASSES)[dominant_class + 1]) |>
count(dataset, hemisphere, dominant_class_name) |>
pivot_wider(names_from = dominant_class_name, values_from = n, values_fill = 0) |>
kbl(caption = "Dominant outlier class counts (secondary diagnostic)") |>
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| dataset | hemisphere | firstorder | glcm | gldm | glrlm | glszm | ngtdm | none |
|---|---|---|---|---|---|---|---|---|
| Bigbrain | Left | 12627 | 26031 | 4296 | 12290 | 16419 | 2440 | 10363 |
| Bigbrain | Right | 11174 | 25821 | 3803 | 9161 | 13691 | 2034 | 10821 |
| Edlow | Left | 13907 | 30872 | 3911 | 11000 | 17718 | 2587 | 10948 |
| Edlow | Right | 9064 | 34822 | 4001 | 13215 | 17702 | 2486 | 11441 |
| Lusebrink | Left | 8985 | 36680 | 6346 | 12930 | 24342 | 2678 | 15918 |
| Lusebrink | Right | 8099 | 32204 | 5590 | 10346 | 20779 | 2430 | 15442 |