1. Descriptives of train set

Here, I will try to analyze the meaning of the clusters found.

1.a Clasification

library(readr)
library(tidyverse)
library(caret)
library(emmeans)

train_subtypes <- read_csv("data_validation/train_subtypes.csv")


train_subtypes <- train_subtypes %>%
  mutate(ml_subtype = ml_subtype+1) %>%
  mutate(
    poor_fit = ifelse(prob_ml_subtype < 0.5, 1, 0),
    ml_subtype_redefined = ifelse(poor_fit == 1, "poor fit", as.character(ml_subtype))
  )

train_subtypes<-train_subtypes %>% filter (Diagnosis!="HC")%>%
  mutate(stage_interval = cut(ml_stage, breaks = seq(0, max(ml_stage), by = 5), right = FALSE, 
                             labels = paste(seq(0, max(ml_stage)-5, by = 5), 
                                            seq(5, max(ml_stage), by = 5), sep = "-")))

train_subtypes %>%
  ggplot(aes(x = factor(ml_subtype_redefined), y = ml_stage, fill = ml_subtype_redefined)) +
  geom_violin(trim = FALSE, alpha = 0.5) +
  #geom_boxplot(width = 0.2, outlier.shape = NA, alpha = 0.7) + 
  geom_dotplot(binaxis = "y", stackdir = "center", 
               dotsize = 0.5, alpha = 0.6) +  # Ajusta binwidth para que haya un punto por cada 10 sujetos
  labs(
    title = "Ml_stage by subtype",
    x = "subtype",
    y = "stage"
  ) +
  scale_fill_manual(values=pal)+
  theme_minimal() +
  theme(legend.position = "none")

1.3 Asymmetry

# assymetry index
dataset_con_index <- train_subtypes %>%
  filter(ml_subtype_redefined != "poor fit") %>%
  # Calcular el índice Derecha-Izquierda para cada región
  mutate(
    # Amygdala
    index_amygdala = 200 * (w_rh_amygdala_h - w_lh_amygdala_h) / (w_rh_amygdala_h + w_lh_amygdala_h),
    # Entorhinal
    index_entorhinal = 200 * (w_rh_entorhinal_medial_h - w_lh_entorhinal_medial_h) / (w_rh_entorhinal_medial_h + w_lh_entorhinal_medial_h),
    # Frontal Lateral
    index_frontal_lateral = 200 * (w_rh_frontal_lateral_h - w_lh_frontal_lateral_h) / (w_rh_frontal_lateral_h + w_lh_frontal_lateral_h),
    # Frontal Medial
    index_frontal_medial = 200 * (w_rh_frontal_medial_h - w_lh_frontal_medial_h) / (w_rh_frontal_medial_h + w_lh_frontal_medial_h),
    # Hippocampus
    index_hippocampus = 200 * (w_rh_hippocampus_h - w_lh_hippocampus_h) / (w_rh_hippocampus_h + w_lh_hippocampus_h),
    # Occipital Lateral
    index_occipital_lateral = 200 * (w_rh_occipital_lateral_h - w_lh_occipital_lateral_h) / (w_rh_occipital_lateral_h + w_lh_occipital_lateral_h),
    # Occipital Medial
    index_occipital_medial = 200 * (w_rh_occipital_medial_h - w_lh_occipital_medial_h) / (w_rh_occipital_medial_h + w_lh_occipital_medial_h),
    # Parietal Lateral
    index_parietal_lateral = 200 * (w_rh_parietal_lateral_h - w_lh_parietal_lateral_h) / (w_rh_parietal_lateral_h + w_lh_parietal_lateral_h),
    # Parietal Medial
    index_parietal_medial = 200 * (w_rh_parietal_medial_h - w_lh_parietal_medial_h) / (w_rh_parietal_medial_h + w_lh_parietal_medial_h),
    # Temporal Lateral
    index_temporal_lateral = 200 * (w_rh_temporal_lateral_h - w_lh_temporal_lateral_h) / (w_rh_temporal_lateral_h + w_lh_temporal_lateral_h)) %>%
  # Seleccionar sólo las columnas necesarias
  select(MRI_ID, index_amygdala, index_entorhinal, index_frontal_lateral, index_frontal_medial,
         index_hippocampus, index_occipital_lateral, index_occipital_medial, index_parietal_lateral, index_parietal_medial,
         index_temporal_lateral, ml_subtype_redefined, ml_stage)
# Crear una nueva columna que agrupe ml_stage en intervalos de 10
dataset_con_index <- dataset_con_index %>%
  mutate(stage_group = cut(ml_stage, breaks = seq(0, max(ml_stage), by = 10), include.lowest = TRUE, right = FALSE, labels = FALSE))

# Transformar el dataset a formato largo
dataset_largo <- dataset_con_index %>%
  pivot_longer(cols = starts_with("index"), names_to = "variable", values_to = "value")

# Calcular las medianas por grupo, subtipo y variable
medianas <- dataset_largo %>%
  group_by(stage_group, ml_subtype_redefined, variable) %>%
  summarise(mediana = median(value, na.rm = TRUE), .groups = 'drop')

# Crear el boxplot con las líneas que conectan las medianas
ggplot(dataset_largo, aes(x = factor(stage_group), y = value, fill = ml_subtype_redefined)) +
  geom_boxplot() +
  facet_wrap(~ variable, scales = "free_y") +
  geom_line(data = medianas, aes(x = factor(stage_group), y = mediana, group = ml_subtype_redefined, color = ml_subtype_redefined), 
            size = 1, linetype = "solid") +
  labs(
    title = "Rigth-Left index by subtype and stage",
    x = "Stage Group (bins 10)",
    y = "Rigth-Left index",
    fill = "ML Subtype",
    color = "ML Subtype"
  ) +
  theme_minimal() +
  scale_fill_manual(values = pal)+
  scale_color_manual(values=pal)+
  ylim(-200,200)+
  theme(legend.position = "bottom")

There doesn’t seem to be much asymmetry… Question for Rik: Could we be wasting computing power with so many regions?

Clinical features

For this I only used the training subjects on their first MRI, in order to compare the first clinic visit (n=1724)

library(readxl)
clinical_data <- read_excel("647_Sustain_Calandri_clinicaldata_20230825.xlsx")

ids <- read_csv("dat_mri_export_random.csv") %>% select(MRI_ID, I_ID)
first_clinic <- train_subtypes %>% filter(order==1)
first_clinic<-left_join(first_clinic, ids, by="MRI_ID")
first_clinic<-left_join(first_clinic, clinical_data, by="I_ID")

first_clinic<-first_clinic %>%
  mutate(apoe4_status = case_when(
    APOE == "E4E4" ~ "homozygous E4",  # E4E4 es homocigoto
    APOE %in% c("E3E4", "E4E3", "E2E4", "E4E2") ~ "heterozygous E4",  # Cualquier combinación con un E4 es heterocigoto
    !grepl("E4", APOE) ~ "Negative",  # Si no contiene E4, es negativo
    is.na(APOE) ~ NA_character_,  # Si es NA, se mantiene como NA
    TRUE ~ "Other"  # En caso de que haya algún valor no contemplado
  ))

library(NormPsy)

first_clinic$normMMSE<-normMMSE(first_clinic$V_MMSE)

first_clinic %>%
  ggplot(aes(x=ml_subtype_redefined, y=M_age, fill=ml_subtype_redefined))+
  geom_boxplot()+
  scale_fill_manual(values=pal)+
  theme_minimal()+
  theme(legend.position = "none")+
  ggtitle("Age distribution")

first_clinic %>%
  filter(ml_subtype_redefined!="poor fit") %>%
  ggplot(aes(color=ml_subtype_redefined, y=normMMSE, x=ml_stage))+
  geom_point()+
  geom_smooth(method = "lm")+
  scale_color_manual(values=pal)+
  theme_minimal()+
  theme(legend.position = "bottom")+
  ggtitle("normalized MMSE by subtype and stage")

first_clinic %>%
  filter(ml_subtype_redefined!="poor fit") %>%
  ggplot(aes(color=ml_subtype_redefined, y=V_IADL, x=ml_stage))+
  geom_point()+
  geom_smooth(method = "lm")+
  scale_color_manual(values=pal)+
  theme_minimal()+
  theme(legend.position = "bottom")+
  ggtitle("IADL by subtype and stage")

first_clinic <-first_clinic %>% 
  mutate(clinical_diagnosis=ifelse(diagnoseOms=="Possible AD" |diagnoseOms=="Probable AD", "AD", diagnoseOms))

first_clinic %>%
  filter(ml_subtype_redefined != "poor fit" & ml_stage!=0) %>%
  ggplot(aes(fill = clinical_diagnosis, x = ml_subtype_redefined)) +
  geom_bar(position = "dodge", stat = "count") +  # Corregido para usar position = "dodge"
  scale_fill_manual(values = pal) +
  theme_minimal() +
  theme(legend.position = "bottom") +
  ggtitle("Subtype by clinical diagnosis")

first_clinic %>%
  filter(ml_subtype_redefined != "poor fit"& ml_stage!=0)  %>%
  group_by(ml_subtype_redefined, clinical_diagnosis) %>%
  tally() %>%  # Contamos la cantidad por cada combinación de ml_subtype_redefined y diagnoseOms
  group_by(ml_subtype_redefined) %>%
  mutate(percentage = n / sum(n)) %>%  # Calculamos el porcentaje dentro de cada grupo ml_subtype_redefined
  ggplot(aes(x = ml_subtype_redefined, y = percentage, fill = clinical_diagnosis)) +
  geom_bar(stat = "identity", position = "stack") +  # Usamos stat = "identity" porque ya tenemos los datos procesados
  geom_text(
    aes(label = scales::percent(percentage, accuracy = 0.1)),
    position = position_stack(vjust = 0.5), size = 3  # Centra las etiquetas en el medio de las barras apiladas
  ) +
  scale_fill_manual(values = pal) +
  scale_y_continuous(labels = scales::percent) +
  theme_minimal() +
  theme(legend.position = "bottom") +
  ggtitle("Subtype by clinical diagnosis (%)")

library(scales)

# Crear variable stage_interval, agrupar en intervalos de 5 en 5 y remover NA
first_clinic %>%
  filter(ml_subtype_redefined != "poor fit") %>%
  mutate(stage_interval = cut(ml_stage, breaks = seq(0, max(ml_stage), by = 5), right = FALSE, 
                             labels = paste(seq(0, max(ml_stage)-5, by = 5), 
                                            seq(5, max(ml_stage), by = 5), sep = "-"))) %>%
  filter(!is.na(stage_interval)) %>%  # Remover filas con NA en stage_interval
  group_by(ml_subtype_redefined, stage_interval, clinical_diagnosis) %>%
  tally() %>%
  group_by(ml_subtype_redefined, stage_interval) %>%
  mutate(percentage = n / sum(n)) %>%  # Calcular el porcentaje por subtipo y stage_interval
  ggplot(aes(x = stage_interval, y = percentage, fill = clinical_diagnosis)) +
  geom_bar(stat = "identity", position = "stack") +
  scale_fill_manual(values = pal) +
  scale_y_continuous(labels = scales::percent) +
  facet_wrap(~ml_subtype_redefined) +  # Crear un gráfico por cada subtipo
  theme_minimal() +
  theme(legend.position = "bottom") +
  ggtitle("Percentage of Clinical Diagnosis by Stage Interval and Subtype")

first_clinic %>%
  filter(ml_subtype_redefined != "poor fit"& ml_stage!=0)  %>%
  group_by(ml_subtype_redefined, apoe4_status) %>%
  tally() %>%  # Contamos la cantidad por cada combinación de ml_subtype_redefined y diagnoseOms
  group_by(ml_subtype_redefined) %>%
  mutate(percentage = n / sum(n)) %>%  # Calculamos el porcentaje dentro de cada grupo ml_subtype_redefined
  ggplot(aes(x = ml_subtype_redefined, y = percentage, fill = apoe4_status)) +
  geom_bar(stat = "identity", position = "stack") +  # Usamos stat = "identity" porque ya tenemos los datos procesados
  geom_text(
    aes(label = scales::percent(percentage, accuracy = 0.1)),
    position = position_stack(vjust = 0.5), size = 3  # Centra las etiquetas en el medio de las barras apiladas
  ) +
  scale_fill_manual(values = pal) +
  scale_y_continuous(labels = scales::percent) +
  theme_minimal() +
  theme(legend.position = "bottom") +
  ggtitle("APOE4 status by subtype (%)")

1.4 Biomarkers

first_clinic %>%
  filter(ml_subtype_redefined != "poor fit") %>%
  ggplot(aes(x=ml_subtype_redefined, y=L_PTAU, fill=ml_subtype_redefined))+
  geom_boxplot()+
  scale_fill_manual(values=pal)+
  theme_minimal()+
  theme(legend.position = "bottom")

library(dplyr)
library(ggplot2)

# Calcular las medianas por stage_interval y ml_subtype_redefined
medianas <- first_clinic %>%
  filter(ml_subtype_redefined != "poor fit") %>%
  group_by(stage_interval, ml_subtype_redefined) %>%
  summarise(mediana = median(L_PTAU, na.rm = TRUE), .groups = 'drop')

# Crear el boxplot con la línea para las medianas
first_clinic %>%
  filter(ml_subtype_redefined != "poor fit") %>%
  ggplot(aes(x = stage_interval, y = L_PTAU, fill = ml_subtype_redefined)) +
  geom_boxplot() +
  geom_line(data = medianas, aes(x = stage_interval, y = mediana, group = ml_subtype_redefined, color = ml_subtype_redefined), 
            size = 1, linetype = "solid") +
  scale_fill_manual(values = pal) +
  scale_color_manual(values = pal) +
  theme_minimal() +
  theme(legend.position = "bottom") +
  labs(
    title = "pTAU by Stage & Subtype",
    x = "Stage Interval",
    y = "pTAU(CSF)",
    fill = "ML Subtype",
    color = "ML Subtype"
  )

first_clinic %>%
  filter(ml_subtype_redefined != "poor fit") %>%
  ggplot(aes(x = L_AB42_corr, y = L_PTAU, color = ml_subtype_redefined)) +
  geom_point() +
  scale_fill_manual(values = pal) +
  scale_color_manual(values = pal) +
  theme_minimal() +
  theme(legend.position = "bottom") +
  labs(
    title = "pTAU/AB42 by Subtype",
    x = "amyloid (AB42_corr)",
    y = "pTAU(CSF)",
    fill = "ML Subtype",
    color = "ML Subtype"
  )

ask Rik?!

2. In-vivo validation

In this section, I used the additional scans of the subjects (n=1259). The trained model was applied, and classification into stage and subtype was obtained. First, I will evaluate the model’s ability to consistently classify subjects into the same subtype, even when presented with a scan taken before or after the one used for training.

2.1 Subtype congruency

2.1.1 Global

subtypes_vivo <- read_csv("data_validation/subtypes_vivo.csv") %>% rename(I_ID=i_id)
subtypes_vivo$ml_subtype<-subtypes_vivo$ml_subtype+1
dat_mri_export_validation <- read_csv("dat_mri_export_validation.csv")
subtypes_vivo<-left_join(subtypes_vivo, dat_mri_export_validation, by="I_ID")

train_subtypes<-left_join(train_subtypes, ids, by="MRI_ID")

subtypes_vivo$origin<-"test"
train_subtypes$origin<-"train"


validation<-rbind(train_subtypes[,c("I_ID","ml_subtype","ml_stage", "prob_ml_subtype", "prob_ml_stage", "order", "origin")], subtypes_vivo[,c("I_ID","ml_subtype","ml_stage", "prob_ml_subtype", "prob_ml_stage", "order", "origin")])

# Filtrar los datos de "train" y "test"
confusion_data <- validation %>%
  filter(origin %in% c("train", "test")) %>%
  select(I_ID, ml_subtype, origin)

# Agrupar los datos de train por I_ID y tomar la primera instancia de cada sujeto
train_data <- confusion_data %>%
  filter(origin == "train") %>%
  group_by(I_ID) %>%
  slice(1) %>%
  ungroup()

# Filtrar solo los datos de "test"
test_data <- confusion_data %>%
  filter(origin == "test")

# Unir los datos de "train" y "test" en base al I_ID
confusion_data_long <- left_join(test_data, train_data, by = "I_ID", suffix = c("_test", "_train"))

# Crear una matriz de confusión
conf_matrix <- table(confusion_data_long$ml_subtype_train, confusion_data_long$ml_subtype_test)

# Plotear la matriz de confusión usando ggplot
conf_matrix_df <- as.data.frame(conf_matrix) %>%
  rename(Train = Var1, Test = Var2, Count = Freq)

ggplot(conf_matrix_df, aes(x = Train, y = Test, fill = Count)) +
  geom_tile() +
  geom_text(aes(label = Count), color = "white", size = 4) +
  scale_fill_gradient(low = "grey", high = "navy") +
  labs(
    title = "Confusion Matrix: Train vs Test Global (ml_subtype)",
    x = "Train ml_subtype",
    y = "Test ml_subtype"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

2.1.2 Retrovalidation

(when validation MRI precedes train MRI)

# Filtrar los datos de "train" y "test"
confusion_data <- validation %>%
  filter(origin %in% c("train", "test")) %>%
  select(I_ID, ml_subtype, order, origin)

# Seleccionar solo una instancia por sujeto en "train"
train_data <- confusion_data %>%
  filter(origin == "train") %>%
  group_by(I_ID) %>%
  slice(1) %>%   # Selecciona la primera fila (puedes cambiar a otra lógica si es necesario)
  ungroup() %>%
  rename(ml_subtype_train = ml_subtype)

# Filtrar "test" y seleccionar la fila con el menor 'order_test' por sujeto
test_data <- confusion_data %>%
  filter(origin == "test") %>%
  group_by(I_ID) %>%
  slice_min(order, n = 1) %>%  # Toma la fila con el menor order
  ungroup() %>%
  rename(ml_subtype_test = ml_subtype)

# Unir las filas de test con su correspondiente train
confusion_data_long <- train_data %>%
  inner_join(test_data, by = "I_ID")   # Asegura que cada sujeto tenga ambas columnas

# Crear la matriz de confusión
conf_matrix <- table(confusion_data_long$ml_subtype_train, confusion_data_long$ml_subtype_test)

# Convertir la matriz en un data frame para ggplot
conf_matrix_df <- as.data.frame(conf_matrix)
colnames(conf_matrix_df) <- c("ml_subtype_train", "ml_subtype_test", "Freq")

# Crear el plot
ggplot(data = conf_matrix_df, aes(x = ml_subtype_train, y = ml_subtype_test, fill = Freq)) +
  geom_tile() +
  geom_text(aes(label = Freq), vjust = 1) +
  scale_fill_gradient(low = "grey", high = "navy") +
  labs(
    title = "ML Subtype in Train and Test (previous) set ",
    x = "ML Subtype Train",
    y = "ML Subtype Test",
    fill = "Count"
  ) +
  theme_minimal()

# Obtener estadísticas de la matriz de confusión
conf_matrix_stats <- confusionMatrix(as.factor(confusion_data_long$ml_subtype_test),
                                     as.factor(confusion_data_long$ml_subtype_train))

# Imprimir las estadísticas de la matriz de confusión
print(conf_matrix_stats)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   1   2   3
##          1 498  74  22
##          2  68 152  37
##          3  29  34 136
## 
## Overall Statistics
##                                           
##                Accuracy : 0.7486          
##                  95% CI : (0.7212, 0.7746)
##     No Information Rate : 0.5667          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.5692          
##                                           
##  Mcnemar's Test P-Value : 0.7194          
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            0.8370   0.5846   0.6974
## Specificity            0.7890   0.8671   0.9263
## Pos Pred Value         0.8384   0.5914   0.6834
## Neg Pred Value         0.7873   0.8638   0.9307
## Prevalence             0.5667   0.2476   0.1857
## Detection Rate         0.4743   0.1448   0.1295
## Detection Prevalence   0.5657   0.2448   0.1895
## Balanced Accuracy      0.8130   0.7259   0.8119

2.1.3 Forward validation

(when validation MRI follows train MRI)

library(dplyr)
library(caret)
library(ggplot2)

# Filtrar los datos de "train" y "test"
confusion_data <- validation %>%
  filter(origin %in% c("train", "test")) %>%
  select(I_ID, ml_subtype, order, origin)

# Seleccionar solo una instancia por sujeto en "train"
train_data <- confusion_data %>%
  filter(origin == "train") %>%
  group_by(I_ID) %>%
  slice(1) %>%   # Selecciona la primera fila de train por sujeto
  ungroup() %>%
  rename(ml_subtype_train = ml_subtype, order_train = order)

# Filtrar "test" y quedarnos solo con las filas donde order_test > order_train
test_data <- confusion_data %>%
  filter(origin == "test") %>%
  left_join(train_data %>% select(I_ID, order_train), by = "I_ID") %>%  # Agregar order_train
  filter(order > order_train) %>%  # Conservar solo test con order_test > order_train
  group_by(I_ID) %>%
  slice_min(order, n = 1) %>%  # Seleccionar la fila de test con el menor order_test mayor a train
  ungroup() %>%
  rename(ml_subtype_test = ml_subtype, order_test = order)

# Unir las filas de test con su correspondiente train
confusion_data_long <- train_data %>%
  inner_join(test_data, by = "I_ID")   # Asegura que cada sujeto tenga ambas columnas

# Crear la matriz de confusión
conf_matrix <- table(confusion_data_long$ml_subtype_train, confusion_data_long$ml_subtype_test)

# Convertir la matriz en un data frame para ggplot
conf_matrix_df <- as.data.frame(conf_matrix)
colnames(conf_matrix_df) <- c("ml_subtype_train", "ml_subtype_test", "Freq")

# Crear el plot
ggplot(data = conf_matrix_df, aes(x = ml_subtype_train, y = ml_subtype_test, fill = Freq)) +
  geom_tile() +
  geom_text(aes(label = Freq), vjust = 1) +
  scale_fill_gradient(low = "grey", high = "blue") +
  labs(
    title = "ML Subtype in Train and Test (later) set",
    x = "ML Subtype Train",
    y = "ML Subtype Test",
    fill = "Count"
  ) +
  theme_minimal()

# Obtener estadísticas de la matriz de confusión
conf_matrix_stats <- confusionMatrix(as.factor(confusion_data_long$ml_subtype_test),
                                     as.factor(confusion_data_long$ml_subtype_train))

# Imprimir las estadísticas de la matriz de confusión
print(conf_matrix_stats)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   1   2   3
##          1 364  59  13
##          2  47  99  28
##          3  21  23 110
## 
## Overall Statistics
##                                           
##                Accuracy : 0.75            
##                  95% CI : (0.7177, 0.7803)
##     No Information Rate : 0.5654          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.5716          
##                                           
##  Mcnemar's Test P-Value : 0.292           
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            0.8426   0.5470   0.7285
## Specificity            0.7831   0.8714   0.9282
## Pos Pred Value         0.8349   0.5690   0.7143
## Neg Pred Value         0.7927   0.8610   0.9328
## Prevalence             0.5654   0.2369   0.1976
## Detection Rate         0.4764   0.1296   0.1440
## Detection Prevalence   0.5707   0.2277   0.2016
## Balanced Accuracy      0.8129   0.7092   0.8283

2.2 Stage validation

I had to define what the appropriate classification was here. I defined it as follows: if the subject’s validation MRI was later than the one used for training, the stage of sustain should be higher in the validation than in the training, and vice versa for earlier MRIs.

# Filtrar datos solo de "train" y "test"
confusion_data <- validation %>%
  filter(origin %in% c("train", "test")) %>%
  select(I_ID, ml_stage, order, origin)

# Seleccionar una única fila por sujeto en "train"
train_data <- confusion_data %>%
  filter(origin == "train") %>%
  group_by(I_ID) %>%
  slice(1) %>%  # Seleccionamos la primera observación de train
  ungroup() %>%
  rename(ml_stage_train = ml_stage, order_train = order)

# Filtrar "test" y emparejar con train por sujeto
test_data <- confusion_data %>%
  filter(origin == "test") %>%
  left_join(train_data %>% select(I_ID, ml_stage_train, order_train), by = "I_ID") %>%  # Unimos la info de train
  group_by(I_ID) %>%
  slice_min(order, n = 1) %>%  # Seleccionar la primera instancia de test disponible
  ungroup() %>%
  rename(ml_stage_test = ml_stage, order_test = order)


test_data <- test_data %>%
  mutate(
    ml_stage_expected = case_when(
      ml_stage_test > ml_stage_train ~ order_test > order_train,
      ml_stage_test < ml_stage_train ~ order_test < order_train,
      TRUE ~ NA
    ),
    validation_time = case_when(
      order_test < order_train ~ "before",
      order_test > order_train ~ "after",
      TRUE ~ NA_character_
    )
  )


# Calcular la proporción de veces que la relación esperada se cumple
accuracy <- mean(test_data$ml_stage_expected, na.rm = TRUE)

test_data %>%
  filter(!is.na(ml_stage_expected)) %>%  # Eliminar NA
  group_by(validation_time, ml_stage_expected) %>%
  summarise(count = n()) %>%
  mutate(percentage = count / sum(count) * 100) %>%  # Calcular los porcentajes
  ggplot(aes(x = validation_time, y = percentage, fill = factor(ml_stage_expected))) +
  geom_bar(stat = "identity", position = "stack") +  # Barras apiladas
  scale_fill_manual(values = c("TRUE" = "skyblue", "FALSE" = "salmon")) +
  labs(
    title = "Percentage of correct staging clasification by Time of the validation MRI",
    x = "Validation Time",
    y = "Percentage",
    fill = "ml_stage_expected"
  ) +
  theme_minimal() +
  theme(legend.position = "top")

ggplot(test_data, aes(x = ml_stage_train, y = ml_stage_test, color = ml_stage_expected)) +
  geom_point(alpha = 0.7, size = 3) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "black") +  # Línea de identidad
  scale_color_manual(values = c("red", "blue"), labels = c("Incorrecto", "Correcto")) +
  labs(
    title = "Validación de ml_stage como clasificador",
    x = "ml_stage en Train",
    y = "ml_stage en Test",
    color = "Cumple relación esperada"
  ) +
  theme_minimal()

# Estadísticas de la relación esperada
cat("Proportion of subjects with  correct stage direction:", round(accuracy * 100, 2), "%\n")

## Proportion of subjects with  correct stage direction: 63.38 %

# Crear una tabla resumen
table_result <- table(test_data$ml_stage_expected, test_data$validation_time)

# Mostrar tabla de resultados
print(table_result)

##        
##         after before
##   FALSE    70    246
##   TRUE    175    372

#3 Post mortem

neuropath_4G8_AT8_NFL <- read_excel("data_validation/neuropath_4G8_AT8_NFL.xlsx")
subtypes_postmortem <- read_csv("data_validation/subtypes_postmortem.csv")

## Rows: 60 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): i_id
## dbl (4): ml_subtype, prob_ml_subtype, ml_stage, prob_ml_stage
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

subtypes_postmortem$i_id <- gsub("PAGE-", "", subtypes_postmortem$i_id)
summary(as.factor(subtypes_postmortem$ml_subtype))

##  0  1  2 
## 38 12 10

# subtypes_postmortem <- subtypes_postmortem %>%
#   mutate(
#     subtype = case_when(
#       ml_subtype == 0 ~ "subtype1",
#       ml_subtype == 1 ~ "subtype2",
#       ml_subtype %in% c(2, 3) ~ "subtype3"
#     )
#   )


Patho_data_combined_Ch2_Shareable <- read_excel("Patho_data_combined_Ch2_Shareable.xlsx") %>%
  rename(i_id=SubjectANW)

## New names:
## • `` -> `...1`

data<-left_join(Patho_data_combined_Ch2_Shareable, subtypes_postmortem, by="i_id") %>% drop_na(ml_subtype)


data$dx_pre<-ifelse(data$Cohort=="Control",1,0)
data$dx_post<-ifelse(data$ml_subtype==0,1,0)
table(data$dx_pre, data$dx_post)

##    
##       0   1
##   0 108 215
##   1  71  62

library(lmerTest)

## Loading required package: lme4

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## 
## Attaching package: 'lmerTest'

## The following object is masked from 'package:lme4':
## 
##     lmer

## The following object is masked from 'package:stats':
## 
##     step

library(lme4)
modelo_completo <- lmer(Amyloid ~ ml_subtype + ml_stage + Region + (1 | i_id), data = data)
modelo_sin_ml_stage <- lmer(Amyloid ~ ml_subtype + Region + (1 | i_id), data = data)
anova(modelo_sin_ml_stage, modelo_completo)

## refitting model(s) with ML (instead of REML)

## Data: data
## Models:
## modelo_sin_ml_stage: Amyloid ~ ml_subtype + Region + (1 | i_id)
## modelo_completo: Amyloid ~ ml_subtype + ml_stage + Region + (1 | i_id)
##                     npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)  
## modelo_sin_ml_stage   12 1682.5 1731.9 -829.27   1658.5                       
## modelo_completo       13 1678.3 1731.8 -826.14   1652.3 6.2456  1    0.01245 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

modelo<-lmer(Amyloid ~ as.factor(ml_subtype)* Region+ml_stage  + (1 | i_id), data=data)
summary(modelo)  # Revisa los términos de interacción

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Amyloid ~ as.factor(ml_subtype) * Region + ml_stage + (1 | i_id)
##    Data: data
## 
## REML criterion at convergence: 1649.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3547 -0.5060 -0.0713  0.4301  4.8998 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  i_id     (Intercept) 3.903    1.976   
##  Residual             1.671    1.293   
## Number of obs: 452, groups:  i_id, 51
## 
## Fixed effects:
##                                       Estimate Std. Error        df t value
## (Intercept)                            2.30668    0.50463  74.75879   4.571
## as.factor(ml_subtype)1                 0.01485    0.83251  88.49932   0.018
## as.factor(ml_subtype)2                -0.82086    0.97347  91.05426  -0.843
## RegionGFM                              0.72694    0.33804 377.22019   2.150
## RegionGPS                              0.28780    0.33804 377.22019   0.851
## RegionGTM                             -0.01857    0.33804 377.22019  -0.055
## RegionHip                             -1.58946    0.33804 377.22019  -4.702
## RegionOC                              -0.87791    0.33804 377.22019  -2.597
## RegionParaHip                         -0.58756    0.34033 377.13008  -1.726
## RegionPCC                              0.25192    0.33804 377.22019   0.745
## RegionPrecun                           0.42201    0.33804 377.22019   1.248
## ml_stage                               0.03201    0.01326  47.03555   2.414
## as.factor(ml_subtype)1:RegionGFM       0.15891    0.63787 377.17321   0.249
## as.factor(ml_subtype)2:RegionGFM       0.15999    0.75188 377.25506   0.213
## as.factor(ml_subtype)1:RegionGPS      -0.29743    0.63787 377.17321  -0.466
## as.factor(ml_subtype)2:RegionGPS      -0.27821    0.75188 377.25506  -0.370
## as.factor(ml_subtype)1:RegionGTM       0.31331    0.63787 377.17321   0.491
## as.factor(ml_subtype)2:RegionGTM       0.10751    0.75188 377.25506   0.143
## as.factor(ml_subtype)1:RegionHip      -0.58092    0.63787 377.17321  -0.911
## as.factor(ml_subtype)2:RegionHip       0.71344    0.75188 377.25506   0.949
## as.factor(ml_subtype)1:RegionOC        0.08202    0.63787 377.17321   0.129
## as.factor(ml_subtype)2:RegionOC       -0.29473    0.75188 377.25506  -0.392
## as.factor(ml_subtype)1:RegionParaHip   0.23002    0.63909 377.15443   0.360
## as.factor(ml_subtype)2:RegionParaHip   0.38868    0.75291 377.24333   0.516
## as.factor(ml_subtype)1:RegionPCC      -0.06203    0.63787 377.17321  -0.097
## as.factor(ml_subtype)2:RegionPCC      -0.07527    0.75188 377.25506  -0.100
## as.factor(ml_subtype)1:RegionPrecun   -0.21929    0.63787 377.17321  -0.344
## as.factor(ml_subtype)2:RegionPrecun   -0.68450    0.77449 377.56992  -0.884
##                                      Pr(>|t|)    
## (Intercept)                          1.88e-05 ***
## as.factor(ml_subtype)1                0.98581    
## as.factor(ml_subtype)2                0.40131    
## RegionGFM                             0.03216 *  
## RegionGPS                             0.39510    
## RegionGTM                             0.95622    
## RegionHip                            3.62e-06 ***
## RegionOC                              0.00977 ** 
## RegionParaHip                         0.08509 .  
## RegionPCC                             0.45659    
## RegionPrecun                          0.21266    
## ml_stage                              0.01973 *  
## as.factor(ml_subtype)1:RegionGFM      0.80340    
## as.factor(ml_subtype)2:RegionGFM      0.83161    
## as.factor(ml_subtype)1:RegionGPS      0.64129    
## as.factor(ml_subtype)2:RegionGPS      0.71158    
## as.factor(ml_subtype)1:RegionGTM      0.62359    
## as.factor(ml_subtype)2:RegionGTM      0.88638    
## as.factor(ml_subtype)1:RegionHip      0.36302    
## as.factor(ml_subtype)2:RegionHip      0.34329    
## as.factor(ml_subtype)1:RegionOC       0.89775    
## as.factor(ml_subtype)2:RegionOC       0.69529    
## as.factor(ml_subtype)1:RegionParaHip  0.71911    
## as.factor(ml_subtype)2:RegionParaHip  0.60600    
## as.factor(ml_subtype)1:RegionPCC      0.92258    
## as.factor(ml_subtype)2:RegionPCC      0.92031    
## as.factor(ml_subtype)1:RegionPrecun   0.73120    
## as.factor(ml_subtype)2:RegionPrecun   0.37736    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 
## Correlation matrix not shown by default, as p = 28 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

anova(modelo)

## Type III Analysis of Variance Table with Satterthwaite's method
##                               Sum Sq Mean Sq NumDF  DenDF F value    Pr(>F)    
## as.factor(ml_subtype)          1.755  0.8776     2  47.01  0.5253   0.59480    
## Region                       147.755 18.4693     8 377.06 11.0552 4.732e-14 ***
## ml_stage                       9.734  9.7341     1  47.04  5.8265   0.01973 *  
## as.factor(ml_subtype):Region  14.771  0.9232    16 377.05  0.5526   0.91745    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

emmeans(modelo, pairwise ~ ml_subtype | Region)

## $emmeans
## Region = Fusiform:
##  ml_subtype emmean    SE   df lower.CL upper.CL
##           0  3.099 0.437 90.2    2.231     3.97
##           1  3.113 0.699 89.5    1.724     4.50
##           2  2.278 0.862 92.5    0.567     3.99
## 
## Region = GFM:
##  ml_subtype emmean    SE   df lower.CL upper.CL
##           0  3.826 0.429 84.5    2.972     4.68
##           1  3.999 0.689 84.6    2.630     5.37
##           2  3.165 0.842 84.8    1.491     4.84
## 
## Region = GPS:
##  ml_subtype emmean    SE   df lower.CL upper.CL
##           0  3.386 0.429 84.5    2.533     4.24
##           1  3.104 0.689 84.6    1.734     4.47
##           2  2.287 0.842 84.8    0.614     3.96
## 
## Region = GTM:
##  ml_subtype emmean    SE   df lower.CL upper.CL
##           0  3.080 0.429 84.5    2.227     3.93
##           1  3.408 0.689 84.6    2.039     4.78
##           2  2.367 0.842 84.8    0.693     4.04
## 
## Region = Hip:
##  ml_subtype emmean    SE   df lower.CL upper.CL
##           0  1.509 0.429 84.5    0.656     2.36
##           1  0.943 0.689 84.6   -0.426     2.31
##           2  1.402 0.842 84.8   -0.272     3.08
## 
## Region = OC:
##  ml_subtype emmean    SE   df lower.CL upper.CL
##           0  2.221 0.429 84.5    1.368     3.07
##           1  2.318 0.689 84.6    0.948     3.69
##           2  1.105 0.842 84.8   -0.568     2.78
## 
## Region = ParaHip:
##  ml_subtype emmean    SE   df lower.CL upper.CL
##           0  2.511 0.432 86.3    1.653     3.37
##           1  2.756 0.689 84.6    1.386     4.13
##           2  2.079 0.842 84.8    0.406     3.75
## 
## Region = PCC:
##  ml_subtype emmean    SE   df lower.CL upper.CL
##           0  3.351 0.429 84.5    2.497     4.20
##           1  3.303 0.689 84.6    1.934     4.67
##           2  2.454 0.842 84.8    0.781     4.13
## 
## Region = Precun:
##  ml_subtype emmean    SE   df lower.CL upper.CL
##           0  3.521 0.429 84.5    2.668     4.37
##           1  3.316 0.689 84.6    1.947     4.69
##           2  2.015 0.861 92.5    0.305     3.73
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
## Region = Fusiform:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1  -0.0149 0.833 88.6  -0.018  0.9998
##  ml_subtype0 - ml_subtype2   0.8209 0.973 91.1   0.843  0.6772
##  ml_subtype1 - ml_subtype2   0.8357 1.099 92.4   0.760  0.7283
## 
## Region = GFM:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1  -0.1738 0.819 83.5  -0.212  0.9755
##  ml_subtype0 - ml_subtype2   0.6609 0.952 83.9   0.694  0.7676
##  ml_subtype1 - ml_subtype2   0.8346 1.078 85.8   0.775  0.7196
## 
## Region = GPS:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.2826 0.819 83.5   0.345  0.9366
##  ml_subtype0 - ml_subtype2   1.0991 0.952 83.9   1.154  0.4837
##  ml_subtype1 - ml_subtype2   0.8165 1.078 85.8   0.758  0.7298
## 
## Region = GTM:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1  -0.3282 0.819 83.5  -0.400  0.9155
##  ml_subtype0 - ml_subtype2   0.7134 0.952 83.9   0.749  0.7349
##  ml_subtype1 - ml_subtype2   1.0415 1.078 85.8   0.967  0.5999
## 
## Region = Hip:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.5661 0.819 83.5   0.691  0.7695
##  ml_subtype0 - ml_subtype2   0.1074 0.952 83.9   0.113  0.9930
##  ml_subtype1 - ml_subtype2  -0.4587 1.078 85.8  -0.426  0.9051
## 
## Region = OC:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1  -0.0969 0.819 83.5  -0.118  0.9923
##  ml_subtype0 - ml_subtype2   1.1156 0.952 83.9   1.172  0.4733
##  ml_subtype1 - ml_subtype2   1.2125 1.078 85.8   1.125  0.5013
## 
## Region = ParaHip:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1  -0.2449 0.821 84.0  -0.298  0.9522
##  ml_subtype0 - ml_subtype2   0.4322 0.953 84.3   0.453  0.8931
##  ml_subtype1 - ml_subtype2   0.6771 1.078 85.8   0.628  0.8050
## 
## Region = PCC:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.0472 0.819 83.5   0.058  0.9982
##  ml_subtype0 - ml_subtype2   0.8961 0.952 83.9   0.941  0.6159
##  ml_subtype1 - ml_subtype2   0.8489 1.078 85.8   0.788  0.7115
## 
## Region = Precun:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.2044 0.819 83.5   0.249  0.9663
##  ml_subtype0 - ml_subtype2   1.5054 0.970 89.8   1.552  0.2717
##  ml_subtype1 - ml_subtype2   1.3009 1.093 90.4   1.190  0.4620
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates

modelo<-lmer(pTau ~ as.factor(ml_subtype)* Region  + (1 | i_id), data=data)
summary(modelo)  # Revisa los términos de interacción

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: pTau ~ as.factor(ml_subtype) * Region + (1 | i_id)
##    Data: data
## 
## REML criterion at convergence: 3429.6
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.98022 -0.58679 -0.08051  0.52627  2.99100 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  i_id     (Intercept) 485.6    22.04   
##  Residual             136.0    11.66   
## Number of obs: 439, groups:  i_id, 51
## 
## Fixed effects:
##                                      Estimate Std. Error      df t value
## (Intercept)                            41.348      4.610  81.554   8.969
## as.factor(ml_subtype)1                -14.498      8.696  80.465  -1.667
## as.factor(ml_subtype)2                 -1.586     10.089  78.842  -0.157
## RegionGFM                             -15.559      3.159 364.360  -4.926
## RegionGPS                             -12.596      3.159 364.360  -3.988
## RegionGTM                             -10.463      3.159 364.360  -3.312
## RegionHip                              -8.687      3.199 364.191  -2.716
## RegionOC                              -33.478      3.159 364.360 -10.598
## RegionParaHip                          -2.189      3.271 363.978  -0.669
## RegionPCC                             -10.738      3.159 364.360  -3.399
## RegionPrecun                          -18.947      3.159 364.360  -5.998
## as.factor(ml_subtype)1:RegionGFM       13.438      5.934 364.267   2.265
## as.factor(ml_subtype)2:RegionGFM        8.059      6.841 364.291   1.178
## as.factor(ml_subtype)1:RegionGPS       11.218      5.934 364.267   1.891
## as.factor(ml_subtype)2:RegionGPS        7.709      7.047 364.515   1.094
## as.factor(ml_subtype)1:RegionGTM       14.718      5.934 364.267   2.480
## as.factor(ml_subtype)2:RegionGTM        8.236      6.841 364.291   1.204
## as.factor(ml_subtype)1:RegionHip        3.426      5.955 364.219   0.575
## as.factor(ml_subtype)2:RegionHip      -10.392      7.007 363.975  -1.483
## as.factor(ml_subtype)1:RegionOC        25.585      5.934 364.267   4.312
## as.factor(ml_subtype)2:RegionOC        14.670      6.841 364.291   2.144
## as.factor(ml_subtype)1:RegionParaHip    4.089      6.068 364.038   0.674
## as.factor(ml_subtype)2:RegionParaHip   -4.702      7.040 363.931  -0.668
## as.factor(ml_subtype)1:RegionPCC       10.511      5.934 364.267   1.771
## as.factor(ml_subtype)2:RegionPCC        3.757      6.841 364.291   0.549
## as.factor(ml_subtype)1:RegionPrecun    21.645      5.934 364.267   3.648
## as.factor(ml_subtype)2:RegionPrecun    11.957      6.841 364.291   1.748
##                                      Pr(>|t|)    
## (Intercept)                          8.68e-14 ***
## as.factor(ml_subtype)1               0.099364 .  
## as.factor(ml_subtype)2               0.875459    
## RegionGFM                            1.28e-06 ***
## RegionGPS                            8.07e-05 ***
## RegionGTM                            0.001017 ** 
## RegionHip                            0.006930 ** 
## RegionOC                              < 2e-16 ***
## RegionParaHip                        0.503708    
## RegionPCC                            0.000750 ***
## RegionPrecun                         4.81e-09 ***
## as.factor(ml_subtype)1:RegionGFM     0.024123 *  
## as.factor(ml_subtype)2:RegionGFM     0.239576    
## as.factor(ml_subtype)1:RegionGPS     0.059483 .  
## as.factor(ml_subtype)2:RegionGPS     0.274703    
## as.factor(ml_subtype)1:RegionGTM     0.013576 *  
## as.factor(ml_subtype)2:RegionGTM     0.229409    
## as.factor(ml_subtype)1:RegionHip     0.565443    
## as.factor(ml_subtype)2:RegionHip     0.138933    
## as.factor(ml_subtype)1:RegionOC      2.09e-05 ***
## as.factor(ml_subtype)2:RegionOC      0.032676 *  
## as.factor(ml_subtype)1:RegionParaHip 0.500847    
## as.factor(ml_subtype)2:RegionParaHip 0.504642    
## as.factor(ml_subtype)1:RegionPCC     0.077332 .  
## as.factor(ml_subtype)2:RegionPCC     0.583208    
## as.factor(ml_subtype)1:RegionPrecun  0.000303 ***
## as.factor(ml_subtype)2:RegionPrecun  0.081351 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 
## Correlation matrix not shown by default, as p = 27 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

anova(modelo)

## Type III Analysis of Variance Table with Satterthwaite's method
##                              Sum Sq Mean Sq NumDF  DenDF F value    Pr(>F)    
## as.factor(ml_subtype)          42.8   21.41     2  47.93  0.1574 0.8548351    
## Region                       9902.5 1237.81     8 364.09  9.0988 2.033e-11 ***
## as.factor(ml_subtype):Region 6344.0  396.50    16 364.08  2.9146 0.0001537 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

emmeans(modelo, pairwise ~ ml_subtype | Region)

## $emmeans
## Region = Fusiform:
##  ml_subtype emmean   SE   df lower.CL upper.CL
##           0  41.35 4.61 81.7    32.18     50.5
##           1  26.85 7.37 80.2    12.18     41.5
##           2  39.76 8.97 78.3    21.90     57.6
## 
## Region = GFM:
##  ml_subtype emmean   SE   df lower.CL upper.CL
##           0  25.79 4.48 73.2    16.86     34.7
##           1  24.73 7.20 73.2    10.38     39.1
##           2  32.26 8.82 73.2    14.69     49.8
## 
## Region = GPS:
##  ml_subtype emmean   SE   df lower.CL upper.CL
##           0  28.75 4.48 73.2    19.83     37.7
##           1  25.47 7.20 73.2    11.13     39.8
##           2  34.87 8.97 78.1    17.02     52.7
## 
## Region = GTM:
##  ml_subtype emmean   SE   df lower.CL upper.CL
##           0  30.88 4.48 73.2    21.96     39.8
##           1  31.10 7.20 73.2    16.76     45.4
##           2  37.53 8.82 73.2    19.97     55.1
## 
## Region = Hip:
##  ml_subtype emmean   SE   df lower.CL upper.CL
##           0  32.66 4.52 75.6    23.66     41.7
##           1  21.59 7.20 73.2     7.25     35.9
##           2  20.68 8.97 78.3     2.82     38.5
## 
## Region = OC:
##  ml_subtype emmean   SE   df lower.CL upper.CL
##           0   7.87 4.48 73.2    -1.05     16.8
##           1  18.96 7.20 73.2     4.61     33.3
##           2  20.95 8.82 73.2     3.39     38.5
## 
## Region = ParaHip:
##  ml_subtype emmean   SE   df lower.CL upper.CL
##           0  39.16 4.58 80.0    30.03     48.3
##           1  28.75 7.28 76.3    14.25     43.2
##           2  32.87 8.97 78.3    15.01     50.7
## 
## Region = PCC:
##  ml_subtype emmean   SE   df lower.CL upper.CL
##           0  30.61 4.48 73.2    21.69     39.5
##           1  26.62 7.20 73.2    12.28     41.0
##           2  32.78 8.82 73.2    15.21     50.3
## 
## Region = Precun:
##  ml_subtype emmean   SE   df lower.CL upper.CL
##           0  22.40 4.48 73.2    13.48     31.3
##           1  29.55 7.20 73.2    15.20     43.9
##           2  32.77 8.82 73.2    15.20     50.3
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
## Region = Fusiform:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1   14.498  8.70 80.6   1.667  0.2241
##  ml_subtype0 - ml_subtype2    1.586 10.09 79.0   0.157  0.9865
##  ml_subtype1 - ml_subtype2  -12.911 11.61 79.0  -1.112  0.5098
## 
## Region = GFM:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1    1.060  8.48 73.2   0.125  0.9914
##  ml_subtype0 - ml_subtype2   -6.473  9.89 73.2  -0.655  0.7903
##  ml_subtype1 - ml_subtype2   -7.533 11.38 73.2  -0.662  0.7862
## 
## Region = GPS:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1    3.280  8.48 73.2   0.387  0.9209
##  ml_subtype0 - ml_subtype2   -6.122 10.02 77.1  -0.611  0.8147
##  ml_subtype1 - ml_subtype2   -9.402 11.50 76.1  -0.818  0.6933
## 
## Region = GTM:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1   -0.220  8.48 73.2  -0.026  0.9996
##  ml_subtype0 - ml_subtype2   -6.650  9.89 73.2  -0.673  0.7801
##  ml_subtype1 - ml_subtype2   -6.430 11.38 73.2  -0.565  0.8391
## 
## Region = Hip:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1   11.072  8.50 73.8   1.303  0.3981
##  ml_subtype0 - ml_subtype2   11.978 10.05 77.7   1.192  0.4613
##  ml_subtype1 - ml_subtype2    0.906 11.50 76.2   0.079  0.9966
## 
## Region = OC:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1  -11.087  8.48 73.2  -1.308  0.3954
##  ml_subtype0 - ml_subtype2  -13.083  9.89 73.2  -1.323  0.3870
##  ml_subtype1 - ml_subtype2   -1.996 11.38 73.2  -0.175  0.9832
## 
## Region = ParaHip:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1   10.409  8.60 77.3   1.210  0.4509
##  ml_subtype0 - ml_subtype2    6.288 10.08 78.6   0.624  0.8074
##  ml_subtype1 - ml_subtype2   -4.121 11.55 77.5  -0.357  0.9324
## 
## Region = PCC:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1    3.987  8.48 73.2   0.470  0.8854
##  ml_subtype0 - ml_subtype2   -2.171  9.89 73.2  -0.220  0.9738
##  ml_subtype1 - ml_subtype2   -6.158 11.38 73.2  -0.541  0.8513
## 
## Region = Precun:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1   -7.147  8.48 73.2  -0.843  0.6776
##  ml_subtype0 - ml_subtype2  -10.371  9.89 73.2  -1.049  0.5487
##  ml_subtype1 - ml_subtype2   -3.224 11.38 73.2  -0.283  0.9568
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates

# Realizar el análisis de los contrastes
resultados <- emmeans(modelo, pairwise ~ ml_subtype | Region)
# Extraer la tabla de contrastes y convertirla a un data frame
contrastes <- as.data.frame(resultados$contrasts)
# Filtrar los contrastes significativos (p < 0.05)
contrasttau<-contrastes
contrasttau$stain<-"pTau"

modelo<-lmer(NfL ~ as.factor(ml_subtype)* Region + (1 | i_id), data=data)
summary(modelo)  # Revisa los términos de interacción

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: NfL ~ as.factor(ml_subtype) * Region + (1 | i_id)
##    Data: data
## 
## REML criterion at convergence: 2104.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6833 -0.6146  0.0399  0.6263  2.4318 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  i_id     (Intercept) 8.971    2.995   
##  Residual             7.448    2.729   
## Number of obs: 424, groups:  i_id, 48
## 
## Fixed effects:
##                                      Estimate Std. Error       df t value
## (Intercept)                            9.0297     0.7729 122.9667  11.683
## as.factor(ml_subtype)1                 1.1617     1.4505 134.6848   0.801
## as.factor(ml_subtype)2                 0.2591     1.8095 167.9034   0.143
## RegionGFM                              1.0690     0.7369 352.2398   1.451
## RegionGPS                              2.3087     0.7369 352.2398   3.133
## RegionGTM                              0.9855     0.7369 352.2398   1.337
## RegionHip                              1.9798     0.7369 352.2398   2.687
## RegionOC                               2.2398     0.7369 352.2398   3.040
## RegionParaHip                         -0.1888     0.7369 352.2398  -0.256
## RegionPCC                              1.7966     0.7369 352.2398   2.438
## RegionPrecun                           2.6092     0.7369 352.2398   3.541
## as.factor(ml_subtype)1:RegionGFM      -2.0040     1.3866 352.6609  -1.445
## as.factor(ml_subtype)2:RegionGFM      -2.1051     1.7406 353.7534  -1.209
## as.factor(ml_subtype)1:RegionGPS      -1.5797     1.3866 352.6609  -1.139
## as.factor(ml_subtype)2:RegionGPS      -2.2250     1.7406 353.7534  -1.278
## as.factor(ml_subtype)1:RegionGTM      -1.4176     1.3866 352.6609  -1.022
## as.factor(ml_subtype)2:RegionGTM      -1.4253     1.7406 353.7534  -0.819
## as.factor(ml_subtype)1:RegionHip      -0.7280     1.3866 352.6609  -0.525
## as.factor(ml_subtype)2:RegionHip      -0.5372     1.7406 353.7534  -0.309
## as.factor(ml_subtype)1:RegionOC       -1.8297     1.3866 352.6609  -1.320
## as.factor(ml_subtype)2:RegionOC       -1.4218     1.7406 353.7534  -0.817
## as.factor(ml_subtype)1:RegionParaHip  -1.8677     1.3866 352.6609  -1.347
## as.factor(ml_subtype)2:RegionParaHip  -0.8132     1.7735 353.2396  -0.459
## as.factor(ml_subtype)1:RegionPCC      -1.1741     1.3866 352.6609  -0.847
## as.factor(ml_subtype)2:RegionPCC      -0.7223     1.7885 354.5384  -0.404
## as.factor(ml_subtype)1:RegionPrecun    0.3765     1.3866 352.6609   0.272
## as.factor(ml_subtype)2:RegionPrecun   -0.1201     1.7406 353.7534  -0.069
##                                      Pr(>|t|)    
## (Intercept)                           < 2e-16 ***
## as.factor(ml_subtype)1               0.424597    
## as.factor(ml_subtype)2               0.886309    
## RegionGFM                            0.147761    
## RegionGPS                            0.001875 ** 
## RegionGTM                            0.181960    
## RegionHip                            0.007557 ** 
## RegionOC                             0.002546 ** 
## RegionParaHip                        0.797973    
## RegionPCC                            0.015257 *  
## RegionPrecun                         0.000452 ***
## as.factor(ml_subtype)1:RegionGFM     0.149266    
## as.factor(ml_subtype)2:RegionGFM     0.227302    
## as.factor(ml_subtype)1:RegionGPS     0.255358    
## as.factor(ml_subtype)2:RegionGPS     0.201976    
## as.factor(ml_subtype)1:RegionGTM     0.307317    
## as.factor(ml_subtype)2:RegionGTM     0.413407    
## as.factor(ml_subtype)1:RegionHip     0.599896    
## as.factor(ml_subtype)2:RegionHip     0.757776    
## as.factor(ml_subtype)1:RegionOC      0.187823    
## as.factor(ml_subtype)2:RegionOC      0.414546    
## as.factor(ml_subtype)1:RegionParaHip 0.178843    
## as.factor(ml_subtype)2:RegionParaHip 0.646854    
## as.factor(ml_subtype)1:RegionPCC     0.397718    
## as.factor(ml_subtype)2:RegionPCC     0.686555    
## as.factor(ml_subtype)1:RegionPrecun  0.786160    
## as.factor(ml_subtype)2:RegionPrecun  0.945014    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 
## Correlation matrix not shown by default, as p = 27 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

anova(modelo)

## Type III Analysis of Variance Table with Satterthwaite's method
##                              Sum Sq Mean Sq NumDF  DenDF F value    Pr(>F)    
## as.factor(ml_subtype)          3.14   1.570     2  45.34  0.2107    0.8108    
## Region                       366.09  45.761     8 352.50  6.1438 1.996e-07 ***
## as.factor(ml_subtype):Region  65.98   4.124    16 352.42  0.5537    0.9166    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

emmeans(modelo, pairwise ~ ml_subtype | Region)

## $emmeans
## Region = Fusiform:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   9.03 0.773 123     7.50     10.6
##           1  10.19 1.227 140     7.76     12.6
##           2   9.29 1.636 180     6.06     12.5
## 
## Region = GFM:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0  10.10 0.766 119     8.58     11.6
##           1   9.26 1.170 119     6.94     11.6
##           2   8.25 1.433 119     5.42     11.1
## 
## Region = GPS:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0  11.34 0.766 119     9.82     12.9
##           1  10.92 1.170 119     8.60     13.2
##           2   9.37 1.433 119     6.54     12.2
## 
## Region = GTM:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0  10.02 0.766 119     8.50     11.5
##           1   9.76 1.170 119     7.44     12.1
##           2   8.85 1.433 119     6.01     11.7
## 
## Region = Hip:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0  11.01 0.766 119     9.49     12.5
##           1  11.44 1.170 119     9.13     13.8
##           2  10.73 1.433 119     7.89     13.6
## 
## Region = OC:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0  11.27 0.766 119     9.75     12.8
##           1  10.60 1.170 119     8.29     12.9
##           2  10.11 1.433 119     7.27     12.9
## 
## Region = ParaHip:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   8.84 0.766 119     7.32     10.4
##           1   8.13 1.170 119     5.82     10.5
##           2   8.29 1.484 134     5.35     11.2
## 
## Region = PCC:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0  10.83 0.766 119     9.31     12.3
##           1  10.81 1.170 119     8.50     13.1
##           2  10.36 1.484 134     7.43     13.3
## 
## Region = Precun:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0  11.64 0.766 119    10.12     13.2
##           1  13.18 1.170 119    10.86     15.5
##           2  11.78 1.433 119     8.94     14.6
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
## Region = Fusiform:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1  -1.1617 1.45 134  -0.801  0.7031
##  ml_subtype0 - ml_subtype2  -0.2591 1.81 168  -0.143  0.9888
##  ml_subtype1 - ml_subtype2   0.9026 2.05 164   0.441  0.8983
## 
## Region = GFM:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.8423 1.40 119   0.602  0.8191
##  ml_subtype0 - ml_subtype2   1.8460 1.62 119   1.136  0.4937
##  ml_subtype1 - ml_subtype2   1.0037 1.85 119   0.543  0.8504
## 
## Region = GPS:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.4180 1.40 119   0.299  0.9519
##  ml_subtype0 - ml_subtype2   1.9659 1.62 119   1.210  0.4495
##  ml_subtype1 - ml_subtype2   1.5479 1.85 119   0.837  0.6809
## 
## Region = GTM:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.2559 1.40 119   0.183  0.9817
##  ml_subtype0 - ml_subtype2   1.1662 1.62 119   0.718  0.7534
##  ml_subtype1 - ml_subtype2   0.9103 1.85 119   0.492  0.8752
## 
## Region = Hip:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1  -0.4337 1.40 119  -0.310  0.9484
##  ml_subtype0 - ml_subtype2   0.2781 1.62 119   0.171  0.9840
##  ml_subtype1 - ml_subtype2   0.7118 1.85 119   0.385  0.9217
## 
## Region = OC:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.6680 1.40 119   0.478  0.8819
##  ml_subtype0 - ml_subtype2   1.1627 1.62 119   0.716  0.7547
##  ml_subtype1 - ml_subtype2   0.4947 1.85 119   0.267  0.9613
## 
## Region = ParaHip:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.7060 1.40 119   0.505  0.8691
##  ml_subtype0 - ml_subtype2   0.5541 1.67 130   0.332  0.9412
##  ml_subtype1 - ml_subtype2  -0.1519 1.89 128  -0.080  0.9964
## 
## Region = PCC:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.0124 1.40 119   0.009  1.0000
##  ml_subtype0 - ml_subtype2   0.4632 1.67 130   0.277  0.9585
##  ml_subtype1 - ml_subtype2   0.4508 1.89 128   0.239  0.9691
## 
## Region = Precun:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1  -1.5382 1.40 119  -1.100  0.5159
##  ml_subtype0 - ml_subtype2  -0.1390 1.62 119  -0.086  0.9960
##  ml_subtype1 - ml_subtype2   1.3992 1.85 119   0.757  0.7303
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates

resultados <- emmeans(modelo, pairwise ~ ml_subtype | Region)
# Extraer la tabla de contrastes y convertirla a un data frame
contrastes_NfL <- as.data.frame(resultados$contrasts)
contrastes_NfL$stain<-"NfL"

modelo<-lmer(Myelin ~ as.factor(ml_subtype)* Region + (1 | i_id), data=data)
summary(modelo)  # Revisa los términos de interacción

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Myelin ~ as.factor(ml_subtype) * Region + (1 | i_id)
##    Data: data
## 
## REML criterion at convergence: 2028.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.1139 -0.4088  0.0530  0.5048  2.2711 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  i_id     (Intercept)  5.343   2.311   
##  Residual             18.276   4.275   
## Number of obs: 360, groups:  i_id, 50
## 
## Fixed effects:
##                                      Estimate Std. Error       df t value
## (Intercept)                           26.3491     1.5306 332.9959  17.215
## as.factor(ml_subtype)1                 3.6185     2.3472 326.0474   1.542
## as.factor(ml_subtype)2                 3.9309     3.0887 330.3045   1.273
## RegionGFM                              3.6327     1.6709 300.0860   2.174
## RegionGPS                              5.8277     1.6721 298.3059   3.485
## RegionGTM                              3.4172     1.6667 298.7404   2.050
## RegionHip                              1.9984     1.6842 297.3685   1.187
## RegionOC                               5.6778     1.6667 298.7691   3.407
## RegionParaHip                          2.7591     1.7067 296.2327   1.617
## RegionPCC                              5.6114     2.0189 299.7713   2.779
## RegionPrecun                           8.0171     1.6762 299.5069   4.783
## as.factor(ml_subtype)1:RegionGFM      -3.3803     2.7023 293.4649  -1.251
## as.factor(ml_subtype)2:RegionGFM      -2.5849     3.5251 299.4442  -0.733
## as.factor(ml_subtype)1:RegionGPS      -1.4576     2.6753 293.5912  -0.545
## as.factor(ml_subtype)2:RegionGPS      -0.4631     3.4411 296.2714  -0.135
## as.factor(ml_subtype)1:RegionGTM      -1.4173     2.6499 295.2407  -0.535
## as.factor(ml_subtype)2:RegionGTM      -3.1001     3.4384 296.3724  -0.902
## as.factor(ml_subtype)1:RegionHip      -2.5797     2.6829 293.2351  -0.962
## as.factor(ml_subtype)2:RegionHip      -2.6075     3.5028 294.5604  -0.744
## as.factor(ml_subtype)1:RegionOC        0.5786     2.6499 295.2525   0.218
## as.factor(ml_subtype)2:RegionOC       -3.2706     3.4948 295.0017  -0.936
## as.factor(ml_subtype)1:RegionParaHip  -1.7978     2.6970 292.8102  -0.667
## as.factor(ml_subtype)2:RegionParaHip  -4.1558     3.5136 294.3046  -1.183
## as.factor(ml_subtype)1:RegionPCC      -1.3664     3.2768 300.4034  -0.417
## as.factor(ml_subtype)2:RegionPCC      -5.1096     4.1310 301.6974  -1.237
## as.factor(ml_subtype)1:RegionPrecun   -0.9095     2.6559 295.5724  -0.342
## as.factor(ml_subtype)2:RegionPrecun   -0.8459     3.4430 296.5650  -0.246
##                                      Pr(>|t|)    
## (Intercept)                           < 2e-16 ***
## as.factor(ml_subtype)1               0.124132    
## as.factor(ml_subtype)2               0.204029    
## RegionGFM                            0.030478 *  
## RegionGPS                            0.000566 ***
## RegionGTM                            0.041208 *  
## RegionHip                            0.236363    
## RegionOC                             0.000748 ***
## RegionParaHip                        0.107013    
## RegionPCC                            0.005790 ** 
## RegionPrecun                         2.71e-06 ***
## as.factor(ml_subtype)1:RegionGFM     0.211963    
## as.factor(ml_subtype)2:RegionGFM     0.463947    
## as.factor(ml_subtype)1:RegionGPS     0.586291    
## as.factor(ml_subtype)2:RegionGPS     0.893028    
## as.factor(ml_subtype)1:RegionGTM     0.593149    
## as.factor(ml_subtype)2:RegionGTM     0.368003    
## as.factor(ml_subtype)1:RegionHip     0.337074    
## as.factor(ml_subtype)2:RegionHip     0.457219    
## as.factor(ml_subtype)1:RegionOC      0.827318    
## as.factor(ml_subtype)2:RegionOC      0.350123    
## as.factor(ml_subtype)1:RegionParaHip 0.505569    
## as.factor(ml_subtype)2:RegionParaHip 0.237862    
## as.factor(ml_subtype)1:RegionPCC     0.676989    
## as.factor(ml_subtype)2:RegionPCC     0.217090    
## as.factor(ml_subtype)1:RegionPrecun  0.732258    
## as.factor(ml_subtype)2:RegionPrecun  0.806092    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 
## Correlation matrix not shown by default, as p = 27 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

anova(modelo)

## Type III Analysis of Variance Table with Satterthwaite's method
##                               Sum Sq Mean Sq NumDF   DenDF F value  Pr(>F)    
## as.factor(ml_subtype)         105.39  52.695     2  48.041  2.8833 0.06568 .  
## Region                       1527.28 190.910     8 290.172 10.4459 7.2e-13 ***
## as.factor(ml_subtype):Region  171.22  10.701    16 290.104  0.5855 0.89414    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

emmeans(modelo, pairwise ~ ml_subtype | Region)

## $emmeans
## Region = Fusiform:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   26.3 1.533 333     23.3     29.4
##           1   30.0 1.781 313     26.5     33.5
##           2   30.3 2.686 328     25.0     35.6
## 
## Region = GFM:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   30.0 0.885 257     28.2     31.7
##           1   30.2 1.521 276     27.2     33.2
##           2   31.3 1.962 275     27.5     35.2
## 
## Region = GPS:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   32.2 0.899 262     30.4     33.9
##           1   34.3 1.459 263     31.5     37.2
##           2   35.6 1.837 252     32.0     39.3
## 
## Region = GTM:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   29.8 0.885 257     28.0     31.5
##           1   32.0 1.403 252     29.2     34.7
##           2   30.6 1.837 252     27.0     34.2
## 
## Region = Hip:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   28.3 0.928 271     26.5     30.2
##           1   29.4 1.459 263     26.5     32.3
##           2   29.7 1.965 274     25.8     33.5
## 
## Region = OC:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   32.0 0.885 257     30.3     33.8
##           1   36.2 1.403 252     33.5     39.0
##           2   32.7 1.964 274     28.8     36.6
## 
## Region = ParaHip:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   29.1 0.977 286     27.2     31.0
##           1   30.9 1.459 263     28.1     33.8
##           2   28.9 1.965 274     25.0     32.8
## 
## Region = PCC:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   32.0 1.459 333     29.1     34.8
##           1   34.2 2.078 329     30.1     38.3
##           2   30.8 2.688 328     25.5     36.1
## 
## Region = Precun:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   34.4 0.899 262     32.6     36.1
##           1   37.1 1.403 252     34.3     39.8
##           2   37.5 1.837 252     33.8     41.1
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
## Region = Fusiform:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   -3.619 2.35 326  -1.540  0.2737
##  ml_subtype0 - ml_subtype2   -3.931 3.09 330  -1.271  0.4125
##  ml_subtype1 - ml_subtype2   -0.312 3.22 325  -0.097  0.9948
## 
## Region = GFM:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   -0.238 1.76 272  -0.135  0.9899
##  ml_subtype0 - ml_subtype2   -1.346 2.15 272  -0.625  0.8064
##  ml_subtype1 - ml_subtype2   -1.108 2.48 276  -0.446  0.8961
## 
## Region = GPS:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   -2.161 1.71 263  -1.261  0.4186
##  ml_subtype0 - ml_subtype2   -3.468 2.04 254  -1.696  0.2088
##  ml_subtype1 - ml_subtype2   -1.307 2.35 257  -0.557  0.8429
## 
## Region = GTM:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   -2.201 1.66 254  -1.327  0.3816
##  ml_subtype0 - ml_subtype2   -0.831 2.04 253  -0.407  0.9126
##  ml_subtype1 - ml_subtype2    1.370 2.31 252   0.593  0.8241
## 
## Region = Hip:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   -1.039 1.73 266  -0.601  0.8198
##  ml_subtype0 - ml_subtype2   -1.323 2.17 274  -0.609  0.8153
##  ml_subtype1 - ml_subtype2   -0.285 2.45 270  -0.116  0.9926
## 
## Region = OC:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   -4.197 1.66 254  -2.530  0.0321
##  ml_subtype0 - ml_subtype2   -0.660 2.15 271  -0.307  0.9495
##  ml_subtype1 - ml_subtype2    3.537 2.41 267   1.465  0.3094
## 
## Region = ParaHip:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   -1.821 1.76 270  -1.037  0.5543
##  ml_subtype0 - ml_subtype2    0.225 2.19 277   0.102  0.9942
##  ml_subtype1 - ml_subtype2    2.046 2.45 270   0.836  0.6811
## 
## Region = PCC:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   -2.252 2.54 331  -0.887  0.6489
##  ml_subtype0 - ml_subtype2    1.179 3.06 330   0.385  0.9214
##  ml_subtype1 - ml_subtype2    3.431 3.40 328   1.010  0.5711
## 
## Region = Precun:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   -2.709 1.67 255  -1.626  0.2365
##  ml_subtype0 - ml_subtype2   -3.085 2.04 254  -1.509  0.2885
##  ml_subtype1 - ml_subtype2   -0.376 2.31 252  -0.163  0.9855
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates

resultados <- emmeans(modelo, pairwise ~ ml_subtype | Region)
# Extraer la tabla de contrastes y convertirla a un data frame
contrastes_myelin <- as.data.frame(resultados$contrasts)

contrastes_myelin$stain<-"Myelin"


modelo<-lmer(Microglia ~ as.factor(ml_subtype)* Region + (1 | i_id), data=data)

## fixed-effect model matrix is rank deficient so dropping 1 column / coefficient

summary(modelo)  # Revisa los términos de interacción

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Microglia ~ as.factor(ml_subtype) * Region + (1 | i_id)
##    Data: data
## 
## REML criterion at convergence: 1988.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.4082 -0.5583 -0.0470  0.5327  4.6251 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  i_id     (Intercept) 11.277   3.358   
##  Residual              9.401   3.066   
## Number of obs: 384, groups:  i_id, 49
## 
## Fixed effects:
##                                       Estimate Std. Error        df t value
## (Intercept)                            7.21751    0.84541 125.63844   8.537
## as.factor(ml_subtype)1                -1.40662    1.65299 144.45862  -0.851
## as.factor(ml_subtype)2                -2.77295    2.06994 152.64047  -1.340
## RegionGFM                             -1.00093    0.81514 312.47136  -1.228
## RegionGPS                             -1.00351    0.80757 312.32373  -1.243
## RegionGTM                             -1.82584    0.81326 312.11639  -2.245
## RegionHip                              2.76047    0.80757 312.32373   3.418
## RegionOC                              -5.08161    0.81522 312.50027  -6.233
## RegionParaHip                          1.23768    0.80757 312.32373   1.533
## RegionPCC                             -2.73298    1.42991 316.52026  -1.911
## RegionPrecun                          -4.59886    0.81522 312.50027  -5.641
## as.factor(ml_subtype)1:RegionGFM       1.62015    1.58925 313.05679   1.019
## as.factor(ml_subtype)2:RegionGFM       3.59163    1.99155 313.71084   1.803
## as.factor(ml_subtype)1:RegionGPS       0.60467    1.58538 313.02149   0.381
## as.factor(ml_subtype)2:RegionGPS       2.20444    1.98847 313.69053   1.109
## as.factor(ml_subtype)1:RegionGTM       3.66547    1.58828 312.96484   2.308
## as.factor(ml_subtype)2:RegionGTM       1.85386    1.99079 313.65307   0.931
## as.factor(ml_subtype)1:RegionHip       0.98722    1.58538 313.02149   0.623
## as.factor(ml_subtype)2:RegionHip       1.07980    1.98847 313.69053   0.543
## as.factor(ml_subtype)1:RegionOC        0.92599    1.58929 313.06433   0.583
## as.factor(ml_subtype)2:RegionOC        2.72006    2.03909 312.98961   1.334
## as.factor(ml_subtype)1:RegionParaHip   0.08371    1.58538 313.02149   0.053
## as.factor(ml_subtype)2:RegionParaHip  -0.19211    2.03604 312.96340  -0.094
## as.factor(ml_subtype)2:RegionPCC       1.90213    3.76725 316.61669   0.505
## as.factor(ml_subtype)1:RegionPrecun    0.60444    1.58929 313.06433   0.380
## as.factor(ml_subtype)2:RegionPrecun    4.10425    2.15524 315.98873   1.904
##                                      Pr(>|t|)    
## (Intercept)                          3.82e-14 ***
## as.factor(ml_subtype)1               0.396202    
## as.factor(ml_subtype)2               0.182357    
## RegionGFM                            0.220397    
## RegionGPS                            0.214933    
## RegionGTM                            0.025461 *  
## RegionHip                            0.000714 ***
## RegionOC                             1.48e-09 ***
## RegionParaHip                        0.126386    
## RegionPCC                            0.056870 .  
## RegionPrecun                         3.78e-08 ***
## as.factor(ml_subtype)1:RegionGFM     0.308780    
## as.factor(ml_subtype)2:RegionGFM     0.072280 .  
## as.factor(ml_subtype)1:RegionGPS     0.703163    
## as.factor(ml_subtype)2:RegionGPS     0.268447    
## as.factor(ml_subtype)1:RegionGTM     0.021661 *  
## as.factor(ml_subtype)2:RegionGTM     0.352456    
## as.factor(ml_subtype)1:RegionHip     0.533934    
## as.factor(ml_subtype)2:RegionHip     0.587494    
## as.factor(ml_subtype)1:RegionOC      0.560555    
## as.factor(ml_subtype)2:RegionOC      0.183189    
## as.factor(ml_subtype)1:RegionParaHip 0.957925    
## as.factor(ml_subtype)2:RegionParaHip 0.924889    
## as.factor(ml_subtype)2:RegionPCC     0.613972    
## as.factor(ml_subtype)1:RegionPrecun  0.703966    
## as.factor(ml_subtype)2:RegionPrecun  0.057777 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 
## Correlation matrix not shown by default, as p = 26 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 1 column / coefficient

anova(modelo)

## Missing cells for: as.factor(ml_subtype)1:RegionPCC.  
## Interpret type III hypotheses with care.

## Type III Analysis of Variance Table with Satterthwaite's method
##                               Sum Sq Mean Sq NumDF   DenDF F value Pr(>F)    
## as.factor(ml_subtype)           3.15   1.575     2  48.951  0.1675 0.8462    
## Region                       1204.11 150.513     8 313.398 16.0108 <2e-16 ***
## as.factor(ml_subtype):Region  157.28  10.486    15 312.975  1.1154 0.3412    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

emmeans(modelo, pairwise ~ ml_subtype | Region)

## $emmeans
## Region = Fusiform:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   7.22 0.845 126    5.544     8.89
##           1   5.81 1.421 152    3.004     8.62
##           2   4.44 1.890 159    0.712     8.18
## 
## Region = GFM:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   6.22 0.837 122    4.559     7.87
##           1   6.43 1.313 118    3.831     9.03
##           2   7.04 1.719 118    3.632    10.44
## 
## Region = GPS:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   6.21 0.830 118    4.570     7.86
##           1   5.41 1.313 118    2.813     8.01
##           2   5.65 1.719 118    2.242     9.05
## 
## Region = GTM:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   5.39 0.838 122    3.733     7.05
##           1   7.65 1.313 118    5.051    10.25
##           2   4.47 1.719 118    1.069     7.88
## 
## Region = Hip:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   9.98 0.830 118    8.334    11.62
##           1   9.56 1.313 118    6.959    12.16
##           2   8.28 1.719 118    4.881    11.69
## 
## Region = OC:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   2.14 0.838 122    0.478     3.79
##           1   1.66 1.313 118   -0.944     4.25
##           2   2.08 1.792 136   -1.461     5.63
## 
## Region = ParaHip:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   8.46 0.830 118    6.811    10.10
##           1   7.13 1.313 118    4.533     9.73
##           2   5.49 1.792 136    1.946     9.03
## 
## Region = PCC:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   4.48 1.446 339    1.641     7.33
##           1 nonEst    NA  NA       NA       NA
##           2   3.61 3.458 355   -3.187    10.41
## 
## Region = Precun:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   2.62 0.838 122    0.961     4.28
##           1   1.82 1.313 118   -0.783     4.42
##           2   3.95 1.888 159    0.221     7.68
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
## Region = Fusiform:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   1.4066 1.65 145   0.851  0.6720
##  ml_subtype0 - ml_subtype2   2.7730 2.07 153   1.339  0.3756
##  ml_subtype1 - ml_subtype2   1.3663 2.36 156   0.578  0.8321
## 
## Region = GFM:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1  -0.2135 1.56 119  -0.137  0.9897
##  ml_subtype0 - ml_subtype2  -0.8187 1.91 119  -0.428  0.9040
##  ml_subtype1 - ml_subtype2  -0.6051 2.16 118  -0.280  0.9578
## 
## Region = GPS:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.8020 1.55 118   0.516  0.8635
##  ml_subtype0 - ml_subtype2   0.5685 1.91 118   0.298  0.9523
##  ml_subtype1 - ml_subtype2  -0.2334 2.16 118  -0.108  0.9936
## 
## Region = GTM:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1  -2.2588 1.56 120  -1.451  0.3185
##  ml_subtype0 - ml_subtype2   0.9191 1.91 119   0.481  0.8806
##  ml_subtype1 - ml_subtype2   3.1779 2.16 118   1.469  0.3093
## 
## Region = Hip:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.4194 1.55 118   0.270  0.9606
##  ml_subtype0 - ml_subtype2   1.6932 1.91 118   0.887  0.6495
##  ml_subtype1 - ml_subtype2   1.2738 2.16 118   0.589  0.8263
## 
## Region = OC:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.4806 1.56 119   0.309  0.9489
##  ml_subtype0 - ml_subtype2   0.0529 1.98 133   0.027  0.9996
##  ml_subtype1 - ml_subtype2  -0.4277 2.22 129  -0.193  0.9798
## 
## Region = ParaHip:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   1.3229 1.55 118   0.852  0.6717
##  ml_subtype0 - ml_subtype2   2.9651 1.98 132   1.501  0.2936
##  ml_subtype1 - ml_subtype2   1.6422 2.22 129   0.739  0.7406
## 
## Region = PCC:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   nonEst   NA  NA      NA      NA
##  ml_subtype0 - ml_subtype2   0.8708 3.75 354   0.232  0.8164
##  ml_subtype1 - ml_subtype2   nonEst   NA  NA      NA      NA
## 
## Region = Precun:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.8022 1.56 119   0.515  0.8641
##  ml_subtype0 - ml_subtype2  -1.3313 2.07 152  -0.645  0.7957
##  ml_subtype1 - ml_subtype2  -2.1335 2.30 144  -0.928  0.6236
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for varying family sizes

resultados <- emmeans(modelo, pairwise ~ ml_subtype | Region)
# Extraer la tabla de contrastes y convertirla a un data frame
contrastes_micro <- as.data.frame(resultados$contrasts)
contrastes_micro$stain<-"microglia"



contraste<-rbind(contrasttau,contrastes_NfL, contrastes_myelin, contrastes_micro)

neuropath_4G8_AT8_NFL <- read_excel("data_validation/neuropath_4G8_AT8_NFL.xlsx")
subtypes_postmortem <- read_csv("data_validation/subtypes_postmortem.csv")

## Rows: 60 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): i_id
## dbl (4): ml_subtype, prob_ml_subtype, ml_stage, prob_ml_stage
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

clinical_data <- read_excel("data_validation/clinical data.xlsx")

## New names:
## • `` -> `...22`

clinical_data$i_id<- ifelse(grepl("^AD", clinical_data$SubjectANW), paste0("PAGE-",  clinical_data$SubjectANW),  clinical_data$SubjectANW)

# data1<-data1 %>% drop_na(data1$Cohort)
# data1$dx_pre<-ifelse(data1$Cohort=="Control",1,0)
# data1$dx_post<-ifelse(data1$ml_subtype==0,1,0)
# 
# confusionMatrix(table(data1$dx_pre, data1$dx_post))


data1<-left_join(subtypes_postmortem,clinical_data, by='i_id')
data1$Subject <- ifelse(grepl("^[0-9]{4}-[0-9]{3}$", data1$Subject), 
                        paste0(substr(data1$Subject, 3, 4), "-", substr(data1$Subject, 6, 8)), 
                        data1$Subject)

# Mostrar el nuevo vector
print(data1$Subject)

##  [1] "ANW019"  NA        "ANW082"  "ANW122"  NA        "ANW232"  "ANW534" 
##  [8] "ANW560"  "ANW569"  NA        NA        NA        NA        NA       
## [15] "ANW777"  "ANW782"  "ANW797"  "ANW880"  "ANW898"  NA        "ANW945" 
## [22] "ANW989"  "ANW999"  "16-035"  "16-105"  "16-131"  "17-018"  "17-024" 
## [29] "17-110"  "ANW-078" "17-133"  "17-147"  "17-154"  "18-016"  "18-032" 
## [36] "18-097"  "18-109"  "18-111"  "18-126"  "19-013"  "19-020"  "19-025" 
## [43] "19-058"  "19-091"  "19-095"  "19-112"  "19-115"  "19-119"  "20-104" 
## [50] "21-006"  "21-066"  "22-036"  "22-049"  "22-063"  "22-078"  "22-081" 
## [57] "22-086"  NA        NA        NA

data<-left_join(data1,neuropath_4G8_AT8_NFL, by="Subject" )

table(data$Cohort.x, data$ml_subtype)

##          
##             0   1   2
##   AD      274  91  65
##   Control  91  65  39

data %>% filter(Region=="Hmed")%>%
  ggplot( aes(x=ml_stage, y=pTau,color=as.factor(ml_subtype)))+
  geom_jitter()+
  xlim(0,35)+
  geom_smooth(method="lm")+
  ggtitle("Hippocampus-ptau")

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 19 rows containing non-finite outside the scale range
## (`stat_smooth()`).

## Warning: Removed 23 rows containing missing values or values outside the scale range
## (`geom_point()`).

data %>% filter(Region=="GPS")%>%
  ggplot( aes(x=ml_stage, y=pTau,color=as.factor(ml_subtype)))+
  geom_jitter()+
  xlim(0,35)+
  geom_smooth(method="lm")+
  ggtitle("Superior parietal gyrus-ptau")

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 18 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 23 rows containing missing values or values outside the scale range
## (`geom_point()`).

library(ggseg)
library(readr)
contraste <- read_csv("contraste.csv")

## New names:
## Rows: 27 Columns: 9
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): contrast, Region, stain dbl (6): ...1, estimate, SE, df, t.ratio, p.value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

knitr::kable(contraste)

…1	contrast	Region	estimate	SE	df	t.ratio	p.value	stain
5	ml_subtype1 - ml_subtype3	Fusiform	45.982537	12.554691	98.34538	3.662578	0.0022538	pTau
6	ml_subtype2 - ml_subtype3	Fusiform	49.935292	13.568797	96.28489	3.680156	0.0021440	pTau
7	ml_subtype0 - ml_subtype1	GFM	-24.148178	7.031639	82.41822	-3.434218	0.0050681	pTau
8	ml_subtype0 - ml_subtype2	GFM	-26.947992	8.687179	82.41822	-3.102042	0.0137518	pTau
11	ml_subtype1 - ml_subtype3	GFM	38.436630	11.963579	82.41822	3.212804	0.0099469	pTau
12	ml_subtype2 - ml_subtype3	GFM	41.236444	13.005781	82.41822	3.170624	0.0112649	pTau
13	ml_subtype0 - ml_subtype1	GPS	-22.869909	7.031639	82.41822	-3.252429	0.0088390	pTau
14	ml_subtype0 - ml_subtype2	GPS	-32.332661	8.805493	86.67601	-3.671874	0.0023110	pTau
17	ml_subtype1 - ml_subtype3	GPS	39.364537	11.963579	82.41822	3.290365	0.0078856	pTau
18	ml_subtype2 - ml_subtype3	GPS	48.827289	13.085105	84.31006	3.731517	0.0019220	pTau
19	ml_subtype0 - ml_subtype1	GTM	-22.305273	7.031639	82.41822	-3.172130	0.0112152	pTau
20	ml_subtype0 - ml_subtype2	GTM	-29.303011	8.687179	82.41822	-3.373133	0.0061255	pTau
23	ml_subtype1 - ml_subtype3	GTM	41.886905	11.963579	82.41822	3.501202	0.0041051	pTau
24	ml_subtype2 - ml_subtype3	GTM	48.884643	13.005781	82.41822	3.758686	0.0017764	pTau
32	ml_subtype0 - ml_subtype2	OC	-24.491860	8.687179	82.41822	-2.819311	0.0301074	pTau
41	ml_subtype1 - ml_subtype3	ParaHip	42.151949	12.529962	97.64571	3.364092	0.0059559	pTau
42	ml_subtype2 - ml_subtype3	ParaHip	42.533188	13.568797	96.28489	3.134632	0.0120295	pTau
47	ml_subtype1 - ml_subtype3	PCC	39.710577	11.963579	82.41822	3.319289	0.0072234	pTau
48	ml_subtype2 - ml_subtype3	PCC	40.541571	13.005781	82.41822	3.117196	0.0131628	pTau
49	ml_subtype0 - ml_subtype1	Precun	-21.362250	7.031639	82.41822	-3.038019	0.0165130	pTau
50	ml_subtype0 - ml_subtype2	Precun	-32.165973	8.687179	82.41822	-3.702695	0.0021390	pTau
53	ml_subtype1 - ml_subtype3	Precun	34.764165	11.963579	82.41822	2.905833	0.0238497	pTau
54	ml_subtype2 - ml_subtype3	Precun	45.567887	13.005781	82.41822	3.503664	0.0040732	pTau
1	ml_subtype0 - ml_subtype1	Fusiform	-3.599330	1.307782	147.55607	-2.752239	0.0333480	NfL
171	ml_subtype1 - ml_subtype3	GPS	6.068956	2.119376	135.62871	2.863558	0.0247653	NfL
491	ml_subtype0 - ml_subtype1	Precun	-4.089309	1.273218	135.62871	-3.211790	0.0088449	NfL
25	ml_subtype0 - ml_subtype1	Hip	4.047705	1.463237	120.20155	2.766267	0.0328230	Myelin

Results_first_iteration

Ismael calandri

2025-03-17