1. Descriptives of train set

Here, I will try to analyze the meaning of the clusters found.

1.a Clasification

library(readr)
library(tidyverse)
library(caret)
library(emmeans)

train_subtypes <- read_csv("data_validation/train_subtypes.csv")


train_subtypes <- train_subtypes %>%
  mutate(ml_subtype = ml_subtype+1) %>%
  mutate(
    poor_fit = ifelse(prob_ml_subtype < 0.5, 1, 0),
    ml_subtype_redefined = ifelse(poor_fit == 1, "poor fit", as.character(ml_subtype))
  )

train_subtypes<-train_subtypes %>% filter (Diagnosis!="HC")%>%
  mutate(stage_interval = cut(ml_stage, breaks = seq(0, max(ml_stage), by = 5), right = FALSE, 
                             labels = paste(seq(0, max(ml_stage)-5, by = 5), 
                                            seq(5, max(ml_stage), by = 5), sep = "-")))
train_subtypes %>%
  ggplot(aes(x = factor(ml_subtype_redefined), y = ml_stage, fill = ml_subtype_redefined)) +
  geom_violin(trim = FALSE, alpha = 0.5) +
  #geom_boxplot(width = 0.2, outlier.shape = NA, alpha = 0.7) + 
  geom_dotplot(binaxis = "y", stackdir = "center", 
               dotsize = 0.5, alpha = 0.6) +  # Ajusta binwidth para que haya un punto por cada 10 sujetos
  labs(
    title = "Ml_stage by subtype",
    x = "subtype",
    y = "stage"
  ) +
  scale_fill_manual(values=pal)+
  theme_minimal() +
  theme(legend.position = "none") 

1.3 Asymmetry

# assymetry index
dataset_con_index <- train_subtypes %>%
  filter(ml_subtype_redefined != "poor fit") %>%
  # Calcular el índice Derecha-Izquierda para cada región
  mutate(
    # Amygdala
    index_amygdala = 200 * (w_rh_amygdala_h - w_lh_amygdala_h) / (w_rh_amygdala_h + w_lh_amygdala_h),
    # Entorhinal
    index_entorhinal = 200 * (w_rh_entorhinal_medial_h - w_lh_entorhinal_medial_h) / (w_rh_entorhinal_medial_h + w_lh_entorhinal_medial_h),
    # Frontal Lateral
    index_frontal_lateral = 200 * (w_rh_frontal_lateral_h - w_lh_frontal_lateral_h) / (w_rh_frontal_lateral_h + w_lh_frontal_lateral_h),
    # Frontal Medial
    index_frontal_medial = 200 * (w_rh_frontal_medial_h - w_lh_frontal_medial_h) / (w_rh_frontal_medial_h + w_lh_frontal_medial_h),
    # Hippocampus
    index_hippocampus = 200 * (w_rh_hippocampus_h - w_lh_hippocampus_h) / (w_rh_hippocampus_h + w_lh_hippocampus_h),
    # Occipital Lateral
    index_occipital_lateral = 200 * (w_rh_occipital_lateral_h - w_lh_occipital_lateral_h) / (w_rh_occipital_lateral_h + w_lh_occipital_lateral_h),
    # Occipital Medial
    index_occipital_medial = 200 * (w_rh_occipital_medial_h - w_lh_occipital_medial_h) / (w_rh_occipital_medial_h + w_lh_occipital_medial_h),
    # Parietal Lateral
    index_parietal_lateral = 200 * (w_rh_parietal_lateral_h - w_lh_parietal_lateral_h) / (w_rh_parietal_lateral_h + w_lh_parietal_lateral_h),
    # Parietal Medial
    index_parietal_medial = 200 * (w_rh_parietal_medial_h - w_lh_parietal_medial_h) / (w_rh_parietal_medial_h + w_lh_parietal_medial_h),
    # Temporal Lateral
    index_temporal_lateral = 200 * (w_rh_temporal_lateral_h - w_lh_temporal_lateral_h) / (w_rh_temporal_lateral_h + w_lh_temporal_lateral_h)) %>%
  # Seleccionar sólo las columnas necesarias
  select(MRI_ID, index_amygdala, index_entorhinal, index_frontal_lateral, index_frontal_medial,
         index_hippocampus, index_occipital_lateral, index_occipital_medial, index_parietal_lateral, index_parietal_medial,
         index_temporal_lateral, ml_subtype_redefined, ml_stage)
# Crear una nueva columna que agrupe ml_stage en intervalos de 10
dataset_con_index <- dataset_con_index %>%
  mutate(stage_group = cut(ml_stage, breaks = seq(0, max(ml_stage), by = 10), include.lowest = TRUE, right = FALSE, labels = FALSE))

# Transformar el dataset a formato largo
dataset_largo <- dataset_con_index %>%
  pivot_longer(cols = starts_with("index"), names_to = "variable", values_to = "value")

# Calcular las medianas por grupo, subtipo y variable
medianas <- dataset_largo %>%
  group_by(stage_group, ml_subtype_redefined, variable) %>%
  summarise(mediana = median(value, na.rm = TRUE), .groups = 'drop')

# Crear el boxplot con las líneas que conectan las medianas
ggplot(dataset_largo, aes(x = factor(stage_group), y = value, fill = ml_subtype_redefined)) +
  geom_boxplot() +
  facet_wrap(~ variable, scales = "free_y") +
  geom_line(data = medianas, aes(x = factor(stage_group), y = mediana, group = ml_subtype_redefined, color = ml_subtype_redefined), 
            size = 1, linetype = "solid") +
  labs(
    title = "Rigth-Left index by subtype and stage",
    x = "Stage Group (bins 10)",
    y = "Rigth-Left index",
    fill = "ML Subtype",
    color = "ML Subtype"
  ) +
  theme_minimal() +
  scale_fill_manual(values = pal)+
  scale_color_manual(values=pal)+
  ylim(-200,200)+
  theme(legend.position = "bottom")

There doesn’t seem to be much asymmetry… Question for Rik: Could we be wasting computing power with so many regions?

Clinical features

For this I only used the training subjects on their first MRI, in order to compare the first clinic visit (n=1724)

library(readxl)
clinical_data <- read_excel("647_Sustain_Calandri_clinicaldata_20230825.xlsx")
ids <- read_csv("dat_mri_export_random.csv") %>% select(MRI_ID, I_ID)
first_clinic <- train_subtypes %>% filter(order==1)
first_clinic<-left_join(first_clinic, ids, by="MRI_ID")
first_clinic<-left_join(first_clinic, clinical_data, by="I_ID")

first_clinic<-first_clinic %>%
  mutate(apoe4_status = case_when(
    APOE == "E4E4" ~ "homozygous E4",  # E4E4 es homocigoto
    APOE %in% c("E3E4", "E4E3", "E2E4", "E4E2") ~ "heterozygous E4",  # Cualquier combinación con un E4 es heterocigoto
    !grepl("E4", APOE) ~ "Negative",  # Si no contiene E4, es negativo
    is.na(APOE) ~ NA_character_,  # Si es NA, se mantiene como NA
    TRUE ~ "Other"  # En caso de que haya algún valor no contemplado
  ))

library(NormPsy)

first_clinic$normMMSE<-normMMSE(first_clinic$V_MMSE)
first_clinic %>%
  ggplot(aes(x=ml_subtype_redefined, y=M_age, fill=ml_subtype_redefined))+
  geom_boxplot()+
  scale_fill_manual(values=pal)+
  theme_minimal()+
  theme(legend.position = "none")+
  ggtitle("Age distribution")

first_clinic %>%
  filter(ml_subtype_redefined!="poor fit") %>%
  ggplot(aes(color=ml_subtype_redefined, y=normMMSE, x=ml_stage))+
  geom_point()+
  geom_smooth(method = "lm")+
  scale_color_manual(values=pal)+
  theme_minimal()+
  theme(legend.position = "bottom")+
  ggtitle("normalized MMSE by subtype and stage")

first_clinic %>%
  filter(ml_subtype_redefined!="poor fit") %>%
  ggplot(aes(color=ml_subtype_redefined, y=V_IADL, x=ml_stage))+
  geom_point()+
  geom_smooth(method = "lm")+
  scale_color_manual(values=pal)+
  theme_minimal()+
  theme(legend.position = "bottom")+
  ggtitle("IADL by subtype and stage")

first_clinic <-first_clinic %>% 
  mutate(clinical_diagnosis=ifelse(diagnoseOms=="Possible AD" |diagnoseOms=="Probable AD", "AD", diagnoseOms))

first_clinic %>%
  filter(ml_subtype_redefined != "poor fit" & ml_stage!=0) %>%
  ggplot(aes(fill = clinical_diagnosis, x = ml_subtype_redefined)) +
  geom_bar(position = "dodge", stat = "count") +  # Corregido para usar position = "dodge"
  scale_fill_manual(values = pal) +
  theme_minimal() +
  theme(legend.position = "bottom") +
  ggtitle("Subtype by clinical diagnosis")

first_clinic %>%
  filter(ml_subtype_redefined != "poor fit"& ml_stage!=0)  %>%
  group_by(ml_subtype_redefined, clinical_diagnosis) %>%
  tally() %>%  # Contamos la cantidad por cada combinación de ml_subtype_redefined y diagnoseOms
  group_by(ml_subtype_redefined) %>%
  mutate(percentage = n / sum(n)) %>%  # Calculamos el porcentaje dentro de cada grupo ml_subtype_redefined
  ggplot(aes(x = ml_subtype_redefined, y = percentage, fill = clinical_diagnosis)) +
  geom_bar(stat = "identity", position = "stack") +  # Usamos stat = "identity" porque ya tenemos los datos procesados
  geom_text(
    aes(label = scales::percent(percentage, accuracy = 0.1)),
    position = position_stack(vjust = 0.5), size = 3  # Centra las etiquetas en el medio de las barras apiladas
  ) +
  scale_fill_manual(values = pal) +
  scale_y_continuous(labels = scales::percent) +
  theme_minimal() +
  theme(legend.position = "bottom") +
  ggtitle("Subtype by clinical diagnosis (%)")

library(scales)

# Crear variable stage_interval, agrupar en intervalos de 5 en 5 y remover NA
first_clinic %>%
  filter(ml_subtype_redefined != "poor fit") %>%
  mutate(stage_interval = cut(ml_stage, breaks = seq(0, max(ml_stage), by = 5), right = FALSE, 
                             labels = paste(seq(0, max(ml_stage)-5, by = 5), 
                                            seq(5, max(ml_stage), by = 5), sep = "-"))) %>%
  filter(!is.na(stage_interval)) %>%  # Remover filas con NA en stage_interval
  group_by(ml_subtype_redefined, stage_interval, clinical_diagnosis) %>%
  tally() %>%
  group_by(ml_subtype_redefined, stage_interval) %>%
  mutate(percentage = n / sum(n)) %>%  # Calcular el porcentaje por subtipo y stage_interval
  ggplot(aes(x = stage_interval, y = percentage, fill = clinical_diagnosis)) +
  geom_bar(stat = "identity", position = "stack") +
  scale_fill_manual(values = pal) +
  scale_y_continuous(labels = scales::percent) +
  facet_wrap(~ml_subtype_redefined) +  # Crear un gráfico por cada subtipo
  theme_minimal() +
  theme(legend.position = "bottom") +
  ggtitle("Percentage of Clinical Diagnosis by Stage Interval and Subtype")

first_clinic %>%
  filter(ml_subtype_redefined != "poor fit"& ml_stage!=0)  %>%
  group_by(ml_subtype_redefined, apoe4_status) %>%
  tally() %>%  # Contamos la cantidad por cada combinación de ml_subtype_redefined y diagnoseOms
  group_by(ml_subtype_redefined) %>%
  mutate(percentage = n / sum(n)) %>%  # Calculamos el porcentaje dentro de cada grupo ml_subtype_redefined
  ggplot(aes(x = ml_subtype_redefined, y = percentage, fill = apoe4_status)) +
  geom_bar(stat = "identity", position = "stack") +  # Usamos stat = "identity" porque ya tenemos los datos procesados
  geom_text(
    aes(label = scales::percent(percentage, accuracy = 0.1)),
    position = position_stack(vjust = 0.5), size = 3  # Centra las etiquetas en el medio de las barras apiladas
  ) +
  scale_fill_manual(values = pal) +
  scale_y_continuous(labels = scales::percent) +
  theme_minimal() +
  theme(legend.position = "bottom") +
  ggtitle("APOE4 status by subtype (%)")

1.4 Biomarkers

first_clinic %>%
  filter(ml_subtype_redefined != "poor fit") %>%
  ggplot(aes(x=ml_subtype_redefined, y=L_PTAU, fill=ml_subtype_redefined))+
  geom_boxplot()+
  scale_fill_manual(values=pal)+
  theme_minimal()+
  theme(legend.position = "bottom")

library(dplyr)
library(ggplot2)

# Calcular las medianas por stage_interval y ml_subtype_redefined
medianas <- first_clinic %>%
  filter(ml_subtype_redefined != "poor fit") %>%
  group_by(stage_interval, ml_subtype_redefined) %>%
  summarise(mediana = median(L_PTAU, na.rm = TRUE), .groups = 'drop')

# Crear el boxplot con la línea para las medianas
first_clinic %>%
  filter(ml_subtype_redefined != "poor fit") %>%
  ggplot(aes(x = stage_interval, y = L_PTAU, fill = ml_subtype_redefined)) +
  geom_boxplot() +
  geom_line(data = medianas, aes(x = stage_interval, y = mediana, group = ml_subtype_redefined, color = ml_subtype_redefined), 
            size = 1, linetype = "solid") +
  scale_fill_manual(values = pal) +
  scale_color_manual(values = pal) +
  theme_minimal() +
  theme(legend.position = "bottom") +
  labs(
    title = "pTAU by Stage & Subtype",
    x = "Stage Interval",
    y = "pTAU(CSF)",
    fill = "ML Subtype",
    color = "ML Subtype"
  )

first_clinic %>%
  filter(ml_subtype_redefined != "poor fit") %>%
  ggplot(aes(x = L_AB42_corr, y = L_PTAU, color = ml_subtype_redefined)) +
  geom_point() +
  scale_fill_manual(values = pal) +
  scale_color_manual(values = pal) +
  theme_minimal() +
  theme(legend.position = "bottom") +
  labs(
    title = "pTAU/AB42 by Subtype",
    x = "amyloid (AB42_corr)",
    y = "pTAU(CSF)",
    fill = "ML Subtype",
    color = "ML Subtype"
  )

ask Rik?!

2. In-vivo validation

In this section, I used the additional scans of the subjects (n=1259). The trained model was applied, and classification into stage and subtype was obtained. First, I will evaluate the model’s ability to consistently classify subjects into the same subtype, even when presented with a scan taken before or after the one used for training.

2.1 Subtype congruency

2.1.1 Global

subtypes_vivo <- read_csv("data_validation/subtypes_vivo.csv") %>% rename(I_ID=i_id)
subtypes_vivo$ml_subtype<-subtypes_vivo$ml_subtype+1
dat_mri_export_validation <- read_csv("dat_mri_export_validation.csv")
subtypes_vivo<-left_join(subtypes_vivo, dat_mri_export_validation, by="I_ID")

train_subtypes<-left_join(train_subtypes, ids, by="MRI_ID")

subtypes_vivo$origin<-"test"
train_subtypes$origin<-"train"


validation<-rbind(train_subtypes[,c("I_ID","ml_subtype","ml_stage", "prob_ml_subtype", "prob_ml_stage", "order", "origin")], subtypes_vivo[,c("I_ID","ml_subtype","ml_stage", "prob_ml_subtype", "prob_ml_stage", "order", "origin")])
# Filtrar los datos de "train" y "test"
confusion_data <- validation %>%
  filter(origin %in% c("train", "test")) %>%
  select(I_ID, ml_subtype, origin)

# Agrupar los datos de train por I_ID y tomar la primera instancia de cada sujeto
train_data <- confusion_data %>%
  filter(origin == "train") %>%
  group_by(I_ID) %>%
  slice(1) %>%
  ungroup()

# Filtrar solo los datos de "test"
test_data <- confusion_data %>%
  filter(origin == "test")

# Unir los datos de "train" y "test" en base al I_ID
confusion_data_long <- left_join(test_data, train_data, by = "I_ID", suffix = c("_test", "_train"))

# Crear una matriz de confusión
conf_matrix <- table(confusion_data_long$ml_subtype_train, confusion_data_long$ml_subtype_test)

# Plotear la matriz de confusión usando ggplot
conf_matrix_df <- as.data.frame(conf_matrix) %>%
  rename(Train = Var1, Test = Var2, Count = Freq)

ggplot(conf_matrix_df, aes(x = Train, y = Test, fill = Count)) +
  geom_tile() +
  geom_text(aes(label = Count), color = "white", size = 4) +
  scale_fill_gradient(low = "grey", high = "navy") +
  labs(
    title = "Confusion Matrix: Train vs Test Global (ml_subtype)",
    x = "Train ml_subtype",
    y = "Test ml_subtype"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

2.1.2 Retrovalidation

(when validation MRI precedes train MRI)

# Filtrar los datos de "train" y "test"
confusion_data <- validation %>%
  filter(origin %in% c("train", "test")) %>%
  select(I_ID, ml_subtype, order, origin)

# Seleccionar solo una instancia por sujeto en "train"
train_data <- confusion_data %>%
  filter(origin == "train") %>%
  group_by(I_ID) %>%
  slice(1) %>%   # Selecciona la primera fila (puedes cambiar a otra lógica si es necesario)
  ungroup() %>%
  rename(ml_subtype_train = ml_subtype)

# Filtrar "test" y seleccionar la fila con el menor 'order_test' por sujeto
test_data <- confusion_data %>%
  filter(origin == "test") %>%
  group_by(I_ID) %>%
  slice_min(order, n = 1) %>%  # Toma la fila con el menor order
  ungroup() %>%
  rename(ml_subtype_test = ml_subtype)

# Unir las filas de test con su correspondiente train
confusion_data_long <- train_data %>%
  inner_join(test_data, by = "I_ID")   # Asegura que cada sujeto tenga ambas columnas

# Crear la matriz de confusión
conf_matrix <- table(confusion_data_long$ml_subtype_train, confusion_data_long$ml_subtype_test)

# Convertir la matriz en un data frame para ggplot
conf_matrix_df <- as.data.frame(conf_matrix)
colnames(conf_matrix_df) <- c("ml_subtype_train", "ml_subtype_test", "Freq")

# Crear el plot
ggplot(data = conf_matrix_df, aes(x = ml_subtype_train, y = ml_subtype_test, fill = Freq)) +
  geom_tile() +
  geom_text(aes(label = Freq), vjust = 1) +
  scale_fill_gradient(low = "grey", high = "navy") +
  labs(
    title = "ML Subtype in Train and Test (previous) set ",
    x = "ML Subtype Train",
    y = "ML Subtype Test",
    fill = "Count"
  ) +
  theme_minimal()

# Obtener estadísticas de la matriz de confusión
conf_matrix_stats <- confusionMatrix(as.factor(confusion_data_long$ml_subtype_test),
                                     as.factor(confusion_data_long$ml_subtype_train))

# Imprimir las estadísticas de la matriz de confusión
print(conf_matrix_stats)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   1   2   3
##          1 498  74  22
##          2  68 152  37
##          3  29  34 136
## 
## Overall Statistics
##                                           
##                Accuracy : 0.7486          
##                  95% CI : (0.7212, 0.7746)
##     No Information Rate : 0.5667          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.5692          
##                                           
##  Mcnemar's Test P-Value : 0.7194          
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            0.8370   0.5846   0.6974
## Specificity            0.7890   0.8671   0.9263
## Pos Pred Value         0.8384   0.5914   0.6834
## Neg Pred Value         0.7873   0.8638   0.9307
## Prevalence             0.5667   0.2476   0.1857
## Detection Rate         0.4743   0.1448   0.1295
## Detection Prevalence   0.5657   0.2448   0.1895
## Balanced Accuracy      0.8130   0.7259   0.8119

2.1.3 Forward validation

(when validation MRI follows train MRI)

library(dplyr)
library(caret)
library(ggplot2)

# Filtrar los datos de "train" y "test"
confusion_data <- validation %>%
  filter(origin %in% c("train", "test")) %>%
  select(I_ID, ml_subtype, order, origin)

# Seleccionar solo una instancia por sujeto en "train"
train_data <- confusion_data %>%
  filter(origin == "train") %>%
  group_by(I_ID) %>%
  slice(1) %>%   # Selecciona la primera fila de train por sujeto
  ungroup() %>%
  rename(ml_subtype_train = ml_subtype, order_train = order)

# Filtrar "test" y quedarnos solo con las filas donde order_test > order_train
test_data <- confusion_data %>%
  filter(origin == "test") %>%
  left_join(train_data %>% select(I_ID, order_train), by = "I_ID") %>%  # Agregar order_train
  filter(order > order_train) %>%  # Conservar solo test con order_test > order_train
  group_by(I_ID) %>%
  slice_min(order, n = 1) %>%  # Seleccionar la fila de test con el menor order_test mayor a train
  ungroup() %>%
  rename(ml_subtype_test = ml_subtype, order_test = order)

# Unir las filas de test con su correspondiente train
confusion_data_long <- train_data %>%
  inner_join(test_data, by = "I_ID")   # Asegura que cada sujeto tenga ambas columnas

# Crear la matriz de confusión
conf_matrix <- table(confusion_data_long$ml_subtype_train, confusion_data_long$ml_subtype_test)

# Convertir la matriz en un data frame para ggplot
conf_matrix_df <- as.data.frame(conf_matrix)
colnames(conf_matrix_df) <- c("ml_subtype_train", "ml_subtype_test", "Freq")

# Crear el plot
ggplot(data = conf_matrix_df, aes(x = ml_subtype_train, y = ml_subtype_test, fill = Freq)) +
  geom_tile() +
  geom_text(aes(label = Freq), vjust = 1) +
  scale_fill_gradient(low = "grey", high = "blue") +
  labs(
    title = "ML Subtype in Train and Test (later) set",
    x = "ML Subtype Train",
    y = "ML Subtype Test",
    fill = "Count"
  ) +
  theme_minimal()

# Obtener estadísticas de la matriz de confusión
conf_matrix_stats <- confusionMatrix(as.factor(confusion_data_long$ml_subtype_test),
                                     as.factor(confusion_data_long$ml_subtype_train))

# Imprimir las estadísticas de la matriz de confusión
print(conf_matrix_stats)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   1   2   3
##          1 364  59  13
##          2  47  99  28
##          3  21  23 110
## 
## Overall Statistics
##                                           
##                Accuracy : 0.75            
##                  95% CI : (0.7177, 0.7803)
##     No Information Rate : 0.5654          
##     P-Value [Acc > NIR] : <2e-16          
##                                           
##                   Kappa : 0.5716          
##                                           
##  Mcnemar's Test P-Value : 0.292           
## 
## Statistics by Class:
## 
##                      Class: 1 Class: 2 Class: 3
## Sensitivity            0.8426   0.5470   0.7285
## Specificity            0.7831   0.8714   0.9282
## Pos Pred Value         0.8349   0.5690   0.7143
## Neg Pred Value         0.7927   0.8610   0.9328
## Prevalence             0.5654   0.2369   0.1976
## Detection Rate         0.4764   0.1296   0.1440
## Detection Prevalence   0.5707   0.2277   0.2016
## Balanced Accuracy      0.8129   0.7092   0.8283

2.2 Stage validation

I had to define what the appropriate classification was here. I defined it as follows: if the subject’s validation MRI was later than the one used for training, the stage of sustain should be higher in the validation than in the training, and vice versa for earlier MRIs.

# Filtrar datos solo de "train" y "test"
confusion_data <- validation %>%
  filter(origin %in% c("train", "test")) %>%
  select(I_ID, ml_stage, order, origin)

# Seleccionar una única fila por sujeto en "train"
train_data <- confusion_data %>%
  filter(origin == "train") %>%
  group_by(I_ID) %>%
  slice(1) %>%  # Seleccionamos la primera observación de train
  ungroup() %>%
  rename(ml_stage_train = ml_stage, order_train = order)

# Filtrar "test" y emparejar con train por sujeto
test_data <- confusion_data %>%
  filter(origin == "test") %>%
  left_join(train_data %>% select(I_ID, ml_stage_train, order_train), by = "I_ID") %>%  # Unimos la info de train
  group_by(I_ID) %>%
  slice_min(order, n = 1) %>%  # Seleccionar la primera instancia de test disponible
  ungroup() %>%
  rename(ml_stage_test = ml_stage, order_test = order)


test_data <- test_data %>%
  mutate(
    ml_stage_expected = case_when(
      ml_stage_test > ml_stage_train ~ order_test > order_train,
      ml_stage_test < ml_stage_train ~ order_test < order_train,
      TRUE ~ NA
    ),
    validation_time = case_when(
      order_test < order_train ~ "before",
      order_test > order_train ~ "after",
      TRUE ~ NA_character_
    )
  )


# Calcular la proporción de veces que la relación esperada se cumple
accuracy <- mean(test_data$ml_stage_expected, na.rm = TRUE)

test_data %>%
  filter(!is.na(ml_stage_expected)) %>%  # Eliminar NA
  group_by(validation_time, ml_stage_expected) %>%
  summarise(count = n()) %>%
  mutate(percentage = count / sum(count) * 100) %>%  # Calcular los porcentajes
  ggplot(aes(x = validation_time, y = percentage, fill = factor(ml_stage_expected))) +
  geom_bar(stat = "identity", position = "stack") +  # Barras apiladas
  scale_fill_manual(values = c("TRUE" = "skyblue", "FALSE" = "salmon")) +
  labs(
    title = "Percentage of correct staging clasification by Time of the validation MRI",
    x = "Validation Time",
    y = "Percentage",
    fill = "ml_stage_expected"
  ) +
  theme_minimal() +
  theme(legend.position = "top")

ggplot(test_data, aes(x = ml_stage_train, y = ml_stage_test, color = ml_stage_expected)) +
  geom_point(alpha = 0.7, size = 3) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "black") +  # Línea de identidad
  scale_color_manual(values = c("red", "blue"), labels = c("Incorrecto", "Correcto")) +
  labs(
    title = "Validación de ml_stage como clasificador",
    x = "ml_stage en Train",
    y = "ml_stage en Test",
    color = "Cumple relación esperada"
  ) +
  theme_minimal()

# Estadísticas de la relación esperada
cat("Proportion of subjects with  correct stage direction:", round(accuracy * 100, 2), "%\n")
## Proportion of subjects with  correct stage direction: 63.38 %
# Crear una tabla resumen
table_result <- table(test_data$ml_stage_expected, test_data$validation_time)

# Mostrar tabla de resultados
print(table_result)
##        
##         after before
##   FALSE    70    246
##   TRUE    175    372

#3 Post mortem

neuropath_4G8_AT8_NFL <- read_excel("data_validation/neuropath_4G8_AT8_NFL.xlsx")
subtypes_postmortem <- read_csv("data_validation/subtypes_postmortem.csv") 
## Rows: 60 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): i_id
## dbl (4): ml_subtype, prob_ml_subtype, ml_stage, prob_ml_stage
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
subtypes_postmortem$i_id <- gsub("PAGE-", "", subtypes_postmortem$i_id)
summary(as.factor(subtypes_postmortem$ml_subtype))
##  0  1  2 
## 38 12 10
# subtypes_postmortem <- subtypes_postmortem %>%
#   mutate(
#     subtype = case_when(
#       ml_subtype == 0 ~ "subtype1",
#       ml_subtype == 1 ~ "subtype2",
#       ml_subtype %in% c(2, 3) ~ "subtype3"
#     )
#   )


Patho_data_combined_Ch2_Shareable <- read_excel("Patho_data_combined_Ch2_Shareable.xlsx") %>%
  rename(i_id=SubjectANW)
## New names:
## • `` -> `...1`
data<-left_join(Patho_data_combined_Ch2_Shareable, subtypes_postmortem, by="i_id") %>% drop_na(ml_subtype)


data$dx_pre<-ifelse(data$Cohort=="Control",1,0)
data$dx_post<-ifelse(data$ml_subtype==0,1,0)
table(data$dx_pre, data$dx_post)
##    
##       0   1
##   0 108 215
##   1  71  62
library(lmerTest)
## Loading required package: lme4
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
library(lme4)
modelo_completo <- lmer(Amyloid ~ ml_subtype + ml_stage + Region + (1 | i_id), data = data)
modelo_sin_ml_stage <- lmer(Amyloid ~ ml_subtype + Region + (1 | i_id), data = data)
anova(modelo_sin_ml_stage, modelo_completo)
## refitting model(s) with ML (instead of REML)
## Data: data
## Models:
## modelo_sin_ml_stage: Amyloid ~ ml_subtype + Region + (1 | i_id)
## modelo_completo: Amyloid ~ ml_subtype + ml_stage + Region + (1 | i_id)
##                     npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)  
## modelo_sin_ml_stage   12 1682.5 1731.9 -829.27   1658.5                       
## modelo_completo       13 1678.3 1731.8 -826.14   1652.3 6.2456  1    0.01245 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modelo<-lmer(Amyloid ~ as.factor(ml_subtype)* Region+ml_stage  + (1 | i_id), data=data)
summary(modelo)  # Revisa los términos de interacción
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Amyloid ~ as.factor(ml_subtype) * Region + ml_stage + (1 | i_id)
##    Data: data
## 
## REML criterion at convergence: 1649.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3547 -0.5060 -0.0713  0.4301  4.8998 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  i_id     (Intercept) 3.903    1.976   
##  Residual             1.671    1.293   
## Number of obs: 452, groups:  i_id, 51
## 
## Fixed effects:
##                                       Estimate Std. Error        df t value
## (Intercept)                            2.30668    0.50463  74.75879   4.571
## as.factor(ml_subtype)1                 0.01485    0.83251  88.49932   0.018
## as.factor(ml_subtype)2                -0.82086    0.97347  91.05426  -0.843
## RegionGFM                              0.72694    0.33804 377.22019   2.150
## RegionGPS                              0.28780    0.33804 377.22019   0.851
## RegionGTM                             -0.01857    0.33804 377.22019  -0.055
## RegionHip                             -1.58946    0.33804 377.22019  -4.702
## RegionOC                              -0.87791    0.33804 377.22019  -2.597
## RegionParaHip                         -0.58756    0.34033 377.13008  -1.726
## RegionPCC                              0.25192    0.33804 377.22019   0.745
## RegionPrecun                           0.42201    0.33804 377.22019   1.248
## ml_stage                               0.03201    0.01326  47.03555   2.414
## as.factor(ml_subtype)1:RegionGFM       0.15891    0.63787 377.17321   0.249
## as.factor(ml_subtype)2:RegionGFM       0.15999    0.75188 377.25506   0.213
## as.factor(ml_subtype)1:RegionGPS      -0.29743    0.63787 377.17321  -0.466
## as.factor(ml_subtype)2:RegionGPS      -0.27821    0.75188 377.25506  -0.370
## as.factor(ml_subtype)1:RegionGTM       0.31331    0.63787 377.17321   0.491
## as.factor(ml_subtype)2:RegionGTM       0.10751    0.75188 377.25506   0.143
## as.factor(ml_subtype)1:RegionHip      -0.58092    0.63787 377.17321  -0.911
## as.factor(ml_subtype)2:RegionHip       0.71344    0.75188 377.25506   0.949
## as.factor(ml_subtype)1:RegionOC        0.08202    0.63787 377.17321   0.129
## as.factor(ml_subtype)2:RegionOC       -0.29473    0.75188 377.25506  -0.392
## as.factor(ml_subtype)1:RegionParaHip   0.23002    0.63909 377.15443   0.360
## as.factor(ml_subtype)2:RegionParaHip   0.38868    0.75291 377.24333   0.516
## as.factor(ml_subtype)1:RegionPCC      -0.06203    0.63787 377.17321  -0.097
## as.factor(ml_subtype)2:RegionPCC      -0.07527    0.75188 377.25506  -0.100
## as.factor(ml_subtype)1:RegionPrecun   -0.21929    0.63787 377.17321  -0.344
## as.factor(ml_subtype)2:RegionPrecun   -0.68450    0.77449 377.56992  -0.884
##                                      Pr(>|t|)    
## (Intercept)                          1.88e-05 ***
## as.factor(ml_subtype)1                0.98581    
## as.factor(ml_subtype)2                0.40131    
## RegionGFM                             0.03216 *  
## RegionGPS                             0.39510    
## RegionGTM                             0.95622    
## RegionHip                            3.62e-06 ***
## RegionOC                              0.00977 ** 
## RegionParaHip                         0.08509 .  
## RegionPCC                             0.45659    
## RegionPrecun                          0.21266    
## ml_stage                              0.01973 *  
## as.factor(ml_subtype)1:RegionGFM      0.80340    
## as.factor(ml_subtype)2:RegionGFM      0.83161    
## as.factor(ml_subtype)1:RegionGPS      0.64129    
## as.factor(ml_subtype)2:RegionGPS      0.71158    
## as.factor(ml_subtype)1:RegionGTM      0.62359    
## as.factor(ml_subtype)2:RegionGTM      0.88638    
## as.factor(ml_subtype)1:RegionHip      0.36302    
## as.factor(ml_subtype)2:RegionHip      0.34329    
## as.factor(ml_subtype)1:RegionOC       0.89775    
## as.factor(ml_subtype)2:RegionOC       0.69529    
## as.factor(ml_subtype)1:RegionParaHip  0.71911    
## as.factor(ml_subtype)2:RegionParaHip  0.60600    
## as.factor(ml_subtype)1:RegionPCC      0.92258    
## as.factor(ml_subtype)2:RegionPCC      0.92031    
## as.factor(ml_subtype)1:RegionPrecun   0.73120    
## as.factor(ml_subtype)2:RegionPrecun   0.37736    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation matrix not shown by default, as p = 28 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
anova(modelo)
## Type III Analysis of Variance Table with Satterthwaite's method
##                               Sum Sq Mean Sq NumDF  DenDF F value    Pr(>F)    
## as.factor(ml_subtype)          1.755  0.8776     2  47.01  0.5253   0.59480    
## Region                       147.755 18.4693     8 377.06 11.0552 4.732e-14 ***
## ml_stage                       9.734  9.7341     1  47.04  5.8265   0.01973 *  
## as.factor(ml_subtype):Region  14.771  0.9232    16 377.05  0.5526   0.91745    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans(modelo, pairwise ~ ml_subtype | Region)
## $emmeans
## Region = Fusiform:
##  ml_subtype emmean    SE   df lower.CL upper.CL
##           0  3.099 0.437 90.2    2.231     3.97
##           1  3.113 0.699 89.5    1.724     4.50
##           2  2.278 0.862 92.5    0.567     3.99
## 
## Region = GFM:
##  ml_subtype emmean    SE   df lower.CL upper.CL
##           0  3.826 0.429 84.5    2.972     4.68
##           1  3.999 0.689 84.6    2.630     5.37
##           2  3.165 0.842 84.8    1.491     4.84
## 
## Region = GPS:
##  ml_subtype emmean    SE   df lower.CL upper.CL
##           0  3.386 0.429 84.5    2.533     4.24
##           1  3.104 0.689 84.6    1.734     4.47
##           2  2.287 0.842 84.8    0.614     3.96
## 
## Region = GTM:
##  ml_subtype emmean    SE   df lower.CL upper.CL
##           0  3.080 0.429 84.5    2.227     3.93
##           1  3.408 0.689 84.6    2.039     4.78
##           2  2.367 0.842 84.8    0.693     4.04
## 
## Region = Hip:
##  ml_subtype emmean    SE   df lower.CL upper.CL
##           0  1.509 0.429 84.5    0.656     2.36
##           1  0.943 0.689 84.6   -0.426     2.31
##           2  1.402 0.842 84.8   -0.272     3.08
## 
## Region = OC:
##  ml_subtype emmean    SE   df lower.CL upper.CL
##           0  2.221 0.429 84.5    1.368     3.07
##           1  2.318 0.689 84.6    0.948     3.69
##           2  1.105 0.842 84.8   -0.568     2.78
## 
## Region = ParaHip:
##  ml_subtype emmean    SE   df lower.CL upper.CL
##           0  2.511 0.432 86.3    1.653     3.37
##           1  2.756 0.689 84.6    1.386     4.13
##           2  2.079 0.842 84.8    0.406     3.75
## 
## Region = PCC:
##  ml_subtype emmean    SE   df lower.CL upper.CL
##           0  3.351 0.429 84.5    2.497     4.20
##           1  3.303 0.689 84.6    1.934     4.67
##           2  2.454 0.842 84.8    0.781     4.13
## 
## Region = Precun:
##  ml_subtype emmean    SE   df lower.CL upper.CL
##           0  3.521 0.429 84.5    2.668     4.37
##           1  3.316 0.689 84.6    1.947     4.69
##           2  2.015 0.861 92.5    0.305     3.73
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
## Region = Fusiform:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1  -0.0149 0.833 88.6  -0.018  0.9998
##  ml_subtype0 - ml_subtype2   0.8209 0.973 91.1   0.843  0.6772
##  ml_subtype1 - ml_subtype2   0.8357 1.099 92.4   0.760  0.7283
## 
## Region = GFM:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1  -0.1738 0.819 83.5  -0.212  0.9755
##  ml_subtype0 - ml_subtype2   0.6609 0.952 83.9   0.694  0.7676
##  ml_subtype1 - ml_subtype2   0.8346 1.078 85.8   0.775  0.7196
## 
## Region = GPS:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.2826 0.819 83.5   0.345  0.9366
##  ml_subtype0 - ml_subtype2   1.0991 0.952 83.9   1.154  0.4837
##  ml_subtype1 - ml_subtype2   0.8165 1.078 85.8   0.758  0.7298
## 
## Region = GTM:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1  -0.3282 0.819 83.5  -0.400  0.9155
##  ml_subtype0 - ml_subtype2   0.7134 0.952 83.9   0.749  0.7349
##  ml_subtype1 - ml_subtype2   1.0415 1.078 85.8   0.967  0.5999
## 
## Region = Hip:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.5661 0.819 83.5   0.691  0.7695
##  ml_subtype0 - ml_subtype2   0.1074 0.952 83.9   0.113  0.9930
##  ml_subtype1 - ml_subtype2  -0.4587 1.078 85.8  -0.426  0.9051
## 
## Region = OC:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1  -0.0969 0.819 83.5  -0.118  0.9923
##  ml_subtype0 - ml_subtype2   1.1156 0.952 83.9   1.172  0.4733
##  ml_subtype1 - ml_subtype2   1.2125 1.078 85.8   1.125  0.5013
## 
## Region = ParaHip:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1  -0.2449 0.821 84.0  -0.298  0.9522
##  ml_subtype0 - ml_subtype2   0.4322 0.953 84.3   0.453  0.8931
##  ml_subtype1 - ml_subtype2   0.6771 1.078 85.8   0.628  0.8050
## 
## Region = PCC:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.0472 0.819 83.5   0.058  0.9982
##  ml_subtype0 - ml_subtype2   0.8961 0.952 83.9   0.941  0.6159
##  ml_subtype1 - ml_subtype2   0.8489 1.078 85.8   0.788  0.7115
## 
## Region = Precun:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.2044 0.819 83.5   0.249  0.9663
##  ml_subtype0 - ml_subtype2   1.5054 0.970 89.8   1.552  0.2717
##  ml_subtype1 - ml_subtype2   1.3009 1.093 90.4   1.190  0.4620
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates
modelo<-lmer(pTau ~ as.factor(ml_subtype)* Region  + (1 | i_id), data=data)
summary(modelo)  # Revisa los términos de interacción
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: pTau ~ as.factor(ml_subtype) * Region + (1 | i_id)
##    Data: data
## 
## REML criterion at convergence: 3429.6
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.98022 -0.58679 -0.08051  0.52627  2.99100 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  i_id     (Intercept) 485.6    22.04   
##  Residual             136.0    11.66   
## Number of obs: 439, groups:  i_id, 51
## 
## Fixed effects:
##                                      Estimate Std. Error      df t value
## (Intercept)                            41.348      4.610  81.554   8.969
## as.factor(ml_subtype)1                -14.498      8.696  80.465  -1.667
## as.factor(ml_subtype)2                 -1.586     10.089  78.842  -0.157
## RegionGFM                             -15.559      3.159 364.360  -4.926
## RegionGPS                             -12.596      3.159 364.360  -3.988
## RegionGTM                             -10.463      3.159 364.360  -3.312
## RegionHip                              -8.687      3.199 364.191  -2.716
## RegionOC                              -33.478      3.159 364.360 -10.598
## RegionParaHip                          -2.189      3.271 363.978  -0.669
## RegionPCC                             -10.738      3.159 364.360  -3.399
## RegionPrecun                          -18.947      3.159 364.360  -5.998
## as.factor(ml_subtype)1:RegionGFM       13.438      5.934 364.267   2.265
## as.factor(ml_subtype)2:RegionGFM        8.059      6.841 364.291   1.178
## as.factor(ml_subtype)1:RegionGPS       11.218      5.934 364.267   1.891
## as.factor(ml_subtype)2:RegionGPS        7.709      7.047 364.515   1.094
## as.factor(ml_subtype)1:RegionGTM       14.718      5.934 364.267   2.480
## as.factor(ml_subtype)2:RegionGTM        8.236      6.841 364.291   1.204
## as.factor(ml_subtype)1:RegionHip        3.426      5.955 364.219   0.575
## as.factor(ml_subtype)2:RegionHip      -10.392      7.007 363.975  -1.483
## as.factor(ml_subtype)1:RegionOC        25.585      5.934 364.267   4.312
## as.factor(ml_subtype)2:RegionOC        14.670      6.841 364.291   2.144
## as.factor(ml_subtype)1:RegionParaHip    4.089      6.068 364.038   0.674
## as.factor(ml_subtype)2:RegionParaHip   -4.702      7.040 363.931  -0.668
## as.factor(ml_subtype)1:RegionPCC       10.511      5.934 364.267   1.771
## as.factor(ml_subtype)2:RegionPCC        3.757      6.841 364.291   0.549
## as.factor(ml_subtype)1:RegionPrecun    21.645      5.934 364.267   3.648
## as.factor(ml_subtype)2:RegionPrecun    11.957      6.841 364.291   1.748
##                                      Pr(>|t|)    
## (Intercept)                          8.68e-14 ***
## as.factor(ml_subtype)1               0.099364 .  
## as.factor(ml_subtype)2               0.875459    
## RegionGFM                            1.28e-06 ***
## RegionGPS                            8.07e-05 ***
## RegionGTM                            0.001017 ** 
## RegionHip                            0.006930 ** 
## RegionOC                              < 2e-16 ***
## RegionParaHip                        0.503708    
## RegionPCC                            0.000750 ***
## RegionPrecun                         4.81e-09 ***
## as.factor(ml_subtype)1:RegionGFM     0.024123 *  
## as.factor(ml_subtype)2:RegionGFM     0.239576    
## as.factor(ml_subtype)1:RegionGPS     0.059483 .  
## as.factor(ml_subtype)2:RegionGPS     0.274703    
## as.factor(ml_subtype)1:RegionGTM     0.013576 *  
## as.factor(ml_subtype)2:RegionGTM     0.229409    
## as.factor(ml_subtype)1:RegionHip     0.565443    
## as.factor(ml_subtype)2:RegionHip     0.138933    
## as.factor(ml_subtype)1:RegionOC      2.09e-05 ***
## as.factor(ml_subtype)2:RegionOC      0.032676 *  
## as.factor(ml_subtype)1:RegionParaHip 0.500847    
## as.factor(ml_subtype)2:RegionParaHip 0.504642    
## as.factor(ml_subtype)1:RegionPCC     0.077332 .  
## as.factor(ml_subtype)2:RegionPCC     0.583208    
## as.factor(ml_subtype)1:RegionPrecun  0.000303 ***
## as.factor(ml_subtype)2:RegionPrecun  0.081351 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation matrix not shown by default, as p = 27 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
anova(modelo)
## Type III Analysis of Variance Table with Satterthwaite's method
##                              Sum Sq Mean Sq NumDF  DenDF F value    Pr(>F)    
## as.factor(ml_subtype)          42.8   21.41     2  47.93  0.1574 0.8548351    
## Region                       9902.5 1237.81     8 364.09  9.0988 2.033e-11 ***
## as.factor(ml_subtype):Region 6344.0  396.50    16 364.08  2.9146 0.0001537 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans(modelo, pairwise ~ ml_subtype | Region)
## $emmeans
## Region = Fusiform:
##  ml_subtype emmean   SE   df lower.CL upper.CL
##           0  41.35 4.61 81.7    32.18     50.5
##           1  26.85 7.37 80.2    12.18     41.5
##           2  39.76 8.97 78.3    21.90     57.6
## 
## Region = GFM:
##  ml_subtype emmean   SE   df lower.CL upper.CL
##           0  25.79 4.48 73.2    16.86     34.7
##           1  24.73 7.20 73.2    10.38     39.1
##           2  32.26 8.82 73.2    14.69     49.8
## 
## Region = GPS:
##  ml_subtype emmean   SE   df lower.CL upper.CL
##           0  28.75 4.48 73.2    19.83     37.7
##           1  25.47 7.20 73.2    11.13     39.8
##           2  34.87 8.97 78.1    17.02     52.7
## 
## Region = GTM:
##  ml_subtype emmean   SE   df lower.CL upper.CL
##           0  30.88 4.48 73.2    21.96     39.8
##           1  31.10 7.20 73.2    16.76     45.4
##           2  37.53 8.82 73.2    19.97     55.1
## 
## Region = Hip:
##  ml_subtype emmean   SE   df lower.CL upper.CL
##           0  32.66 4.52 75.6    23.66     41.7
##           1  21.59 7.20 73.2     7.25     35.9
##           2  20.68 8.97 78.3     2.82     38.5
## 
## Region = OC:
##  ml_subtype emmean   SE   df lower.CL upper.CL
##           0   7.87 4.48 73.2    -1.05     16.8
##           1  18.96 7.20 73.2     4.61     33.3
##           2  20.95 8.82 73.2     3.39     38.5
## 
## Region = ParaHip:
##  ml_subtype emmean   SE   df lower.CL upper.CL
##           0  39.16 4.58 80.0    30.03     48.3
##           1  28.75 7.28 76.3    14.25     43.2
##           2  32.87 8.97 78.3    15.01     50.7
## 
## Region = PCC:
##  ml_subtype emmean   SE   df lower.CL upper.CL
##           0  30.61 4.48 73.2    21.69     39.5
##           1  26.62 7.20 73.2    12.28     41.0
##           2  32.78 8.82 73.2    15.21     50.3
## 
## Region = Precun:
##  ml_subtype emmean   SE   df lower.CL upper.CL
##           0  22.40 4.48 73.2    13.48     31.3
##           1  29.55 7.20 73.2    15.20     43.9
##           2  32.77 8.82 73.2    15.20     50.3
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
## Region = Fusiform:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1   14.498  8.70 80.6   1.667  0.2241
##  ml_subtype0 - ml_subtype2    1.586 10.09 79.0   0.157  0.9865
##  ml_subtype1 - ml_subtype2  -12.911 11.61 79.0  -1.112  0.5098
## 
## Region = GFM:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1    1.060  8.48 73.2   0.125  0.9914
##  ml_subtype0 - ml_subtype2   -6.473  9.89 73.2  -0.655  0.7903
##  ml_subtype1 - ml_subtype2   -7.533 11.38 73.2  -0.662  0.7862
## 
## Region = GPS:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1    3.280  8.48 73.2   0.387  0.9209
##  ml_subtype0 - ml_subtype2   -6.122 10.02 77.1  -0.611  0.8147
##  ml_subtype1 - ml_subtype2   -9.402 11.50 76.1  -0.818  0.6933
## 
## Region = GTM:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1   -0.220  8.48 73.2  -0.026  0.9996
##  ml_subtype0 - ml_subtype2   -6.650  9.89 73.2  -0.673  0.7801
##  ml_subtype1 - ml_subtype2   -6.430 11.38 73.2  -0.565  0.8391
## 
## Region = Hip:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1   11.072  8.50 73.8   1.303  0.3981
##  ml_subtype0 - ml_subtype2   11.978 10.05 77.7   1.192  0.4613
##  ml_subtype1 - ml_subtype2    0.906 11.50 76.2   0.079  0.9966
## 
## Region = OC:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1  -11.087  8.48 73.2  -1.308  0.3954
##  ml_subtype0 - ml_subtype2  -13.083  9.89 73.2  -1.323  0.3870
##  ml_subtype1 - ml_subtype2   -1.996 11.38 73.2  -0.175  0.9832
## 
## Region = ParaHip:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1   10.409  8.60 77.3   1.210  0.4509
##  ml_subtype0 - ml_subtype2    6.288 10.08 78.6   0.624  0.8074
##  ml_subtype1 - ml_subtype2   -4.121 11.55 77.5  -0.357  0.9324
## 
## Region = PCC:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1    3.987  8.48 73.2   0.470  0.8854
##  ml_subtype0 - ml_subtype2   -2.171  9.89 73.2  -0.220  0.9738
##  ml_subtype1 - ml_subtype2   -6.158 11.38 73.2  -0.541  0.8513
## 
## Region = Precun:
##  contrast                  estimate    SE   df t.ratio p.value
##  ml_subtype0 - ml_subtype1   -7.147  8.48 73.2  -0.843  0.6776
##  ml_subtype0 - ml_subtype2  -10.371  9.89 73.2  -1.049  0.5487
##  ml_subtype1 - ml_subtype2   -3.224 11.38 73.2  -0.283  0.9568
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates
# Realizar el análisis de los contrastes
resultados <- emmeans(modelo, pairwise ~ ml_subtype | Region)
# Extraer la tabla de contrastes y convertirla a un data frame
contrastes <- as.data.frame(resultados$contrasts)
# Filtrar los contrastes significativos (p < 0.05)
contrasttau<-contrastes
contrasttau$stain<-"pTau"

modelo<-lmer(NfL ~ as.factor(ml_subtype)* Region + (1 | i_id), data=data)
summary(modelo)  # Revisa los términos de interacción
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: NfL ~ as.factor(ml_subtype) * Region + (1 | i_id)
##    Data: data
## 
## REML criterion at convergence: 2104.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6833 -0.6146  0.0399  0.6263  2.4318 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  i_id     (Intercept) 8.971    2.995   
##  Residual             7.448    2.729   
## Number of obs: 424, groups:  i_id, 48
## 
## Fixed effects:
##                                      Estimate Std. Error       df t value
## (Intercept)                            9.0297     0.7729 122.9667  11.683
## as.factor(ml_subtype)1                 1.1617     1.4505 134.6848   0.801
## as.factor(ml_subtype)2                 0.2591     1.8095 167.9034   0.143
## RegionGFM                              1.0690     0.7369 352.2398   1.451
## RegionGPS                              2.3087     0.7369 352.2398   3.133
## RegionGTM                              0.9855     0.7369 352.2398   1.337
## RegionHip                              1.9798     0.7369 352.2398   2.687
## RegionOC                               2.2398     0.7369 352.2398   3.040
## RegionParaHip                         -0.1888     0.7369 352.2398  -0.256
## RegionPCC                              1.7966     0.7369 352.2398   2.438
## RegionPrecun                           2.6092     0.7369 352.2398   3.541
## as.factor(ml_subtype)1:RegionGFM      -2.0040     1.3866 352.6609  -1.445
## as.factor(ml_subtype)2:RegionGFM      -2.1051     1.7406 353.7534  -1.209
## as.factor(ml_subtype)1:RegionGPS      -1.5797     1.3866 352.6609  -1.139
## as.factor(ml_subtype)2:RegionGPS      -2.2250     1.7406 353.7534  -1.278
## as.factor(ml_subtype)1:RegionGTM      -1.4176     1.3866 352.6609  -1.022
## as.factor(ml_subtype)2:RegionGTM      -1.4253     1.7406 353.7534  -0.819
## as.factor(ml_subtype)1:RegionHip      -0.7280     1.3866 352.6609  -0.525
## as.factor(ml_subtype)2:RegionHip      -0.5372     1.7406 353.7534  -0.309
## as.factor(ml_subtype)1:RegionOC       -1.8297     1.3866 352.6609  -1.320
## as.factor(ml_subtype)2:RegionOC       -1.4218     1.7406 353.7534  -0.817
## as.factor(ml_subtype)1:RegionParaHip  -1.8677     1.3866 352.6609  -1.347
## as.factor(ml_subtype)2:RegionParaHip  -0.8132     1.7735 353.2396  -0.459
## as.factor(ml_subtype)1:RegionPCC      -1.1741     1.3866 352.6609  -0.847
## as.factor(ml_subtype)2:RegionPCC      -0.7223     1.7885 354.5384  -0.404
## as.factor(ml_subtype)1:RegionPrecun    0.3765     1.3866 352.6609   0.272
## as.factor(ml_subtype)2:RegionPrecun   -0.1201     1.7406 353.7534  -0.069
##                                      Pr(>|t|)    
## (Intercept)                           < 2e-16 ***
## as.factor(ml_subtype)1               0.424597    
## as.factor(ml_subtype)2               0.886309    
## RegionGFM                            0.147761    
## RegionGPS                            0.001875 ** 
## RegionGTM                            0.181960    
## RegionHip                            0.007557 ** 
## RegionOC                             0.002546 ** 
## RegionParaHip                        0.797973    
## RegionPCC                            0.015257 *  
## RegionPrecun                         0.000452 ***
## as.factor(ml_subtype)1:RegionGFM     0.149266    
## as.factor(ml_subtype)2:RegionGFM     0.227302    
## as.factor(ml_subtype)1:RegionGPS     0.255358    
## as.factor(ml_subtype)2:RegionGPS     0.201976    
## as.factor(ml_subtype)1:RegionGTM     0.307317    
## as.factor(ml_subtype)2:RegionGTM     0.413407    
## as.factor(ml_subtype)1:RegionHip     0.599896    
## as.factor(ml_subtype)2:RegionHip     0.757776    
## as.factor(ml_subtype)1:RegionOC      0.187823    
## as.factor(ml_subtype)2:RegionOC      0.414546    
## as.factor(ml_subtype)1:RegionParaHip 0.178843    
## as.factor(ml_subtype)2:RegionParaHip 0.646854    
## as.factor(ml_subtype)1:RegionPCC     0.397718    
## as.factor(ml_subtype)2:RegionPCC     0.686555    
## as.factor(ml_subtype)1:RegionPrecun  0.786160    
## as.factor(ml_subtype)2:RegionPrecun  0.945014    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation matrix not shown by default, as p = 27 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
anova(modelo)
## Type III Analysis of Variance Table with Satterthwaite's method
##                              Sum Sq Mean Sq NumDF  DenDF F value    Pr(>F)    
## as.factor(ml_subtype)          3.14   1.570     2  45.34  0.2107    0.8108    
## Region                       366.09  45.761     8 352.50  6.1438 1.996e-07 ***
## as.factor(ml_subtype):Region  65.98   4.124    16 352.42  0.5537    0.9166    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans(modelo, pairwise ~ ml_subtype | Region)
## $emmeans
## Region = Fusiform:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   9.03 0.773 123     7.50     10.6
##           1  10.19 1.227 140     7.76     12.6
##           2   9.29 1.636 180     6.06     12.5
## 
## Region = GFM:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0  10.10 0.766 119     8.58     11.6
##           1   9.26 1.170 119     6.94     11.6
##           2   8.25 1.433 119     5.42     11.1
## 
## Region = GPS:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0  11.34 0.766 119     9.82     12.9
##           1  10.92 1.170 119     8.60     13.2
##           2   9.37 1.433 119     6.54     12.2
## 
## Region = GTM:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0  10.02 0.766 119     8.50     11.5
##           1   9.76 1.170 119     7.44     12.1
##           2   8.85 1.433 119     6.01     11.7
## 
## Region = Hip:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0  11.01 0.766 119     9.49     12.5
##           1  11.44 1.170 119     9.13     13.8
##           2  10.73 1.433 119     7.89     13.6
## 
## Region = OC:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0  11.27 0.766 119     9.75     12.8
##           1  10.60 1.170 119     8.29     12.9
##           2  10.11 1.433 119     7.27     12.9
## 
## Region = ParaHip:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   8.84 0.766 119     7.32     10.4
##           1   8.13 1.170 119     5.82     10.5
##           2   8.29 1.484 134     5.35     11.2
## 
## Region = PCC:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0  10.83 0.766 119     9.31     12.3
##           1  10.81 1.170 119     8.50     13.1
##           2  10.36 1.484 134     7.43     13.3
## 
## Region = Precun:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0  11.64 0.766 119    10.12     13.2
##           1  13.18 1.170 119    10.86     15.5
##           2  11.78 1.433 119     8.94     14.6
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
## Region = Fusiform:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1  -1.1617 1.45 134  -0.801  0.7031
##  ml_subtype0 - ml_subtype2  -0.2591 1.81 168  -0.143  0.9888
##  ml_subtype1 - ml_subtype2   0.9026 2.05 164   0.441  0.8983
## 
## Region = GFM:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.8423 1.40 119   0.602  0.8191
##  ml_subtype0 - ml_subtype2   1.8460 1.62 119   1.136  0.4937
##  ml_subtype1 - ml_subtype2   1.0037 1.85 119   0.543  0.8504
## 
## Region = GPS:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.4180 1.40 119   0.299  0.9519
##  ml_subtype0 - ml_subtype2   1.9659 1.62 119   1.210  0.4495
##  ml_subtype1 - ml_subtype2   1.5479 1.85 119   0.837  0.6809
## 
## Region = GTM:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.2559 1.40 119   0.183  0.9817
##  ml_subtype0 - ml_subtype2   1.1662 1.62 119   0.718  0.7534
##  ml_subtype1 - ml_subtype2   0.9103 1.85 119   0.492  0.8752
## 
## Region = Hip:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1  -0.4337 1.40 119  -0.310  0.9484
##  ml_subtype0 - ml_subtype2   0.2781 1.62 119   0.171  0.9840
##  ml_subtype1 - ml_subtype2   0.7118 1.85 119   0.385  0.9217
## 
## Region = OC:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.6680 1.40 119   0.478  0.8819
##  ml_subtype0 - ml_subtype2   1.1627 1.62 119   0.716  0.7547
##  ml_subtype1 - ml_subtype2   0.4947 1.85 119   0.267  0.9613
## 
## Region = ParaHip:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.7060 1.40 119   0.505  0.8691
##  ml_subtype0 - ml_subtype2   0.5541 1.67 130   0.332  0.9412
##  ml_subtype1 - ml_subtype2  -0.1519 1.89 128  -0.080  0.9964
## 
## Region = PCC:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.0124 1.40 119   0.009  1.0000
##  ml_subtype0 - ml_subtype2   0.4632 1.67 130   0.277  0.9585
##  ml_subtype1 - ml_subtype2   0.4508 1.89 128   0.239  0.9691
## 
## Region = Precun:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1  -1.5382 1.40 119  -1.100  0.5159
##  ml_subtype0 - ml_subtype2  -0.1390 1.62 119  -0.086  0.9960
##  ml_subtype1 - ml_subtype2   1.3992 1.85 119   0.757  0.7303
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates
resultados <- emmeans(modelo, pairwise ~ ml_subtype | Region)
# Extraer la tabla de contrastes y convertirla a un data frame
contrastes_NfL <- as.data.frame(resultados$contrasts)
contrastes_NfL$stain<-"NfL"

modelo<-lmer(Myelin ~ as.factor(ml_subtype)* Region + (1 | i_id), data=data)
summary(modelo)  # Revisa los términos de interacción
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Myelin ~ as.factor(ml_subtype) * Region + (1 | i_id)
##    Data: data
## 
## REML criterion at convergence: 2028.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -6.1139 -0.4088  0.0530  0.5048  2.2711 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  i_id     (Intercept)  5.343   2.311   
##  Residual             18.276   4.275   
## Number of obs: 360, groups:  i_id, 50
## 
## Fixed effects:
##                                      Estimate Std. Error       df t value
## (Intercept)                           26.3491     1.5306 332.9959  17.215
## as.factor(ml_subtype)1                 3.6185     2.3472 326.0474   1.542
## as.factor(ml_subtype)2                 3.9309     3.0887 330.3045   1.273
## RegionGFM                              3.6327     1.6709 300.0860   2.174
## RegionGPS                              5.8277     1.6721 298.3059   3.485
## RegionGTM                              3.4172     1.6667 298.7404   2.050
## RegionHip                              1.9984     1.6842 297.3685   1.187
## RegionOC                               5.6778     1.6667 298.7691   3.407
## RegionParaHip                          2.7591     1.7067 296.2327   1.617
## RegionPCC                              5.6114     2.0189 299.7713   2.779
## RegionPrecun                           8.0171     1.6762 299.5069   4.783
## as.factor(ml_subtype)1:RegionGFM      -3.3803     2.7023 293.4649  -1.251
## as.factor(ml_subtype)2:RegionGFM      -2.5849     3.5251 299.4442  -0.733
## as.factor(ml_subtype)1:RegionGPS      -1.4576     2.6753 293.5912  -0.545
## as.factor(ml_subtype)2:RegionGPS      -0.4631     3.4411 296.2714  -0.135
## as.factor(ml_subtype)1:RegionGTM      -1.4173     2.6499 295.2407  -0.535
## as.factor(ml_subtype)2:RegionGTM      -3.1001     3.4384 296.3724  -0.902
## as.factor(ml_subtype)1:RegionHip      -2.5797     2.6829 293.2351  -0.962
## as.factor(ml_subtype)2:RegionHip      -2.6075     3.5028 294.5604  -0.744
## as.factor(ml_subtype)1:RegionOC        0.5786     2.6499 295.2525   0.218
## as.factor(ml_subtype)2:RegionOC       -3.2706     3.4948 295.0017  -0.936
## as.factor(ml_subtype)1:RegionParaHip  -1.7978     2.6970 292.8102  -0.667
## as.factor(ml_subtype)2:RegionParaHip  -4.1558     3.5136 294.3046  -1.183
## as.factor(ml_subtype)1:RegionPCC      -1.3664     3.2768 300.4034  -0.417
## as.factor(ml_subtype)2:RegionPCC      -5.1096     4.1310 301.6974  -1.237
## as.factor(ml_subtype)1:RegionPrecun   -0.9095     2.6559 295.5724  -0.342
## as.factor(ml_subtype)2:RegionPrecun   -0.8459     3.4430 296.5650  -0.246
##                                      Pr(>|t|)    
## (Intercept)                           < 2e-16 ***
## as.factor(ml_subtype)1               0.124132    
## as.factor(ml_subtype)2               0.204029    
## RegionGFM                            0.030478 *  
## RegionGPS                            0.000566 ***
## RegionGTM                            0.041208 *  
## RegionHip                            0.236363    
## RegionOC                             0.000748 ***
## RegionParaHip                        0.107013    
## RegionPCC                            0.005790 ** 
## RegionPrecun                         2.71e-06 ***
## as.factor(ml_subtype)1:RegionGFM     0.211963    
## as.factor(ml_subtype)2:RegionGFM     0.463947    
## as.factor(ml_subtype)1:RegionGPS     0.586291    
## as.factor(ml_subtype)2:RegionGPS     0.893028    
## as.factor(ml_subtype)1:RegionGTM     0.593149    
## as.factor(ml_subtype)2:RegionGTM     0.368003    
## as.factor(ml_subtype)1:RegionHip     0.337074    
## as.factor(ml_subtype)2:RegionHip     0.457219    
## as.factor(ml_subtype)1:RegionOC      0.827318    
## as.factor(ml_subtype)2:RegionOC      0.350123    
## as.factor(ml_subtype)1:RegionParaHip 0.505569    
## as.factor(ml_subtype)2:RegionParaHip 0.237862    
## as.factor(ml_subtype)1:RegionPCC     0.676989    
## as.factor(ml_subtype)2:RegionPCC     0.217090    
## as.factor(ml_subtype)1:RegionPrecun  0.732258    
## as.factor(ml_subtype)2:RegionPrecun  0.806092    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation matrix not shown by default, as p = 27 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
anova(modelo)
## Type III Analysis of Variance Table with Satterthwaite's method
##                               Sum Sq Mean Sq NumDF   DenDF F value  Pr(>F)    
## as.factor(ml_subtype)         105.39  52.695     2  48.041  2.8833 0.06568 .  
## Region                       1527.28 190.910     8 290.172 10.4459 7.2e-13 ***
## as.factor(ml_subtype):Region  171.22  10.701    16 290.104  0.5855 0.89414    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans(modelo, pairwise ~ ml_subtype | Region)
## $emmeans
## Region = Fusiform:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   26.3 1.533 333     23.3     29.4
##           1   30.0 1.781 313     26.5     33.5
##           2   30.3 2.686 328     25.0     35.6
## 
## Region = GFM:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   30.0 0.885 257     28.2     31.7
##           1   30.2 1.521 276     27.2     33.2
##           2   31.3 1.962 275     27.5     35.2
## 
## Region = GPS:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   32.2 0.899 262     30.4     33.9
##           1   34.3 1.459 263     31.5     37.2
##           2   35.6 1.837 252     32.0     39.3
## 
## Region = GTM:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   29.8 0.885 257     28.0     31.5
##           1   32.0 1.403 252     29.2     34.7
##           2   30.6 1.837 252     27.0     34.2
## 
## Region = Hip:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   28.3 0.928 271     26.5     30.2
##           1   29.4 1.459 263     26.5     32.3
##           2   29.7 1.965 274     25.8     33.5
## 
## Region = OC:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   32.0 0.885 257     30.3     33.8
##           1   36.2 1.403 252     33.5     39.0
##           2   32.7 1.964 274     28.8     36.6
## 
## Region = ParaHip:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   29.1 0.977 286     27.2     31.0
##           1   30.9 1.459 263     28.1     33.8
##           2   28.9 1.965 274     25.0     32.8
## 
## Region = PCC:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   32.0 1.459 333     29.1     34.8
##           1   34.2 2.078 329     30.1     38.3
##           2   30.8 2.688 328     25.5     36.1
## 
## Region = Precun:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   34.4 0.899 262     32.6     36.1
##           1   37.1 1.403 252     34.3     39.8
##           2   37.5 1.837 252     33.8     41.1
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
## Region = Fusiform:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   -3.619 2.35 326  -1.540  0.2737
##  ml_subtype0 - ml_subtype2   -3.931 3.09 330  -1.271  0.4125
##  ml_subtype1 - ml_subtype2   -0.312 3.22 325  -0.097  0.9948
## 
## Region = GFM:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   -0.238 1.76 272  -0.135  0.9899
##  ml_subtype0 - ml_subtype2   -1.346 2.15 272  -0.625  0.8064
##  ml_subtype1 - ml_subtype2   -1.108 2.48 276  -0.446  0.8961
## 
## Region = GPS:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   -2.161 1.71 263  -1.261  0.4186
##  ml_subtype0 - ml_subtype2   -3.468 2.04 254  -1.696  0.2088
##  ml_subtype1 - ml_subtype2   -1.307 2.35 257  -0.557  0.8429
## 
## Region = GTM:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   -2.201 1.66 254  -1.327  0.3816
##  ml_subtype0 - ml_subtype2   -0.831 2.04 253  -0.407  0.9126
##  ml_subtype1 - ml_subtype2    1.370 2.31 252   0.593  0.8241
## 
## Region = Hip:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   -1.039 1.73 266  -0.601  0.8198
##  ml_subtype0 - ml_subtype2   -1.323 2.17 274  -0.609  0.8153
##  ml_subtype1 - ml_subtype2   -0.285 2.45 270  -0.116  0.9926
## 
## Region = OC:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   -4.197 1.66 254  -2.530  0.0321
##  ml_subtype0 - ml_subtype2   -0.660 2.15 271  -0.307  0.9495
##  ml_subtype1 - ml_subtype2    3.537 2.41 267   1.465  0.3094
## 
## Region = ParaHip:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   -1.821 1.76 270  -1.037  0.5543
##  ml_subtype0 - ml_subtype2    0.225 2.19 277   0.102  0.9942
##  ml_subtype1 - ml_subtype2    2.046 2.45 270   0.836  0.6811
## 
## Region = PCC:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   -2.252 2.54 331  -0.887  0.6489
##  ml_subtype0 - ml_subtype2    1.179 3.06 330   0.385  0.9214
##  ml_subtype1 - ml_subtype2    3.431 3.40 328   1.010  0.5711
## 
## Region = Precun:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   -2.709 1.67 255  -1.626  0.2365
##  ml_subtype0 - ml_subtype2   -3.085 2.04 254  -1.509  0.2885
##  ml_subtype1 - ml_subtype2   -0.376 2.31 252  -0.163  0.9855
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates
resultados <- emmeans(modelo, pairwise ~ ml_subtype | Region)
# Extraer la tabla de contrastes y convertirla a un data frame
contrastes_myelin <- as.data.frame(resultados$contrasts)

contrastes_myelin$stain<-"Myelin"


modelo<-lmer(Microglia ~ as.factor(ml_subtype)* Region + (1 | i_id), data=data)
## fixed-effect model matrix is rank deficient so dropping 1 column / coefficient
summary(modelo)  # Revisa los términos de interacción
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Microglia ~ as.factor(ml_subtype) * Region + (1 | i_id)
##    Data: data
## 
## REML criterion at convergence: 1988.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.4082 -0.5583 -0.0470  0.5327  4.6251 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  i_id     (Intercept) 11.277   3.358   
##  Residual              9.401   3.066   
## Number of obs: 384, groups:  i_id, 49
## 
## Fixed effects:
##                                       Estimate Std. Error        df t value
## (Intercept)                            7.21751    0.84541 125.63844   8.537
## as.factor(ml_subtype)1                -1.40662    1.65299 144.45862  -0.851
## as.factor(ml_subtype)2                -2.77295    2.06994 152.64047  -1.340
## RegionGFM                             -1.00093    0.81514 312.47136  -1.228
## RegionGPS                             -1.00351    0.80757 312.32373  -1.243
## RegionGTM                             -1.82584    0.81326 312.11639  -2.245
## RegionHip                              2.76047    0.80757 312.32373   3.418
## RegionOC                              -5.08161    0.81522 312.50027  -6.233
## RegionParaHip                          1.23768    0.80757 312.32373   1.533
## RegionPCC                             -2.73298    1.42991 316.52026  -1.911
## RegionPrecun                          -4.59886    0.81522 312.50027  -5.641
## as.factor(ml_subtype)1:RegionGFM       1.62015    1.58925 313.05679   1.019
## as.factor(ml_subtype)2:RegionGFM       3.59163    1.99155 313.71084   1.803
## as.factor(ml_subtype)1:RegionGPS       0.60467    1.58538 313.02149   0.381
## as.factor(ml_subtype)2:RegionGPS       2.20444    1.98847 313.69053   1.109
## as.factor(ml_subtype)1:RegionGTM       3.66547    1.58828 312.96484   2.308
## as.factor(ml_subtype)2:RegionGTM       1.85386    1.99079 313.65307   0.931
## as.factor(ml_subtype)1:RegionHip       0.98722    1.58538 313.02149   0.623
## as.factor(ml_subtype)2:RegionHip       1.07980    1.98847 313.69053   0.543
## as.factor(ml_subtype)1:RegionOC        0.92599    1.58929 313.06433   0.583
## as.factor(ml_subtype)2:RegionOC        2.72006    2.03909 312.98961   1.334
## as.factor(ml_subtype)1:RegionParaHip   0.08371    1.58538 313.02149   0.053
## as.factor(ml_subtype)2:RegionParaHip  -0.19211    2.03604 312.96340  -0.094
## as.factor(ml_subtype)2:RegionPCC       1.90213    3.76725 316.61669   0.505
## as.factor(ml_subtype)1:RegionPrecun    0.60444    1.58929 313.06433   0.380
## as.factor(ml_subtype)2:RegionPrecun    4.10425    2.15524 315.98873   1.904
##                                      Pr(>|t|)    
## (Intercept)                          3.82e-14 ***
## as.factor(ml_subtype)1               0.396202    
## as.factor(ml_subtype)2               0.182357    
## RegionGFM                            0.220397    
## RegionGPS                            0.214933    
## RegionGTM                            0.025461 *  
## RegionHip                            0.000714 ***
## RegionOC                             1.48e-09 ***
## RegionParaHip                        0.126386    
## RegionPCC                            0.056870 .  
## RegionPrecun                         3.78e-08 ***
## as.factor(ml_subtype)1:RegionGFM     0.308780    
## as.factor(ml_subtype)2:RegionGFM     0.072280 .  
## as.factor(ml_subtype)1:RegionGPS     0.703163    
## as.factor(ml_subtype)2:RegionGPS     0.268447    
## as.factor(ml_subtype)1:RegionGTM     0.021661 *  
## as.factor(ml_subtype)2:RegionGTM     0.352456    
## as.factor(ml_subtype)1:RegionHip     0.533934    
## as.factor(ml_subtype)2:RegionHip     0.587494    
## as.factor(ml_subtype)1:RegionOC      0.560555    
## as.factor(ml_subtype)2:RegionOC      0.183189    
## as.factor(ml_subtype)1:RegionParaHip 0.957925    
## as.factor(ml_subtype)2:RegionParaHip 0.924889    
## as.factor(ml_subtype)2:RegionPCC     0.613972    
## as.factor(ml_subtype)1:RegionPrecun  0.703966    
## as.factor(ml_subtype)2:RegionPrecun  0.057777 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation matrix not shown by default, as p = 26 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 1 column / coefficient
anova(modelo)
## Missing cells for: as.factor(ml_subtype)1:RegionPCC.  
## Interpret type III hypotheses with care.
## Type III Analysis of Variance Table with Satterthwaite's method
##                               Sum Sq Mean Sq NumDF   DenDF F value Pr(>F)    
## as.factor(ml_subtype)           3.15   1.575     2  48.951  0.1675 0.8462    
## Region                       1204.11 150.513     8 313.398 16.0108 <2e-16 ***
## as.factor(ml_subtype):Region  157.28  10.486    15 312.975  1.1154 0.3412    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans(modelo, pairwise ~ ml_subtype | Region)
## $emmeans
## Region = Fusiform:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   7.22 0.845 126    5.544     8.89
##           1   5.81 1.421 152    3.004     8.62
##           2   4.44 1.890 159    0.712     8.18
## 
## Region = GFM:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   6.22 0.837 122    4.559     7.87
##           1   6.43 1.313 118    3.831     9.03
##           2   7.04 1.719 118    3.632    10.44
## 
## Region = GPS:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   6.21 0.830 118    4.570     7.86
##           1   5.41 1.313 118    2.813     8.01
##           2   5.65 1.719 118    2.242     9.05
## 
## Region = GTM:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   5.39 0.838 122    3.733     7.05
##           1   7.65 1.313 118    5.051    10.25
##           2   4.47 1.719 118    1.069     7.88
## 
## Region = Hip:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   9.98 0.830 118    8.334    11.62
##           1   9.56 1.313 118    6.959    12.16
##           2   8.28 1.719 118    4.881    11.69
## 
## Region = OC:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   2.14 0.838 122    0.478     3.79
##           1   1.66 1.313 118   -0.944     4.25
##           2   2.08 1.792 136   -1.461     5.63
## 
## Region = ParaHip:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   8.46 0.830 118    6.811    10.10
##           1   7.13 1.313 118    4.533     9.73
##           2   5.49 1.792 136    1.946     9.03
## 
## Region = PCC:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   4.48 1.446 339    1.641     7.33
##           1 nonEst    NA  NA       NA       NA
##           2   3.61 3.458 355   -3.187    10.41
## 
## Region = Precun:
##  ml_subtype emmean    SE  df lower.CL upper.CL
##           0   2.62 0.838 122    0.961     4.28
##           1   1.82 1.313 118   -0.783     4.42
##           2   3.95 1.888 159    0.221     7.68
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
## Region = Fusiform:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   1.4066 1.65 145   0.851  0.6720
##  ml_subtype0 - ml_subtype2   2.7730 2.07 153   1.339  0.3756
##  ml_subtype1 - ml_subtype2   1.3663 2.36 156   0.578  0.8321
## 
## Region = GFM:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1  -0.2135 1.56 119  -0.137  0.9897
##  ml_subtype0 - ml_subtype2  -0.8187 1.91 119  -0.428  0.9040
##  ml_subtype1 - ml_subtype2  -0.6051 2.16 118  -0.280  0.9578
## 
## Region = GPS:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.8020 1.55 118   0.516  0.8635
##  ml_subtype0 - ml_subtype2   0.5685 1.91 118   0.298  0.9523
##  ml_subtype1 - ml_subtype2  -0.2334 2.16 118  -0.108  0.9936
## 
## Region = GTM:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1  -2.2588 1.56 120  -1.451  0.3185
##  ml_subtype0 - ml_subtype2   0.9191 1.91 119   0.481  0.8806
##  ml_subtype1 - ml_subtype2   3.1779 2.16 118   1.469  0.3093
## 
## Region = Hip:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.4194 1.55 118   0.270  0.9606
##  ml_subtype0 - ml_subtype2   1.6932 1.91 118   0.887  0.6495
##  ml_subtype1 - ml_subtype2   1.2738 2.16 118   0.589  0.8263
## 
## Region = OC:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.4806 1.56 119   0.309  0.9489
##  ml_subtype0 - ml_subtype2   0.0529 1.98 133   0.027  0.9996
##  ml_subtype1 - ml_subtype2  -0.4277 2.22 129  -0.193  0.9798
## 
## Region = ParaHip:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   1.3229 1.55 118   0.852  0.6717
##  ml_subtype0 - ml_subtype2   2.9651 1.98 132   1.501  0.2936
##  ml_subtype1 - ml_subtype2   1.6422 2.22 129   0.739  0.7406
## 
## Region = PCC:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   nonEst   NA  NA      NA      NA
##  ml_subtype0 - ml_subtype2   0.8708 3.75 354   0.232  0.8164
##  ml_subtype1 - ml_subtype2   nonEst   NA  NA      NA      NA
## 
## Region = Precun:
##  contrast                  estimate   SE  df t.ratio p.value
##  ml_subtype0 - ml_subtype1   0.8022 1.56 119   0.515  0.8641
##  ml_subtype0 - ml_subtype2  -1.3313 2.07 152  -0.645  0.7957
##  ml_subtype1 - ml_subtype2  -2.1335 2.30 144  -0.928  0.6236
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for varying family sizes
resultados <- emmeans(modelo, pairwise ~ ml_subtype | Region)
# Extraer la tabla de contrastes y convertirla a un data frame
contrastes_micro <- as.data.frame(resultados$contrasts)
contrastes_micro$stain<-"microglia"



contraste<-rbind(contrasttau,contrastes_NfL, contrastes_myelin, contrastes_micro)
neuropath_4G8_AT8_NFL <- read_excel("data_validation/neuropath_4G8_AT8_NFL.xlsx")
subtypes_postmortem <- read_csv("data_validation/subtypes_postmortem.csv") 
## Rows: 60 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): i_id
## dbl (4): ml_subtype, prob_ml_subtype, ml_stage, prob_ml_stage
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
clinical_data <- read_excel("data_validation/clinical data.xlsx")
## New names:
## • `` -> `...22`
clinical_data$i_id<- ifelse(grepl("^AD", clinical_data$SubjectANW), paste0("PAGE-",  clinical_data$SubjectANW),  clinical_data$SubjectANW)

# data1<-data1 %>% drop_na(data1$Cohort)
# data1$dx_pre<-ifelse(data1$Cohort=="Control",1,0)
# data1$dx_post<-ifelse(data1$ml_subtype==0,1,0)
# 
# confusionMatrix(table(data1$dx_pre, data1$dx_post))


data1<-left_join(subtypes_postmortem,clinical_data, by='i_id')
data1$Subject <- ifelse(grepl("^[0-9]{4}-[0-9]{3}$", data1$Subject), 
                        paste0(substr(data1$Subject, 3, 4), "-", substr(data1$Subject, 6, 8)), 
                        data1$Subject)

# Mostrar el nuevo vector
print(data1$Subject)
##  [1] "ANW019"  NA        "ANW082"  "ANW122"  NA        "ANW232"  "ANW534" 
##  [8] "ANW560"  "ANW569"  NA        NA        NA        NA        NA       
## [15] "ANW777"  "ANW782"  "ANW797"  "ANW880"  "ANW898"  NA        "ANW945" 
## [22] "ANW989"  "ANW999"  "16-035"  "16-105"  "16-131"  "17-018"  "17-024" 
## [29] "17-110"  "ANW-078" "17-133"  "17-147"  "17-154"  "18-016"  "18-032" 
## [36] "18-097"  "18-109"  "18-111"  "18-126"  "19-013"  "19-020"  "19-025" 
## [43] "19-058"  "19-091"  "19-095"  "19-112"  "19-115"  "19-119"  "20-104" 
## [50] "21-006"  "21-066"  "22-036"  "22-049"  "22-063"  "22-078"  "22-081" 
## [57] "22-086"  NA        NA        NA
data<-left_join(data1,neuropath_4G8_AT8_NFL, by="Subject" )

table(data$Cohort.x, data$ml_subtype)
##          
##             0   1   2
##   AD      274  91  65
##   Control  91  65  39
data %>% filter(Region=="Hmed")%>%
  ggplot( aes(x=ml_stage, y=pTau,color=as.factor(ml_subtype)))+
  geom_jitter()+
  xlim(0,35)+
  geom_smooth(method="lm")+
  ggtitle("Hippocampus-ptau")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 19 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 23 rows containing missing values or values outside the scale range
## (`geom_point()`).

data %>% filter(Region=="GPS")%>%
  ggplot( aes(x=ml_stage, y=pTau,color=as.factor(ml_subtype)))+
  geom_jitter()+
  xlim(0,35)+
  geom_smooth(method="lm")+
  ggtitle("Superior parietal gyrus-ptau")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 18 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 23 rows containing missing values or values outside the scale range
## (`geom_point()`).

library(ggseg)
library(readr)
contraste <- read_csv("contraste.csv")
## New names:
## Rows: 27 Columns: 9
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): contrast, Region, stain dbl (6): ...1, estimate, SE, df, t.ratio, p.value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
knitr::kable(contraste)
…1 contrast Region estimate SE df t.ratio p.value stain
5 ml_subtype1 - ml_subtype3 Fusiform 45.982537 12.554691 98.34538 3.662578 0.0022538 pTau
6 ml_subtype2 - ml_subtype3 Fusiform 49.935292 13.568797 96.28489 3.680156 0.0021440 pTau
7 ml_subtype0 - ml_subtype1 GFM -24.148178 7.031639 82.41822 -3.434218 0.0050681 pTau
8 ml_subtype0 - ml_subtype2 GFM -26.947992 8.687179 82.41822 -3.102042 0.0137518 pTau
11 ml_subtype1 - ml_subtype3 GFM 38.436630 11.963579 82.41822 3.212804 0.0099469 pTau
12 ml_subtype2 - ml_subtype3 GFM 41.236444 13.005781 82.41822 3.170624 0.0112649 pTau
13 ml_subtype0 - ml_subtype1 GPS -22.869909 7.031639 82.41822 -3.252429 0.0088390 pTau
14 ml_subtype0 - ml_subtype2 GPS -32.332661 8.805493 86.67601 -3.671874 0.0023110 pTau
17 ml_subtype1 - ml_subtype3 GPS 39.364537 11.963579 82.41822 3.290365 0.0078856 pTau
18 ml_subtype2 - ml_subtype3 GPS 48.827289 13.085105 84.31006 3.731517 0.0019220 pTau
19 ml_subtype0 - ml_subtype1 GTM -22.305273 7.031639 82.41822 -3.172130 0.0112152 pTau
20 ml_subtype0 - ml_subtype2 GTM -29.303011 8.687179 82.41822 -3.373133 0.0061255 pTau
23 ml_subtype1 - ml_subtype3 GTM 41.886905 11.963579 82.41822 3.501202 0.0041051 pTau
24 ml_subtype2 - ml_subtype3 GTM 48.884643 13.005781 82.41822 3.758686 0.0017764 pTau
32 ml_subtype0 - ml_subtype2 OC -24.491860 8.687179 82.41822 -2.819311 0.0301074 pTau
41 ml_subtype1 - ml_subtype3 ParaHip 42.151949 12.529962 97.64571 3.364092 0.0059559 pTau
42 ml_subtype2 - ml_subtype3 ParaHip 42.533188 13.568797 96.28489 3.134632 0.0120295 pTau
47 ml_subtype1 - ml_subtype3 PCC 39.710577 11.963579 82.41822 3.319289 0.0072234 pTau
48 ml_subtype2 - ml_subtype3 PCC 40.541571 13.005781 82.41822 3.117196 0.0131628 pTau
49 ml_subtype0 - ml_subtype1 Precun -21.362250 7.031639 82.41822 -3.038019 0.0165130 pTau
50 ml_subtype0 - ml_subtype2 Precun -32.165973 8.687179 82.41822 -3.702695 0.0021390 pTau
53 ml_subtype1 - ml_subtype3 Precun 34.764165 11.963579 82.41822 2.905833 0.0238497 pTau
54 ml_subtype2 - ml_subtype3 Precun 45.567887 13.005781 82.41822 3.503664 0.0040732 pTau
1 ml_subtype0 - ml_subtype1 Fusiform -3.599330 1.307782 147.55607 -2.752239 0.0333480 NfL
171 ml_subtype1 - ml_subtype3 GPS 6.068956 2.119376 135.62871 2.863558 0.0247653 NfL
491 ml_subtype0 - ml_subtype1 Precun -4.089309 1.273218 135.62871 -3.211790 0.0088449 NfL
25 ml_subtype0 - ml_subtype1 Hip 4.047705 1.463237 120.20155 2.766267 0.0328230 Myelin