Here, I will try to analyze the meaning of the clusters found.
library(readr)
library(tidyverse)
library(caret)
library(emmeans)
train_subtypes <- read_csv("data_validation/train_subtypes.csv")
train_subtypes <- train_subtypes %>%
mutate(ml_subtype = ml_subtype+1) %>%
mutate(
poor_fit = ifelse(prob_ml_subtype < 0.5, 1, 0),
ml_subtype_redefined = ifelse(poor_fit == 1, "poor fit", as.character(ml_subtype))
)
train_subtypes<-train_subtypes %>% filter (Diagnosis!="HC")%>%
mutate(stage_interval = cut(ml_stage, breaks = seq(0, max(ml_stage), by = 5), right = FALSE,
labels = paste(seq(0, max(ml_stage)-5, by = 5),
seq(5, max(ml_stage), by = 5), sep = "-")))
train_subtypes %>%
ggplot(aes(x = factor(ml_subtype_redefined), y = ml_stage, fill = ml_subtype_redefined)) +
geom_violin(trim = FALSE, alpha = 0.5) +
#geom_boxplot(width = 0.2, outlier.shape = NA, alpha = 0.7) +
geom_dotplot(binaxis = "y", stackdir = "center",
dotsize = 0.5, alpha = 0.6) + # Ajusta binwidth para que haya un punto por cada 10 sujetos
labs(
title = "Ml_stage by subtype",
x = "subtype",
y = "stage"
) +
scale_fill_manual(values=pal)+
theme_minimal() +
theme(legend.position = "none")
# assymetry index
dataset_con_index <- train_subtypes %>%
filter(ml_subtype_redefined != "poor fit") %>%
# Calcular el índice Derecha-Izquierda para cada región
mutate(
# Amygdala
index_amygdala = 200 * (w_rh_amygdala_h - w_lh_amygdala_h) / (w_rh_amygdala_h + w_lh_amygdala_h),
# Entorhinal
index_entorhinal = 200 * (w_rh_entorhinal_medial_h - w_lh_entorhinal_medial_h) / (w_rh_entorhinal_medial_h + w_lh_entorhinal_medial_h),
# Frontal Lateral
index_frontal_lateral = 200 * (w_rh_frontal_lateral_h - w_lh_frontal_lateral_h) / (w_rh_frontal_lateral_h + w_lh_frontal_lateral_h),
# Frontal Medial
index_frontal_medial = 200 * (w_rh_frontal_medial_h - w_lh_frontal_medial_h) / (w_rh_frontal_medial_h + w_lh_frontal_medial_h),
# Hippocampus
index_hippocampus = 200 * (w_rh_hippocampus_h - w_lh_hippocampus_h) / (w_rh_hippocampus_h + w_lh_hippocampus_h),
# Occipital Lateral
index_occipital_lateral = 200 * (w_rh_occipital_lateral_h - w_lh_occipital_lateral_h) / (w_rh_occipital_lateral_h + w_lh_occipital_lateral_h),
# Occipital Medial
index_occipital_medial = 200 * (w_rh_occipital_medial_h - w_lh_occipital_medial_h) / (w_rh_occipital_medial_h + w_lh_occipital_medial_h),
# Parietal Lateral
index_parietal_lateral = 200 * (w_rh_parietal_lateral_h - w_lh_parietal_lateral_h) / (w_rh_parietal_lateral_h + w_lh_parietal_lateral_h),
# Parietal Medial
index_parietal_medial = 200 * (w_rh_parietal_medial_h - w_lh_parietal_medial_h) / (w_rh_parietal_medial_h + w_lh_parietal_medial_h),
# Temporal Lateral
index_temporal_lateral = 200 * (w_rh_temporal_lateral_h - w_lh_temporal_lateral_h) / (w_rh_temporal_lateral_h + w_lh_temporal_lateral_h)) %>%
# Seleccionar sólo las columnas necesarias
select(MRI_ID, index_amygdala, index_entorhinal, index_frontal_lateral, index_frontal_medial,
index_hippocampus, index_occipital_lateral, index_occipital_medial, index_parietal_lateral, index_parietal_medial,
index_temporal_lateral, ml_subtype_redefined, ml_stage)
# Crear una nueva columna que agrupe ml_stage en intervalos de 10
dataset_con_index <- dataset_con_index %>%
mutate(stage_group = cut(ml_stage, breaks = seq(0, max(ml_stage), by = 10), include.lowest = TRUE, right = FALSE, labels = FALSE))
# Transformar el dataset a formato largo
dataset_largo <- dataset_con_index %>%
pivot_longer(cols = starts_with("index"), names_to = "variable", values_to = "value")
# Calcular las medianas por grupo, subtipo y variable
medianas <- dataset_largo %>%
group_by(stage_group, ml_subtype_redefined, variable) %>%
summarise(mediana = median(value, na.rm = TRUE), .groups = 'drop')
# Crear el boxplot con las líneas que conectan las medianas
ggplot(dataset_largo, aes(x = factor(stage_group), y = value, fill = ml_subtype_redefined)) +
geom_boxplot() +
facet_wrap(~ variable, scales = "free_y") +
geom_line(data = medianas, aes(x = factor(stage_group), y = mediana, group = ml_subtype_redefined, color = ml_subtype_redefined),
size = 1, linetype = "solid") +
labs(
title = "Rigth-Left index by subtype and stage",
x = "Stage Group (bins 10)",
y = "Rigth-Left index",
fill = "ML Subtype",
color = "ML Subtype"
) +
theme_minimal() +
scale_fill_manual(values = pal)+
scale_color_manual(values=pal)+
ylim(-200,200)+
theme(legend.position = "bottom")
There doesn’t seem to be much asymmetry… Question for Rik: Could we be wasting computing power with so many regions?
For this I only used the training subjects on their first MRI, in order to compare the first clinic visit (n=1724)
library(readxl)
clinical_data <- read_excel("647_Sustain_Calandri_clinicaldata_20230825.xlsx")
ids <- read_csv("dat_mri_export_random.csv") %>% select(MRI_ID, I_ID)
first_clinic <- train_subtypes %>% filter(order==1)
first_clinic<-left_join(first_clinic, ids, by="MRI_ID")
first_clinic<-left_join(first_clinic, clinical_data, by="I_ID")
first_clinic<-first_clinic %>%
mutate(apoe4_status = case_when(
APOE == "E4E4" ~ "homozygous E4", # E4E4 es homocigoto
APOE %in% c("E3E4", "E4E3", "E2E4", "E4E2") ~ "heterozygous E4", # Cualquier combinación con un E4 es heterocigoto
!grepl("E4", APOE) ~ "Negative", # Si no contiene E4, es negativo
is.na(APOE) ~ NA_character_, # Si es NA, se mantiene como NA
TRUE ~ "Other" # En caso de que haya algún valor no contemplado
))
library(NormPsy)
first_clinic$normMMSE<-normMMSE(first_clinic$V_MMSE)
first_clinic %>%
ggplot(aes(x=ml_subtype_redefined, y=M_age, fill=ml_subtype_redefined))+
geom_boxplot()+
scale_fill_manual(values=pal)+
theme_minimal()+
theme(legend.position = "none")+
ggtitle("Age distribution")
first_clinic %>%
filter(ml_subtype_redefined!="poor fit") %>%
ggplot(aes(color=ml_subtype_redefined, y=normMMSE, x=ml_stage))+
geom_point()+
geom_smooth(method = "lm")+
scale_color_manual(values=pal)+
theme_minimal()+
theme(legend.position = "bottom")+
ggtitle("normalized MMSE by subtype and stage")
first_clinic %>%
filter(ml_subtype_redefined!="poor fit") %>%
ggplot(aes(color=ml_subtype_redefined, y=V_IADL, x=ml_stage))+
geom_point()+
geom_smooth(method = "lm")+
scale_color_manual(values=pal)+
theme_minimal()+
theme(legend.position = "bottom")+
ggtitle("IADL by subtype and stage")
first_clinic <-first_clinic %>%
mutate(clinical_diagnosis=ifelse(diagnoseOms=="Possible AD" |diagnoseOms=="Probable AD", "AD", diagnoseOms))
first_clinic %>%
filter(ml_subtype_redefined != "poor fit" & ml_stage!=0) %>%
ggplot(aes(fill = clinical_diagnosis, x = ml_subtype_redefined)) +
geom_bar(position = "dodge", stat = "count") + # Corregido para usar position = "dodge"
scale_fill_manual(values = pal) +
theme_minimal() +
theme(legend.position = "bottom") +
ggtitle("Subtype by clinical diagnosis")
first_clinic %>%
filter(ml_subtype_redefined != "poor fit"& ml_stage!=0) %>%
group_by(ml_subtype_redefined, clinical_diagnosis) %>%
tally() %>% # Contamos la cantidad por cada combinación de ml_subtype_redefined y diagnoseOms
group_by(ml_subtype_redefined) %>%
mutate(percentage = n / sum(n)) %>% # Calculamos el porcentaje dentro de cada grupo ml_subtype_redefined
ggplot(aes(x = ml_subtype_redefined, y = percentage, fill = clinical_diagnosis)) +
geom_bar(stat = "identity", position = "stack") + # Usamos stat = "identity" porque ya tenemos los datos procesados
geom_text(
aes(label = scales::percent(percentage, accuracy = 0.1)),
position = position_stack(vjust = 0.5), size = 3 # Centra las etiquetas en el medio de las barras apiladas
) +
scale_fill_manual(values = pal) +
scale_y_continuous(labels = scales::percent) +
theme_minimal() +
theme(legend.position = "bottom") +
ggtitle("Subtype by clinical diagnosis (%)")
library(scales)
# Crear variable stage_interval, agrupar en intervalos de 5 en 5 y remover NA
first_clinic %>%
filter(ml_subtype_redefined != "poor fit") %>%
mutate(stage_interval = cut(ml_stage, breaks = seq(0, max(ml_stage), by = 5), right = FALSE,
labels = paste(seq(0, max(ml_stage)-5, by = 5),
seq(5, max(ml_stage), by = 5), sep = "-"))) %>%
filter(!is.na(stage_interval)) %>% # Remover filas con NA en stage_interval
group_by(ml_subtype_redefined, stage_interval, clinical_diagnosis) %>%
tally() %>%
group_by(ml_subtype_redefined, stage_interval) %>%
mutate(percentage = n / sum(n)) %>% # Calcular el porcentaje por subtipo y stage_interval
ggplot(aes(x = stage_interval, y = percentage, fill = clinical_diagnosis)) +
geom_bar(stat = "identity", position = "stack") +
scale_fill_manual(values = pal) +
scale_y_continuous(labels = scales::percent) +
facet_wrap(~ml_subtype_redefined) + # Crear un gráfico por cada subtipo
theme_minimal() +
theme(legend.position = "bottom") +
ggtitle("Percentage of Clinical Diagnosis by Stage Interval and Subtype")
first_clinic %>%
filter(ml_subtype_redefined != "poor fit"& ml_stage!=0) %>%
group_by(ml_subtype_redefined, apoe4_status) %>%
tally() %>% # Contamos la cantidad por cada combinación de ml_subtype_redefined y diagnoseOms
group_by(ml_subtype_redefined) %>%
mutate(percentage = n / sum(n)) %>% # Calculamos el porcentaje dentro de cada grupo ml_subtype_redefined
ggplot(aes(x = ml_subtype_redefined, y = percentage, fill = apoe4_status)) +
geom_bar(stat = "identity", position = "stack") + # Usamos stat = "identity" porque ya tenemos los datos procesados
geom_text(
aes(label = scales::percent(percentage, accuracy = 0.1)),
position = position_stack(vjust = 0.5), size = 3 # Centra las etiquetas en el medio de las barras apiladas
) +
scale_fill_manual(values = pal) +
scale_y_continuous(labels = scales::percent) +
theme_minimal() +
theme(legend.position = "bottom") +
ggtitle("APOE4 status by subtype (%)")
first_clinic %>%
filter(ml_subtype_redefined != "poor fit") %>%
ggplot(aes(x=ml_subtype_redefined, y=L_PTAU, fill=ml_subtype_redefined))+
geom_boxplot()+
scale_fill_manual(values=pal)+
theme_minimal()+
theme(legend.position = "bottom")
library(dplyr)
library(ggplot2)
# Calcular las medianas por stage_interval y ml_subtype_redefined
medianas <- first_clinic %>%
filter(ml_subtype_redefined != "poor fit") %>%
group_by(stage_interval, ml_subtype_redefined) %>%
summarise(mediana = median(L_PTAU, na.rm = TRUE), .groups = 'drop')
# Crear el boxplot con la línea para las medianas
first_clinic %>%
filter(ml_subtype_redefined != "poor fit") %>%
ggplot(aes(x = stage_interval, y = L_PTAU, fill = ml_subtype_redefined)) +
geom_boxplot() +
geom_line(data = medianas, aes(x = stage_interval, y = mediana, group = ml_subtype_redefined, color = ml_subtype_redefined),
size = 1, linetype = "solid") +
scale_fill_manual(values = pal) +
scale_color_manual(values = pal) +
theme_minimal() +
theme(legend.position = "bottom") +
labs(
title = "pTAU by Stage & Subtype",
x = "Stage Interval",
y = "pTAU(CSF)",
fill = "ML Subtype",
color = "ML Subtype"
)
first_clinic %>%
filter(ml_subtype_redefined != "poor fit") %>%
ggplot(aes(x = L_AB42_corr, y = L_PTAU, color = ml_subtype_redefined)) +
geom_point() +
scale_fill_manual(values = pal) +
scale_color_manual(values = pal) +
theme_minimal() +
theme(legend.position = "bottom") +
labs(
title = "pTAU/AB42 by Subtype",
x = "amyloid (AB42_corr)",
y = "pTAU(CSF)",
fill = "ML Subtype",
color = "ML Subtype"
)
ask Rik?!
In this section, I used the additional scans of the subjects (n=1259). The trained model was applied, and classification into stage and subtype was obtained. First, I will evaluate the model’s ability to consistently classify subjects into the same subtype, even when presented with a scan taken before or after the one used for training.
subtypes_vivo <- read_csv("data_validation/subtypes_vivo.csv") %>% rename(I_ID=i_id)
subtypes_vivo$ml_subtype<-subtypes_vivo$ml_subtype+1
dat_mri_export_validation <- read_csv("dat_mri_export_validation.csv")
subtypes_vivo<-left_join(subtypes_vivo, dat_mri_export_validation, by="I_ID")
train_subtypes<-left_join(train_subtypes, ids, by="MRI_ID")
subtypes_vivo$origin<-"test"
train_subtypes$origin<-"train"
validation<-rbind(train_subtypes[,c("I_ID","ml_subtype","ml_stage", "prob_ml_subtype", "prob_ml_stage", "order", "origin")], subtypes_vivo[,c("I_ID","ml_subtype","ml_stage", "prob_ml_subtype", "prob_ml_stage", "order", "origin")])
# Filtrar los datos de "train" y "test"
confusion_data <- validation %>%
filter(origin %in% c("train", "test")) %>%
select(I_ID, ml_subtype, origin)
# Agrupar los datos de train por I_ID y tomar la primera instancia de cada sujeto
train_data <- confusion_data %>%
filter(origin == "train") %>%
group_by(I_ID) %>%
slice(1) %>%
ungroup()
# Filtrar solo los datos de "test"
test_data <- confusion_data %>%
filter(origin == "test")
# Unir los datos de "train" y "test" en base al I_ID
confusion_data_long <- left_join(test_data, train_data, by = "I_ID", suffix = c("_test", "_train"))
# Crear una matriz de confusión
conf_matrix <- table(confusion_data_long$ml_subtype_train, confusion_data_long$ml_subtype_test)
# Plotear la matriz de confusión usando ggplot
conf_matrix_df <- as.data.frame(conf_matrix) %>%
rename(Train = Var1, Test = Var2, Count = Freq)
ggplot(conf_matrix_df, aes(x = Train, y = Test, fill = Count)) +
geom_tile() +
geom_text(aes(label = Count), color = "white", size = 4) +
scale_fill_gradient(low = "grey", high = "navy") +
labs(
title = "Confusion Matrix: Train vs Test Global (ml_subtype)",
x = "Train ml_subtype",
y = "Test ml_subtype"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
(when validation MRI precedes train MRI)
# Filtrar los datos de "train" y "test"
confusion_data <- validation %>%
filter(origin %in% c("train", "test")) %>%
select(I_ID, ml_subtype, order, origin)
# Seleccionar solo una instancia por sujeto en "train"
train_data <- confusion_data %>%
filter(origin == "train") %>%
group_by(I_ID) %>%
slice(1) %>% # Selecciona la primera fila (puedes cambiar a otra lógica si es necesario)
ungroup() %>%
rename(ml_subtype_train = ml_subtype)
# Filtrar "test" y seleccionar la fila con el menor 'order_test' por sujeto
test_data <- confusion_data %>%
filter(origin == "test") %>%
group_by(I_ID) %>%
slice_min(order, n = 1) %>% # Toma la fila con el menor order
ungroup() %>%
rename(ml_subtype_test = ml_subtype)
# Unir las filas de test con su correspondiente train
confusion_data_long <- train_data %>%
inner_join(test_data, by = "I_ID") # Asegura que cada sujeto tenga ambas columnas
# Crear la matriz de confusión
conf_matrix <- table(confusion_data_long$ml_subtype_train, confusion_data_long$ml_subtype_test)
# Convertir la matriz en un data frame para ggplot
conf_matrix_df <- as.data.frame(conf_matrix)
colnames(conf_matrix_df) <- c("ml_subtype_train", "ml_subtype_test", "Freq")
# Crear el plot
ggplot(data = conf_matrix_df, aes(x = ml_subtype_train, y = ml_subtype_test, fill = Freq)) +
geom_tile() +
geom_text(aes(label = Freq), vjust = 1) +
scale_fill_gradient(low = "grey", high = "navy") +
labs(
title = "ML Subtype in Train and Test (previous) set ",
x = "ML Subtype Train",
y = "ML Subtype Test",
fill = "Count"
) +
theme_minimal()
# Obtener estadísticas de la matriz de confusión
conf_matrix_stats <- confusionMatrix(as.factor(confusion_data_long$ml_subtype_test),
as.factor(confusion_data_long$ml_subtype_train))
# Imprimir las estadísticas de la matriz de confusión
print(conf_matrix_stats)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3
## 1 498 74 22
## 2 68 152 37
## 3 29 34 136
##
## Overall Statistics
##
## Accuracy : 0.7486
## 95% CI : (0.7212, 0.7746)
## No Information Rate : 0.5667
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.5692
##
## Mcnemar's Test P-Value : 0.7194
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3
## Sensitivity 0.8370 0.5846 0.6974
## Specificity 0.7890 0.8671 0.9263
## Pos Pred Value 0.8384 0.5914 0.6834
## Neg Pred Value 0.7873 0.8638 0.9307
## Prevalence 0.5667 0.2476 0.1857
## Detection Rate 0.4743 0.1448 0.1295
## Detection Prevalence 0.5657 0.2448 0.1895
## Balanced Accuracy 0.8130 0.7259 0.8119
(when validation MRI follows train MRI)
library(dplyr)
library(caret)
library(ggplot2)
# Filtrar los datos de "train" y "test"
confusion_data <- validation %>%
filter(origin %in% c("train", "test")) %>%
select(I_ID, ml_subtype, order, origin)
# Seleccionar solo una instancia por sujeto en "train"
train_data <- confusion_data %>%
filter(origin == "train") %>%
group_by(I_ID) %>%
slice(1) %>% # Selecciona la primera fila de train por sujeto
ungroup() %>%
rename(ml_subtype_train = ml_subtype, order_train = order)
# Filtrar "test" y quedarnos solo con las filas donde order_test > order_train
test_data <- confusion_data %>%
filter(origin == "test") %>%
left_join(train_data %>% select(I_ID, order_train), by = "I_ID") %>% # Agregar order_train
filter(order > order_train) %>% # Conservar solo test con order_test > order_train
group_by(I_ID) %>%
slice_min(order, n = 1) %>% # Seleccionar la fila de test con el menor order_test mayor a train
ungroup() %>%
rename(ml_subtype_test = ml_subtype, order_test = order)
# Unir las filas de test con su correspondiente train
confusion_data_long <- train_data %>%
inner_join(test_data, by = "I_ID") # Asegura que cada sujeto tenga ambas columnas
# Crear la matriz de confusión
conf_matrix <- table(confusion_data_long$ml_subtype_train, confusion_data_long$ml_subtype_test)
# Convertir la matriz en un data frame para ggplot
conf_matrix_df <- as.data.frame(conf_matrix)
colnames(conf_matrix_df) <- c("ml_subtype_train", "ml_subtype_test", "Freq")
# Crear el plot
ggplot(data = conf_matrix_df, aes(x = ml_subtype_train, y = ml_subtype_test, fill = Freq)) +
geom_tile() +
geom_text(aes(label = Freq), vjust = 1) +
scale_fill_gradient(low = "grey", high = "blue") +
labs(
title = "ML Subtype in Train and Test (later) set",
x = "ML Subtype Train",
y = "ML Subtype Test",
fill = "Count"
) +
theme_minimal()
# Obtener estadísticas de la matriz de confusión
conf_matrix_stats <- confusionMatrix(as.factor(confusion_data_long$ml_subtype_test),
as.factor(confusion_data_long$ml_subtype_train))
# Imprimir las estadísticas de la matriz de confusión
print(conf_matrix_stats)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 1 2 3
## 1 364 59 13
## 2 47 99 28
## 3 21 23 110
##
## Overall Statistics
##
## Accuracy : 0.75
## 95% CI : (0.7177, 0.7803)
## No Information Rate : 0.5654
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.5716
##
## Mcnemar's Test P-Value : 0.292
##
## Statistics by Class:
##
## Class: 1 Class: 2 Class: 3
## Sensitivity 0.8426 0.5470 0.7285
## Specificity 0.7831 0.8714 0.9282
## Pos Pred Value 0.8349 0.5690 0.7143
## Neg Pred Value 0.7927 0.8610 0.9328
## Prevalence 0.5654 0.2369 0.1976
## Detection Rate 0.4764 0.1296 0.1440
## Detection Prevalence 0.5707 0.2277 0.2016
## Balanced Accuracy 0.8129 0.7092 0.8283
I had to define what the appropriate classification was here. I defined it as follows: if the subject’s validation MRI was later than the one used for training, the stage of sustain should be higher in the validation than in the training, and vice versa for earlier MRIs.
# Filtrar datos solo de "train" y "test"
confusion_data <- validation %>%
filter(origin %in% c("train", "test")) %>%
select(I_ID, ml_stage, order, origin)
# Seleccionar una única fila por sujeto en "train"
train_data <- confusion_data %>%
filter(origin == "train") %>%
group_by(I_ID) %>%
slice(1) %>% # Seleccionamos la primera observación de train
ungroup() %>%
rename(ml_stage_train = ml_stage, order_train = order)
# Filtrar "test" y emparejar con train por sujeto
test_data <- confusion_data %>%
filter(origin == "test") %>%
left_join(train_data %>% select(I_ID, ml_stage_train, order_train), by = "I_ID") %>% # Unimos la info de train
group_by(I_ID) %>%
slice_min(order, n = 1) %>% # Seleccionar la primera instancia de test disponible
ungroup() %>%
rename(ml_stage_test = ml_stage, order_test = order)
test_data <- test_data %>%
mutate(
ml_stage_expected = case_when(
ml_stage_test > ml_stage_train ~ order_test > order_train,
ml_stage_test < ml_stage_train ~ order_test < order_train,
TRUE ~ NA
),
validation_time = case_when(
order_test < order_train ~ "before",
order_test > order_train ~ "after",
TRUE ~ NA_character_
)
)
# Calcular la proporción de veces que la relación esperada se cumple
accuracy <- mean(test_data$ml_stage_expected, na.rm = TRUE)
test_data %>%
filter(!is.na(ml_stage_expected)) %>% # Eliminar NA
group_by(validation_time, ml_stage_expected) %>%
summarise(count = n()) %>%
mutate(percentage = count / sum(count) * 100) %>% # Calcular los porcentajes
ggplot(aes(x = validation_time, y = percentage, fill = factor(ml_stage_expected))) +
geom_bar(stat = "identity", position = "stack") + # Barras apiladas
scale_fill_manual(values = c("TRUE" = "skyblue", "FALSE" = "salmon")) +
labs(
title = "Percentage of correct staging clasification by Time of the validation MRI",
x = "Validation Time",
y = "Percentage",
fill = "ml_stage_expected"
) +
theme_minimal() +
theme(legend.position = "top")
ggplot(test_data, aes(x = ml_stage_train, y = ml_stage_test, color = ml_stage_expected)) +
geom_point(alpha = 0.7, size = 3) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "black") + # Línea de identidad
scale_color_manual(values = c("red", "blue"), labels = c("Incorrecto", "Correcto")) +
labs(
title = "Validación de ml_stage como clasificador",
x = "ml_stage en Train",
y = "ml_stage en Test",
color = "Cumple relación esperada"
) +
theme_minimal()
# Estadísticas de la relación esperada
cat("Proportion of subjects with correct stage direction:", round(accuracy * 100, 2), "%\n")
## Proportion of subjects with correct stage direction: 63.38 %
# Crear una tabla resumen
table_result <- table(test_data$ml_stage_expected, test_data$validation_time)
# Mostrar tabla de resultados
print(table_result)
##
## after before
## FALSE 70 246
## TRUE 175 372
#3 Post mortem
neuropath_4G8_AT8_NFL <- read_excel("data_validation/neuropath_4G8_AT8_NFL.xlsx")
subtypes_postmortem <- read_csv("data_validation/subtypes_postmortem.csv")
## Rows: 60 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): i_id
## dbl (4): ml_subtype, prob_ml_subtype, ml_stage, prob_ml_stage
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
subtypes_postmortem$i_id <- gsub("PAGE-", "", subtypes_postmortem$i_id)
summary(as.factor(subtypes_postmortem$ml_subtype))
## 0 1 2
## 38 12 10
# subtypes_postmortem <- subtypes_postmortem %>%
# mutate(
# subtype = case_when(
# ml_subtype == 0 ~ "subtype1",
# ml_subtype == 1 ~ "subtype2",
# ml_subtype %in% c(2, 3) ~ "subtype3"
# )
# )
Patho_data_combined_Ch2_Shareable <- read_excel("Patho_data_combined_Ch2_Shareable.xlsx") %>%
rename(i_id=SubjectANW)
## New names:
## • `` -> `...1`
data<-left_join(Patho_data_combined_Ch2_Shareable, subtypes_postmortem, by="i_id") %>% drop_na(ml_subtype)
data$dx_pre<-ifelse(data$Cohort=="Control",1,0)
data$dx_post<-ifelse(data$ml_subtype==0,1,0)
table(data$dx_pre, data$dx_post)
##
## 0 1
## 0 108 215
## 1 71 62
library(lmerTest)
## Loading required package: lme4
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(lme4)
modelo_completo <- lmer(Amyloid ~ ml_subtype + ml_stage + Region + (1 | i_id), data = data)
modelo_sin_ml_stage <- lmer(Amyloid ~ ml_subtype + Region + (1 | i_id), data = data)
anova(modelo_sin_ml_stage, modelo_completo)
## refitting model(s) with ML (instead of REML)
## Data: data
## Models:
## modelo_sin_ml_stage: Amyloid ~ ml_subtype + Region + (1 | i_id)
## modelo_completo: Amyloid ~ ml_subtype + ml_stage + Region + (1 | i_id)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## modelo_sin_ml_stage 12 1682.5 1731.9 -829.27 1658.5
## modelo_completo 13 1678.3 1731.8 -826.14 1652.3 6.2456 1 0.01245 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modelo<-lmer(Amyloid ~ as.factor(ml_subtype)* Region+ml_stage + (1 | i_id), data=data)
summary(modelo) # Revisa los términos de interacción
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Amyloid ~ as.factor(ml_subtype) * Region + ml_stage + (1 | i_id)
## Data: data
##
## REML criterion at convergence: 1649.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3547 -0.5060 -0.0713 0.4301 4.8998
##
## Random effects:
## Groups Name Variance Std.Dev.
## i_id (Intercept) 3.903 1.976
## Residual 1.671 1.293
## Number of obs: 452, groups: i_id, 51
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 2.30668 0.50463 74.75879 4.571
## as.factor(ml_subtype)1 0.01485 0.83251 88.49932 0.018
## as.factor(ml_subtype)2 -0.82086 0.97347 91.05426 -0.843
## RegionGFM 0.72694 0.33804 377.22019 2.150
## RegionGPS 0.28780 0.33804 377.22019 0.851
## RegionGTM -0.01857 0.33804 377.22019 -0.055
## RegionHip -1.58946 0.33804 377.22019 -4.702
## RegionOC -0.87791 0.33804 377.22019 -2.597
## RegionParaHip -0.58756 0.34033 377.13008 -1.726
## RegionPCC 0.25192 0.33804 377.22019 0.745
## RegionPrecun 0.42201 0.33804 377.22019 1.248
## ml_stage 0.03201 0.01326 47.03555 2.414
## as.factor(ml_subtype)1:RegionGFM 0.15891 0.63787 377.17321 0.249
## as.factor(ml_subtype)2:RegionGFM 0.15999 0.75188 377.25506 0.213
## as.factor(ml_subtype)1:RegionGPS -0.29743 0.63787 377.17321 -0.466
## as.factor(ml_subtype)2:RegionGPS -0.27821 0.75188 377.25506 -0.370
## as.factor(ml_subtype)1:RegionGTM 0.31331 0.63787 377.17321 0.491
## as.factor(ml_subtype)2:RegionGTM 0.10751 0.75188 377.25506 0.143
## as.factor(ml_subtype)1:RegionHip -0.58092 0.63787 377.17321 -0.911
## as.factor(ml_subtype)2:RegionHip 0.71344 0.75188 377.25506 0.949
## as.factor(ml_subtype)1:RegionOC 0.08202 0.63787 377.17321 0.129
## as.factor(ml_subtype)2:RegionOC -0.29473 0.75188 377.25506 -0.392
## as.factor(ml_subtype)1:RegionParaHip 0.23002 0.63909 377.15443 0.360
## as.factor(ml_subtype)2:RegionParaHip 0.38868 0.75291 377.24333 0.516
## as.factor(ml_subtype)1:RegionPCC -0.06203 0.63787 377.17321 -0.097
## as.factor(ml_subtype)2:RegionPCC -0.07527 0.75188 377.25506 -0.100
## as.factor(ml_subtype)1:RegionPrecun -0.21929 0.63787 377.17321 -0.344
## as.factor(ml_subtype)2:RegionPrecun -0.68450 0.77449 377.56992 -0.884
## Pr(>|t|)
## (Intercept) 1.88e-05 ***
## as.factor(ml_subtype)1 0.98581
## as.factor(ml_subtype)2 0.40131
## RegionGFM 0.03216 *
## RegionGPS 0.39510
## RegionGTM 0.95622
## RegionHip 3.62e-06 ***
## RegionOC 0.00977 **
## RegionParaHip 0.08509 .
## RegionPCC 0.45659
## RegionPrecun 0.21266
## ml_stage 0.01973 *
## as.factor(ml_subtype)1:RegionGFM 0.80340
## as.factor(ml_subtype)2:RegionGFM 0.83161
## as.factor(ml_subtype)1:RegionGPS 0.64129
## as.factor(ml_subtype)2:RegionGPS 0.71158
## as.factor(ml_subtype)1:RegionGTM 0.62359
## as.factor(ml_subtype)2:RegionGTM 0.88638
## as.factor(ml_subtype)1:RegionHip 0.36302
## as.factor(ml_subtype)2:RegionHip 0.34329
## as.factor(ml_subtype)1:RegionOC 0.89775
## as.factor(ml_subtype)2:RegionOC 0.69529
## as.factor(ml_subtype)1:RegionParaHip 0.71911
## as.factor(ml_subtype)2:RegionParaHip 0.60600
## as.factor(ml_subtype)1:RegionPCC 0.92258
## as.factor(ml_subtype)2:RegionPCC 0.92031
## as.factor(ml_subtype)1:RegionPrecun 0.73120
## as.factor(ml_subtype)2:RegionPrecun 0.37736
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 28 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
anova(modelo)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## as.factor(ml_subtype) 1.755 0.8776 2 47.01 0.5253 0.59480
## Region 147.755 18.4693 8 377.06 11.0552 4.732e-14 ***
## ml_stage 9.734 9.7341 1 47.04 5.8265 0.01973 *
## as.factor(ml_subtype):Region 14.771 0.9232 16 377.05 0.5526 0.91745
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans(modelo, pairwise ~ ml_subtype | Region)
## $emmeans
## Region = Fusiform:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 3.099 0.437 90.2 2.231 3.97
## 1 3.113 0.699 89.5 1.724 4.50
## 2 2.278 0.862 92.5 0.567 3.99
##
## Region = GFM:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 3.826 0.429 84.5 2.972 4.68
## 1 3.999 0.689 84.6 2.630 5.37
## 2 3.165 0.842 84.8 1.491 4.84
##
## Region = GPS:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 3.386 0.429 84.5 2.533 4.24
## 1 3.104 0.689 84.6 1.734 4.47
## 2 2.287 0.842 84.8 0.614 3.96
##
## Region = GTM:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 3.080 0.429 84.5 2.227 3.93
## 1 3.408 0.689 84.6 2.039 4.78
## 2 2.367 0.842 84.8 0.693 4.04
##
## Region = Hip:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 1.509 0.429 84.5 0.656 2.36
## 1 0.943 0.689 84.6 -0.426 2.31
## 2 1.402 0.842 84.8 -0.272 3.08
##
## Region = OC:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 2.221 0.429 84.5 1.368 3.07
## 1 2.318 0.689 84.6 0.948 3.69
## 2 1.105 0.842 84.8 -0.568 2.78
##
## Region = ParaHip:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 2.511 0.432 86.3 1.653 3.37
## 1 2.756 0.689 84.6 1.386 4.13
## 2 2.079 0.842 84.8 0.406 3.75
##
## Region = PCC:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 3.351 0.429 84.5 2.497 4.20
## 1 3.303 0.689 84.6 1.934 4.67
## 2 2.454 0.842 84.8 0.781 4.13
##
## Region = Precun:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 3.521 0.429 84.5 2.668 4.37
## 1 3.316 0.689 84.6 1.947 4.69
## 2 2.015 0.861 92.5 0.305 3.73
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $contrasts
## Region = Fusiform:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 -0.0149 0.833 88.6 -0.018 0.9998
## ml_subtype0 - ml_subtype2 0.8209 0.973 91.1 0.843 0.6772
## ml_subtype1 - ml_subtype2 0.8357 1.099 92.4 0.760 0.7283
##
## Region = GFM:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 -0.1738 0.819 83.5 -0.212 0.9755
## ml_subtype0 - ml_subtype2 0.6609 0.952 83.9 0.694 0.7676
## ml_subtype1 - ml_subtype2 0.8346 1.078 85.8 0.775 0.7196
##
## Region = GPS:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 0.2826 0.819 83.5 0.345 0.9366
## ml_subtype0 - ml_subtype2 1.0991 0.952 83.9 1.154 0.4837
## ml_subtype1 - ml_subtype2 0.8165 1.078 85.8 0.758 0.7298
##
## Region = GTM:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 -0.3282 0.819 83.5 -0.400 0.9155
## ml_subtype0 - ml_subtype2 0.7134 0.952 83.9 0.749 0.7349
## ml_subtype1 - ml_subtype2 1.0415 1.078 85.8 0.967 0.5999
##
## Region = Hip:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 0.5661 0.819 83.5 0.691 0.7695
## ml_subtype0 - ml_subtype2 0.1074 0.952 83.9 0.113 0.9930
## ml_subtype1 - ml_subtype2 -0.4587 1.078 85.8 -0.426 0.9051
##
## Region = OC:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 -0.0969 0.819 83.5 -0.118 0.9923
## ml_subtype0 - ml_subtype2 1.1156 0.952 83.9 1.172 0.4733
## ml_subtype1 - ml_subtype2 1.2125 1.078 85.8 1.125 0.5013
##
## Region = ParaHip:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 -0.2449 0.821 84.0 -0.298 0.9522
## ml_subtype0 - ml_subtype2 0.4322 0.953 84.3 0.453 0.8931
## ml_subtype1 - ml_subtype2 0.6771 1.078 85.8 0.628 0.8050
##
## Region = PCC:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 0.0472 0.819 83.5 0.058 0.9982
## ml_subtype0 - ml_subtype2 0.8961 0.952 83.9 0.941 0.6159
## ml_subtype1 - ml_subtype2 0.8489 1.078 85.8 0.788 0.7115
##
## Region = Precun:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 0.2044 0.819 83.5 0.249 0.9663
## ml_subtype0 - ml_subtype2 1.5054 0.970 89.8 1.552 0.2717
## ml_subtype1 - ml_subtype2 1.3009 1.093 90.4 1.190 0.4620
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 3 estimates
modelo<-lmer(pTau ~ as.factor(ml_subtype)* Region + (1 | i_id), data=data)
summary(modelo) # Revisa los términos de interacción
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: pTau ~ as.factor(ml_subtype) * Region + (1 | i_id)
## Data: data
##
## REML criterion at convergence: 3429.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.98022 -0.58679 -0.08051 0.52627 2.99100
##
## Random effects:
## Groups Name Variance Std.Dev.
## i_id (Intercept) 485.6 22.04
## Residual 136.0 11.66
## Number of obs: 439, groups: i_id, 51
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 41.348 4.610 81.554 8.969
## as.factor(ml_subtype)1 -14.498 8.696 80.465 -1.667
## as.factor(ml_subtype)2 -1.586 10.089 78.842 -0.157
## RegionGFM -15.559 3.159 364.360 -4.926
## RegionGPS -12.596 3.159 364.360 -3.988
## RegionGTM -10.463 3.159 364.360 -3.312
## RegionHip -8.687 3.199 364.191 -2.716
## RegionOC -33.478 3.159 364.360 -10.598
## RegionParaHip -2.189 3.271 363.978 -0.669
## RegionPCC -10.738 3.159 364.360 -3.399
## RegionPrecun -18.947 3.159 364.360 -5.998
## as.factor(ml_subtype)1:RegionGFM 13.438 5.934 364.267 2.265
## as.factor(ml_subtype)2:RegionGFM 8.059 6.841 364.291 1.178
## as.factor(ml_subtype)1:RegionGPS 11.218 5.934 364.267 1.891
## as.factor(ml_subtype)2:RegionGPS 7.709 7.047 364.515 1.094
## as.factor(ml_subtype)1:RegionGTM 14.718 5.934 364.267 2.480
## as.factor(ml_subtype)2:RegionGTM 8.236 6.841 364.291 1.204
## as.factor(ml_subtype)1:RegionHip 3.426 5.955 364.219 0.575
## as.factor(ml_subtype)2:RegionHip -10.392 7.007 363.975 -1.483
## as.factor(ml_subtype)1:RegionOC 25.585 5.934 364.267 4.312
## as.factor(ml_subtype)2:RegionOC 14.670 6.841 364.291 2.144
## as.factor(ml_subtype)1:RegionParaHip 4.089 6.068 364.038 0.674
## as.factor(ml_subtype)2:RegionParaHip -4.702 7.040 363.931 -0.668
## as.factor(ml_subtype)1:RegionPCC 10.511 5.934 364.267 1.771
## as.factor(ml_subtype)2:RegionPCC 3.757 6.841 364.291 0.549
## as.factor(ml_subtype)1:RegionPrecun 21.645 5.934 364.267 3.648
## as.factor(ml_subtype)2:RegionPrecun 11.957 6.841 364.291 1.748
## Pr(>|t|)
## (Intercept) 8.68e-14 ***
## as.factor(ml_subtype)1 0.099364 .
## as.factor(ml_subtype)2 0.875459
## RegionGFM 1.28e-06 ***
## RegionGPS 8.07e-05 ***
## RegionGTM 0.001017 **
## RegionHip 0.006930 **
## RegionOC < 2e-16 ***
## RegionParaHip 0.503708
## RegionPCC 0.000750 ***
## RegionPrecun 4.81e-09 ***
## as.factor(ml_subtype)1:RegionGFM 0.024123 *
## as.factor(ml_subtype)2:RegionGFM 0.239576
## as.factor(ml_subtype)1:RegionGPS 0.059483 .
## as.factor(ml_subtype)2:RegionGPS 0.274703
## as.factor(ml_subtype)1:RegionGTM 0.013576 *
## as.factor(ml_subtype)2:RegionGTM 0.229409
## as.factor(ml_subtype)1:RegionHip 0.565443
## as.factor(ml_subtype)2:RegionHip 0.138933
## as.factor(ml_subtype)1:RegionOC 2.09e-05 ***
## as.factor(ml_subtype)2:RegionOC 0.032676 *
## as.factor(ml_subtype)1:RegionParaHip 0.500847
## as.factor(ml_subtype)2:RegionParaHip 0.504642
## as.factor(ml_subtype)1:RegionPCC 0.077332 .
## as.factor(ml_subtype)2:RegionPCC 0.583208
## as.factor(ml_subtype)1:RegionPrecun 0.000303 ***
## as.factor(ml_subtype)2:RegionPrecun 0.081351 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 27 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
anova(modelo)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## as.factor(ml_subtype) 42.8 21.41 2 47.93 0.1574 0.8548351
## Region 9902.5 1237.81 8 364.09 9.0988 2.033e-11 ***
## as.factor(ml_subtype):Region 6344.0 396.50 16 364.08 2.9146 0.0001537 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans(modelo, pairwise ~ ml_subtype | Region)
## $emmeans
## Region = Fusiform:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 41.35 4.61 81.7 32.18 50.5
## 1 26.85 7.37 80.2 12.18 41.5
## 2 39.76 8.97 78.3 21.90 57.6
##
## Region = GFM:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 25.79 4.48 73.2 16.86 34.7
## 1 24.73 7.20 73.2 10.38 39.1
## 2 32.26 8.82 73.2 14.69 49.8
##
## Region = GPS:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 28.75 4.48 73.2 19.83 37.7
## 1 25.47 7.20 73.2 11.13 39.8
## 2 34.87 8.97 78.1 17.02 52.7
##
## Region = GTM:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 30.88 4.48 73.2 21.96 39.8
## 1 31.10 7.20 73.2 16.76 45.4
## 2 37.53 8.82 73.2 19.97 55.1
##
## Region = Hip:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 32.66 4.52 75.6 23.66 41.7
## 1 21.59 7.20 73.2 7.25 35.9
## 2 20.68 8.97 78.3 2.82 38.5
##
## Region = OC:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 7.87 4.48 73.2 -1.05 16.8
## 1 18.96 7.20 73.2 4.61 33.3
## 2 20.95 8.82 73.2 3.39 38.5
##
## Region = ParaHip:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 39.16 4.58 80.0 30.03 48.3
## 1 28.75 7.28 76.3 14.25 43.2
## 2 32.87 8.97 78.3 15.01 50.7
##
## Region = PCC:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 30.61 4.48 73.2 21.69 39.5
## 1 26.62 7.20 73.2 12.28 41.0
## 2 32.78 8.82 73.2 15.21 50.3
##
## Region = Precun:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 22.40 4.48 73.2 13.48 31.3
## 1 29.55 7.20 73.2 15.20 43.9
## 2 32.77 8.82 73.2 15.20 50.3
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $contrasts
## Region = Fusiform:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 14.498 8.70 80.6 1.667 0.2241
## ml_subtype0 - ml_subtype2 1.586 10.09 79.0 0.157 0.9865
## ml_subtype1 - ml_subtype2 -12.911 11.61 79.0 -1.112 0.5098
##
## Region = GFM:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 1.060 8.48 73.2 0.125 0.9914
## ml_subtype0 - ml_subtype2 -6.473 9.89 73.2 -0.655 0.7903
## ml_subtype1 - ml_subtype2 -7.533 11.38 73.2 -0.662 0.7862
##
## Region = GPS:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 3.280 8.48 73.2 0.387 0.9209
## ml_subtype0 - ml_subtype2 -6.122 10.02 77.1 -0.611 0.8147
## ml_subtype1 - ml_subtype2 -9.402 11.50 76.1 -0.818 0.6933
##
## Region = GTM:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 -0.220 8.48 73.2 -0.026 0.9996
## ml_subtype0 - ml_subtype2 -6.650 9.89 73.2 -0.673 0.7801
## ml_subtype1 - ml_subtype2 -6.430 11.38 73.2 -0.565 0.8391
##
## Region = Hip:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 11.072 8.50 73.8 1.303 0.3981
## ml_subtype0 - ml_subtype2 11.978 10.05 77.7 1.192 0.4613
## ml_subtype1 - ml_subtype2 0.906 11.50 76.2 0.079 0.9966
##
## Region = OC:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 -11.087 8.48 73.2 -1.308 0.3954
## ml_subtype0 - ml_subtype2 -13.083 9.89 73.2 -1.323 0.3870
## ml_subtype1 - ml_subtype2 -1.996 11.38 73.2 -0.175 0.9832
##
## Region = ParaHip:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 10.409 8.60 77.3 1.210 0.4509
## ml_subtype0 - ml_subtype2 6.288 10.08 78.6 0.624 0.8074
## ml_subtype1 - ml_subtype2 -4.121 11.55 77.5 -0.357 0.9324
##
## Region = PCC:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 3.987 8.48 73.2 0.470 0.8854
## ml_subtype0 - ml_subtype2 -2.171 9.89 73.2 -0.220 0.9738
## ml_subtype1 - ml_subtype2 -6.158 11.38 73.2 -0.541 0.8513
##
## Region = Precun:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 -7.147 8.48 73.2 -0.843 0.6776
## ml_subtype0 - ml_subtype2 -10.371 9.89 73.2 -1.049 0.5487
## ml_subtype1 - ml_subtype2 -3.224 11.38 73.2 -0.283 0.9568
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 3 estimates
# Realizar el análisis de los contrastes
resultados <- emmeans(modelo, pairwise ~ ml_subtype | Region)
# Extraer la tabla de contrastes y convertirla a un data frame
contrastes <- as.data.frame(resultados$contrasts)
# Filtrar los contrastes significativos (p < 0.05)
contrasttau<-contrastes
contrasttau$stain<-"pTau"
modelo<-lmer(NfL ~ as.factor(ml_subtype)* Region + (1 | i_id), data=data)
summary(modelo) # Revisa los términos de interacción
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: NfL ~ as.factor(ml_subtype) * Region + (1 | i_id)
## Data: data
##
## REML criterion at convergence: 2104.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6833 -0.6146 0.0399 0.6263 2.4318
##
## Random effects:
## Groups Name Variance Std.Dev.
## i_id (Intercept) 8.971 2.995
## Residual 7.448 2.729
## Number of obs: 424, groups: i_id, 48
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 9.0297 0.7729 122.9667 11.683
## as.factor(ml_subtype)1 1.1617 1.4505 134.6848 0.801
## as.factor(ml_subtype)2 0.2591 1.8095 167.9034 0.143
## RegionGFM 1.0690 0.7369 352.2398 1.451
## RegionGPS 2.3087 0.7369 352.2398 3.133
## RegionGTM 0.9855 0.7369 352.2398 1.337
## RegionHip 1.9798 0.7369 352.2398 2.687
## RegionOC 2.2398 0.7369 352.2398 3.040
## RegionParaHip -0.1888 0.7369 352.2398 -0.256
## RegionPCC 1.7966 0.7369 352.2398 2.438
## RegionPrecun 2.6092 0.7369 352.2398 3.541
## as.factor(ml_subtype)1:RegionGFM -2.0040 1.3866 352.6609 -1.445
## as.factor(ml_subtype)2:RegionGFM -2.1051 1.7406 353.7534 -1.209
## as.factor(ml_subtype)1:RegionGPS -1.5797 1.3866 352.6609 -1.139
## as.factor(ml_subtype)2:RegionGPS -2.2250 1.7406 353.7534 -1.278
## as.factor(ml_subtype)1:RegionGTM -1.4176 1.3866 352.6609 -1.022
## as.factor(ml_subtype)2:RegionGTM -1.4253 1.7406 353.7534 -0.819
## as.factor(ml_subtype)1:RegionHip -0.7280 1.3866 352.6609 -0.525
## as.factor(ml_subtype)2:RegionHip -0.5372 1.7406 353.7534 -0.309
## as.factor(ml_subtype)1:RegionOC -1.8297 1.3866 352.6609 -1.320
## as.factor(ml_subtype)2:RegionOC -1.4218 1.7406 353.7534 -0.817
## as.factor(ml_subtype)1:RegionParaHip -1.8677 1.3866 352.6609 -1.347
## as.factor(ml_subtype)2:RegionParaHip -0.8132 1.7735 353.2396 -0.459
## as.factor(ml_subtype)1:RegionPCC -1.1741 1.3866 352.6609 -0.847
## as.factor(ml_subtype)2:RegionPCC -0.7223 1.7885 354.5384 -0.404
## as.factor(ml_subtype)1:RegionPrecun 0.3765 1.3866 352.6609 0.272
## as.factor(ml_subtype)2:RegionPrecun -0.1201 1.7406 353.7534 -0.069
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## as.factor(ml_subtype)1 0.424597
## as.factor(ml_subtype)2 0.886309
## RegionGFM 0.147761
## RegionGPS 0.001875 **
## RegionGTM 0.181960
## RegionHip 0.007557 **
## RegionOC 0.002546 **
## RegionParaHip 0.797973
## RegionPCC 0.015257 *
## RegionPrecun 0.000452 ***
## as.factor(ml_subtype)1:RegionGFM 0.149266
## as.factor(ml_subtype)2:RegionGFM 0.227302
## as.factor(ml_subtype)1:RegionGPS 0.255358
## as.factor(ml_subtype)2:RegionGPS 0.201976
## as.factor(ml_subtype)1:RegionGTM 0.307317
## as.factor(ml_subtype)2:RegionGTM 0.413407
## as.factor(ml_subtype)1:RegionHip 0.599896
## as.factor(ml_subtype)2:RegionHip 0.757776
## as.factor(ml_subtype)1:RegionOC 0.187823
## as.factor(ml_subtype)2:RegionOC 0.414546
## as.factor(ml_subtype)1:RegionParaHip 0.178843
## as.factor(ml_subtype)2:RegionParaHip 0.646854
## as.factor(ml_subtype)1:RegionPCC 0.397718
## as.factor(ml_subtype)2:RegionPCC 0.686555
## as.factor(ml_subtype)1:RegionPrecun 0.786160
## as.factor(ml_subtype)2:RegionPrecun 0.945014
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 27 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
anova(modelo)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## as.factor(ml_subtype) 3.14 1.570 2 45.34 0.2107 0.8108
## Region 366.09 45.761 8 352.50 6.1438 1.996e-07 ***
## as.factor(ml_subtype):Region 65.98 4.124 16 352.42 0.5537 0.9166
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans(modelo, pairwise ~ ml_subtype | Region)
## $emmeans
## Region = Fusiform:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 9.03 0.773 123 7.50 10.6
## 1 10.19 1.227 140 7.76 12.6
## 2 9.29 1.636 180 6.06 12.5
##
## Region = GFM:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 10.10 0.766 119 8.58 11.6
## 1 9.26 1.170 119 6.94 11.6
## 2 8.25 1.433 119 5.42 11.1
##
## Region = GPS:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 11.34 0.766 119 9.82 12.9
## 1 10.92 1.170 119 8.60 13.2
## 2 9.37 1.433 119 6.54 12.2
##
## Region = GTM:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 10.02 0.766 119 8.50 11.5
## 1 9.76 1.170 119 7.44 12.1
## 2 8.85 1.433 119 6.01 11.7
##
## Region = Hip:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 11.01 0.766 119 9.49 12.5
## 1 11.44 1.170 119 9.13 13.8
## 2 10.73 1.433 119 7.89 13.6
##
## Region = OC:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 11.27 0.766 119 9.75 12.8
## 1 10.60 1.170 119 8.29 12.9
## 2 10.11 1.433 119 7.27 12.9
##
## Region = ParaHip:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 8.84 0.766 119 7.32 10.4
## 1 8.13 1.170 119 5.82 10.5
## 2 8.29 1.484 134 5.35 11.2
##
## Region = PCC:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 10.83 0.766 119 9.31 12.3
## 1 10.81 1.170 119 8.50 13.1
## 2 10.36 1.484 134 7.43 13.3
##
## Region = Precun:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 11.64 0.766 119 10.12 13.2
## 1 13.18 1.170 119 10.86 15.5
## 2 11.78 1.433 119 8.94 14.6
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $contrasts
## Region = Fusiform:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 -1.1617 1.45 134 -0.801 0.7031
## ml_subtype0 - ml_subtype2 -0.2591 1.81 168 -0.143 0.9888
## ml_subtype1 - ml_subtype2 0.9026 2.05 164 0.441 0.8983
##
## Region = GFM:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 0.8423 1.40 119 0.602 0.8191
## ml_subtype0 - ml_subtype2 1.8460 1.62 119 1.136 0.4937
## ml_subtype1 - ml_subtype2 1.0037 1.85 119 0.543 0.8504
##
## Region = GPS:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 0.4180 1.40 119 0.299 0.9519
## ml_subtype0 - ml_subtype2 1.9659 1.62 119 1.210 0.4495
## ml_subtype1 - ml_subtype2 1.5479 1.85 119 0.837 0.6809
##
## Region = GTM:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 0.2559 1.40 119 0.183 0.9817
## ml_subtype0 - ml_subtype2 1.1662 1.62 119 0.718 0.7534
## ml_subtype1 - ml_subtype2 0.9103 1.85 119 0.492 0.8752
##
## Region = Hip:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 -0.4337 1.40 119 -0.310 0.9484
## ml_subtype0 - ml_subtype2 0.2781 1.62 119 0.171 0.9840
## ml_subtype1 - ml_subtype2 0.7118 1.85 119 0.385 0.9217
##
## Region = OC:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 0.6680 1.40 119 0.478 0.8819
## ml_subtype0 - ml_subtype2 1.1627 1.62 119 0.716 0.7547
## ml_subtype1 - ml_subtype2 0.4947 1.85 119 0.267 0.9613
##
## Region = ParaHip:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 0.7060 1.40 119 0.505 0.8691
## ml_subtype0 - ml_subtype2 0.5541 1.67 130 0.332 0.9412
## ml_subtype1 - ml_subtype2 -0.1519 1.89 128 -0.080 0.9964
##
## Region = PCC:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 0.0124 1.40 119 0.009 1.0000
## ml_subtype0 - ml_subtype2 0.4632 1.67 130 0.277 0.9585
## ml_subtype1 - ml_subtype2 0.4508 1.89 128 0.239 0.9691
##
## Region = Precun:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 -1.5382 1.40 119 -1.100 0.5159
## ml_subtype0 - ml_subtype2 -0.1390 1.62 119 -0.086 0.9960
## ml_subtype1 - ml_subtype2 1.3992 1.85 119 0.757 0.7303
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 3 estimates
resultados <- emmeans(modelo, pairwise ~ ml_subtype | Region)
# Extraer la tabla de contrastes y convertirla a un data frame
contrastes_NfL <- as.data.frame(resultados$contrasts)
contrastes_NfL$stain<-"NfL"
modelo<-lmer(Myelin ~ as.factor(ml_subtype)* Region + (1 | i_id), data=data)
summary(modelo) # Revisa los términos de interacción
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Myelin ~ as.factor(ml_subtype) * Region + (1 | i_id)
## Data: data
##
## REML criterion at convergence: 2028.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.1139 -0.4088 0.0530 0.5048 2.2711
##
## Random effects:
## Groups Name Variance Std.Dev.
## i_id (Intercept) 5.343 2.311
## Residual 18.276 4.275
## Number of obs: 360, groups: i_id, 50
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 26.3491 1.5306 332.9959 17.215
## as.factor(ml_subtype)1 3.6185 2.3472 326.0474 1.542
## as.factor(ml_subtype)2 3.9309 3.0887 330.3045 1.273
## RegionGFM 3.6327 1.6709 300.0860 2.174
## RegionGPS 5.8277 1.6721 298.3059 3.485
## RegionGTM 3.4172 1.6667 298.7404 2.050
## RegionHip 1.9984 1.6842 297.3685 1.187
## RegionOC 5.6778 1.6667 298.7691 3.407
## RegionParaHip 2.7591 1.7067 296.2327 1.617
## RegionPCC 5.6114 2.0189 299.7713 2.779
## RegionPrecun 8.0171 1.6762 299.5069 4.783
## as.factor(ml_subtype)1:RegionGFM -3.3803 2.7023 293.4649 -1.251
## as.factor(ml_subtype)2:RegionGFM -2.5849 3.5251 299.4442 -0.733
## as.factor(ml_subtype)1:RegionGPS -1.4576 2.6753 293.5912 -0.545
## as.factor(ml_subtype)2:RegionGPS -0.4631 3.4411 296.2714 -0.135
## as.factor(ml_subtype)1:RegionGTM -1.4173 2.6499 295.2407 -0.535
## as.factor(ml_subtype)2:RegionGTM -3.1001 3.4384 296.3724 -0.902
## as.factor(ml_subtype)1:RegionHip -2.5797 2.6829 293.2351 -0.962
## as.factor(ml_subtype)2:RegionHip -2.6075 3.5028 294.5604 -0.744
## as.factor(ml_subtype)1:RegionOC 0.5786 2.6499 295.2525 0.218
## as.factor(ml_subtype)2:RegionOC -3.2706 3.4948 295.0017 -0.936
## as.factor(ml_subtype)1:RegionParaHip -1.7978 2.6970 292.8102 -0.667
## as.factor(ml_subtype)2:RegionParaHip -4.1558 3.5136 294.3046 -1.183
## as.factor(ml_subtype)1:RegionPCC -1.3664 3.2768 300.4034 -0.417
## as.factor(ml_subtype)2:RegionPCC -5.1096 4.1310 301.6974 -1.237
## as.factor(ml_subtype)1:RegionPrecun -0.9095 2.6559 295.5724 -0.342
## as.factor(ml_subtype)2:RegionPrecun -0.8459 3.4430 296.5650 -0.246
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## as.factor(ml_subtype)1 0.124132
## as.factor(ml_subtype)2 0.204029
## RegionGFM 0.030478 *
## RegionGPS 0.000566 ***
## RegionGTM 0.041208 *
## RegionHip 0.236363
## RegionOC 0.000748 ***
## RegionParaHip 0.107013
## RegionPCC 0.005790 **
## RegionPrecun 2.71e-06 ***
## as.factor(ml_subtype)1:RegionGFM 0.211963
## as.factor(ml_subtype)2:RegionGFM 0.463947
## as.factor(ml_subtype)1:RegionGPS 0.586291
## as.factor(ml_subtype)2:RegionGPS 0.893028
## as.factor(ml_subtype)1:RegionGTM 0.593149
## as.factor(ml_subtype)2:RegionGTM 0.368003
## as.factor(ml_subtype)1:RegionHip 0.337074
## as.factor(ml_subtype)2:RegionHip 0.457219
## as.factor(ml_subtype)1:RegionOC 0.827318
## as.factor(ml_subtype)2:RegionOC 0.350123
## as.factor(ml_subtype)1:RegionParaHip 0.505569
## as.factor(ml_subtype)2:RegionParaHip 0.237862
## as.factor(ml_subtype)1:RegionPCC 0.676989
## as.factor(ml_subtype)2:RegionPCC 0.217090
## as.factor(ml_subtype)1:RegionPrecun 0.732258
## as.factor(ml_subtype)2:RegionPrecun 0.806092
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 27 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
anova(modelo)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## as.factor(ml_subtype) 105.39 52.695 2 48.041 2.8833 0.06568 .
## Region 1527.28 190.910 8 290.172 10.4459 7.2e-13 ***
## as.factor(ml_subtype):Region 171.22 10.701 16 290.104 0.5855 0.89414
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans(modelo, pairwise ~ ml_subtype | Region)
## $emmeans
## Region = Fusiform:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 26.3 1.533 333 23.3 29.4
## 1 30.0 1.781 313 26.5 33.5
## 2 30.3 2.686 328 25.0 35.6
##
## Region = GFM:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 30.0 0.885 257 28.2 31.7
## 1 30.2 1.521 276 27.2 33.2
## 2 31.3 1.962 275 27.5 35.2
##
## Region = GPS:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 32.2 0.899 262 30.4 33.9
## 1 34.3 1.459 263 31.5 37.2
## 2 35.6 1.837 252 32.0 39.3
##
## Region = GTM:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 29.8 0.885 257 28.0 31.5
## 1 32.0 1.403 252 29.2 34.7
## 2 30.6 1.837 252 27.0 34.2
##
## Region = Hip:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 28.3 0.928 271 26.5 30.2
## 1 29.4 1.459 263 26.5 32.3
## 2 29.7 1.965 274 25.8 33.5
##
## Region = OC:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 32.0 0.885 257 30.3 33.8
## 1 36.2 1.403 252 33.5 39.0
## 2 32.7 1.964 274 28.8 36.6
##
## Region = ParaHip:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 29.1 0.977 286 27.2 31.0
## 1 30.9 1.459 263 28.1 33.8
## 2 28.9 1.965 274 25.0 32.8
##
## Region = PCC:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 32.0 1.459 333 29.1 34.8
## 1 34.2 2.078 329 30.1 38.3
## 2 30.8 2.688 328 25.5 36.1
##
## Region = Precun:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 34.4 0.899 262 32.6 36.1
## 1 37.1 1.403 252 34.3 39.8
## 2 37.5 1.837 252 33.8 41.1
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $contrasts
## Region = Fusiform:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 -3.619 2.35 326 -1.540 0.2737
## ml_subtype0 - ml_subtype2 -3.931 3.09 330 -1.271 0.4125
## ml_subtype1 - ml_subtype2 -0.312 3.22 325 -0.097 0.9948
##
## Region = GFM:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 -0.238 1.76 272 -0.135 0.9899
## ml_subtype0 - ml_subtype2 -1.346 2.15 272 -0.625 0.8064
## ml_subtype1 - ml_subtype2 -1.108 2.48 276 -0.446 0.8961
##
## Region = GPS:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 -2.161 1.71 263 -1.261 0.4186
## ml_subtype0 - ml_subtype2 -3.468 2.04 254 -1.696 0.2088
## ml_subtype1 - ml_subtype2 -1.307 2.35 257 -0.557 0.8429
##
## Region = GTM:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 -2.201 1.66 254 -1.327 0.3816
## ml_subtype0 - ml_subtype2 -0.831 2.04 253 -0.407 0.9126
## ml_subtype1 - ml_subtype2 1.370 2.31 252 0.593 0.8241
##
## Region = Hip:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 -1.039 1.73 266 -0.601 0.8198
## ml_subtype0 - ml_subtype2 -1.323 2.17 274 -0.609 0.8153
## ml_subtype1 - ml_subtype2 -0.285 2.45 270 -0.116 0.9926
##
## Region = OC:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 -4.197 1.66 254 -2.530 0.0321
## ml_subtype0 - ml_subtype2 -0.660 2.15 271 -0.307 0.9495
## ml_subtype1 - ml_subtype2 3.537 2.41 267 1.465 0.3094
##
## Region = ParaHip:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 -1.821 1.76 270 -1.037 0.5543
## ml_subtype0 - ml_subtype2 0.225 2.19 277 0.102 0.9942
## ml_subtype1 - ml_subtype2 2.046 2.45 270 0.836 0.6811
##
## Region = PCC:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 -2.252 2.54 331 -0.887 0.6489
## ml_subtype0 - ml_subtype2 1.179 3.06 330 0.385 0.9214
## ml_subtype1 - ml_subtype2 3.431 3.40 328 1.010 0.5711
##
## Region = Precun:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 -2.709 1.67 255 -1.626 0.2365
## ml_subtype0 - ml_subtype2 -3.085 2.04 254 -1.509 0.2885
## ml_subtype1 - ml_subtype2 -0.376 2.31 252 -0.163 0.9855
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 3 estimates
resultados <- emmeans(modelo, pairwise ~ ml_subtype | Region)
# Extraer la tabla de contrastes y convertirla a un data frame
contrastes_myelin <- as.data.frame(resultados$contrasts)
contrastes_myelin$stain<-"Myelin"
modelo<-lmer(Microglia ~ as.factor(ml_subtype)* Region + (1 | i_id), data=data)
## fixed-effect model matrix is rank deficient so dropping 1 column / coefficient
summary(modelo) # Revisa los términos de interacción
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Microglia ~ as.factor(ml_subtype) * Region + (1 | i_id)
## Data: data
##
## REML criterion at convergence: 1988.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.4082 -0.5583 -0.0470 0.5327 4.6251
##
## Random effects:
## Groups Name Variance Std.Dev.
## i_id (Intercept) 11.277 3.358
## Residual 9.401 3.066
## Number of obs: 384, groups: i_id, 49
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 7.21751 0.84541 125.63844 8.537
## as.factor(ml_subtype)1 -1.40662 1.65299 144.45862 -0.851
## as.factor(ml_subtype)2 -2.77295 2.06994 152.64047 -1.340
## RegionGFM -1.00093 0.81514 312.47136 -1.228
## RegionGPS -1.00351 0.80757 312.32373 -1.243
## RegionGTM -1.82584 0.81326 312.11639 -2.245
## RegionHip 2.76047 0.80757 312.32373 3.418
## RegionOC -5.08161 0.81522 312.50027 -6.233
## RegionParaHip 1.23768 0.80757 312.32373 1.533
## RegionPCC -2.73298 1.42991 316.52026 -1.911
## RegionPrecun -4.59886 0.81522 312.50027 -5.641
## as.factor(ml_subtype)1:RegionGFM 1.62015 1.58925 313.05679 1.019
## as.factor(ml_subtype)2:RegionGFM 3.59163 1.99155 313.71084 1.803
## as.factor(ml_subtype)1:RegionGPS 0.60467 1.58538 313.02149 0.381
## as.factor(ml_subtype)2:RegionGPS 2.20444 1.98847 313.69053 1.109
## as.factor(ml_subtype)1:RegionGTM 3.66547 1.58828 312.96484 2.308
## as.factor(ml_subtype)2:RegionGTM 1.85386 1.99079 313.65307 0.931
## as.factor(ml_subtype)1:RegionHip 0.98722 1.58538 313.02149 0.623
## as.factor(ml_subtype)2:RegionHip 1.07980 1.98847 313.69053 0.543
## as.factor(ml_subtype)1:RegionOC 0.92599 1.58929 313.06433 0.583
## as.factor(ml_subtype)2:RegionOC 2.72006 2.03909 312.98961 1.334
## as.factor(ml_subtype)1:RegionParaHip 0.08371 1.58538 313.02149 0.053
## as.factor(ml_subtype)2:RegionParaHip -0.19211 2.03604 312.96340 -0.094
## as.factor(ml_subtype)2:RegionPCC 1.90213 3.76725 316.61669 0.505
## as.factor(ml_subtype)1:RegionPrecun 0.60444 1.58929 313.06433 0.380
## as.factor(ml_subtype)2:RegionPrecun 4.10425 2.15524 315.98873 1.904
## Pr(>|t|)
## (Intercept) 3.82e-14 ***
## as.factor(ml_subtype)1 0.396202
## as.factor(ml_subtype)2 0.182357
## RegionGFM 0.220397
## RegionGPS 0.214933
## RegionGTM 0.025461 *
## RegionHip 0.000714 ***
## RegionOC 1.48e-09 ***
## RegionParaHip 0.126386
## RegionPCC 0.056870 .
## RegionPrecun 3.78e-08 ***
## as.factor(ml_subtype)1:RegionGFM 0.308780
## as.factor(ml_subtype)2:RegionGFM 0.072280 .
## as.factor(ml_subtype)1:RegionGPS 0.703163
## as.factor(ml_subtype)2:RegionGPS 0.268447
## as.factor(ml_subtype)1:RegionGTM 0.021661 *
## as.factor(ml_subtype)2:RegionGTM 0.352456
## as.factor(ml_subtype)1:RegionHip 0.533934
## as.factor(ml_subtype)2:RegionHip 0.587494
## as.factor(ml_subtype)1:RegionOC 0.560555
## as.factor(ml_subtype)2:RegionOC 0.183189
## as.factor(ml_subtype)1:RegionParaHip 0.957925
## as.factor(ml_subtype)2:RegionParaHip 0.924889
## as.factor(ml_subtype)2:RegionPCC 0.613972
## as.factor(ml_subtype)1:RegionPrecun 0.703966
## as.factor(ml_subtype)2:RegionPrecun 0.057777 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 26 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## fit warnings:
## fixed-effect model matrix is rank deficient so dropping 1 column / coefficient
anova(modelo)
## Missing cells for: as.factor(ml_subtype)1:RegionPCC.
## Interpret type III hypotheses with care.
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## as.factor(ml_subtype) 3.15 1.575 2 48.951 0.1675 0.8462
## Region 1204.11 150.513 8 313.398 16.0108 <2e-16 ***
## as.factor(ml_subtype):Region 157.28 10.486 15 312.975 1.1154 0.3412
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans(modelo, pairwise ~ ml_subtype | Region)
## $emmeans
## Region = Fusiform:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 7.22 0.845 126 5.544 8.89
## 1 5.81 1.421 152 3.004 8.62
## 2 4.44 1.890 159 0.712 8.18
##
## Region = GFM:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 6.22 0.837 122 4.559 7.87
## 1 6.43 1.313 118 3.831 9.03
## 2 7.04 1.719 118 3.632 10.44
##
## Region = GPS:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 6.21 0.830 118 4.570 7.86
## 1 5.41 1.313 118 2.813 8.01
## 2 5.65 1.719 118 2.242 9.05
##
## Region = GTM:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 5.39 0.838 122 3.733 7.05
## 1 7.65 1.313 118 5.051 10.25
## 2 4.47 1.719 118 1.069 7.88
##
## Region = Hip:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 9.98 0.830 118 8.334 11.62
## 1 9.56 1.313 118 6.959 12.16
## 2 8.28 1.719 118 4.881 11.69
##
## Region = OC:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 2.14 0.838 122 0.478 3.79
## 1 1.66 1.313 118 -0.944 4.25
## 2 2.08 1.792 136 -1.461 5.63
##
## Region = ParaHip:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 8.46 0.830 118 6.811 10.10
## 1 7.13 1.313 118 4.533 9.73
## 2 5.49 1.792 136 1.946 9.03
##
## Region = PCC:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 4.48 1.446 339 1.641 7.33
## 1 nonEst NA NA NA NA
## 2 3.61 3.458 355 -3.187 10.41
##
## Region = Precun:
## ml_subtype emmean SE df lower.CL upper.CL
## 0 2.62 0.838 122 0.961 4.28
## 1 1.82 1.313 118 -0.783 4.42
## 2 3.95 1.888 159 0.221 7.68
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $contrasts
## Region = Fusiform:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 1.4066 1.65 145 0.851 0.6720
## ml_subtype0 - ml_subtype2 2.7730 2.07 153 1.339 0.3756
## ml_subtype1 - ml_subtype2 1.3663 2.36 156 0.578 0.8321
##
## Region = GFM:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 -0.2135 1.56 119 -0.137 0.9897
## ml_subtype0 - ml_subtype2 -0.8187 1.91 119 -0.428 0.9040
## ml_subtype1 - ml_subtype2 -0.6051 2.16 118 -0.280 0.9578
##
## Region = GPS:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 0.8020 1.55 118 0.516 0.8635
## ml_subtype0 - ml_subtype2 0.5685 1.91 118 0.298 0.9523
## ml_subtype1 - ml_subtype2 -0.2334 2.16 118 -0.108 0.9936
##
## Region = GTM:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 -2.2588 1.56 120 -1.451 0.3185
## ml_subtype0 - ml_subtype2 0.9191 1.91 119 0.481 0.8806
## ml_subtype1 - ml_subtype2 3.1779 2.16 118 1.469 0.3093
##
## Region = Hip:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 0.4194 1.55 118 0.270 0.9606
## ml_subtype0 - ml_subtype2 1.6932 1.91 118 0.887 0.6495
## ml_subtype1 - ml_subtype2 1.2738 2.16 118 0.589 0.8263
##
## Region = OC:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 0.4806 1.56 119 0.309 0.9489
## ml_subtype0 - ml_subtype2 0.0529 1.98 133 0.027 0.9996
## ml_subtype1 - ml_subtype2 -0.4277 2.22 129 -0.193 0.9798
##
## Region = ParaHip:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 1.3229 1.55 118 0.852 0.6717
## ml_subtype0 - ml_subtype2 2.9651 1.98 132 1.501 0.2936
## ml_subtype1 - ml_subtype2 1.6422 2.22 129 0.739 0.7406
##
## Region = PCC:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 nonEst NA NA NA NA
## ml_subtype0 - ml_subtype2 0.8708 3.75 354 0.232 0.8164
## ml_subtype1 - ml_subtype2 nonEst NA NA NA NA
##
## Region = Precun:
## contrast estimate SE df t.ratio p.value
## ml_subtype0 - ml_subtype1 0.8022 1.56 119 0.515 0.8641
## ml_subtype0 - ml_subtype2 -1.3313 2.07 152 -0.645 0.7957
## ml_subtype1 - ml_subtype2 -2.1335 2.30 144 -0.928 0.6236
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for varying family sizes
resultados <- emmeans(modelo, pairwise ~ ml_subtype | Region)
# Extraer la tabla de contrastes y convertirla a un data frame
contrastes_micro <- as.data.frame(resultados$contrasts)
contrastes_micro$stain<-"microglia"
contraste<-rbind(contrasttau,contrastes_NfL, contrastes_myelin, contrastes_micro)
neuropath_4G8_AT8_NFL <- read_excel("data_validation/neuropath_4G8_AT8_NFL.xlsx")
subtypes_postmortem <- read_csv("data_validation/subtypes_postmortem.csv")
## Rows: 60 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): i_id
## dbl (4): ml_subtype, prob_ml_subtype, ml_stage, prob_ml_stage
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
clinical_data <- read_excel("data_validation/clinical data.xlsx")
## New names:
## • `` -> `...22`
clinical_data$i_id<- ifelse(grepl("^AD", clinical_data$SubjectANW), paste0("PAGE-", clinical_data$SubjectANW), clinical_data$SubjectANW)
# data1<-data1 %>% drop_na(data1$Cohort)
# data1$dx_pre<-ifelse(data1$Cohort=="Control",1,0)
# data1$dx_post<-ifelse(data1$ml_subtype==0,1,0)
#
# confusionMatrix(table(data1$dx_pre, data1$dx_post))
data1<-left_join(subtypes_postmortem,clinical_data, by='i_id')
data1$Subject <- ifelse(grepl("^[0-9]{4}-[0-9]{3}$", data1$Subject),
paste0(substr(data1$Subject, 3, 4), "-", substr(data1$Subject, 6, 8)),
data1$Subject)
# Mostrar el nuevo vector
print(data1$Subject)
## [1] "ANW019" NA "ANW082" "ANW122" NA "ANW232" "ANW534"
## [8] "ANW560" "ANW569" NA NA NA NA NA
## [15] "ANW777" "ANW782" "ANW797" "ANW880" "ANW898" NA "ANW945"
## [22] "ANW989" "ANW999" "16-035" "16-105" "16-131" "17-018" "17-024"
## [29] "17-110" "ANW-078" "17-133" "17-147" "17-154" "18-016" "18-032"
## [36] "18-097" "18-109" "18-111" "18-126" "19-013" "19-020" "19-025"
## [43] "19-058" "19-091" "19-095" "19-112" "19-115" "19-119" "20-104"
## [50] "21-006" "21-066" "22-036" "22-049" "22-063" "22-078" "22-081"
## [57] "22-086" NA NA NA
data<-left_join(data1,neuropath_4G8_AT8_NFL, by="Subject" )
table(data$Cohort.x, data$ml_subtype)
##
## 0 1 2
## AD 274 91 65
## Control 91 65 39
data %>% filter(Region=="Hmed")%>%
ggplot( aes(x=ml_stage, y=pTau,color=as.factor(ml_subtype)))+
geom_jitter()+
xlim(0,35)+
geom_smooth(method="lm")+
ggtitle("Hippocampus-ptau")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 19 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 23 rows containing missing values or values outside the scale range
## (`geom_point()`).
data %>% filter(Region=="GPS")%>%
ggplot( aes(x=ml_stage, y=pTau,color=as.factor(ml_subtype)))+
geom_jitter()+
xlim(0,35)+
geom_smooth(method="lm")+
ggtitle("Superior parietal gyrus-ptau")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 18 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 23 rows containing missing values or values outside the scale range
## (`geom_point()`).
library(ggseg)
library(readr)
contraste <- read_csv("contraste.csv")
## New names:
## Rows: 27 Columns: 9
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (3): contrast, Region, stain dbl (6): ...1, estimate, SE, df, t.ratio, p.value
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
knitr::kable(contraste)
| …1 | contrast | Region | estimate | SE | df | t.ratio | p.value | stain |
|---|---|---|---|---|---|---|---|---|
| 5 | ml_subtype1 - ml_subtype3 | Fusiform | 45.982537 | 12.554691 | 98.34538 | 3.662578 | 0.0022538 | pTau |
| 6 | ml_subtype2 - ml_subtype3 | Fusiform | 49.935292 | 13.568797 | 96.28489 | 3.680156 | 0.0021440 | pTau |
| 7 | ml_subtype0 - ml_subtype1 | GFM | -24.148178 | 7.031639 | 82.41822 | -3.434218 | 0.0050681 | pTau |
| 8 | ml_subtype0 - ml_subtype2 | GFM | -26.947992 | 8.687179 | 82.41822 | -3.102042 | 0.0137518 | pTau |
| 11 | ml_subtype1 - ml_subtype3 | GFM | 38.436630 | 11.963579 | 82.41822 | 3.212804 | 0.0099469 | pTau |
| 12 | ml_subtype2 - ml_subtype3 | GFM | 41.236444 | 13.005781 | 82.41822 | 3.170624 | 0.0112649 | pTau |
| 13 | ml_subtype0 - ml_subtype1 | GPS | -22.869909 | 7.031639 | 82.41822 | -3.252429 | 0.0088390 | pTau |
| 14 | ml_subtype0 - ml_subtype2 | GPS | -32.332661 | 8.805493 | 86.67601 | -3.671874 | 0.0023110 | pTau |
| 17 | ml_subtype1 - ml_subtype3 | GPS | 39.364537 | 11.963579 | 82.41822 | 3.290365 | 0.0078856 | pTau |
| 18 | ml_subtype2 - ml_subtype3 | GPS | 48.827289 | 13.085105 | 84.31006 | 3.731517 | 0.0019220 | pTau |
| 19 | ml_subtype0 - ml_subtype1 | GTM | -22.305273 | 7.031639 | 82.41822 | -3.172130 | 0.0112152 | pTau |
| 20 | ml_subtype0 - ml_subtype2 | GTM | -29.303011 | 8.687179 | 82.41822 | -3.373133 | 0.0061255 | pTau |
| 23 | ml_subtype1 - ml_subtype3 | GTM | 41.886905 | 11.963579 | 82.41822 | 3.501202 | 0.0041051 | pTau |
| 24 | ml_subtype2 - ml_subtype3 | GTM | 48.884643 | 13.005781 | 82.41822 | 3.758686 | 0.0017764 | pTau |
| 32 | ml_subtype0 - ml_subtype2 | OC | -24.491860 | 8.687179 | 82.41822 | -2.819311 | 0.0301074 | pTau |
| 41 | ml_subtype1 - ml_subtype3 | ParaHip | 42.151949 | 12.529962 | 97.64571 | 3.364092 | 0.0059559 | pTau |
| 42 | ml_subtype2 - ml_subtype3 | ParaHip | 42.533188 | 13.568797 | 96.28489 | 3.134632 | 0.0120295 | pTau |
| 47 | ml_subtype1 - ml_subtype3 | PCC | 39.710577 | 11.963579 | 82.41822 | 3.319289 | 0.0072234 | pTau |
| 48 | ml_subtype2 - ml_subtype3 | PCC | 40.541571 | 13.005781 | 82.41822 | 3.117196 | 0.0131628 | pTau |
| 49 | ml_subtype0 - ml_subtype1 | Precun | -21.362250 | 7.031639 | 82.41822 | -3.038019 | 0.0165130 | pTau |
| 50 | ml_subtype0 - ml_subtype2 | Precun | -32.165973 | 8.687179 | 82.41822 | -3.702695 | 0.0021390 | pTau |
| 53 | ml_subtype1 - ml_subtype3 | Precun | 34.764165 | 11.963579 | 82.41822 | 2.905833 | 0.0238497 | pTau |
| 54 | ml_subtype2 - ml_subtype3 | Precun | 45.567887 | 13.005781 | 82.41822 | 3.503664 | 0.0040732 | pTau |
| 1 | ml_subtype0 - ml_subtype1 | Fusiform | -3.599330 | 1.307782 | 147.55607 | -2.752239 | 0.0333480 | NfL |
| 171 | ml_subtype1 - ml_subtype3 | GPS | 6.068956 | 2.119376 | 135.62871 | 2.863558 | 0.0247653 | NfL |
| 491 | ml_subtype0 - ml_subtype1 | Precun | -4.089309 | 1.273218 | 135.62871 | -3.211790 | 0.0088449 | NfL |
| 25 | ml_subtype0 - ml_subtype1 | Hip | 4.047705 | 1.463237 | 120.20155 | 2.766267 | 0.0328230 | Myelin |