require(knitr)
require(ggplot2)
require(dplyr)
require(tidyr)
require(rlang)
require(ggpubr)
require(gridExtra)
require(devtools)
require(matrixStats)
require(ConsensusTME)
require(pheatmap)
require(caret)
require(class)
require(limma)
require(CMScaller)
require(openxlsx)
require(survival)
require(readxl)
require(xfun)
require(survminer)
setwd(params$wd)
load("tcga.coad.prim.RData")
Clasificación y exploración inicial de los datos
clin <- clin.tcga.coad
# Resumen variables clínicas
for (var in names(clin)[-c(1, 14, 15)]) {
cat("\n###", var, "###\n")
if (is.numeric(clin.tcga.coad[[var]])) {
print(summary(clin.tcga.coad[[var]]))
} else {
print(table(clin.tcga.coad[[var]], useNA = "ifany"))
}
}
### additional_studies ###
TCGAFPPP
443 10
### tumor_tissue_site ###
Colon
1 452
### histological_type ###
Colon Adenocarcinoma
5 386
Colon Mucinous Adenocarcinoma
62
### other_dx ###
No
391
Yes
61
Yes, History of Synchronous/Bilateral Malignancy
1
### gender ###
FEMALE MALE
214 239
### vital_status ###
<NA>
453
### days_to_birth ###
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
-32873 -28472 -25243 -24660 -21526 -11391 2
### days_to_last_known_alive ###
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.0 429.8 1974.0 1928.1 3424.0 3920.0 443
### days_to_death ###
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.0 75.5 290.0 573.5 750.5 3042.0 398
### days_to_last_followup ###
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.0 457.0 761.0 946.4 1117.5 4270.0 106
### race_list ###
AMERICAN INDIAN OR ALASKA NATIVE
172 1
ASIAN BLACK OR AFRICAN AMERICAN
11 59
WHITE
210
### tissue_source_site ###
3L 4N 4T 5M A6 AA AD AM AU AY AZ CA CK CM D5 DM F4 G4 NH QG
1 1 1 3 51 172 13 2 2 10 20 10 12 37 31 25 16 27 9 5
QL RU SS T9 WS
1 1 1 1 1
### history_of_neoadjuvant_treatment ###
No Yes
450 3
### informed_consent_verified ###
YES
453
### icd_o_3_site ###
C18.0 C18.2 C18.3 C18.4 C18.5 C18.6 C18.7 C18.9 C19.9
85 103 10 21 5 17 110 96 6
### icd_o_3_histology ###
8010/3 8140/3 8255/3 8260/3 8480/3 8560/3 8574/3
1 384 1 2 63 1 1
### icd_10 ###
C18.0 C18.2 C18.3 C18.4 C18.5 C18.6 C18.7 C18.9 C19
85 103 10 21 5 17 110 96 6
### tissue_prospective_collection_indicator ###
NO YES
12 279 162
### tissue_retrospective_collection_indicator ###
NO YES
12 162 279
### days_to_initial_pathologic_diagnosis ###
Min. 1st Qu. Median Mean 3rd Qu. Max.
0 0 0 0 0 0
### age_at_initial_pathologic_diagnosis ###
Min. 1st Qu. Median Mean 3rd Qu. Max.
31.00 58.00 69.00 67.05 77.00 90.00
### year_of_initial_pathologic_diagnosis ###
Min. 1st Qu. Median Mean 3rd Qu. Max.
1998 2007 2009 2008 2010 2013
### person_neoplasm_cancer_status ###
TUMOR FREE WITH TUMOR
52 197 204
### ethnicity ###
HISPANIC OR LATINO NOT HISPANIC OR LATINO
182 4 267
### weight ###
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
34.00 65.00 80.00 80.86 93.00 175.30 204
### height ###
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
80.3 162.0 170.0 168.4 176.0 193.0 221
### day_of_form_completion ###
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
1.00 15.00 18.00 16.85 19.00 31.00 25
### month_of_form_completion ###
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.000 5.000 6.000 5.852 7.000 12.000
### year_of_form_completion ###
Min. 1st Qu. Median Mean 3rd Qu. Max.
2010 2010 2011 2011 2011 2015
### residual_tumor ###
R0 R1 R2 RX
73 327 4 25 24
### anatomic_neoplasm_subdivision ###
Ascending Colon Cecum
20 87 107
Descending Colon Hepatic Flexure Rectosigmoid Junction
20 26 1
Sigmoid Colon Splenic Flexure Transverse Colon
147 7 38
### primary_lymph_node_presentation_assessment ###
NO YES
10 15 428
### lymph_node_examined_count ###
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.00 14.00 20.00 22.46 27.50 109.00 22
### number_of_lymphnodes_positive_by_he ###
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.000 0.000 0.000 2.068 2.000 50.000 26
### number_of_lymphnodes_positive_by_ihc ###
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.0000 0.0000 0.0000 0.7083 0.0000 12.0000 405
### preoperative_pretreatment_cea_level ###
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.00 1.70 3.00 37.69 7.55 2614.00 168
### non_nodal_tumor_deposits ###
NO YES
230 186 37
### circumferential_resection_margin ###
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.00 4.00 20.00 29.72 40.00 165.00 384
### venous_invasion ###
NO YES
59 298 96
### lymphatic_invasion ###
NO YES
42 247 164
### perineural_invasion_present ###
NO YES
274 133 46
### microsatellite_instability ###
NO YES
362 80 11
### number_of_loci_tested ###
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.000 5.000 5.000 4.925 5.000 10.000 400
### number_of_abnormal_loci ###
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.0000 0.0000 0.0000 0.7924 0.0000 9.0000 400
### kras_gene_analysis_performed ###
NO YES
36 369 48
### kras_mutation_found ###
NO YES
407 24 22
### kras_mutation_codon ###
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
12.00 12.00 12.00 12.19 12.00 13.00 432
### braf_gene_analysis_performed ###
NO YES
49 376 28
### braf_gene_analysis_result ###
Abnormal Normal
425 3 25
### synchronous_colon_cancer_present ###
NO YES
44 385 24
### history_of_colon_polyps ###
NO YES
68 248 137
### colon_polyps_present ###
NO YES
229 144 80
### loss_expression_of_mismatch_repair_proteins_by_ihc ###
NO YES
114 283 56
### loss_expression_of_mismatch_repair_proteins_by_ihc_results ###
395
MLH1-ExpressedMSH2-Expressed
1
MLH1-ExpressedMSH2-ExpressedMSH6-Expressed
1
MLH1-ExpressedMSH2-ExpressedPMS2-ExpressedMSH6-Expressed
27
MLH1-ExpressedMSH2-ExpressedPMS2-Not ExpressedMSH6-Expressed
3
MLH1-ExpressedMSH2-ExpressedPMS2-Not ExpressedMSH6-Not Expressed
1
MLH1-ExpressedMSH2-Not ExpressedMSH6-Not Expressed
1
MLH1-ExpressedMSH2-Not ExpressedPMS2-ExpressedMSH6-Not Expressed
1
MLH1-ExpressedPMS2-Expressed
1
MLH1-Not ExpressedMSH2-ExpressedPMS2-Not ExpressedMSH6-Expressed
6
MLH1-Not ExpressedMSH2-Not ExpressedMSH6-Not Expressed
1
MLH1-Not ExpressedMSH2-Not ExpressedPMS2-Not Expressed
1
MLH1-Not ExpressedMSH2-Not ExpressedPMS2-Not ExpressedMSH6-Not Expressed
14
### number_of_first_degree_relatives_with_cancer_diagnosis ###
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.0000 0.0000 0.0000 0.1825 0.0000 3.0000 64
### radiation_therapy ###
NO
419 34
### postoperative_rx_tx ###
NO YES
419 16 18
### primary_therapy_outcome_success ###
Complete Remission/Response
426 17
Partial Remission/Response Progressive Disease
3 6
Stable Disease
1
### has_new_tumor_events_information ###
NO YES
364 89
### has_drugs_information ###
NO YES
302 151
### has_radiations_information ###
NO YES
438 15
### has_follow_ups_information ###
NO YES
36 417
### project ###
TCGA-COAD
453
### stage_event_system_version ###
2nd 5th 6th 7th
53 1 63 204 132
### stage_event_clinical_stage ###
<NA>
453
### stage_event_pathologic_stage ###
Stage I Stage IA Stage II Stage IIA Stage IIB Stage IIC
11 74 1 30 136 10 1
Stage III Stage IIIA Stage IIIB Stage IIIC Stage IV Stage IVA Stage IVB
19 8 58 41 46 16 2
### stage_event_tnm_categories ###
T1N0 T1N0M0 T1N0MX T1N1bM0 T2N0 T2N0M0 T2N0MX T2N1aM0
1 8 1 1 1 59 7 1
T2N1M0 T2N1MX T2N2aM0 T3N0 T3N0M0 T3N0M1 T3N0M1a T3N0M1b
6 2 1 2 151 4 1 1
T3N0MX T3N1 T3N1aM0 T3N1aM1 T3N1aM1a T3N1aMX T3N1bM0 T3N1bMX
15 1 6 1 2 2 7 2
T3N1cM0 T3N1M0 T3N1M1 T3N1MX T3N2aM0 T3N2aM1 T3N2aMX T3N2b
1 34 12 5 3 1 1 1
T3N2bM0 T3N2bM1b T3N2bMX T3N2M0 T3N2M1 T3N2MX T4aN0M0 T4aN0M1a
3 1 3 29 17 3 1 1
T4aN0MX T4aN1aM0 T4aN1aM1a T4aN1bM0 T4aN1bM1a T4aN1cM1a T4aN1M0 T4aN1M1
1 1 1 1 1 1 1 1
T4aN1MX T4aN2aMX T4aN2b T4aN2bM0 T4aN2bM1 T4aN2bM1a T4aN2bMX T4aN2M1
1 2 1 1 1 1 1 1
T4bN0M0 T4bN0MX T4bN1aMX T4bN1bM1b T4bN1bMX T4bN1M1 T4bN2M0 T4N0M0
2 1 1 1 1 1 2 8
T4N0M1 T4N1bM1a T4N1M0 T4N1M1 T4N2M0 T4N2M1 TisN0MX
1 1 4 3 1 9 1
### stage_event_psa ###
<NA>
453
### stage_event_gleason_grading ###
<NA>
453
### stage_event_ann_arbor ###
<NA>
453
### stage_event_serum_markers ###
<NA>
453
### stage_event_igcccg_stage ###
<NA>
453
### stage_event_masaoka_stage ###
<NA>
453
# Modelo de clasificación de pacientes según el estado MSI/MSS: "up"
# Filtrar pacientes con estado MSI/MSS conocido
clin_known <- clin %>%
filter(microsatellite_instability %in% c("YES", "NO"))
# Submatriz de expresión para esos pacientes
expr_known <-
ex.tcga.coad[, colnames(ex.tcga.coad) %in% clin_known$bcr_patient_barcode]
expr_known <-
expr_known[, match(clin_known$bcr_patient_barcode, colnames(expr_known))]
# Seleccionar los 50 genes mas significativos, menor p-valor
group <- factor(clin_known$microsatellite_instability, levels = c("NO", "YES"))
design <- model.matrix(~ group)
fit <- lmFit(expr_known, design)
fit <- eBayes(fit)
top_genes <- topTable(fit, number = 50, sort.by = "p") %>% rownames()
X_train <- t(expr_known[rownames(expr_known) %in% top_genes, ])
y_train <- group
# Entrenar modelo kNN sampling = "up"
set.seed(2025)
ctrl <- trainControl(method = "cv", number = 10, sampling = "up")
modelo_knn <- train(
X_train, y_train,
method = "knn",
tuneGrid = data.frame(k = 5),
trControl = ctrl
)
modelo_knn
k-Nearest Neighbors
91 samples
50 predictors
2 classes: 'NO', 'YES'
No pre-processing
Resampling: Cross-Validated (10 fold)
Summary of sample sizes: 81, 82, 82, 82, 82, 82, ...
Addtional sampling using up-sampling
Resampling results:
Accuracy Kappa
0.9666667 0.84
Tuning parameter 'k' was held constant at a value of 5
# Aplicar modelo
clin_missing <- clin %>%
filter(!(microsatellite_instability %in% c("YES", "NO")))
expr_missing <-
ex.tcga.coad[, colnames(ex.tcga.coad) %in% clin_missing$bcr_patient_barcode]
expr_missing <- expr_missing[rownames(expr_missing) %in% top_genes, ]
X_missing <- t(expr_missing)
# Sensibilidad y especificidad
predicciones_up <- predict(modelo_knn, newdata = X_train)
cm_up <- confusionMatrix(predicciones_up, y_train, positive = "YES")
cm_up
Confusion Matrix and Statistics
Reference
Prediction NO YES
NO 78 0
YES 2 11
Accuracy : 0.978
95% CI : (0.9229, 0.9973)
No Information Rate : 0.8791
P-Value [Acc > NIR] : 0.0007365
Kappa : 0.9041
Mcnemar's Test P-Value : 0.4795001
Sensitivity : 1.0000
Specificity : 0.9750
Pos Pred Value : 0.8462
Neg Pred Value : 1.0000
Prevalence : 0.1209
Detection Rate : 0.1209
Detection Prevalence : 0.1429
Balanced Accuracy : 0.9875
'Positive' Class : YES
# Predicción
predicted_labels_up <- as.character(predict(modelo_knn, X_missing))
# Modelo de clasificación de pacientes según el estado MSI/MSS: "down"
# Figura 3A
# Entrenar modelo kNN sampling = "down"
set.seed(2025)
ctrl <- trainControl(method = "cv", number = 10, sampling = "down")
modelo_knn <- train(
X_train, y_train,
method = "knn",
tuneGrid = data.frame(k = 5),
trControl = ctrl
)
modelo_knn
k-Nearest Neighbors
91 samples
50 predictors
2 classes: 'NO', 'YES'
No pre-processing
Resampling: Cross-Validated (10 fold)
Summary of sample sizes: 81, 82, 82, 82, 82, 82, ...
Addtional sampling using down-sampling
Resampling results:
Accuracy Kappa
0.9666667 0.7608696
Tuning parameter 'k' was held constant at a value of 5
# Sensibilidad y especificidad
predicciones_down <- predict(modelo_knn, newdata = X_train)
cm_down <- confusionMatrix(predicciones_down, y_train, positive = "YES")
cm_down
Confusion Matrix and Statistics
Reference
Prediction NO YES
NO 80 1
YES 0 10
Accuracy : 0.989
95% CI : (0.9403, 0.9997)
No Information Rate : 0.8791
P-Value [Acc > NIR] : 0.0001094
Kappa : 0.9462
Mcnemar's Test P-Value : 1.0000000
Sensitivity : 0.9091
Specificity : 1.0000
Pos Pred Value : 1.0000
Neg Pred Value : 0.9877
Prevalence : 0.1209
Detection Rate : 0.1099
Detection Prevalence : 0.1099
Balanced Accuracy : 0.9545
'Positive' Class : YES
# Predicción
predicted_labels_down <- as.character(predict(modelo_knn, X_missing))
# Figura 3C
# sampling = "up"
# Combinar datos de expresión
X_all <- rbind(X_train, X_missing)
# Obtener etiquetas combinadas
labels_all <- c(as.character(y_train), predicted_labels_up)
# Indicar si es conocido o predicho
known_status <- c(rep("Conocido", length(y_train)),
rep("Predecido", length(predicted_labels_up)))
# PCA
pca <- prcomp(X_all, scale. = TRUE)
pca_df <- data.frame(
PC1 = pca$x[, 1],
PC2 = pca$x[, 2],
MSI_MSS = labels_all,
Valor = known_status
)
# Convertir a factores con niveles bien definidos
pca_df$MSI_MSS <- factor(pca_df$MSI_MSS, levels = c("NO", "YES"))
pca_df$Valor <- factor(pca_df$Valor, levels = c("Conocido", "Predecido"))
# Visualización
library(ggplot2)
ggplot(pca_df, aes(x = PC1, y = PC2, color = MSI_MSS, shape = Valor)) +
geom_point(size = 3, alpha = 0.8) +
scale_color_manual(values = c("NO" = "lightblue", "YES" = "lightpink")) +
scale_shape_manual(values = c("Conocido" = 17, "Predecido" = 16)) +
theme_minimal() +
labs(
title = "Sampling = up",
x = "PC1",
y = "PC2",
color = "MSI/MSS",
shape = "Valor MSI/MSS"
)

# Figura 3B
# sampling = "down"
# Combinar datos de expresión
X_all <- rbind(X_train, X_missing)
# Obtener etiquetas combinadas
labels_all <- c(as.character(y_train), predicted_labels_down)
# Indicar si es conocido o predicho
known_status <- c(rep("Conocido", length(y_train)),
rep("Predecido", length(predicted_labels_down)))
# PCA
pca <- prcomp(X_all, scale. = TRUE)
pca_df <- data.frame(
PC1 = pca$x[, 1],
PC2 = pca$x[, 2],
MSI_MSS = labels_all,
Valor = known_status
)
# Convertir a factores con niveles bien definidos
pca_df$MSI_MSS <- factor(pca_df$MSI_MSS, levels = c("NO", "YES"))
pca_df$Valor <- factor(pca_df$Valor, levels = c("Conocido", "Predecido"))
# Visualización
library(ggplot2)
ggplot(pca_df, aes(x = PC1, y = PC2, color = MSI_MSS, shape = Valor)) +
geom_point(size = 3, alpha = 0.8) +
scale_color_manual(values = c("NO" = "lightblue", "YES" = "lightpink")) +
scale_shape_manual(values = c("Conocido" = 17, "Predecido" = 16)) +
theme_minimal() +
labs(
title = "Sampling = down",
x = "PC1",
y = "PC2",
color = "MSI/MSS",
shape = "Valor MSI/MSS"
)

# Guardar predicciones del modelo con sampling = "down"
clin$microsatellite_instability_pred<- ifelse(
clin$bcr_patient_barcode %in% rownames(X_missing), predicted_labels_down,
clin$microsatellite_instability
)
clin$microsatellite_instability_pred[clin$microsatellite_instability_pred == 2] <- "NO"
clin$microsatellite_instability_pred[clin$microsatellite_instability_pred == 3] <- "YES"
# Creación variable Estadío (I, II, II, IV)
clin <- clin %>%
mutate(Estadío = case_when(
stage_event_pathologic_stage %in% c("Stage I", "Stage IA") ~ "I",
stage_event_pathologic_stage %in% c("Stage II", "Stage IIA", "Stage IIB", "Stage IIC") ~ "II",
stage_event_pathologic_stage %in% c("Stage III", "Stage IIIA", "Stage IIIB", "Stage IIIC") ~ "III",
stage_event_pathologic_stage %in% c("Stage IV", "Stage IVA", "Stage IVB") ~ "IV",
stage_event_pathologic_stage == "" ~ NA_character_))
# Creación variable Grupo
clin <- clin %>%
mutate(Grupo = case_when(
Estadío != "" & microsatellite_instability_pred == "YES" ~ paste(Estadío, "MSI"),
Estadío != "" & microsatellite_instability_pred == "NO" ~ paste(Estadío, "MSS")))
# Figura 4
# Inferencia subtipos CMS
# https://github.com/peterawe/CMScaller
# devtools::install_github("Lothelab/CMScaller")
ex.tcga.coad2 <-
replaceGeneId(ex.tcga.coad, id.in = "symbol", id.out = "entrez")
cms_result <- CMScaller(ex.tcga.coad2, RNAseq = TRUE)

CMS1 CMS2 CMS3 CMS4 <NA>
78 121 66 139 49
clin$CMS <-
cms_result$prediction[match(clin$bcr_patient_barcode, rownames(cms_result))]
# Figura 5A
# Gráfico quesitos género
genero_table <- table(clin$gender)
percent_labels <-
paste0(c("Femenino", "Masculino"), "\n",
round(100 * genero_table / sum(genero_table), 1), "%")
pie(genero_table,
main = "Género",
labels = percent_labels,
col = rainbow(2),
cex = 0.9,
cex.main = 0.9)

# Figura 5B
# Gráfico quesitos raza
raza_table <- table(clin$race_list)
percent_labels <-
paste0(c("America, India, Alaska", "Asia", "Negro o África", "Blanco"), "\n",
round(100 * raza_table / sum(raza_table), 1), "%")
pie(table(clin$race_list),
main = "Raza",
labels = percent_labels,
col = rainbow(5),
cex = 0.9,
cex.main = 0.9)

# Figura 5D
# Gráfico quesitos edad diagnóstico
edad_cut <- cut(clin$age_at_initial_pathologic_diagnosis,
breaks = 5,
include.lowest = TRUE)
edad_table <- table(edad_cut)
percent_labels <- paste0(names(edad_table),
"\n", round(100 * edad_table / sum(edad_table), 1),
"%")
pie(edad_table,
labels = percent_labels,
main = "Edad de diagnóstico",
col = rainbow(5),
cex = 0.9,
cex.main = 0.9)

# Figura 5C
# Gráfico quesitos edad
edad <- -clin$days_to_birth / 365
edad_cut <- cut(edad,
breaks = 5,
include.lowest = TRUE)
edad_table <- table(edad_cut)
percent_labels <-
paste0(names(edad_table), "\n",
round(100 * edad_table / sum(edad_table), 1), "%")
pie(edad_table,
labels = percent_labels,
main = "Edad",
col = rainbow(5),
cex = 0.9,
cex.main = 0.9)

Análisis descriptivo y comparación de HLA-I y HLA-II
# Filtrar la matriz de expresión por los genes que contienen "HLA"
hla <-
as.data.frame(t(ex.tcga.coad)[, rownames(ex.tcga.coad) %in% c("HLA-A", "HLA-B", "HLA-C", "HLA-E", "HLA-F", "HLA-G",
"HLA-DPA1", "HLA-DPB1", "HLA-DPB2", "HLA-DQA1", "HLA-DQA2",
"HLA-DQB1", "HLA-DRA", "HLA-DRB1", "HLA-DRB5", "HLA-DOA",
"HLA-DOB", "HLA-DMA", "HLA-DMB")])
colnames(hla) <- gsub("-", "_", colnames(hla))
# Añadir la columna ID al dataframe
colnames(clin)[colnames(clin) == "bcr_patient_barcode"] <- "ID"
hla$ID <- rownames(hla)
# Unir dataframes de datos clínicos y expresión de HLA por la columna ID
aux <- merge(hla, clin, by = "ID")
# Cambiar nombres columnas
colnames(aux)[colnames(aux) == "microsatellite_instability_pred"] <- "MSI_MSS"
aux$MSI_MSS[aux$MSI_MSS == "NO"] <- "MSS"
aux$MSI_MSS[aux$MSI_MSS == "YES"] <- "MSI"
# Figura 2B
# Gráfico de barras para la distribución de las muestras en los distintos grupos
aux %>%
select(MSI_MSS, CMS, Estadío, Grupo) %>%
gather(key = "variable", value = "value") %>%
bind_rows(tibble(variable = "Dataset", value = "TCGA")) %>%
# Agrupación por variable y valor para contar frecuencias
group_by(variable, value) %>%
# Resumen de los datos / conteo
summarise(count = n(), .groups = "drop") %>%
# Agrupación por variable para calcular porcentajes dentro de cada grupo
group_by(variable) %>%
# Calcular porcentaje y ordenar factores
mutate(percent = count / sum(count) * 100,
variable = factor(variable, levels = c("MSI_MSS", "CMS", "Estadío", "Grupo", "Dataset")),
value = factor(value, levels = unique(value))) %>%
# Gráfico de barras
ggplot(aes(x = variable, y = percent, fill = value)) +
geom_bar(stat = "identity") +
# Personalizar colores, leyenda, ejes y título
scale_fill_manual(values = c("#FFFACD", "#FFD700", "#FFA07A", "#FF6347", "#FFB6C1","#FF6281", "#FF69B4", "#FF1493","#DDA0DD", "#191970", "#4169E1", "#1E90FF",
"#6495ED", "#4682B4", "#87CEEB", "#87CEFA", "#00BFFF", "#006400", "#9ACD32" ),
guide = guide_legend(keyheight = 0.8, keywidth = 0.8)) +
scale_x_discrete(labels = c("MSI_MSS" = "MSI/MSS",
"Dataset" = "Conjunto de Datos")) +
labs(title = "Distribución porcentual de las muestras",
x = "",
y = "Porcentaje (%)",
fill = "") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Medias HLA para cada gen/grupo
# Listas de genes y grupos con los que trabajaré
genes <- c("HLA_A", "HLA_B", "HLA_C", "HLA_E", "HLA_F", "HLA_G", "HLA_DPA1",
"HLA_DPB1", "HLA_DPB2", "HLA_DQA1", "HLA_DQB1", "HLA_DRA",
"HLA_DRB1", "HLA_DRB5", "HLA_DOA", "HLA_DOB", "HLA_DMA", "HLA_DMB")
grupos <- c("MSI", "MSS", "CMS1", "CMS2", "CMS3", "CMS4", "CMS_NA", "I", "II",
"III", "IV","Estadío_NA", "I MSI", "I MSS", "II MSI", "II MSS",
"III MSI", "III MSS", "IV MSI", "IV MSS", "Grupo_NA")
# Crear dataframe vacío para almacenar las medias
medias <- data.frame(matrix(ncol = length(genes), nrow = length(grupos)))
colnames(medias) <- genes
rownames(medias) <- grupos
# Función para calcular la media de cada gen en los diferentes grupos
summary_groups <- function(dataset, gen) {
result <- list(
MSI_MSS = dataset %>%
group_by(MSI_MSS) %>%
summarise(mean = mean(!!sym(gen))),
# Gestionar NAs en CMS
CMS = dataset %>%
group_by(CMS) %>%
summarise(mean = mean(!!sym(gen), na.rm = TRUE)),
Estadío = dataset %>%
group_by(Estadío) %>%
summarise(mean = mean(!!sym(gen), na.rm = TRUE)),
Grupo = dataset %>%
group_by(Grupo) %>%
summarise(mean = mean(!!sym(gen), na.rm = TRUE))
)
means_values <- unlist(lapply(result, function(x) x$mean))
}
# Aplicar la función en bucle a cada gen HLA
for (i in 1:length(genes)) {
gen <- genes[i]
means_values <- summary_groups(aux, gen)
medias[, i] <- means_values
}
# write.csv(round(medias,2), "medias.csv", row.names = TRUE)
# Tabla 2
# Kruskal para cada gen/grupo
# Grupos de interés
grupos_ag <- c("MSI_MSS", "CMS", "Estadío", "Grupo")
# Dataframe para guardar resultados
kruskal_sig <- data.frame(matrix(ncol = length(genes), nrow = 4))
colnames(kruskal_sig) <- genes
rownames(kruskal_sig) <- grupos_ag
# Función kruskal
kruskal <- function(dataset, gen){
result <- list(
# Aplicar el test de kruskal a can gen ~ grupo
# Guardar el p-valor
kruskal.test(as.formula(paste(gen, "~ MSI_MSS")), dataset)$p.value,
kruskal.test(as.formula(paste(gen, "~ CMS")), dataset)$p.value,
kruskal.test(as.formula(paste(gen, "~ Estadío")), dataset)$p.value,
kruskal.test(as.formula(paste(gen, "~ Grupo")), dataset)$p.value
)
# Convertir el resultado en vector
result <- unlist(result)
}
# Aplicar la función en bucle a cada gen
# Ir añadiendo los resultados a la tabla
for (i in 1:length(genes)) {
gen <- genes[i]
p_values <- kruskal(aux, gen)
kruskal_sig[, i] <- p_values
}
# Sustituir los valores de la tabla por NS, *, ** o ***
kruskal_sig <- apply(kruskal_sig, c(1, 2), function(p) {
if (p > 0.05) {
return("NS")
} else if (p <= 0.05 && p > 0.01) {
return("*")
} else if (p <= 0.01 && p > 0.001) {
return("**")
} else {
return("***")
}
})
as.data.frame(kruskal_sig)
HLA_A HLA_B HLA_C HLA_E HLA_F HLA_G HLA_DPA1 HLA_DPB1 HLA_DPB2 HLA_DQA1
MSI_MSS NS NS NS NS NS NS * NS NS NS
CMS * *** *** *** *** NS *** *** *** ***
Estadío NS NS NS NS NS NS NS NS NS NS
Grupo NS NS NS NS NS NS * * NS *
HLA_DQB1 HLA_DRA HLA_DRB1 HLA_DRB5 HLA_DOA HLA_DOB HLA_DMA HLA_DMB
MSI_MSS NS * ** * NS NS * NS
CMS *** *** *** *** *** *** *** ***
Estadío NS ** * * NS NS * *
Grupo * *** *** ** NS NS ** **
# write.csv(t(as.data.frame(kruskal_sig)), "kruskal.csv", row.names = TRUE)
# Función comparación medias 2 a 2
comparaciones_significativas <- function(dataset, gen, grupo) {
# Obtener los niveles del grupo
dataset <- dataset %>% filter(!is.na(.data[[grupo]]), !is.na(.data[[gen]]))
niveles_grupo <- unique(dataset[[grupo]])
# Generar todas las combinaciones posibles de pares de niveles
pares_niveles <- combn(niveles_grupo, 2, simplify = FALSE)
# Inicializar la lista para almacenar comparaciones significativas
comparisons_ <- list()
# Realizar la comparación para cada par de niveles
for (par in pares_niveles) {
# Filtrar el dataset para solo considerar los dos niveles comparados
dataset_filtrado <- dataset %>% filter(.data[[grupo]] %in% par)
# Realizar la comparación
resultado <- compare_means(as.formula(paste(gen, "~", grupo)),
dataset_filtrado)
# Verificar si la comparación es significativa
if (any(resultado$p.adj < 0.05)) {
# Si es significativa, añadir a la lista
comparisons_[[length(comparisons_) + 1]] <- par
}
}
# Crear el dataset comparisons_ que se añade al Enviroment
nombre_variable <- paste0("comparisons_", gen, "_", grupo)
assign(nombre_variable, comparisons_, envir = .GlobalEnv)
}
# Bucle para generar las comparaciones significativos para cada gen/grupo
for (gen in genes){
for (g in grupos_ag){
comparaciones_significativas(aux, gen, g)
}
}
# Los pares de compraciones significativas que se han generado los uso en el boxplot para indicar para que parejas de valores quiero mostrar el p-valor
# Figura 8 Y 9
# Gráficas boxplot con comparación de medias
# Función boxplot
box_plot <- function(dataset, gen, grupo, my_comparisons) {
# Filtrar los datos nulos
ggplot(dataset %>% filter(!is.na(.data[[gen]]), !is.na(.data[[grupo]])),
# Definir función y tipo de gráfico
aes_string(x = grupo, y = gen)) +
geom_boxplot(color = "black", outlier.shape = NA) +
geom_jitter(position = position_jitter(0.35), size = 1,
aes_string(color = grupo, shape = grupo)) +
# Definir etiquetas de los ejes
labs(y = paste0("log2(", gen, ")"), x = grupo) +
# Diseño del gráfico
theme_classic() +
scale_color_manual(values = c("grey80", "black", "grey60", "grey30",
"grey80", "black", "grey60", "grey30"),
name = NULL) +
scale_shape_manual(values = c(15, 16, 17, 18, 19, 20, 21, 22), name = NULL) +
theme(plot.title = element_text(hjust = 0.5, size = 10, face = "bold"),
panel.spacing = unit(1, "lines"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(colour = "white", fill = NA,
size = 0.3),
strip.background = element_rect(colour = "white", fill = "white")) +
# Muestro la comparación de medias para las parejas preestablecidas
stat_compare_means(comparisons = my_comparisons, size = 3.2) +
stat_compare_means(method = "kruskal.test", label = "p.format", size = 3.5,
label.x = Inf, label.y = Inf, hjust = 1, vjust = 1)
}
# Bucle para generar el gráfico para cada gen/grupo
for (gen in genes) {
for (g in grupos_ag){
# comparisons_ depende del gen
comparisons_variable <- paste0("comparisons_", gen, "_", g)
print(box_plot(aux, gen, g, get(comparisons_variable)))
}
}








































































Inferencia de infiltración inmune
# Inferencia de la infiltración
# install.packages("devtools")
# devtools::install_github("cansysbio/ConsensusTME", force = TRUE)
consensus <- consensusTMEAnalysis(as.matrix(ex.tcga.coad), cancer = "COAD",
statMethod = "gsva")
Producing ConsensusTME Estimates Using The Following Parameters:
Statistical Framework: "gsva"
Gene Sets For Cancer Type: "COAD"
Sample Size: 453
consensus <- t(consensus)
consensus <- as.data.frame(consensus)
head(consensus)
B_cells Cytotoxic_cells Dendritic_cells Eosinophils Macrophages
TCGA-AA-3854 -0.4803717 -0.5349437 -0.5303677 -0.5495252 -0.5019391
TCGA-QG-A5YW -0.3260441 -0.5005492 -0.3621965 -0.4450782 -0.4844821
TCGA-AA-3966 0.1942019 0.1978167 0.5275622 0.4570357 0.5420140
TCGA-G4-6626 -0.6913118 -0.7068806 -0.7105103 -0.7488931 -0.7322518
TCGA-CM-5868 -0.7431985 -0.7839818 -0.6869500 -0.7214234 -0.5813385
TCGA-AA-3973 -0.5550090 -0.5680799 -0.6612286 -0.7171094 -0.6431824
Mast_cells NK_cells Neutrophils T_cells_CD4 T_cells_CD8
TCGA-AA-3854 -0.6313005 -0.4848965 -0.3555158 -0.5997161 -0.5869838
TCGA-QG-A5YW -0.1522690 -0.3438767 -0.5377830 -0.2895701 -0.2996378
TCGA-AA-3966 0.4806968 0.2144912 0.6500641 0.3058948 0.2905591
TCGA-G4-6626 -0.6512359 -0.5892358 -0.7614573 -0.7293006 -0.6862090
TCGA-CM-5868 -0.6452313 -0.7665153 -0.7517783 -0.7225120 -0.7403492
TCGA-AA-3973 -0.5458429 -0.5851976 -0.7099543 -0.6548409 -0.6271639
T_cells_gamma_delta T_regulatory_cells Macrophages_M1
TCGA-AA-3854 -0.5718952 -0.5460151 -0.4425823
TCGA-QG-A5YW -0.1880826 -0.2298948 -0.3375183
TCGA-AA-3966 0.4239998 0.4604758 0.5720187
TCGA-G4-6626 -0.7090368 -0.7413858 -0.7003253
TCGA-CM-5868 -0.7823149 -0.8340382 -0.6285408
TCGA-AA-3973 -0.6164323 -0.7002066 -0.6732833
Macrophages_M2 Endothelial Fibroblasts Monocytes Plasma_cells
TCGA-AA-3854 -0.4736311 -0.71668517 -0.71237113 -0.4892901 -0.580307575
TCGA-QG-A5YW -0.4408496 -0.14728208 -0.51466407 -0.5180534 -0.330513404
TCGA-AA-3966 0.5833930 0.01898697 -0.02941950 0.5634607 -0.009847754
TCGA-G4-6626 -0.7395050 -0.67590699 -0.85830728 -0.7476222 -0.678981009
TCGA-CM-5868 -0.6108191 -0.70355192 -0.07720652 -0.7016416 -0.650268303
TCGA-AA-3973 -0.5669731 -0.57851843 -0.46387035 -0.6760106 -0.564966974
Immune_Score
TCGA-AA-3854 -0.5210926
TCGA-QG-A5YW -0.4000719
TCGA-AA-3966 0.3972211
TCGA-G4-6626 -0.6967276
TCGA-CM-5868 -0.6749153
TCGA-AA-3973 -0.6314876
# Nuevo conjunto de datos uniendo esta información al anterior
consensus <- consensus %>%
tibble::rownames_to_column(var = "ID")
merged_data <- aux %>%
inner_join(consensus, by = "ID")
write.xlsx(merged_data, file = "merged_data.xlsx")
# Figura 11
# Boxplots para infiltración
grupos_ag <- c("MSI_MSS", "CMS", "Estadío", "Grupo")
variables <- c("B_cells", "Cytotoxic_cells", "Dendritic_cells", "Eosinophils",
"Macrophages", "Mast_cells", "NK_cells", "Neutrophils",
"T_cells_CD4", "T_cells_CD8", "T_cells_gamma_delta",
"T_regulatory_cells", "Macrophages_M1", "Macrophages_M2",
"Endothelial", "Fibroblasts", "Monocytes", "Plasma_cells")
# Bucle para generar los gráficos para cada grupo
for (g in grupos_ag) {
data <- merged_data
# Gestionar los valores faltantes de CMS
if (g %in% c("CMS", "Estadío", "MSI_MSS", "Grupo")) {
data <- data %>% filter(.data[[g]] != "") %>% drop_na(.data[[g]])
}
# Función del gráfico
plots <- lapply(variables, function(var) {
ggplot(data, aes(x = .data[[g]], y = .data[[var]])) +
# Tipo de gráfico y diseño
geom_boxplot(color = "black", outlier.shape = NA) +
labs(y = paste(var), x = "") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5, size = 6, face = "bold"),
axis.text.x = element_text(size = 5),
axis.text.y = element_text(size = 6),
axis.title.y = element_text(size = 8),
panel.spacing = unit(1, "lines"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(colour = "white",
fill = NA, size = 0.3),
strip.background = element_rect(colour = "white", fill = "white")) +
# Añadir comparaciones de medias
stat_compare_means(method = "kruskal.test", label = "p.format", size = 3,
label.x = Inf, label.y = Inf, hjust = 1, vjust = 1)
})
grid.arrange(grobs = plots[1:9], ncol = 3, heights = rep(3, 3))
grid.arrange(grobs = plots[10:18], ncol = 3, heights = rep(3, 3))
}








# Figura 13
# Heatmap
# Genes y células de interés
genes <- c("HLA_A", "HLA_B", "HLA_C", "HLA_E", "HLA_F", "HLA_G", "HLA_DPA1",
"HLA_DPB1", "HLA_DPB2", "HLA_DQA1", "HLA_DQB1", "HLA_DRA",
"HLA_DRB1", "HLA_DRB5", "HLA_DOA", "HLA_DOB", "HLA_DMA", "HLA_DMB")
cell_types <- c("B_cells", "Cytotoxic_cells", "Dendritic_cells", "Eosinophils",
"Macrophages", "Mast_cells", "NK_cells", "Neutrophils",
"T_cells_CD4", "T_cells_CD8", "T_cells_gamma_delta",
"T_regulatory_cells", "Macrophages_M1","Macrophages_M2",
"Endothelial", "Fibroblasts", "Monocytes", "Plasma_cells")
# Crear las anotaciones
merged_data_filt <-
merged_data[!is.na(merged_data$Grupo) & merged_data$Grupo != "IV MSI",]
genes_data <- merged_data_filt[, genes]
cells_data <- merged_data_filt[, cell_types]
group_annotation <-
data.frame(Grupo = rep(c("I MSI", "I MSS", "II MSI", "II MSS", "III MSI", "III MSS", "IV MSS"),
each = length(genes)))
group_annotation$Gen <- rep(genes, times=7)
rownames(group_annotation) <- paste(group_annotation$Grupo,
group_annotation$Gen, sep = "_")
labels <- group_annotation$Gen
group_annotation <- subset(group_annotation, select = -Gen)
# Matriz de correlación por grupos
cor_list <- list()
for (group in unique(merged_data_filt$Grupo)){
group_data <- merged_data_filt[merged_data_filt$Grupo == group,]
genes_data_group <- group_data[, genes]
cells_data_group <- group_data[, cell_types]
cor_matrix_group <- cor(cells_data_group, genes_data_group, method="spearman")
cor_list[[group]] <- cor_matrix_group
}
cor_matrix_all <- do.call(cbind, cor_list)
cor_matrix_all[is.na(cor_matrix_all)] <- 0
colnames(cor_matrix_all) <- rownames(group_annotation)
# Definir colores para los grupos
group_colors <- list(Grupo = c("II MSS" = "#9E0142",
"II MSI" = "#5E4FA2",
"III MSS"= "#3288BD",
"III MSI"= "#CEB944",
"I MSS" = "#F46D43",
"I MSI" = "#67C2A4",
"IV MSS"= "#E6F598"))
# Crear el heatmap
pheatmap(cor_matrix_all,
annotation_col = group_annotation,
annotation_colors = group_colors,
labels_col = labels,
main = "Heatmap HLA e infiltracion",
color = colorRampPalette(c("blue", "white", "red"))(100),
show_colnames = T, show_rownames = T, fontsize = 8,
fontsize_legend = 4, border_color = NA)

# Nuevo conjunto de datos incluyendo la expresión de NCR1
# Divido la expresión en "Baja" y "Alta" dependiendo si es mayor o menor que la media
merged_dataNK <- merged_data %>%
select(ID, NK_cells, MSI_MSS, CMS, Estadío, Grupo, HLA_A, HLA_B, HLA_C, HLA_E,
HLA_F, HLA_G, HLA_DPA1, HLA_DPB1, HLA_DPB2, HLA_DQA1, HLA_DQB1, HLA_DRA,
HLA_DRB1, HLA_DRB5, HLA_DOA, HLA_DOB, HLA_DMA, HLA_DMB)
merged_dataNK <- merged_dataNK %>%
filter(!is.na(Grupo))
merged_dataNK <- merged_dataNK %>%
mutate(
HLA_Ae = ifelse(HLA_A > mean(HLA_A, na.rm = TRUE), "Alta", "Baja"),
HLA_Be = ifelse(HLA_B > mean(HLA_B, na.rm = TRUE), "Alta", "Baja"),
HLA_Ce = ifelse(HLA_C > mean(HLA_C, na.rm = TRUE), "Alta", "Baja"),
HLA_Ee = ifelse(HLA_E > mean(HLA_E, na.rm = TRUE), "Alta", "Baja"),
HLA_Fe = ifelse(HLA_F > mean(HLA_F, na.rm = TRUE), "Alta", "Baja"),
HLA_Ge = ifelse(HLA_G > mean(HLA_G, na.rm = TRUE), "Alta", "Baja"),
HLA_DPA1e = ifelse(HLA_DPA1 > mean(HLA_DPA1, na.rm = TRUE), "Alta", "Baja"),
HLA_DPB1e = ifelse(HLA_DPB1 > mean(HLA_DPB1, na.rm = TRUE), "Alta", "Baja"),
HLA_DPB2e = ifelse(HLA_DPB2 > mean(HLA_DPB2, na.rm = TRUE), "Alta", "Baja"),
HLA_DQA1e = ifelse(HLA_DQA1 > mean(HLA_DQA1, na.rm = TRUE), "Alta", "Baja"),
HLA_DQB1e = ifelse(HLA_DQB1 > mean(HLA_DQB1, na.rm = TRUE), "Alta", "Baja"),
HLA_DRAe = ifelse(HLA_DRA > mean(HLA_DRA, na.rm = TRUE), "Alta", "Baja"),
HLA_DRB1e = ifelse(HLA_DRB1 > mean(HLA_DRB1, na.rm = TRUE), "Alta", "Baja"),
HLA_DRB5e = ifelse(HLA_DRB5 > mean(HLA_DRB5, na.rm = TRUE), "Alta", "Baja"),
HLA_DOAe = ifelse(HLA_DOA > mean(HLA_DOA, na.rm = TRUE), "Alta", "Baja"),
HLA_DOBe = ifelse(HLA_DOB > mean(HLA_DOB, na.rm = TRUE), "Alta", "Baja"),
HLA_DMAe = ifelse(HLA_DMA > mean(HLA_DMA, na.rm = TRUE), "Alta", "Baja"),
HLA_DMBe = ifelse(HLA_DMB > mean(HLA_DMB, na.rm = TRUE), "Alta", "Baja")
)
NK <- as.data.frame(t(ex.tcga.coad["NCR1", , drop = FALSE]))
NK$ID <- rownames(NK)
merged_dataNK <- merge(merged_dataNK, NK, by = "ID")
head(merged_dataNK)
ID NK_cells MSI_MSS CMS Estadío Grupo HLA_A HLA_B
1 TCGA-3L-AA1B -0.6359357 MSS CMS2 I I MSS 9.422095 9.423582
2 TCGA-4N-A93T -0.6773970 MSS <NA> III III MSS 10.087300 10.467855
3 TCGA-4T-AA8H -0.6143092 MSS CMS3 II II MSS 9.515499 9.616729
4 TCGA-5M-AAT4 -0.7421081 MSS <NA> IV IV MSS 9.738799 9.741454
5 TCGA-5M-AAT6 0.3561739 MSS CMS4 IV IV MSS 11.092478 11.579548
6 TCGA-5M-AATE -0.6622639 MSS CMS2 II II MSS 9.898062 9.582778
HLA_C HLA_E HLA_F HLA_G HLA_DPA1 HLA_DPB1 HLA_DPB2 HLA_DQA1
1 9.276550 8.952226 4.825786 2.128095 5.720875 5.421493 0.3545650 4.680516
2 10.042796 9.472137 6.607422 2.737795 3.511506 3.611692 0.2461040 2.505180
3 8.800078 8.621431 4.894027 2.380009 2.540424 3.134747 0.4168397 3.026942
4 9.339448 8.983008 4.889235 2.767273 3.658440 3.322952 0.1180939 2.941989
5 10.896130 9.903491 7.151282 4.115008 7.628497 7.211774 2.3664201 5.949196
6 9.335188 8.616671 5.209192 3.176099 4.565591 3.974722 0.2412300 2.868667
HLA_DQB1 HLA_DRA HLA_DRB1 HLA_DRB5 HLA_DOA HLA_DOB HLA_DMA HLA_DMB
1 5.665441 9.022051 8.437177 5.927275 3.316638 1.7563402 4.740857 3.289510
2 4.438073 8.056338 8.498138 6.859332 1.465295 1.2936059 5.042167 2.043240
3 2.896872 7.404167 6.608737 4.248558 1.256467 0.6273267 4.425493 1.646209
4 2.304365 6.963469 6.344530 3.971838 1.658737 0.7304008 5.028591 2.781360
5 7.789211 11.311419 10.803370 7.820975 4.998687 2.7762725 7.712894 5.156441
6 3.955024 7.663813 6.797785 4.956647 1.971442 0.1991226 4.486515 2.160920
HLA_Ae HLA_Be HLA_Ce HLA_Ee HLA_Fe HLA_Ge HLA_DPA1e HLA_DPB1e HLA_DPB2e
1 Baja Baja Baja Baja Baja Baja Baja Baja Baja
2 Baja Baja Baja Baja Alta Baja Baja Baja Baja
3 Baja Baja Baja Baja Baja Baja Baja Baja Baja
4 Baja Baja Baja Baja Baja Baja Baja Baja Baja
5 Alta Alta Alta Alta Alta Alta Alta Alta Alta
6 Baja Baja Baja Baja Baja Baja Baja Baja Baja
HLA_DQA1e HLA_DQB1e HLA_DRAe HLA_DRB1e HLA_DRB5e HLA_DOAe HLA_DOBe HLA_DMAe
1 Alta Alta Baja Baja Baja Alta Alta Baja
2 Baja Baja Baja Baja Baja Baja Baja Baja
3 Baja Baja Baja Baja Baja Baja Baja Baja
4 Baja Baja Baja Baja Baja Baja Baja Baja
5 Alta Alta Alta Alta Alta Alta Alta Alta
6 Baja Baja Baja Baja Baja Baja Baja Baja
HLA_DMBe NCR1
1 Baja 0.1604039
2 Baja 0.0000000
3 Baja 0.1908045
4 Baja 0.1003049
5 Alta 0.6564506
6 Baja 0.0000000
# Figura 15A, C y E
# Función para comparar la expresión de NCR1 para un gen específico en función de las categorías "Baja" y "Alta" mediante boxplot para cada Grupo
NK_HLA <- function(dataset, gen){
ggplot(dataset, aes(x = .data[[gen]], y = NCR1)) +
geom_boxplot(outlier.shape = NA, alpha = 0.6) +
geom_jitter(width = 0.2, height = 0, alpha = 0.5) +
facet_wrap(~ Grupo) +
stat_compare_means(method = "wilcox.test", label = "p.format", size = 3,
label.x = Inf, label.y = Inf, hjust = 1, vjust = 1) +
theme_minimal() +
labs(
title = paste("Comparación de la expresión de NCR1 entre las categorías de",
substr(gen, 1, nchar(gen) - 1)),
x = NULL,
y = "log2(NCR1)"
) +
theme(legend.position = "none")
}
exp_HLA <- c("HLA_Ae", "HLA_Be", "HLA_Ce", "HLA_Ee", "HLA_Fe", "HLA_Ge",
"HLA_DPA1e", "HLA_DPB1e", "HLA_DPB2e", "HLA_DQA1e",
"HLA_DQB1e", "HLA_DRAe", "HLA_DRB1e", "HLA_DRB5e", "HLA_DOAe",
"HLA_DOBe", "HLA_DMAe", "HLA_DMBe")
for (gen in exp_HLA){
print(NK_HLA(merged_dataNK, gen))
}


















# Figura 15B, D y F
# Función para comparar la infiltración de NK para un gen específico en función de las categorías "Baja" y "Alta" mediante boxplot para cada Grupo
NK_HLA <- function(dataset, gen){
ggplot(dataset, aes(x = .data[[gen]], y = NK_cells)) +
geom_boxplot(outlier.shape = NA, alpha = 0.6) +
geom_jitter(width = 0.2, height = 0, alpha = 0.5) +
facet_wrap(~ Grupo) +
stat_compare_means(method = "wilcox.test", label = "p.format", size = 3,
label.x = Inf, label.y = Inf, hjust = 1, vjust = 1) +
theme_minimal() +
labs(
title = paste("Comparación de la expresión de NK_cells
entre las categorías de",
substr(gen, 1, nchar(gen) - 1)),
x = NULL,
y = "NK_cells"
) +
theme(legend.position = "none")
}
for (gen in exp_HLA){
print(NK_HLA(merged_dataNK, gen))
}


















Análisis de supervivencia
merged_data <- read_excel("merged_data.xlsx")
# Variables edad, evento, tiempo
clin <- merged_data %>%
mutate(
# Edad
edad = abs(days_to_birth) / 365.25,
# Evento: 1 si hay una fecha de muerte, 0 si no
evento = ifelse(!is.na(days_to_death), 1, 0),
# Tiempo: usar días hasta la muerte si hay, si no usar seguimiento
tiempo = ifelse(!is.na(days_to_death), days_to_death,
ifelse(!is.na(days_to_last_followup), days_to_last_followup,
days_to_last_known_alive))
)
clin <- clin %>% drop_na(tiempo)
# Modelos de Cox simples
genes <- c("HLA_A", "HLA_B", "HLA_C", "HLA_E", "HLA_F", "HLA_G",
"HLA_DPA1", "HLA_DPB1", "HLA_DPB2", "HLA_DQA1",
"HLA_DQB1", "HLA_DRA", "HLA_DRB1", "HLA_DRB5", "HLA_DOA",
"HLA_DOB", "HLA_DMA", "HLA_DMB")
for (gene in genes) {
# Modelo de Cox para cada gen como variable continua
formula <- as.formula(paste("Surv(tiempo, evento) ~", gene))
cox_model <- coxph(formula, data = clin)
# Mostrar resumen del modelo
cat("Modelo de Cox para", gene, "\n")
print(summary(cox_model))
cat("\n--------------------------------\n")
}
Modelo de Cox para HLA_A
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_A -0.1453 0.8648 0.1533 -0.948 0.343
exp(coef) exp(-coef) lower .95 upper .95
HLA_A 0.8648 1.156 0.6404 1.168
Concordance= 0.536 (se = 0.038 )
Likelihood ratio test= 0.88 on 1 df, p=0.3
Wald test = 0.9 on 1 df, p=0.3
Score (logrank) test = 0.9 on 1 df, p=0.3
--------------------------------
Modelo de Cox para HLA_B
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_B -0.1734 0.8408 0.1299 -1.335 0.182
exp(coef) exp(-coef) lower .95 upper .95
HLA_B 0.8408 1.189 0.6518 1.085
Concordance= 0.566 (se = 0.041 )
Likelihood ratio test= 1.69 on 1 df, p=0.2
Wald test = 1.78 on 1 df, p=0.2
Score (logrank) test = 1.78 on 1 df, p=0.2
--------------------------------
Modelo de Cox para HLA_C
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_C -0.3310 0.7182 0.1531 -2.162 0.0306 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
HLA_C 0.7182 1.392 0.532 0.9695
Concordance= 0.566 (se = 0.04 )
Likelihood ratio test= 4.61 on 1 df, p=0.03
Wald test = 4.67 on 1 df, p=0.03
Score (logrank) test = 4.68 on 1 df, p=0.03
--------------------------------
Modelo de Cox para HLA_E
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_E -0.2731 0.7610 0.2025 -1.349 0.177
exp(coef) exp(-coef) lower .95 upper .95
HLA_E 0.761 1.314 0.5118 1.132
Concordance= 0.565 (se = 0.04 )
Likelihood ratio test= 1.8 on 1 df, p=0.2
Wald test = 1.82 on 1 df, p=0.2
Score (logrank) test = 1.82 on 1 df, p=0.2
--------------------------------
Modelo de Cox para HLA_F
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_F -0.1124 0.8937 0.1114 -1.009 0.313
exp(coef) exp(-coef) lower .95 upper .95
HLA_F 0.8937 1.119 0.7185 1.112
Concordance= 0.542 (se = 0.042 )
Likelihood ratio test= 0.99 on 1 df, p=0.3
Wald test = 1.02 on 1 df, p=0.3
Score (logrank) test = 1.02 on 1 df, p=0.3
--------------------------------
Modelo de Cox para HLA_G
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_G -0.0530 0.9484 0.1181 -0.449 0.654
exp(coef) exp(-coef) lower .95 upper .95
HLA_G 0.9484 1.054 0.7524 1.195
Concordance= 0.49 (se = 0.039 )
Likelihood ratio test= 0.2 on 1 df, p=0.7
Wald test = 0.2 on 1 df, p=0.7
Score (logrank) test = 0.2 on 1 df, p=0.7
--------------------------------
Modelo de Cox para HLA_DPA1
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_DPA1 -0.14713 0.86318 0.08806 -1.671 0.0948 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
HLA_DPA1 0.8632 1.159 0.7263 1.026
Concordance= 0.586 (se = 0.044 )
Likelihood ratio test= 2.78 on 1 df, p=0.1
Wald test = 2.79 on 1 df, p=0.09
Score (logrank) test = 2.8 on 1 df, p=0.09
--------------------------------
Modelo de Cox para HLA_DPB1
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_DPB1 -0.2182 0.8040 0.1007 -2.167 0.0302 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
HLA_DPB1 0.804 1.244 0.66 0.9793
Concordance= 0.604 (se = 0.042 )
Likelihood ratio test= 4.71 on 1 df, p=0.03
Wald test = 4.7 on 1 df, p=0.03
Score (logrank) test = 4.71 on 1 df, p=0.03
--------------------------------
Modelo de Cox para HLA_DPB2
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_DPB2 -0.4841 0.6162 0.2206 -2.195 0.0282 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
HLA_DPB2 0.6162 1.623 0.3999 0.9495
Concordance= 0.628 (se = 0.041 )
Likelihood ratio test= 5.92 on 1 df, p=0.01
Wald test = 4.82 on 1 df, p=0.03
Score (logrank) test = 4.94 on 1 df, p=0.03
--------------------------------
Modelo de Cox para HLA_DQA1
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_DQA1 -0.13665 0.87227 0.09303 -1.469 0.142
exp(coef) exp(-coef) lower .95 upper .95
HLA_DQA1 0.8723 1.146 0.7269 1.047
Concordance= 0.584 (se = 0.046 )
Likelihood ratio test= 2.18 on 1 df, p=0.1
Wald test = 2.16 on 1 df, p=0.1
Score (logrank) test = 2.16 on 1 df, p=0.1
--------------------------------
Modelo de Cox para HLA_DQB1
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_DQB1 -0.07489 0.92784 0.08618 -0.869 0.385
exp(coef) exp(-coef) lower .95 upper .95
HLA_DQB1 0.9278 1.078 0.7836 1.099
Concordance= 0.537 (se = 0.045 )
Likelihood ratio test= 0.76 on 1 df, p=0.4
Wald test = 0.76 on 1 df, p=0.4
Score (logrank) test = 0.76 on 1 df, p=0.4
--------------------------------
Modelo de Cox para HLA_DRA
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_DRA -0.19709 0.82112 0.08723 -2.259 0.0239 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
HLA_DRA 0.8211 1.218 0.6921 0.9742
Concordance= 0.607 (se = 0.042 )
Likelihood ratio test= 5.07 on 1 df, p=0.02
Wald test = 5.11 on 1 df, p=0.02
Score (logrank) test = 5.13 on 1 df, p=0.02
--------------------------------
Modelo de Cox para HLA_DRB1
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_DRB1 -0.17850 0.83653 0.09225 -1.935 0.053 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
HLA_DRB1 0.8365 1.195 0.6982 1.002
Concordance= 0.591 (se = 0.043 )
Likelihood ratio test= 3.7 on 1 df, p=0.05
Wald test = 3.74 on 1 df, p=0.05
Score (logrank) test = 3.74 on 1 df, p=0.05
--------------------------------
Modelo de Cox para HLA_DRB5
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_DRB5 -0.20288 0.81637 0.07945 -2.553 0.0107 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
HLA_DRB5 0.8164 1.225 0.6986 0.9539
Concordance= 0.627 (se = 0.04 )
Likelihood ratio test= 6.66 on 1 df, p=0.01
Wald test = 6.52 on 1 df, p=0.01
Score (logrank) test = 6.52 on 1 df, p=0.01
--------------------------------
Modelo de Cox para HLA_DOA
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_DOA -0.1393 0.8699 0.1052 -1.325 0.185
exp(coef) exp(-coef) lower .95 upper .95
HLA_DOA 0.8699 1.15 0.7079 1.069
Concordance= 0.58 (se = 0.046 )
Likelihood ratio test= 1.83 on 1 df, p=0.2
Wald test = 1.75 on 1 df, p=0.2
Score (logrank) test = 1.75 on 1 df, p=0.2
--------------------------------
Modelo de Cox para HLA_DOB
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_DOB 0.1198 1.1273 0.1817 0.659 0.51
exp(coef) exp(-coef) lower .95 upper .95
HLA_DOB 1.127 0.8871 0.7896 1.609
Concordance= 0.491 (se = 0.045 )
Likelihood ratio test= 0.43 on 1 df, p=0.5
Wald test = 0.43 on 1 df, p=0.5
Score (logrank) test = 0.43 on 1 df, p=0.5
--------------------------------
Modelo de Cox para HLA_DMA
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_DMA -0.1590 0.8530 0.1222 -1.301 0.193
exp(coef) exp(-coef) lower .95 upper .95
HLA_DMA 0.853 1.172 0.6714 1.084
Concordance= 0.578 (se = 0.042 )
Likelihood ratio test= 1.74 on 1 df, p=0.2
Wald test = 1.69 on 1 df, p=0.2
Score (logrank) test = 1.69 on 1 df, p=0.2
--------------------------------
Modelo de Cox para HLA_DMB
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_DMB -0.1994 0.8192 0.1146 -1.74 0.0818 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
HLA_DMB 0.8192 1.221 0.6544 1.025
Concordance= 0.581 (se = 0.042 )
Likelihood ratio test= 3.09 on 1 df, p=0.08
Wald test = 3.03 on 1 df, p=0.08
Score (logrank) test = 3.04 on 1 df, p=0.08
--------------------------------
# Modelos de Cox con interacción
for (gene in genes) {
# Modelo de Cox para cada gen como variable continua
formula <- as.formula(paste("Surv(tiempo, evento) ~", gene,"*NK_cells"))
cox_model <- coxph(formula, data = clin)
# Mostrar resumen del modelo
cat("Modelo de Cox para", gene, "\n")
print(summary(cox_model))
cat("\n--------------------------------\n")
}
Modelo de Cox para HLA_A
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_A -0.2869 0.7506 0.2543 -1.128 0.259
NK_cells 4.1974 66.5158 5.2173 0.805 0.421
HLA_A:NK_cells -0.4364 0.6464 0.5045 -0.865 0.387
exp(coef) exp(-coef) lower .95 upper .95
HLA_A 0.7506 1.33223 0.45604 1.235e+00
NK_cells 66.5158 0.01503 0.00241 1.836e+06
HLA_A:NK_cells 0.6464 1.54712 0.24046 1.737e+00
Concordance= 0.537 (se = 0.041 )
Likelihood ratio test= 2.2 on 3 df, p=0.5
Wald test = 1.89 on 3 df, p=0.6
Score (logrank) test = 1.9 on 3 df, p=0.6
--------------------------------
Modelo de Cox para HLA_B
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_B -0.18770 0.82886 0.24273 -0.773 0.439
NK_cells 0.67969 1.97326 4.55260 0.149 0.881
HLA_B:NK_cells -0.08643 0.91720 0.42107 -0.205 0.837
exp(coef) exp(-coef) lower .95 upper .95
HLA_B 0.8289 1.2065 0.515079 1.334
NK_cells 1.9733 0.5068 0.000263 14803.563
HLA_B:NK_cells 0.9172 1.0903 0.401837 2.094
Concordance= 0.562 (se = 0.044 )
Likelihood ratio test= 2.04 on 3 df, p=0.6
Wald test = 2.01 on 3 df, p=0.6
Score (logrank) test = 2.04 on 3 df, p=0.6
--------------------------------
Modelo de Cox para HLA_C
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_C -0.3357 0.7148 0.2710 -1.239 0.215
NK_cells 0.1653 1.1798 4.9792 0.033 0.974
HLA_C:NK_cells -0.0175 0.9827 0.4653 -0.038 0.970
exp(coef) exp(-coef) lower .95 upper .95
HLA_C 0.7148 1.3989 4.202e-01 1.216
NK_cells 1.1798 0.8476 6.816e-05 20421.270
HLA_C:NK_cells 0.9827 1.0177 3.948e-01 2.446
Concordance= 0.566 (se = 0.04 )
Likelihood ratio test= 4.61 on 3 df, p=0.2
Wald test = 4.65 on 3 df, p=0.2
Score (logrank) test = 4.72 on 3 df, p=0.2
--------------------------------
Modelo de Cox para HLA_E
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_E -0.12700 0.88073 0.38501 -0.330 0.741
NK_cells -2.33390 0.09692 6.07498 -0.384 0.701
HLA_E:NK_cells 0.22079 1.24706 0.61137 0.361 0.718
exp(coef) exp(-coef) lower .95 upper .95
HLA_E 0.88073 1.1354 4.141e-01 1.873
NK_cells 0.09692 10.3181 6.537e-07 14369.173
HLA_E:NK_cells 1.24706 0.8019 3.763e-01 4.133
Concordance= 0.575 (se = 0.043 )
Likelihood ratio test= 2.01 on 3 df, p=0.6
Wald test = 2.1 on 3 df, p=0.6
Score (logrank) test = 2.11 on 3 df, p=0.5
--------------------------------
Modelo de Cox para HLA_F
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_F -0.12185 0.88528 0.22274 -0.547 0.584
NK_cells 0.22179 1.24831 2.40061 0.092 0.926
HLA_F:NK_cells -0.08198 0.92129 0.37743 -0.217 0.828
exp(coef) exp(-coef) lower .95 upper .95
HLA_F 0.8853 1.1296 0.5721 1.37
NK_cells 1.2483 0.8011 0.0113 137.95
HLA_F:NK_cells 0.9213 1.0854 0.4397 1.93
Concordance= 0.547 (se = 0.045 )
Likelihood ratio test= 1.42 on 3 df, p=0.7
Wald test = 1.35 on 3 df, p=0.7
Score (logrank) test = 1.37 on 3 df, p=0.7
--------------------------------
Modelo de Cox para HLA_G
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_G -0.1246 0.8828 0.2002 -0.623 0.534
NK_cells 0.5024 1.6527 1.4401 0.349 0.727
HLA_G:NK_cells -0.2557 0.7744 0.3958 -0.646 0.518
exp(coef) exp(-coef) lower .95 upper .95
HLA_G 0.8828 1.1327 0.59631 1.307
NK_cells 1.6527 0.6051 0.09827 27.795
HLA_G:NK_cells 0.7744 1.2914 0.35651 1.682
Concordance= 0.587 (se = 0.046 )
Likelihood ratio test= 1.42 on 3 df, p=0.7
Wald test = 1.21 on 3 df, p=0.8
Score (logrank) test = 1.23 on 3 df, p=0.7
--------------------------------
Modelo de Cox para HLA_DPA1
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_DPA1 0.08895 1.09303 0.19099 0.466 0.6414
NK_cells -3.12032 0.04414 1.87412 -1.665 0.0959 .
HLA_DPA1:NK_cells 0.46347 1.58959 0.23154 2.002 0.0453 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
HLA_DPA1 1.09303 0.9149 0.751726 1.589
NK_cells 0.04414 22.6537 0.001121 1.738
HLA_DPA1:NK_cells 1.58959 0.6291 1.009713 2.502
Concordance= 0.617 (se = 0.043 )
Likelihood ratio test= 6.54 on 3 df, p=0.09
Wald test = 7.11 on 3 df, p=0.07
Score (logrank) test = 7.29 on 3 df, p=0.06
--------------------------------
Modelo de Cox para HLA_DPB1
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_DPB1 -0.1126 0.8935 0.2179 -0.517 0.605
NK_cells -1.8247 0.1613 1.9328 -0.944 0.345
HLA_DPB1:NK_cells 0.3724 1.4512 0.2647 1.407 0.160
exp(coef) exp(-coef) lower .95 upper .95
HLA_DPB1 0.8935 1.1192 0.58294 1.370
NK_cells 0.1613 6.2010 0.00365 7.124
HLA_DPB1:NK_cells 1.4512 0.6891 0.86376 2.438
Concordance= 0.629 (se = 0.04 )
Likelihood ratio test= 7.49 on 3 df, p=0.06
Wald test = 8.08 on 3 df, p=0.04
Score (logrank) test = 8.24 on 3 df, p=0.04
--------------------------------
Modelo de Cox para HLA_DPB2
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_DPB2 -0.4428 0.6422 0.2558 -1.731 0.08349 .
NK_cells -0.7127 0.4903 0.6418 -1.110 0.26681
HLA_DPB2:NK_cells 1.1805 3.2561 0.4570 2.583 0.00979 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
HLA_DPB2 0.6422 1.5571 0.3890 1.060
NK_cells 0.4903 2.0395 0.1394 1.725
HLA_DPB2:NK_cells 3.2561 0.3071 1.3295 7.975
Concordance= 0.65 (se = 0.038 )
Likelihood ratio test= 12.13 on 3 df, p=0.007
Wald test = 9.84 on 3 df, p=0.02
Score (logrank) test = 10.05 on 3 df, p=0.02
--------------------------------
Modelo de Cox para HLA_DQA1
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_DQA1 0.08199 1.08544 0.17615 0.465 0.6416
NK_cells -2.46253 0.08522 1.41026 -1.746 0.0808 .
HLA_DQA1:NK_cells 0.45773 1.58048 0.20637 2.218 0.0266 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
HLA_DQA1 1.08544 0.9213 0.768542 1.533
NK_cells 0.08522 11.7345 0.005372 1.352
HLA_DQA1:NK_cells 1.58048 0.6327 1.054702 2.368
Concordance= 0.633 (se = 0.042 )
Likelihood ratio test= 6.36 on 3 df, p=0.1
Wald test = 6.9 on 3 df, p=0.08
Score (logrank) test = 7.07 on 3 df, p=0.07
--------------------------------
Modelo de Cox para HLA_DQB1
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_DQB1 0.21953 1.24549 0.16832 1.304 0.1922
NK_cells -3.33412 0.03565 1.59110 -2.095 0.0361 *
HLA_DQB1:NK_cells 0.48711 1.62761 0.22258 2.188 0.0286 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
HLA_DQB1 1.24549 0.8029 0.895494 1.732
NK_cells 0.03565 28.0537 0.001576 0.806
HLA_DQB1:NK_cells 1.62761 0.6144 1.052175 2.518
Concordance= 0.598 (se = 0.044 )
Likelihood ratio test= 5.46 on 3 df, p=0.1
Wald test = 5.58 on 3 df, p=0.1
Score (logrank) test = 5.66 on 3 df, p=0.1
--------------------------------
Modelo de Cox para HLA_DRA
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_DRA -0.1741 0.8402 0.2070 -0.841 0.400
NK_cells -1.6298 0.1960 2.8682 -0.568 0.570
HLA_DRA:NK_cells 0.2172 1.2426 0.2521 0.861 0.389
exp(coef) exp(-coef) lower .95 upper .95
HLA_DRA 0.8402 1.1902 0.5600076 1.261
NK_cells 0.1960 5.1030 0.0007091 54.153
HLA_DRA:NK_cells 1.2426 0.8048 0.7580497 2.037
Concordance= 0.635 (se = 0.04 )
Likelihood ratio test= 7.03 on 3 df, p=0.07
Wald test = 7.61 on 3 df, p=0.05
Score (logrank) test = 7.74 on 3 df, p=0.05
--------------------------------
Modelo de Cox para HLA_DRB1
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_DRB1 -0.09347 0.91077 0.20267 -0.461 0.645
NK_cells -1.98856 0.13689 2.66458 -0.746 0.455
HLA_DRB1:NK_cells 0.23644 1.26673 0.25615 0.923 0.356
exp(coef) exp(-coef) lower .95 upper .95
HLA_DRB1 0.9108 1.0980 0.6121977 1.355
NK_cells 0.1369 7.3050 0.0007384 25.379
HLA_DRB1:NK_cells 1.2667 0.7894 0.7667461 2.093
Concordance= 0.609 (se = 0.042 )
Likelihood ratio test= 4.84 on 3 df, p=0.2
Wald test = 5.3 on 3 df, p=0.2
Score (logrank) test = 5.38 on 3 df, p=0.1
--------------------------------
Modelo de Cox para HLA_DRB5
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_DRB5 -0.267407 0.765362 0.150752 -1.774 0.0761 .
NK_cells 0.540251 1.716437 1.752262 0.308 0.7578
HLA_DRB5:NK_cells 0.002244 1.002246 0.224416 0.010 0.9920
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
HLA_DRB5 0.7654 1.3066 0.56957 1.028
NK_cells 1.7164 0.5826 0.05535 53.230
HLA_DRB5:NK_cells 1.0022 0.9978 0.64558 1.556
Concordance= 0.631 (se = 0.039 )
Likelihood ratio test= 7.59 on 3 df, p=0.06
Wald test = 7.52 on 3 df, p=0.06
Score (logrank) test = 7.63 on 3 df, p=0.05
--------------------------------
Modelo de Cox para HLA_DOA
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_DOA -0.00804 0.99199 0.15570 -0.052 0.9588
NK_cells -1.58218 0.20553 0.96877 -1.633 0.1024
HLA_DOA:NK_cells 0.43095 1.53871 0.18104 2.380 0.0173 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
HLA_DOA 0.9920 1.0081 0.73110 1.346
NK_cells 0.2055 4.8656 0.03078 1.372
HLA_DOA:NK_cells 1.5387 0.6499 1.07909 2.194
Concordance= 0.604 (se = 0.047 )
Likelihood ratio test= 6.21 on 3 df, p=0.1
Wald test = 6.7 on 3 df, p=0.08
Score (logrank) test = 6.88 on 3 df, p=0.08
--------------------------------
Modelo de Cox para HLA_DOB
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_DOB 0.76873 2.15701 0.26284 2.925 0.00345 **
NK_cells -3.05793 0.04699 0.99866 -3.062 0.00220 **
HLA_DOB:NK_cells 1.17788 3.24750 0.38317 3.074 0.00211 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
HLA_DOB 2.15701 0.4636 1.288627 3.6106
NK_cells 0.04699 21.2834 0.006636 0.3327
HLA_DOB:NK_cells 3.24750 0.3079 1.532478 6.8818
Concordance= 0.659 (se = 0.041 )
Likelihood ratio test= 10.41 on 3 df, p=0.02
Wald test = 11.32 on 3 df, p=0.01
Score (logrank) test = 11.25 on 3 df, p=0.01
--------------------------------
Modelo de Cox para HLA_DMA
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_DMA 0.03141 1.03191 0.20902 0.150 0.8805
NK_cells -2.71653 0.06610 1.81435 -1.497 0.1343
HLA_DMA:NK_cells 0.43871 1.55071 0.26634 1.647 0.0995 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
HLA_DMA 1.0319 0.9691 0.685055 1.554
NK_cells 0.0661 15.1277 0.001887 2.315
HLA_DMA:NK_cells 1.5507 0.6449 0.920079 2.614
Concordance= 0.587 (se = 0.045 )
Likelihood ratio test= 4.11 on 3 df, p=0.3
Wald test = 4.32 on 3 df, p=0.2
Score (logrank) test = 4.38 on 3 df, p=0.2
--------------------------------
Modelo de Cox para HLA_DMB
Call:
coxph(formula = formula, data = clin)
n= 391, number of events= 55
coef exp(coef) se(coef) z Pr(>|z|)
HLA_DMB -0.01482 0.98529 0.24508 -0.060 0.9518
NK_cells -1.82214 0.16168 1.57340 -1.158 0.2468
HLA_DMB:NK_cells 0.47928 1.61490 0.29065 1.649 0.0991 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
HLA_DMB 0.9853 1.0149 0.609466 1.593
NK_cells 0.1617 6.1851 0.007402 3.531
HLA_DMB:NK_cells 1.6149 0.6192 0.913585 2.855
Concordance= 0.612 (se = 0.041 )
Likelihood ratio test= 5.92 on 3 df, p=0.1
Wald test = 6.3 on 3 df, p=0.1
Score (logrank) test = 6.43 on 3 df, p=0.09
--------------------------------
# Estratificar por mediana
clin <- clin %>%
mutate(
HLA_Ae = ifelse(HLA_A > median(HLA_A, na.rm = TRUE), "Alta", "Baja"),
HLA_Be = ifelse(HLA_B > median(HLA_B, na.rm = TRUE), "Alta", "Baja"),
HLA_Ce = ifelse(HLA_C > median(HLA_C, na.rm = TRUE), "Alta", "Baja"),
HLA_Ee = ifelse(HLA_E > median(HLA_E, na.rm = TRUE), "Alta", "Baja"),
HLA_Fe = ifelse(HLA_F > median(HLA_F, na.rm = TRUE), "Alta", "Baja"),
HLA_Ge = ifelse(HLA_G > median(HLA_G, na.rm = TRUE), "Alta", "Baja"),
HLA_DPA1e = ifelse(HLA_DPA1 > median(HLA_DPA1, na.rm = TRUE), "Alta", "Baja"),
HLA_DPB1e = ifelse(HLA_DPB1 > median(HLA_DPB1, na.rm = TRUE), "Alta", "Baja"),
HLA_DPB2e = ifelse(HLA_DPB2 > median(HLA_DPB2, na.rm = TRUE), "Alta", "Baja"),
HLA_DQA1e = ifelse(HLA_DQA1 > median(HLA_DQA1, na.rm = TRUE), "Alta", "Baja"),
HLA_DQB1e = ifelse(HLA_DQB1 > median(HLA_DQB1, na.rm = TRUE), "Alta", "Baja"),
HLA_DRAe = ifelse(HLA_DRA > median(HLA_DRA, na.rm = TRUE), "Alta", "Baja"),
HLA_DRB1e = ifelse(HLA_DRB1 > median(HLA_DRB1, na.rm = TRUE), "Alta", "Baja"),
HLA_DRB5e = ifelse(HLA_DRB5 > median(HLA_DRB5, na.rm = TRUE), "Alta", "Baja"),
HLA_DOAe = ifelse(HLA_DOA > median(HLA_DOA, na.rm = TRUE), "Alta", "Baja"),
HLA_DOBe = ifelse(HLA_DOB > median(HLA_DOB, na.rm = TRUE), "Alta", "Baja"),
HLA_DMAe = ifelse(HLA_DMA > median(HLA_DMA, na.rm = TRUE), "Alta", "Baja"),
HLA_DMBe = ifelse(HLA_DMB > median(HLA_DMB, na.rm = TRUE), "Alta", "Baja")
)
# Curvas Kaplan-Meier
genes <- c("HLA_Ae", "HLA_Be", "HLA_Ce", "HLA_Ee", "HLA_Fe", "HLA_Ge",
"HLA_DPA1e", "HLA_DPB1e", "HLA_DPB2e", "HLA_DQA1e",
"HLA_DQB1e", "HLA_DRAe", "HLA_DRB1e", "HLA_DRB5e", "HLA_DOAe",
"HLA_DOBe", "HLA_DMAe", "HLA_DMBe")
for (gene in genes) {
# Ajuste del modelo de supervivencia
fit <- survfit(as.formula(paste("Surv(tiempo, evento) ~", gene)), data = clin)
# Gráfico
plot <- ggsurvplot(fit,
data = clin,
pval = TRUE,
ggtheme = theme_minimal(),
title = "Curvas supervivencia estimador Kaplan-Meier",
subtitle = gene,
legend.labs = c("Alta", "Baja"),
risk.table.y.text.col = TRUE,
risk.table.y.text = FALSE)
print(plot)
}

















