Heatmap y PCA

Cargar paquetes

if(!require("pacman"))
  install.packages("pacman")

## Loading required package: pacman

library("pacman")

p_load("pheatmap", #para crear los heatmaps
       "RColorBrewer",
       "ggplot2",
       "dplyr",
       "vroom",
       "FactoMineR", # para el PCA
       "factoextra",
       "tibble")

Llamar base de datos

Datos_PCR <- read.csv("https://raw.githubusercontent.com/ManuelLaraMVZ/Heatmaps/refs/heads/main/miRNA_qPCR_Ct_Data_20g.csv")

head(Datos_PCR)

##    Gene Condition Control_1 Control_2 Control_3 Control_4 Tratamiento_1
## 1 Gen_1    Target  26.43952  26.76982  28.55871  27.07051      30.12929
## 2 Gen_2    Target  29.31315  29.55434  31.22408  30.35981      27.40077
## 3 Gen_3    Target  27.49785  25.03338  27.70136  26.52721      28.93218
## 4 Gen_4    Target  29.37496  28.31331  30.83779  30.15337      25.86186
## 5 Gen_5    Target  27.89513  27.87813  27.82158  27.68864      30.55392
## 6 Gen_6    Target  29.30529  29.79208  28.73460  32.16896      28.20796
##   Tratamiento_2 Tratamiento_3 Tratamiento_4
## 1      31.71506      30.46092      28.73494
## 2      27.11068      26.44416      28.78691
## 3      29.78203      28.97400      29.27111
## 4      28.25381      27.42646      26.70493
## 5      29.93809      29.69404      29.61953
## 6      25.87689      26.59712      26.53334

Sacar genes de referencia para usarlos como comparativos

library(dplyr)

Ref_gen_prom <- Datos_PCR %>% 
  filter(Condition == "Reference") %>% 
  select(-1, -2) %>% 
  summarise(across(everything(), mean, na.rm = TRUE)) #para tener solamente los promedios de cada gen de ref en controles y tratamientos

## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(everything(), mean, na.rm = TRUE)`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
## 
##   # Previously
##   across(a:b, mean, na.rm = TRUE)
## 
##   # Now
##   across(a:b, \(x) mean(x, na.rm = TRUE))

head(Ref_gen_prom)

##   Control_1 Control_2 Control_3 Control_4 Tratamiento_1 Tratamiento_2
## 1  25.94169  24.48383   25.2429   24.9541      24.78032      25.31373
##   Tratamiento_3 Tratamiento_4
## 1      25.58023      24.40192

Calcualr DCT (restarle a cada valor el valor del promedio de los genes de referencia)

DCT <- Datos_PCR %>% 
  filter(Condition == "Target") %>% 
  select(-2) %>% 
  mutate(across(-1, ~ -(. -Ref_gen_prom[[cur_column()]][[1]]),
                .names = "DCT_{.col}")) %>% 
  select(Gene, starts_with("DCT_"))

head(DCT)

##    Gene DCT_Control_1 DCT_Control_2 DCT_Control_3 DCT_Control_4
## 1 Gen_1    -0.4978334    -2.2859901     -3.315805     -2.116405
## 2 Gen_2    -3.3714562    -5.0705056     -5.981179     -5.405711
## 3 Gen_3    -1.5561595    -0.5495505     -2.458453     -1.573106
## 4 Gen_4    -3.4332697    -3.8294743     -5.594884     -5.199270
## 5 Gen_5    -1.9534347    -3.3943011     -2.578678     -2.734537
## 6 Gen_6    -3.3636020    -5.3082503     -3.491701     -7.214853
##   DCT_Tratamiento_1 DCT_Tratamiento_2 DCT_Tratamiento_3 DCT_Tratamiento_4
## 1         -5.348970        -6.4013369        -4.8806912         -4.333016
## 2         -2.620453        -1.7969546        -0.8639338         -4.384990
## 3         -4.151858        -4.4682970        -3.3937705         -4.869186
## 4         -1.081545        -2.9400868        -1.8462392         -2.303005
## 5         -5.773600        -4.6243602        -4.1138123         -5.217606
## 6         -3.427644        -0.5631633        -1.0168901         -2.131421

Escalar los datos (ver qué tanto se desvió cada gen en cada columna de la desviación estándar)

miRNA_escalado <- DCT %>% 
  column_to_rownames(var = "Gene") %>% 
  scale(center = TRUE,
        scale = TRUE) %>% 
  as.data.frame()

head(miRNA_escalado)

##       DCT_Control_1 DCT_Control_2 DCT_Control_3 DCT_Control_4 DCT_Tratamiento_1
## Gen_1     1.0273615    0.97484827    0.12356681     0.8860403       -0.81362577
## Gen_2    -0.6633067   -0.71178341   -1.42820012    -1.0075153        0.81618925
## Gen_3     0.4047056    2.02664135    0.62271304     1.1988019       -0.09855941
## Gen_4    -0.6996741    0.03993184   -1.20330127    -0.8886735        1.73541997
## Gen_5     0.1709725    0.30352416    0.55271855     0.5302002       -1.06726846
## Gen_6    -0.6586858   -0.85578969    0.02116144    -2.0489848        0.33403296
##       DCT_Tratamiento_2 DCT_Tratamiento_3 DCT_Tratamiento_4
## Gen_1        -1.5553515        -1.2834728        -0.2403860
## Gen_2         0.6936251         1.1523470        -0.2686596
## Gen_3        -0.6111724        -0.3817826        -0.5320576
## Gen_4         0.1352706         0.5566628         0.8639210
## Gen_5        -0.6874003        -0.8184264        -0.7215950
## Gen_6         1.2962613         1.0595921         0.9572610

Definir los colores del hetamap

paleta_colores <- colorRampPalette(c("#333352", "#F7F5F5", "#821E50"))(100)

Construir el heatmap

heatmap <- pheatmap(miRNA_escalado,
                    color = paleta_colores,
                    cluster_rows = T,
                    cluster_cols = T,
                    show_rownames = T,
                    show_colnames = T,
                    fontsize_row = 8,
                    fontsize_col = 8,
                    border_color = "black",
                    main = "Heatmap de expresión de miRNAs",
                    fontface_row = "bold")
heatmap

Análisis de Componentes Principales

Calcular los PC

PCA_resultados <- prcomp(t(miRNA_escalado),
                         center = T,
                         scale. = T)
summary(PCA_resultados)

## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     3.9273 1.0981 1.06384 0.93134 0.86565 0.63804 0.46315
## Proportion of Variance 0.7712 0.0603 0.05659 0.04337 0.03747 0.02035 0.01073
## Cumulative Proportion  0.7712 0.8315 0.88808 0.93145 0.96892 0.98927 1.00000
##                              PC8
## Standard deviation     2.064e-16
## Proportion of Variance 0.000e+00
## Cumulative Proportion  1.000e+00

#Se dividió en 8 PCs

Screenplot: varianza explicada por cada componente

fviz_eig(PCA_resultados,
         addlabels = T,
         barfill = "#6A5D87",
         barcolor = "#3B2F66")

La mayoría de la varianza está explicada por el PC #1

Gráfica de PCA (biplot)

PCA_df <- as.data.frame(PCA_resultados$x)
PCA_df$Sample <- rownames(PCA_df)

Graficar

PCA_plot <- ggplot(PCA_df,
                   aes(x = PC1,
                       y = PC2,
                       color = Sample)) +
  
  geom_point(size = 4, alpha = 0.9) +
  
  geom_hline(yintercept = 0, linetype = "solid", color = "grey50", linewidth = 0.7) +
  geom_vline(xintercept = 0, linetype = "solid", color = "grey50", linewidth = 0.7) +
  
  scale_color_brewer(palette = "Dark2") +
  
  labs(
    title = "PCA de expresión de miRNAs",
    x = "Componente Principal 1 (PC1)",
    y = "Componente Principal 2 (PC2)",
    color = "Muestra"
  ) +
  
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", size = 14, hjust = 0.5),
    axis.title = element_text(face = "bold"),
    panel.grid.major = element_line(color = "grey85"),
    panel.grid.minor = element_blank(),
    legend.position = "right"
  )

PCA_plot

Analizar el comportamiento de los genes

PCA_resultados_genes <- prcomp(miRNA_escalado,
                               center = T,
                               scale. = T)
summary(PCA_resultados_genes)

## Importance of components:
##                           PC1     PC2     PC3     PC4     PC5    PC6    PC7
## Standard deviation     2.4551 0.73337 0.68254 0.57125 0.50573 0.4317 0.3359
## Proportion of Variance 0.7534 0.06723 0.05823 0.04079 0.03197 0.0233 0.0141
## Cumulative Proportion  0.7534 0.82066 0.87890 0.91969 0.95166 0.9750 0.9891
##                            PC8
## Standard deviation     0.29582
## Proportion of Variance 0.01094
## Cumulative Proportion  1.00000

fviz_eig(PCA_resultados_genes,
          addlabels = T,
          barfill = "#6A5D87",
          barcolor = "#3B2F66")

Hacer la gráfica de componentes principales

PCA_df_genes <- as.data.frame(PCA_resultados_genes$x)
PCA_df_genes$Gene <- row.names(PCA_df_genes)

Por genes

PCA_plot_Genes <- ggplot(PCA_df_genes,
                         aes(x = PC1,
                             y = PC2,
                             color = Gene,
                             label = Gene)) +
  
  geom_point(size = 1.8, alpha = 0.9) +
  
  geom_text(vjust = -0.7, size = 2.8, color = "black") +
  
  geom_hline(yintercept = 0, color = "grey60", linewidth = 0.6) +
  geom_vline(xintercept = 0, color = "grey60", linewidth = 0.6) +
  
  scale_color_viridis_d(option = "plasma") +
  
  labs(
    title = "PCA de expresión de miRNAs (genes)",
    x = "Componente Principal 1 (PC1)",
    y = "Componente Principal 2 (PC2)"
  ) +
  
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", size = 14, hjust = 0.5),
    axis.title = element_text(face = "bold"),
    panel.grid.major = element_line(color = "grey88"),
    panel.grid.minor = element_blank(),
    legend.position = "none"
  )

PCA_plot_Genes

Por clústers

set.seed(123)
clusters <- kmeans(PCA_df_genes[, c("PC1", "PC2")], centers = 3)

PCA_df_genes$Cluster <- as.factor(clusters$cluster)

PCA_plot_Genes_clustered <- ggplot(PCA_df_genes,
                         aes(x = PC1,
                             y = PC2,
                             color = Cluster)) +
  
  geom_point(size = 2.5, alpha = 0.9) +
  
  geom_text(aes(label = Gene),
            vjust = -0.6,
            size = 2.5,
            show.legend = FALSE) +
  
  geom_hline(yintercept = 0, color = "grey60", linewidth = 0.6) +
  geom_vline(xintercept = 0, color = "grey60", linewidth = 0.6) +
  
  scale_color_manual(values = c(
    "#6D597A",  
    "#7A9E7E",  
    "#B5838D"   
  )) +
  
  labs(
    title = "PCA de expresión de miRNAs (genes)",
    x = "Componente Principal 1 (PC1)",
    y = "Componente Principal 2 (PC2)",
    color = "Cluster"
  ) +
  
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", size = 14, hjust = 0.5),
    axis.title = element_text(face = "bold"),
    panel.grid.major = element_line(color = "grey88"),
    panel.grid.minor = element_blank(),
    legend.position = "right"
  )

PCA_plot_Genes_clustered

Heatmap y PCA

Natalia Oropeza

2026-04-22