Cargar paquetes
if(!require("pacman"))
install.packages("pacman")
## Loading required package: pacman
library("pacman")
p_load("pheatmap", #para crear los heatmaps
"RColorBrewer",
"ggplot2",
"dplyr",
"vroom",
"FactoMineR", # para el PCA
"factoextra",
"tibble")
Llamar base de datos
Datos_PCR <- read.csv("https://raw.githubusercontent.com/ManuelLaraMVZ/Heatmaps/refs/heads/main/miRNA_qPCR_Ct_Data_20g.csv")
head(Datos_PCR)
## Gene Condition Control_1 Control_2 Control_3 Control_4 Tratamiento_1
## 1 Gen_1 Target 26.43952 26.76982 28.55871 27.07051 30.12929
## 2 Gen_2 Target 29.31315 29.55434 31.22408 30.35981 27.40077
## 3 Gen_3 Target 27.49785 25.03338 27.70136 26.52721 28.93218
## 4 Gen_4 Target 29.37496 28.31331 30.83779 30.15337 25.86186
## 5 Gen_5 Target 27.89513 27.87813 27.82158 27.68864 30.55392
## 6 Gen_6 Target 29.30529 29.79208 28.73460 32.16896 28.20796
## Tratamiento_2 Tratamiento_3 Tratamiento_4
## 1 31.71506 30.46092 28.73494
## 2 27.11068 26.44416 28.78691
## 3 29.78203 28.97400 29.27111
## 4 28.25381 27.42646 26.70493
## 5 29.93809 29.69404 29.61953
## 6 25.87689 26.59712 26.53334
Sacar genes de referencia para usarlos como comparativos
library(dplyr)
Ref_gen_prom <- Datos_PCR %>%
filter(Condition == "Reference") %>%
select(-1, -2) %>%
summarise(across(everything(), mean, na.rm = TRUE)) #para tener solamente los promedios de cada gen de ref en controles y tratamientos
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(everything(), mean, na.rm = TRUE)`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
##
## # Previously
## across(a:b, mean, na.rm = TRUE)
##
## # Now
## across(a:b, \(x) mean(x, na.rm = TRUE))
head(Ref_gen_prom)
## Control_1 Control_2 Control_3 Control_4 Tratamiento_1 Tratamiento_2
## 1 25.94169 24.48383 25.2429 24.9541 24.78032 25.31373
## Tratamiento_3 Tratamiento_4
## 1 25.58023 24.40192
Calcualr DCT (restarle a cada valor el valor del promedio de los genes de referencia)
DCT <- Datos_PCR %>%
filter(Condition == "Target") %>%
select(-2) %>%
mutate(across(-1, ~ -(. -Ref_gen_prom[[cur_column()]][[1]]),
.names = "DCT_{.col}")) %>%
select(Gene, starts_with("DCT_"))
head(DCT)
## Gene DCT_Control_1 DCT_Control_2 DCT_Control_3 DCT_Control_4
## 1 Gen_1 -0.4978334 -2.2859901 -3.315805 -2.116405
## 2 Gen_2 -3.3714562 -5.0705056 -5.981179 -5.405711
## 3 Gen_3 -1.5561595 -0.5495505 -2.458453 -1.573106
## 4 Gen_4 -3.4332697 -3.8294743 -5.594884 -5.199270
## 5 Gen_5 -1.9534347 -3.3943011 -2.578678 -2.734537
## 6 Gen_6 -3.3636020 -5.3082503 -3.491701 -7.214853
## DCT_Tratamiento_1 DCT_Tratamiento_2 DCT_Tratamiento_3 DCT_Tratamiento_4
## 1 -5.348970 -6.4013369 -4.8806912 -4.333016
## 2 -2.620453 -1.7969546 -0.8639338 -4.384990
## 3 -4.151858 -4.4682970 -3.3937705 -4.869186
## 4 -1.081545 -2.9400868 -1.8462392 -2.303005
## 5 -5.773600 -4.6243602 -4.1138123 -5.217606
## 6 -3.427644 -0.5631633 -1.0168901 -2.131421
Escalar los datos (ver qué tanto se desvió cada gen en cada columna de la desviación estándar)
miRNA_escalado <- DCT %>%
column_to_rownames(var = "Gene") %>%
scale(center = TRUE,
scale = TRUE) %>%
as.data.frame()
head(miRNA_escalado)
## DCT_Control_1 DCT_Control_2 DCT_Control_3 DCT_Control_4 DCT_Tratamiento_1
## Gen_1 1.0273615 0.97484827 0.12356681 0.8860403 -0.81362577
## Gen_2 -0.6633067 -0.71178341 -1.42820012 -1.0075153 0.81618925
## Gen_3 0.4047056 2.02664135 0.62271304 1.1988019 -0.09855941
## Gen_4 -0.6996741 0.03993184 -1.20330127 -0.8886735 1.73541997
## Gen_5 0.1709725 0.30352416 0.55271855 0.5302002 -1.06726846
## Gen_6 -0.6586858 -0.85578969 0.02116144 -2.0489848 0.33403296
## DCT_Tratamiento_2 DCT_Tratamiento_3 DCT_Tratamiento_4
## Gen_1 -1.5553515 -1.2834728 -0.2403860
## Gen_2 0.6936251 1.1523470 -0.2686596
## Gen_3 -0.6111724 -0.3817826 -0.5320576
## Gen_4 0.1352706 0.5566628 0.8639210
## Gen_5 -0.6874003 -0.8184264 -0.7215950
## Gen_6 1.2962613 1.0595921 0.9572610
Definir los colores del hetamap
paleta_colores <- colorRampPalette(c("#333352", "#F7F5F5", "#821E50"))(100)
Construir el heatmap
heatmap <- pheatmap(miRNA_escalado,
color = paleta_colores,
cluster_rows = T,
cluster_cols = T,
show_rownames = T,
show_colnames = T,
fontsize_row = 8,
fontsize_col = 8,
border_color = "black",
main = "Heatmap de expresión de miRNAs",
fontface_row = "bold")
heatmap
Análisis de Componentes Principales
PCA_resultados <- prcomp(t(miRNA_escalado),
center = T,
scale. = T)
summary(PCA_resultados)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 3.9273 1.0981 1.06384 0.93134 0.86565 0.63804 0.46315
## Proportion of Variance 0.7712 0.0603 0.05659 0.04337 0.03747 0.02035 0.01073
## Cumulative Proportion 0.7712 0.8315 0.88808 0.93145 0.96892 0.98927 1.00000
## PC8
## Standard deviation 2.064e-16
## Proportion of Variance 0.000e+00
## Cumulative Proportion 1.000e+00
#Se dividió en 8 PCs
Screenplot: varianza explicada por cada componente
fviz_eig(PCA_resultados,
addlabels = T,
barfill = "#6A5D87",
barcolor = "#3B2F66")
La mayorÃa de la varianza está explicada por el PC #1
Gráfica de PCA (biplot)
PCA_df <- as.data.frame(PCA_resultados$x)
PCA_df$Sample <- rownames(PCA_df)
Graficar
PCA_plot <- ggplot(PCA_df,
aes(x = PC1,
y = PC2,
color = Sample)) +
geom_point(size = 4, alpha = 0.9) +
geom_hline(yintercept = 0, linetype = "solid", color = "grey50", linewidth = 0.7) +
geom_vline(xintercept = 0, linetype = "solid", color = "grey50", linewidth = 0.7) +
scale_color_brewer(palette = "Dark2") +
labs(
title = "PCA de expresión de miRNAs",
x = "Componente Principal 1 (PC1)",
y = "Componente Principal 2 (PC2)",
color = "Muestra"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 14, hjust = 0.5),
axis.title = element_text(face = "bold"),
panel.grid.major = element_line(color = "grey85"),
panel.grid.minor = element_blank(),
legend.position = "right"
)
PCA_plot
Analizar el comportamiento de los genes
PCA_resultados_genes <- prcomp(miRNA_escalado,
center = T,
scale. = T)
summary(PCA_resultados_genes)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.4551 0.73337 0.68254 0.57125 0.50573 0.4317 0.3359
## Proportion of Variance 0.7534 0.06723 0.05823 0.04079 0.03197 0.0233 0.0141
## Cumulative Proportion 0.7534 0.82066 0.87890 0.91969 0.95166 0.9750 0.9891
## PC8
## Standard deviation 0.29582
## Proportion of Variance 0.01094
## Cumulative Proportion 1.00000
fviz_eig(PCA_resultados_genes,
addlabels = T,
barfill = "#6A5D87",
barcolor = "#3B2F66")
Hacer la gráfica de componentes principales
PCA_df_genes <- as.data.frame(PCA_resultados_genes$x)
PCA_df_genes$Gene <- row.names(PCA_df_genes)
Por genes
PCA_plot_Genes <- ggplot(PCA_df_genes,
aes(x = PC1,
y = PC2,
color = Gene,
label = Gene)) +
geom_point(size = 1.8, alpha = 0.9) +
geom_text(vjust = -0.7, size = 2.8, color = "black") +
geom_hline(yintercept = 0, color = "grey60", linewidth = 0.6) +
geom_vline(xintercept = 0, color = "grey60", linewidth = 0.6) +
scale_color_viridis_d(option = "plasma") +
labs(
title = "PCA de expresión de miRNAs (genes)",
x = "Componente Principal 1 (PC1)",
y = "Componente Principal 2 (PC2)"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 14, hjust = 0.5),
axis.title = element_text(face = "bold"),
panel.grid.major = element_line(color = "grey88"),
panel.grid.minor = element_blank(),
legend.position = "none"
)
PCA_plot_Genes
Por clústers
set.seed(123)
clusters <- kmeans(PCA_df_genes[, c("PC1", "PC2")], centers = 3)
PCA_df_genes$Cluster <- as.factor(clusters$cluster)
PCA_plot_Genes_clustered <- ggplot(PCA_df_genes,
aes(x = PC1,
y = PC2,
color = Cluster)) +
geom_point(size = 2.5, alpha = 0.9) +
geom_text(aes(label = Gene),
vjust = -0.6,
size = 2.5,
show.legend = FALSE) +
geom_hline(yintercept = 0, color = "grey60", linewidth = 0.6) +
geom_vline(xintercept = 0, color = "grey60", linewidth = 0.6) +
scale_color_manual(values = c(
"#6D597A",
"#7A9E7E",
"#B5838D"
)) +
labs(
title = "PCA de expresión de miRNAs (genes)",
x = "Componente Principal 1 (PC1)",
y = "Componente Principal 2 (PC2)",
color = "Cluster"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 14, hjust = 0.5),
axis.title = element_text(face = "bold"),
panel.grid.major = element_line(color = "grey88"),
panel.grid.minor = element_blank(),
legend.position = "right"
)
PCA_plot_Genes_clustered