if(!require("pacman"))
  install.packages("pacman")

library(pacman)

p_load("pheatmap", #Para crear los heatmaps
       "RColorBrewer",
       "ggplot2",
       "dplyr",
       "vroom",
       "FactoMineR",
       "factoextra",
       "tibble")

Llamar la base de datos

Datos_PCR <- vroom(
  "https://raw.githubusercontent.com/ManuelLaraMVZ/Heatmaps/refs/heads/main/miRNA_qPCR_Ct_Data_20g.csv",
  show_col_types = FALSE
)

head(Datos_PCR)
## # A tibble: 6 × 10
##   Gene  Condition Control_1 Control_2 Control_3 Control_4 Tratamiento_1
##   <chr> <chr>         <dbl>     <dbl>     <dbl>     <dbl>         <dbl>
## 1 Gen_1 Target         26.4      26.8      28.6      27.1          30.1
## 2 Gen_2 Target         29.3      29.6      31.2      30.4          27.4
## 3 Gen_3 Target         27.5      25.0      27.7      26.5          28.9
## 4 Gen_4 Target         29.4      28.3      30.8      30.2          25.9
## 5 Gen_5 Target         27.9      27.9      27.8      27.7          30.6
## 6 Gen_6 Target         29.3      29.8      28.7      32.2          28.2
## # ℹ 3 more variables: Tratamiento_2 <dbl>, Tratamiento_3 <dbl>,
## #   Tratamiento_4 <dbl>

Sacar los genes de referencia

Ref_gen_prom <- Datos_PCR %>% 
  filter(Condition == "Reference") %>%
  select(-1,-2) %>% 
  summarise(across(everything(), \(x) mean(x, na.rm = TRUE)))

head(Ref_gen_prom)
## # A tibble: 1 × 8
##   Control_1 Control_2 Control_3 Control_4 Tratamiento_1 Tratamiento_2
##       <dbl>     <dbl>     <dbl>     <dbl>         <dbl>         <dbl>
## 1      25.9      24.5      25.2      25.0          24.8          25.3
## # ℹ 2 more variables: Tratamiento_3 <dbl>, Tratamiento_4 <dbl>

Obtener el valor de Delta Ct

DCt <- Datos_PCR %>% 
  filter(Condition =="Target") %>% 
  select(-2) %>% 
  mutate(across(-1,
                ~ -(. - Ref_gen_prom[[cur_column()]][[1]]),
                .names = "DCt_{.col}")) %>% 
  select(Gene, starts_with("DCt_"))

head(DCt)
## # A tibble: 6 × 9
##   Gene  DCt_Control_1 DCt_Control_2 DCt_Control_3 DCt_Control_4
##   <chr>         <dbl>         <dbl>         <dbl>         <dbl>
## 1 Gen_1        -0.498        -2.29          -3.32         -2.12
## 2 Gen_2        -3.37         -5.07          -5.98         -5.41
## 3 Gen_3        -1.56         -0.550         -2.46         -1.57
## 4 Gen_4        -3.43         -3.83          -5.59         -5.20
## 5 Gen_5        -1.95         -3.39          -2.58         -2.73
## 6 Gen_6        -3.36         -5.31          -3.49         -7.21
## # ℹ 4 more variables: DCt_Tratamiento_1 <dbl>, DCt_Tratamiento_2 <dbl>,
## #   DCt_Tratamiento_3 <dbl>, DCt_Tratamiento_4 <dbl>

Escalar los datos

miRNA_escalado <- DCt %>% 
  column_to_rownames(var = "Gene") %>% 
  scale(center = TRUE,
        scale = TRUE) %>%
  as.data.frame()

head(miRNA_escalado)
##       DCt_Control_1 DCt_Control_2 DCt_Control_3 DCt_Control_4 DCt_Tratamiento_1
## Gen_1     1.0273615    0.97484827    0.12356681     0.8860403       -0.81362577
## Gen_2    -0.6633067   -0.71178341   -1.42820012    -1.0075153        0.81618925
## Gen_3     0.4047056    2.02664135    0.62271304     1.1988019       -0.09855941
## Gen_4    -0.6996741    0.03993184   -1.20330127    -0.8886735        1.73541997
## Gen_5     0.1709725    0.30352416    0.55271855     0.5302002       -1.06726846
## Gen_6    -0.6586858   -0.85578969    0.02116144    -2.0489848        0.33403296
##       DCt_Tratamiento_2 DCt_Tratamiento_3 DCt_Tratamiento_4
## Gen_1        -1.5553515        -1.2834728        -0.2403860
## Gen_2         0.6936251         1.1523470        -0.2686596
## Gen_3        -0.6111724        -0.3817826        -0.5320576
## Gen_4         0.1352706         0.5566628         0.8639210
## Gen_5        -0.6874003        -0.8184264        -0.7215950
## Gen_6         1.2962613         1.0595921         0.9572610

Heatmap

paleta_colores <- colorRampPalette(
  c("#001219","#0A9396","#94D2BD","#E9D8A6","#EE9B00","#CA6702","#BB3E03")
)(100)

pheatmap(miRNA_escalado,
         color = paleta_colores,
         cluster_rows = TRUE,
         cluster_cols = TRUE,
         show_rownames = FALSE,
         show_colnames = TRUE,
         fontsize_row = 5,
         fontsize_col = 8,
         border_color = "grey30",
         main = "Heatmap de expresión de miRNAs")

Análisis de PCA

PCA muestras

PCA_resultados <- prcomp(t(miRNA_escalado),
                         center = TRUE,
                         scale. = TRUE)

summary(PCA_resultados)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     3.9273 1.0981 1.06384 0.93134 0.86565 0.63804 0.46315
## Proportion of Variance 0.7712 0.0603 0.05659 0.04337 0.03747 0.02035 0.01073
## Cumulative Proportion  0.7712 0.8315 0.88808 0.93145 0.96892 0.98927 1.00000
##                              PC8
## Standard deviation     1.734e-16
## Proportion of Variance 0.000e+00
## Cumulative Proportion  1.000e+00

Screen Plot

fviz_eig(PCA_resultados,
         addlabels = T,
         barfill = "#2668BF",
         barcolor = "#4176BA")

fviz_eig
## function (X, choice = c("variance", "eigenvalue"), geom = c("bar", 
##     "line"), barfill = "steelblue", barcolor = "steelblue", linecolor = "black", 
##     ncp = 10, addlabels = FALSE, hjust = 0, main = NULL, xlab = NULL, 
##     ylab = NULL, ggtheme = theme_minimal(), parallel = FALSE, 
##     parallel.color = "red", parallel.lty = "dashed", parallel.iter = 100, 
##     parallel.seed = NULL, ...) 
## {
##     eig <- get_eigenvalue(X)
##     eig <- eig[seq_len(min(ncp, nrow(eig))), , drop = FALSE]
##     choice <- choice[1]
##     if (choice == "eigenvalue") {
##         eig <- eig[, 1]
##         text_labels <- round(eig, 1)
##         if (is.null(ylab)) 
##             ylab <- "Eigenvalue"
##     }
##     else if (choice == "variance") {
##         eig <- eig[, 2]
##         text_labels <- paste0(round(eig, 1), "%")
##     }
##     else stop("Allowed values for the argument choice are : 'variance' or 'eigenvalue'")
##     if (length(intersect(geom, c("bar", "line"))) == 0) 
##         stop("The specified value(s) for the argument geom are not allowed ")
##     df.eig <- data.frame(dim = factor(seq_along(eig)), eig = eig)
##     extra_args <- list(...)
##     bar_width <- extra_args$bar_width
##     linetype <- extra_args$linetype
##     if (is.null(linetype)) 
##         linetype <- "solid"
##     p <- ggplot(df.eig, aes(dim, eig, group = 1))
##     if ("bar" %in% geom) {
##         bar_args <- list(stat = "identity", fill = barfill, color = barcolor)
##         if (!is.null(bar_width)) 
##             bar_args$width <- bar_width
##         p <- p + do.call(geom_bar, bar_args)
##     }
##     if ("line" %in% geom) 
##         p <- p + geom_line(color = linecolor, linetype = linetype) + 
##             geom_point(shape = 19, color = linecolor)
##     if (addlabels) 
##         p <- p + geom_text(label = text_labels, vjust = -0.4, 
##             hjust = hjust)
##     if (!is.null(parallel.seed)) {
##         if (!is.numeric(parallel.seed) || length(parallel.seed) != 
##             1L || is.na(parallel.seed) || !is.finite(parallel.seed) || 
##             parallel.seed%%1 != 0 || parallel.seed < 0 || parallel.seed > 
##             .Machine$integer.max) 
##             stop("parallel.seed must be NULL or a single integer value in [0, ", 
##                 .Machine$integer.max, "].")
##         parallel.seed <- as.integer(parallel.seed)
##     }
##     if (parallel && choice == "eigenvalue") {
##         compute_parallel_threshold <- function(n_obs, n_var, 
##             fit_fn) {
##             sim_eigs <- matrix(NA_real_, nrow = parallel.iter, 
##                 ncol = n_var)
##             for (i in seq_len(parallel.iter)) {
##                 random_data <- matrix(stats::rnorm(n_obs * n_var), 
##                   nrow = n_obs, ncol = n_var)
##                 sim_eigs[i, ] <- fit_fn(random_data)
##             }
##             apply(sim_eigs, 2, function(x) stats::quantile(x, 
##                 0.95))
##         }
##         if (inherits(X, "prcomp") && !is.null(X$x)) {
##             n_obs <- nrow(X$x)
##             n_var <- ncol(X$x)
##             if (is.null(parallel.seed)) {
##                 parallel_threshold <- compute_parallel_threshold(n_obs, 
##                   n_var, fit_fn = function(random_data) stats::prcomp(random_data, 
##                     scale. = TRUE)$sdev^2)
##             }
##             else {
##                 parallel_threshold <- .with_preserved_seed(parallel.seed, 
##                   {
##                     compute_parallel_threshold(n_obs, n_var, 
##                       fit_fn = function(random_data) stats::prcomp(random_data, 
##                         scale. = TRUE)$sdev^2)
##                   })
##             }
##             parallel_threshold <- parallel_threshold[seq_len(min(ncp, 
##                 length(parallel_threshold)))]
##             df.parallel <- data.frame(dim = factor(seq_along(parallel_threshold)), 
##                 threshold = parallel_threshold)
##             p <- p + geom_line(data = df.parallel, aes(x = .data[["dim"]], 
##                 y = .data[["threshold"]]), color = parallel.color, 
##                 linetype = parallel.lty, linewidth = 0.8) + geom_point(data = df.parallel, 
##                 aes(x = .data[["dim"]], y = .data[["threshold"]]), 
##                 color = parallel.color, shape = 4, size = 2)
##         }
##         else if (inherits(X, "princomp") && !is.null(X$scores)) {
##             n_obs <- nrow(X$scores)
##             n_var <- ncol(X$scores)
##             if (is.null(parallel.seed)) {
##                 parallel_threshold <- compute_parallel_threshold(n_obs, 
##                   n_var, fit_fn = function(random_data) stats::princomp(random_data, 
##                     cor = TRUE)$sdev^2)
##             }
##             else {
##                 parallel_threshold <- .with_preserved_seed(parallel.seed, 
##                   {
##                     compute_parallel_threshold(n_obs, n_var, 
##                       fit_fn = function(random_data) stats::princomp(random_data, 
##                         cor = TRUE)$sdev^2)
##                   })
##             }
##             parallel_threshold <- parallel_threshold[seq_len(min(ncp, 
##                 length(parallel_threshold)))]
##             df.parallel <- data.frame(dim = factor(seq_along(parallel_threshold)), 
##                 threshold = parallel_threshold)
##             p <- p + geom_line(data = df.parallel, aes(x = .data[["dim"]], 
##                 y = .data[["threshold"]]), color = parallel.color, 
##                 linetype = parallel.lty, linewidth = 0.8) + geom_point(data = df.parallel, 
##                 aes(x = .data[["dim"]], y = .data[["threshold"]]), 
##                 color = parallel.color, shape = 4, size = 2)
##         }
##         else {
##             warning("Parallel analysis requires prcomp or princomp objects with score data. Skipping.")
##         }
##     }
##     else if (parallel && choice != "eigenvalue") {
##         warning("Parallel analysis only works with choice = 'eigenvalue'. Skipping.")
##     }
##     if (is.null(main)) 
##         main <- "Scree plot"
##     if (is.null(xlab)) 
##         xlab <- "Dimensions"
##     if (is.null(ylab)) 
##         ylab <- "Percentage of explained variances"
##     p <- p + labs(title = main, x = xlab, y = ylab)
##     ggpubr::ggpar(p, ggtheme = ggtheme, ...)
## }
## <bytecode: 0x5d7554512460>
## <environment: namespace:factoextra>

PCA gráfico muestras

PCA_df <- as.data.frame(PCA_resultados$x)
PCA_df$Sample <- rownames(PCA_df)

ggplot(PCA_df,
       aes(x= PC1,
           y= PC2,
           color = Sample))+
  geom_point(size = 4)+
  geom_hline(yintercept = 0, linewidth= 1.2, color ="grey20")+ 
  geom_vline(xintercept = 0, linewidth= 1.2, color ="grey20")+
  scale_color_brewer(palette = "Set1")+
  labs(title = "PCA de expresión de miRNAs",
       x= "PC1",
       y= "PC2")+
  theme_classic()

PCA Genes

PCA genes

PCA_resultados_genes <- prcomp(miRNA_escalado,
                               center = TRUE,
                               scale. = TRUE)

summary(PCA_resultados_genes)
## Importance of components:
##                           PC1     PC2     PC3     PC4     PC5    PC6    PC7
## Standard deviation     2.4551 0.73337 0.68254 0.57125 0.50573 0.4317 0.3359
## Proportion of Variance 0.7534 0.06723 0.05823 0.04079 0.03197 0.0233 0.0141
## Cumulative Proportion  0.7534 0.82066 0.87890 0.91969 0.95166 0.9750 0.9891
##                            PC8
## Standard deviation     0.29582
## Proportion of Variance 0.01094
## Cumulative Proportion  1.00000

Scree Plot genes

fviz_eig(PCA_resultados_genes,
         addlabels = TRUE,
         barfill = "#D95E45",
         barcolor = "#D4472A",
         title = "Varianza explicada genes")

PCA gráfico genes

PCA_df_genes <- as.data.frame(PCA_resultados_genes$x)
PCA_df_genes$Gene <- row.names(PCA_df_genes)

ggplot(PCA_df_genes,
       aes(x = PC1,
           y= PC2,
           label = Gene,
           color = Gene))+
  geom_point(size= 4, show.legend = FALSE)+
  geom_text(vjust=-0.2, size = 3)+ 
  geom_hline(yintercept = 0, linewidth= 1.2, color ="grey20")+ 
  geom_vline(xintercept = 0, linewidth= 1.2, color ="grey20")+
  scale_color_brewer(palette = "Dark2")+
  labs(title = "PCA de expresión de miRNAs (Genes) - Manuel Corrales",
       x= "PC1",
       y= "PC2")+
  theme_classic()
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Dark2 is 8
## Returning the palette you asked for with that many colors
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).