if(!require("pacman"))
install.packages("pacman")
library(pacman)
p_load("pheatmap", #Para crear los heatmaps
"RColorBrewer",
"ggplot2",
"dplyr",
"vroom",
"FactoMineR",
"factoextra",
"tibble")
Llamar la base de datos
Datos_PCR <- vroom(
"https://raw.githubusercontent.com/ManuelLaraMVZ/Heatmaps/refs/heads/main/miRNA_qPCR_Ct_Data_20g.csv",
show_col_types = FALSE
)
head(Datos_PCR)
## # A tibble: 6 × 10
## Gene Condition Control_1 Control_2 Control_3 Control_4 Tratamiento_1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen_1 Target 26.4 26.8 28.6 27.1 30.1
## 2 Gen_2 Target 29.3 29.6 31.2 30.4 27.4
## 3 Gen_3 Target 27.5 25.0 27.7 26.5 28.9
## 4 Gen_4 Target 29.4 28.3 30.8 30.2 25.9
## 5 Gen_5 Target 27.9 27.9 27.8 27.7 30.6
## 6 Gen_6 Target 29.3 29.8 28.7 32.2 28.2
## # ℹ 3 more variables: Tratamiento_2 <dbl>, Tratamiento_3 <dbl>,
## # Tratamiento_4 <dbl>
Sacar los genes de referencia
Ref_gen_prom <- Datos_PCR %>%
filter(Condition == "Reference") %>%
select(-1,-2) %>%
summarise(across(everything(), \(x) mean(x, na.rm = TRUE)))
head(Ref_gen_prom)
## # A tibble: 1 × 8
## Control_1 Control_2 Control_3 Control_4 Tratamiento_1 Tratamiento_2
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 25.9 24.5 25.2 25.0 24.8 25.3
## # ℹ 2 more variables: Tratamiento_3 <dbl>, Tratamiento_4 <dbl>
Obtener el valor de Delta Ct
DCt <- Datos_PCR %>%
filter(Condition =="Target") %>%
select(-2) %>%
mutate(across(-1,
~ -(. - Ref_gen_prom[[cur_column()]][[1]]),
.names = "DCt_{.col}")) %>%
select(Gene, starts_with("DCt_"))
head(DCt)
## # A tibble: 6 × 9
## Gene DCt_Control_1 DCt_Control_2 DCt_Control_3 DCt_Control_4
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen_1 -0.498 -2.29 -3.32 -2.12
## 2 Gen_2 -3.37 -5.07 -5.98 -5.41
## 3 Gen_3 -1.56 -0.550 -2.46 -1.57
## 4 Gen_4 -3.43 -3.83 -5.59 -5.20
## 5 Gen_5 -1.95 -3.39 -2.58 -2.73
## 6 Gen_6 -3.36 -5.31 -3.49 -7.21
## # ℹ 4 more variables: DCt_Tratamiento_1 <dbl>, DCt_Tratamiento_2 <dbl>,
## # DCt_Tratamiento_3 <dbl>, DCt_Tratamiento_4 <dbl>
Escalar los datos
miRNA_escalado <- DCt %>%
column_to_rownames(var = "Gene") %>%
scale(center = TRUE,
scale = TRUE) %>%
as.data.frame()
head(miRNA_escalado)
## DCt_Control_1 DCt_Control_2 DCt_Control_3 DCt_Control_4 DCt_Tratamiento_1
## Gen_1 1.0273615 0.97484827 0.12356681 0.8860403 -0.81362577
## Gen_2 -0.6633067 -0.71178341 -1.42820012 -1.0075153 0.81618925
## Gen_3 0.4047056 2.02664135 0.62271304 1.1988019 -0.09855941
## Gen_4 -0.6996741 0.03993184 -1.20330127 -0.8886735 1.73541997
## Gen_5 0.1709725 0.30352416 0.55271855 0.5302002 -1.06726846
## Gen_6 -0.6586858 -0.85578969 0.02116144 -2.0489848 0.33403296
## DCt_Tratamiento_2 DCt_Tratamiento_3 DCt_Tratamiento_4
## Gen_1 -1.5553515 -1.2834728 -0.2403860
## Gen_2 0.6936251 1.1523470 -0.2686596
## Gen_3 -0.6111724 -0.3817826 -0.5320576
## Gen_4 0.1352706 0.5566628 0.8639210
## Gen_5 -0.6874003 -0.8184264 -0.7215950
## Gen_6 1.2962613 1.0595921 0.9572610
Heatmap
paleta_colores <- colorRampPalette(
c("#001219","#0A9396","#94D2BD","#E9D8A6","#EE9B00","#CA6702","#BB3E03")
)(100)
pheatmap(miRNA_escalado,
color = paleta_colores,
cluster_rows = TRUE,
cluster_cols = TRUE,
show_rownames = FALSE,
show_colnames = TRUE,
fontsize_row = 5,
fontsize_col = 8,
border_color = "grey30",
main = "Heatmap de expresión de miRNAs")
Análisis de PCA
PCA muestras
PCA_resultados <- prcomp(t(miRNA_escalado),
center = TRUE,
scale. = TRUE)
summary(PCA_resultados)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 3.9273 1.0981 1.06384 0.93134 0.86565 0.63804 0.46315
## Proportion of Variance 0.7712 0.0603 0.05659 0.04337 0.03747 0.02035 0.01073
## Cumulative Proportion 0.7712 0.8315 0.88808 0.93145 0.96892 0.98927 1.00000
## PC8
## Standard deviation 1.734e-16
## Proportion of Variance 0.000e+00
## Cumulative Proportion 1.000e+00
Screen Plot
fviz_eig(PCA_resultados,
addlabels = T,
barfill = "#2668BF",
barcolor = "#4176BA")
fviz_eig
## function (X, choice = c("variance", "eigenvalue"), geom = c("bar",
## "line"), barfill = "steelblue", barcolor = "steelblue", linecolor = "black",
## ncp = 10, addlabels = FALSE, hjust = 0, main = NULL, xlab = NULL,
## ylab = NULL, ggtheme = theme_minimal(), parallel = FALSE,
## parallel.color = "red", parallel.lty = "dashed", parallel.iter = 100,
## parallel.seed = NULL, ...)
## {
## eig <- get_eigenvalue(X)
## eig <- eig[seq_len(min(ncp, nrow(eig))), , drop = FALSE]
## choice <- choice[1]
## if (choice == "eigenvalue") {
## eig <- eig[, 1]
## text_labels <- round(eig, 1)
## if (is.null(ylab))
## ylab <- "Eigenvalue"
## }
## else if (choice == "variance") {
## eig <- eig[, 2]
## text_labels <- paste0(round(eig, 1), "%")
## }
## else stop("Allowed values for the argument choice are : 'variance' or 'eigenvalue'")
## if (length(intersect(geom, c("bar", "line"))) == 0)
## stop("The specified value(s) for the argument geom are not allowed ")
## df.eig <- data.frame(dim = factor(seq_along(eig)), eig = eig)
## extra_args <- list(...)
## bar_width <- extra_args$bar_width
## linetype <- extra_args$linetype
## if (is.null(linetype))
## linetype <- "solid"
## p <- ggplot(df.eig, aes(dim, eig, group = 1))
## if ("bar" %in% geom) {
## bar_args <- list(stat = "identity", fill = barfill, color = barcolor)
## if (!is.null(bar_width))
## bar_args$width <- bar_width
## p <- p + do.call(geom_bar, bar_args)
## }
## if ("line" %in% geom)
## p <- p + geom_line(color = linecolor, linetype = linetype) +
## geom_point(shape = 19, color = linecolor)
## if (addlabels)
## p <- p + geom_text(label = text_labels, vjust = -0.4,
## hjust = hjust)
## if (!is.null(parallel.seed)) {
## if (!is.numeric(parallel.seed) || length(parallel.seed) !=
## 1L || is.na(parallel.seed) || !is.finite(parallel.seed) ||
## parallel.seed%%1 != 0 || parallel.seed < 0 || parallel.seed >
## .Machine$integer.max)
## stop("parallel.seed must be NULL or a single integer value in [0, ",
## .Machine$integer.max, "].")
## parallel.seed <- as.integer(parallel.seed)
## }
## if (parallel && choice == "eigenvalue") {
## compute_parallel_threshold <- function(n_obs, n_var,
## fit_fn) {
## sim_eigs <- matrix(NA_real_, nrow = parallel.iter,
## ncol = n_var)
## for (i in seq_len(parallel.iter)) {
## random_data <- matrix(stats::rnorm(n_obs * n_var),
## nrow = n_obs, ncol = n_var)
## sim_eigs[i, ] <- fit_fn(random_data)
## }
## apply(sim_eigs, 2, function(x) stats::quantile(x,
## 0.95))
## }
## if (inherits(X, "prcomp") && !is.null(X$x)) {
## n_obs <- nrow(X$x)
## n_var <- ncol(X$x)
## if (is.null(parallel.seed)) {
## parallel_threshold <- compute_parallel_threshold(n_obs,
## n_var, fit_fn = function(random_data) stats::prcomp(random_data,
## scale. = TRUE)$sdev^2)
## }
## else {
## parallel_threshold <- .with_preserved_seed(parallel.seed,
## {
## compute_parallel_threshold(n_obs, n_var,
## fit_fn = function(random_data) stats::prcomp(random_data,
## scale. = TRUE)$sdev^2)
## })
## }
## parallel_threshold <- parallel_threshold[seq_len(min(ncp,
## length(parallel_threshold)))]
## df.parallel <- data.frame(dim = factor(seq_along(parallel_threshold)),
## threshold = parallel_threshold)
## p <- p + geom_line(data = df.parallel, aes(x = .data[["dim"]],
## y = .data[["threshold"]]), color = parallel.color,
## linetype = parallel.lty, linewidth = 0.8) + geom_point(data = df.parallel,
## aes(x = .data[["dim"]], y = .data[["threshold"]]),
## color = parallel.color, shape = 4, size = 2)
## }
## else if (inherits(X, "princomp") && !is.null(X$scores)) {
## n_obs <- nrow(X$scores)
## n_var <- ncol(X$scores)
## if (is.null(parallel.seed)) {
## parallel_threshold <- compute_parallel_threshold(n_obs,
## n_var, fit_fn = function(random_data) stats::princomp(random_data,
## cor = TRUE)$sdev^2)
## }
## else {
## parallel_threshold <- .with_preserved_seed(parallel.seed,
## {
## compute_parallel_threshold(n_obs, n_var,
## fit_fn = function(random_data) stats::princomp(random_data,
## cor = TRUE)$sdev^2)
## })
## }
## parallel_threshold <- parallel_threshold[seq_len(min(ncp,
## length(parallel_threshold)))]
## df.parallel <- data.frame(dim = factor(seq_along(parallel_threshold)),
## threshold = parallel_threshold)
## p <- p + geom_line(data = df.parallel, aes(x = .data[["dim"]],
## y = .data[["threshold"]]), color = parallel.color,
## linetype = parallel.lty, linewidth = 0.8) + geom_point(data = df.parallel,
## aes(x = .data[["dim"]], y = .data[["threshold"]]),
## color = parallel.color, shape = 4, size = 2)
## }
## else {
## warning("Parallel analysis requires prcomp or princomp objects with score data. Skipping.")
## }
## }
## else if (parallel && choice != "eigenvalue") {
## warning("Parallel analysis only works with choice = 'eigenvalue'. Skipping.")
## }
## if (is.null(main))
## main <- "Scree plot"
## if (is.null(xlab))
## xlab <- "Dimensions"
## if (is.null(ylab))
## ylab <- "Percentage of explained variances"
## p <- p + labs(title = main, x = xlab, y = ylab)
## ggpubr::ggpar(p, ggtheme = ggtheme, ...)
## }
## <bytecode: 0x5d7554512460>
## <environment: namespace:factoextra>
PCA gráfico muestras
PCA_df <- as.data.frame(PCA_resultados$x)
PCA_df$Sample <- rownames(PCA_df)
ggplot(PCA_df,
aes(x= PC1,
y= PC2,
color = Sample))+
geom_point(size = 4)+
geom_hline(yintercept = 0, linewidth= 1.2, color ="grey20")+
geom_vline(xintercept = 0, linewidth= 1.2, color ="grey20")+
scale_color_brewer(palette = "Set1")+
labs(title = "PCA de expresión de miRNAs",
x= "PC1",
y= "PC2")+
theme_classic()
PCA Genes
PCA genes
PCA_resultados_genes <- prcomp(miRNA_escalado,
center = TRUE,
scale. = TRUE)
summary(PCA_resultados_genes)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.4551 0.73337 0.68254 0.57125 0.50573 0.4317 0.3359
## Proportion of Variance 0.7534 0.06723 0.05823 0.04079 0.03197 0.0233 0.0141
## Cumulative Proportion 0.7534 0.82066 0.87890 0.91969 0.95166 0.9750 0.9891
## PC8
## Standard deviation 0.29582
## Proportion of Variance 0.01094
## Cumulative Proportion 1.00000
fviz_eig(PCA_resultados_genes,
addlabels = TRUE,
barfill = "#D95E45",
barcolor = "#D4472A",
title = "Varianza explicada genes")
PCA_df_genes <- as.data.frame(PCA_resultados_genes$x)
PCA_df_genes$Gene <- row.names(PCA_df_genes)
ggplot(PCA_df_genes,
aes(x = PC1,
y= PC2,
label = Gene,
color = Gene))+
geom_point(size= 4, show.legend = FALSE)+
geom_text(vjust=-0.2, size = 3)+
geom_hline(yintercept = 0, linewidth= 1.2, color ="grey20")+
geom_vline(xintercept = 0, linewidth= 1.2, color ="grey20")+
scale_color_brewer(palette = "Dark2")+
labs(title = "PCA de expresión de miRNAs (Genes) - Manuel Corrales",
x= "PC1",
y= "PC2")+
theme_classic()
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Dark2 is 8
## Returning the palette you asked for with that many colors
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).