if (!require(pacman)) install.packages("pacman")
## Loading required package: pacman
pacman::p_load(
ggplot2,
dplyr,
ggrepel,
matrixTests,
readr,
grid
)
datos <- read_csv("https://raw.githubusercontent.com/ManuelLaraMVZ/Transcript-mica/refs/heads/main/datos_miRNAs.csv")
## Rows: 363 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gen, Condicion
## dbl (6): Cx1, Cx2, Cx3, T1, T2, T3
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(datos)
## # A tibble: 6 × 8
## Gen Condicion Cx1 Cx2 Cx3 T1 T2 T3
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 U6 snRNA-001973 Control 13.8 11.7 11.9 13.2 13.0 12.6
## 2 ath-miR159a-000338 Target 35 35 35 35 35 35
## 3 hsa-let-7a-000377 Target 20.5 21.0 21.0 20.4 19.6 20.9
## 4 hsa-let-7b-002619 Target 18.4 19.0 19.1 18.3 17.4 19.0
## 5 hsa-let-7c-000379 Target 22.2 23.7 23.8 22.9 22.0 24.0
## 6 hsa-let-7d-002283 Target 22.2 22.7 21.9 22.0 21.2 21.9
Controles <- datos %>%
filter(Condicion == "Control")
promedio_controles <- Controles %>%
summarise(Mean_C1 = mean(Cx1),
Mean_C2 = mean(Cx2),
Mean_C3 = mean(Cx3),
Mean_T1 = mean(T1),
Mean_T2 = mean(T2),
Mean_T3 = mean(T3)) %>%
mutate(Gen = "Promedio_controles") %>%
select(7,1,2,3,4,5,6)
promedio_controles
## # A tibble: 1 × 7
## Gen Mean_C1 Mean_C2 Mean_C3 Mean_T1 Mean_T2 Mean_T3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Promedio_controles 15.4 15.0 15.3 15.6 15.1 15.3
genes <- datos %>%
filter(Condicion == "Target") %>%
select(-2)
head(genes)
## # A tibble: 6 × 7
## Gen Cx1 Cx2 Cx3 T1 T2 T3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ath-miR159a-000338 35 35 35 35 35 35
## 2 hsa-let-7a-000377 20.5 21.0 21.0 20.4 19.6 20.9
## 3 hsa-let-7b-002619 18.4 19.0 19.1 18.3 17.4 19.0
## 4 hsa-let-7c-000379 22.2 23.7 23.8 22.9 22.0 24.0
## 5 hsa-let-7d-002283 22.2 22.7 21.9 22.0 21.2 21.9
## 6 hsa-let-7e-002406 18.0 18.4 18.5 18.0 17.3 18.6
DCT <- genes %>%
mutate(DCT_C1 = 2^-(Cx1 - promedio_controles$Mean_C1),
DCT_C2 = 2^-(Cx2 - promedio_controles$Mean_C2),
DCT_C3 = 2^-(Cx3 - promedio_controles$Mean_C3),
DCT_T1 = 2^-(T1 - promedio_controles$Mean_T1),
DCT_T2 = 2^-(T2 - promedio_controles$Mean_T2),
DCT_T3 = 2^-(T3 - promedio_controles$Mean_T3)) %>%
select(-2,-3,-4,-5,-6,-7)
promedio_genes <- DCT %>%
mutate(Mean_DCT_Cx = (DCT_C1 + DCT_C2 + DCT_C3)/3,
Mean_DCT_Tx = (DCT_T1 + DCT_T2 + DCT_T3)/3)
promedio_genes
## # A tibble: 360 × 9
## Gen DCT_C1 DCT_C2 DCT_C3 DCT_T1 DCT_T2 DCT_T3 Mean_DCT_Cx Mean_DCT_Tx
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ath-… 1.24e-6 9.60e-7 1.14e-6 1.47e-6 1.03e-6 1.16e-6 0.00000111 0.00000122
## 2 hsa-… 2.89e-2 1.57e-2 1.85e-2 3.70e-2 4.49e-2 1.98e-2 0.0210 0.0339
## 3 hsa-… 1.27e-1 6.15e-2 6.77e-2 1.62e-1 1.98e-1 7.75e-2 0.0856 0.146
## 4 hsa-… 9.00e-3 2.43e-3 2.72e-3 6.40e-3 8.67e-3 2.39e-3 0.00472 0.00582
## 5 hsa-… 8.93e-3 4.92e-3 1.02e-2 1.23e-2 1.46e-2 9.87e-3 0.00802 0.0123
## 6 hsa-… 1.67e-1 9.60e-2 1.08e-1 1.91e-1 2.21e-1 1.03e-1 0.123 0.172
## 7 hsa-… 1.90e-3 8.82e-4 1.46e-3 3.58e-3 4.25e-3 1.24e-3 0.00142 0.00302
## 8 hsa-… 9.63e-3 7.20e-3 6.59e-3 9.86e-3 8.59e-3 5.50e-3 0.00781 0.00798
## 9 hsa-… 1.50e-5 4.04e-6 6.61e-6 5.99e-6 9.45e-6 3.04e-6 0.00000856 0.00000616
## 10 hsa-… 2.87e-1 3.90e-1 4.98e-1 4.10e-1 2.69e-1 6.25e-1 0.392 0.434
## # ℹ 350 more rows
tvalue_gen <- row_t_welch(
x = promedio_genes[, c("DCT_C1", "DCT_C2", "DCT_C3")],
y = promedio_genes[, c("DCT_T1", "DCT_T2", "DCT_T3")]
)
FCyPV <- promedio_genes %>%
select(1,8,9) %>%
mutate(p_value = tvalue_gen$pvalue,
Fold_change = Mean_DCT_Tx / Mean_DCT_Cx) %>%
select(1,4,5)
Logs <- FCyPV %>%
mutate(LPV = -log10(p_value),
LFC = log2(Fold_change)) %>%
select(1,4,5)
Logs
## # A tibble: 360 × 3
## Gen LPV LFC
## <chr> <dbl> <dbl>
## 1 ath-miR159a-000338 0.272 0.132
## 2 hsa-let-7a-000377 0.655 0.688
## 3 hsa-let-7b-002619 0.627 0.768
## 4 hsa-let-7c-000379 0.145 0.303
## 5 hsa-let-7d-002283 0.938 0.613
## 6 hsa-let-7e-002406 0.491 0.476
## 7 hsa-let-7f-000382 0.669 1.09
## 8 hsa-let-7g-002282 0.0370 0.0320
## 9 hsa-miR-1-002222 0.244 -0.475
## 10 hsa-miR-100-000437 0.129 0.150
## # ℹ 350 more rows
vulcano <- ggplot(Logs, aes(x = LFC, y = LPV)) +
geom_point(size = 2, color = "gray") +
theme_classic() +
labs(title = "Análisis comparativo de miRNAs",
caption = "Creador: Abril Nava",
x = "Log2 (2^-ΔΔCt)",
y = "-Log10 (valor de p)")
vulcano

limite_p <- 0.05
limite_FC <- 1.5
down_regulated <- Logs %>%
filter(LFC < -log2(limite_FC),
LPV > -log10(limite_p))
up_regulated <- Logs %>%
filter(LFC > log2(limite_FC),
LPV > -log10(limite_p))
top_down_regulated <- down_regulated %>%
arrange(desc(LPV)) %>%
head(5)
top_up_regulated <- up_regulated %>%
arrange(desc(LPV)) %>%
head(5)
vulcano2 <- vulcano +
geom_hline(yintercept = -log10(limite_p),
linetype = "dashed") +
geom_vline(xintercept = c(-log2(limite_FC),
log2(limite_FC)),
linetype = "dashed")
vulcano2

vulcano3 <- vulcano2 +
geom_point(data = up_regulated,
aes(LFC, LPV),
color = "#E64B35B2",
size = 3) +
geom_point(data = down_regulated,
aes(LFC, LPV),
color = "#4DBBD5B2",
size = 3)
vulcano3

vulcano4 <- vulcano3 +
geom_label_repel(data = top_up_regulated,
aes(LFC, LPV, label = Gen),
box.padding = unit(0.35, "lines"),
point.padding = unit(0.5, "lines"),
segment.color = "grey50",
max.overlaps = 50,
show.legend = FALSE) +
geom_label_repel(data = top_down_regulated,
aes(LFC, LPV, label = Gen),
box.padding = unit(0.35, "lines"),
point.padding = unit(0.5, "lines"),
segment.color = "grey50",
max.overlaps = 50,
show.legend = FALSE)
vulcano4

ggsave("vulcano4.jpeg",
plot = vulcano4,
height = 5,
width = 6,
dpi = 300)