if(!require(pacman))
install.packages("pacman")
## Loading required package: pacman
library("pacman")
p_load("vroom",
"dplyr",
"ggplot2",
"matrixTests",
"readr",
"ggrepel")
Datos <- vroom (file="https://raw.githubusercontent.com/ManuelLaraMVZ/Transcript-mica/refs/heads/main/datos_miRNAs.csv")
## Rows: 363 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gen, Condicion
## dbl (6): Cx1, Cx2, Cx3, T1, T2, T3
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(Datos)
## # A tibble: 6 × 8
## Gen Condicion Cx1 Cx2 Cx3 T1 T2 T3
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 U6 snRNA-001973 Control 13.8 11.7 11.9 13.2 13.0 12.6
## 2 ath-miR159a-000338 Target 35 35 35 35 35 35
## 3 hsa-let-7a-000377 Target 20.5 21.0 21.0 20.4 19.6 20.9
## 4 hsa-let-7b-002619 Target 18.4 19.0 19.1 18.3 17.4 19.0
## 5 hsa-let-7c-000379 Target 22.2 23.7 23.8 22.9 22.0 24.0
## 6 hsa-let-7d-002283 Target 22.2 22.7 21.9 22.0 21.2 21.9
Controles <- Datos %>%
filter(Condicion=="Control")
head(Controles)
## # A tibble: 3 × 8
## Gen Condicion Cx1 Cx2 Cx3 T1 T2 T3
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 U6 snRNA-001973 Control 13.8 11.7 11.9 13.2 13.0 12.6
## 2 RNU44-001094 Control 17.3 16.9 16.8 17.7 17.3 16.8
## 3 RNU48-001006 Control 15.1 16.4 17.0 16.0 15.1 16.4
promedio_controles <- Controles %>%
summarise(Mean_C1=mean(Cx1),
Mean_C2=mean(Cx2),
Mean_C3=mean(Cx3),
Mean_T1=mean(T1),
Mean_T2=mean(T2),
Mean_T3=mean(T3)) %>%
mutate(Gen="Promedio_controles") %>%
select(7,1,2,3,4,5,6)
promedio_controles
## # A tibble: 1 × 7
## Gen Mean_C1 Mean_C2 Mean_C3 Mean_T1 Mean_T2 Mean_T3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Promedio_controles 15.4 15.0 15.3 15.6 15.1 15.3
genes <- Datos %>%
filter(Condicion=="Target") %>%
select(-2)
head(genes)
## # A tibble: 6 × 7
## Gen Cx1 Cx2 Cx3 T1 T2 T3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ath-miR159a-000338 35 35 35 35 35 35
## 2 hsa-let-7a-000377 20.5 21.0 21.0 20.4 19.6 20.9
## 3 hsa-let-7b-002619 18.4 19.0 19.1 18.3 17.4 19.0
## 4 hsa-let-7c-000379 22.2 23.7 23.8 22.9 22.0 24.0
## 5 hsa-let-7d-002283 22.2 22.7 21.9 22.0 21.2 21.9
## 6 hsa-let-7e-002406 18.0 18.4 18.5 18.0 17.3 18.6
DCT <- genes %>%
mutate(DCT_C1=2^(Cx1- promedio_controles$Mean_C1),
DCT_C2=2^(Cx2- promedio_controles$Mean_C2),
DCT_C3=2^(Cx3- promedio_controles$Mean_C3),
DCT_T1=2^(T1- promedio_controles$Mean_T1),
DCT_T2=2^(T2- promedio_controles$Mean_T2),
DCT_T3=2^(T3- promedio_controles$Mean_T3)) %>%
select(-2,-3,-4,-5,-6,-7)
DCT
## # A tibble: 360 × 7
## Gen DCT_C1 DCT_C2 DCT_C3 DCT_T1 DCT_T2 DCT_T3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ath-miR159a-000338 804073. 1041189. 880400. 679572. 968818. 8.65e5
## 2 hsa-let-7a-000377 34.6 63.7 54.0 27.0 22.3 5.04e1
## 3 hsa-let-7b-002619 7.84 16.2 14.8 6.19 5.05 1.29e1
## 4 hsa-let-7c-000379 111. 411. 368. 156. 115. 4.19e2
## 5 hsa-let-7d-002283 112. 203. 97.9 81.3 68.3 1.01e2
## 6 hsa-let-7e-002406 6.00 10.4 9.29 5.23 4.53 9.72e0
## 7 hsa-let-7f-000382 525. 1134. 684. 280. 235. 8.07e2
## 8 hsa-let-7g-002282 104. 139. 152. 101. 116. 1.82e2
## 9 hsa-miR-1-002222 66495. 247300. 151274. 166835. 105792. 3.29e5
## 10 hsa-miR-100-000437 3.49 2.57 2.01 2.44 3.72 1.60e0
## # ℹ 350 more rows
promedio_genes <- DCT %>%
mutate(Mean_DCT_Cx=(DCT_C1+DCT_C2+DCT_C3)/3,
Mean_DCT_Tx=(DCT_T1+DCT_T2+DCT_T3)/3)
promedio_genes
## # A tibble: 360 × 9
## Gen DCT_C1 DCT_C2 DCT_C3 DCT_T1 DCT_T2 DCT_T3 Mean_DCT_Cx Mean_DCT_Tx
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ath-miR159… 8.04e5 1.04e6 8.80e5 6.80e5 9.69e5 8.65e5 908554. 837675.
## 2 hsa-let-7a… 3.46e1 6.37e1 5.40e1 2.70e1 2.23e1 5.04e1 50.8 33.2
## 3 hsa-let-7b… 7.84e0 1.62e1 1.48e1 6.19e0 5.05e0 1.29e1 13.0 8.05
## 4 hsa-let-7c… 1.11e2 4.11e2 3.68e2 1.56e2 1.15e2 4.19e2 297. 230.
## 5 hsa-let-7d… 1.12e2 2.03e2 9.79e1 8.13e1 6.83e1 1.01e2 138. 83.6
## 6 hsa-let-7e… 6.00e0 1.04e1 9.29e0 5.23e0 4.53e0 9.72e0 8.57 6.49
## 7 hsa-let-7f… 5.25e2 1.13e3 6.84e2 2.80e2 2.35e2 8.07e2 781. 441.
## 8 hsa-let-7g… 1.04e2 1.39e2 1.52e2 1.01e2 1.16e2 1.82e2 131. 133.
## 9 hsa-miR-1-… 6.65e4 2.47e5 1.51e5 1.67e5 1.06e5 3.29e5 155023. 200588.
## 10 hsa-miR-10… 3.49e0 2.57e0 2.01e0 2.44e0 3.72e0 1.60e0 2.69 2.59
## # ℹ 350 more rows
tvalue_gen <- row_t_welch(promedio_genes[,c("DCT_C1",
"DCT_C2",
"DCT_C3")],
promedio_genes[,c("DCT_T1",
"DCT_T2",
"DCT_T3")])
head(tvalue_gen)
## obs.x obs.y obs.tot mean.x mean.y mean.diff var.x
## 1 3 3 6 9.085539e+05 8.376754e+05 70878.485134 1.465044e+10
## 2 3 3 6 5.075313e+01 3.323744e+01 17.515689 2.199415e+02
## 3 3 3 6 1.295420e+01 8.045165e+00 4.909031 2.013772e+01
## 4 3 3 6 2.966509e+02 2.300374e+02 66.613527 2.626696e+04
## 5 3 3 6 1.377747e+02 8.364488e+01 54.129843 3.282233e+03
## 6 3 3 6 8.568666e+00 6.490691e+00 2.077974 5.271469e+00
## var.y stderr df statistic pvalue conf.low
## 1 2.146103e+10 1.097140e+05 3.862609 0.6460296 0.5546323 -2.380574e+05
## 2 2.264139e+02 1.219775e+01 3.999159 1.4359770 0.2243662 -1.635350e+01
## 3 1.799711e+01 3.565335e+00 3.987436 1.3768781 0.2407979 -5.002237e+00
## 4 2.708403e+04 1.333554e+02 3.999062 0.4995188 0.6436460 -3.036747e+02
## 5 2.764996e+02 3.444189e+01 2.334591 1.5716279 0.2388783 -7.545904e+01
## 6 7.923184e+00 2.097193e+00 3.844718 0.9908361 0.3799539 -3.838696e+00
## conf.high mean.null alternative conf.level
## 1 3.798144e+05 0 two.sided 0.95
## 2 5.138488e+01 0 two.sided 0.95
## 3 1.482030e+01 0 two.sided 0.95
## 4 4.369017e+02 0 two.sided 0.95
## 5 1.837187e+02 0 two.sided 0.95
## 6 7.994645e+00 0 two.sided 0.95
FCyPV <- promedio_genes %>%
select(1,8,9) %>%
mutate(p_value=tvalue_gen$pvalue,
fold_change=Mean_DCT_Tx/Mean_DCT_Cx) %>%
select(1,4,5)
FCyPV
## # A tibble: 360 × 3
## Gen p_value fold_change
## <chr> <dbl> <dbl>
## 1 ath-miR159a-000338 0.555 0.922
## 2 hsa-let-7a-000377 0.224 0.655
## 3 hsa-let-7b-002619 0.241 0.621
## 4 hsa-let-7c-000379 0.644 0.775
## 5 hsa-let-7d-002283 0.239 0.607
## 6 hsa-let-7e-002406 0.380 0.757
## 7 hsa-let-7f-000382 0.259 0.564
## 8 hsa-let-7g-002282 0.955 1.01
## 9 hsa-miR-1-002222 0.621 1.29
## 10 hsa-miR-100-000437 0.902 0.963
## # ℹ 350 more rows
Logs <- FCyPV %>%
mutate(LPV=-log10(p_value),
LFC=log2(fold_change)) %>%
select(1,4,5)
Logs
## # A tibble: 360 × 3
## Gen LPV LFC
## <chr> <dbl> <dbl>
## 1 ath-miR159a-000338 0.256 -0.117
## 2 hsa-let-7a-000377 0.649 -0.611
## 3 hsa-let-7b-002619 0.618 -0.687
## 4 hsa-let-7c-000379 0.191 -0.367
## 5 hsa-let-7d-002283 0.622 -0.720
## 6 hsa-let-7e-002406 0.420 -0.401
## 7 hsa-let-7f-000382 0.587 -0.825
## 8 hsa-let-7g-002282 0.0201 0.0191
## 9 hsa-miR-1-002222 0.207 0.372
## 10 hsa-miR-100-000437 0.0449 -0.0545
## # ℹ 350 more rows
vulcano <- ggplot(Logs, aes(x = LFC, y = LPV)) +
geom_point(size = 2, color = "gray") +
theme_classic() +
labs(title = "Análisis comparativo de miRNAs",
caption = "Creador: Paola López",
x = "log2(2-DDCT)",
y = "-Log10(valor de p)")
vulcano

limite_p <- 0.05
limite_FC <- 1.5
down_regulated <- Logs %>%
filter(LFC < -log2(limite_FC),
LPV> -log10(limite_p))
down_regulated
## # A tibble: 1 × 3
## Gen LPV LFC
## <chr> <dbl> <dbl>
## 1 hsa-miR-361-000554 1.32 -0.650
up_regulated <- Logs %>%
filter(LFC>log2(limite_FC),
LPV>-log10(limite_p))
up_regulated
## # A tibble: 0 × 3
## # ℹ 3 variables: Gen <chr>, LPV <dbl>, LFC <dbl>
top_down_regulated <- down_regulated %>%
arrange(desc(LPV)) %>%
head(5)
top_down_regulated
## # A tibble: 1 × 3
## Gen LPV LFC
## <chr> <dbl> <dbl>
## 1 hsa-miR-361-000554 1.32 -0.650
top_up_regulated <- up_regulated %>%
arrange(desc(LPV)) %>%
head(5)
top_up_regulated
## # A tibble: 0 × 3
## # ℹ 3 variables: Gen <chr>, LPV <dbl>, LFC <dbl>
vulcano2 <- vulcano+
geom_hline(yintercept=-log10(limite_p),
linetype="dashed",
xintercept=c(-log2(limite_FC),log2(limite_FC)),
linetype="dashed")
## Warning: Duplicated aesthetics after name standardisation: linetype
## Warning in geom_hline(yintercept = -log10(limite_p), linetype = "dashed", :
## Ignoring unknown parameters: `xintercept`
vulcano2

vulcano3 <- vulcano2+
geom_point(data=up_regulated,
x=up_regulated$LFC,
y=up_regulated$LPV,
alpha=1,
size=3,
color="pink")+
geom_point(data=down_regulated,
x=down_regulated$LFC,
y=down_regulated$LPV,
alpha=1,
size=3,
color="purple")
vulcano3

vulcano4 <- vulcano3+
geom_label_repel(data=top_up_regulated,
mapping=aes(x=top_up_regulated$LFC,
y=top_up_regulated$LPV),
label=top_up_regulated$Gen,
max.overlaps = 10)+
geom_label_repel(data=top_down_regulated,
mapping=aes(x=top_down_regulated$LFC,
y=top_down_regulated$LPV),
label=top_down_regulated$Gen,
max.overlaps = 10)+
geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black") +
geom_vline(xintercept = 1.5, linetype = "dashed", color = "black") +
geom_vline(xintercept = -1.5, linetype = "dashed", color = "black")
vulcano4
## Warning: Use of `top_down_regulated$LFC` is discouraged.
## ℹ Use `LFC` instead.
## Warning: Use of `top_down_regulated$LPV` is discouraged.
## ℹ Use `LPV` instead.

ggsave(filename = "Gráfica V plot.png",
plot = vulcano4,
dpi = 300)
## Saving 7 x 5 in image
## Warning: Use of `top_down_regulated$LFC` is discouraged.
## ℹ Use `LFC` instead.
## Warning: Use of `top_down_regulated$LPV` is discouraged.
## ℹ Use `LPV` instead.