#Diseñado por: Zyanya Garduño #Gráfica de Volcano
#install.packages("pacman")
library(pacman)
p_load("readr", #para llamar la base de datos
"ggplot2", #para graficar
"ggrepel2", #para etiquetar datos en una gráfica
"dplyr", #para facilitar el manejo de datos
"matrixTests") #para realizar la prueba estadística
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
## Warning: package 'ggrepel2' is not available for this version of R
##
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
## Warning: 'BiocManager' not available. Could not check Bioconductor.
##
## Please use `install.packages('BiocManager')` and then retry.
## Warning in p_install(package, character.only = TRUE, ...):
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'ggrepel2'
## Warning in p_load("readr", "ggplot2", "ggrepel2", "dplyr", "matrixTests"): Failed to install/load:
## ggrepel2
datos <- read_csv(file = "https://raw.githubusercontent.com/ManuelLaraMVZ/Transcript-mica/refs/heads/main/datos_miRNAs.csv")
## Rows: 363 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gen, Condicion
## dbl (6): Cx1, Cx2, Cx3, T1, T2, T3
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(datos)
## # A tibble: 6 × 8
## Gen Condicion Cx1 Cx2 Cx3 T1 T2 T3
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 U6 snRNA-001973 Control 13.8 11.7 11.9 13.2 13.0 12.6
## 2 ath-miR159a-000338 Target 35 35 35 35 35 35
## 3 hsa-let-7a-000377 Target 20.5 21.0 21.0 20.4 19.6 20.9
## 4 hsa-let-7b-002619 Target 18.4 19.0 19.1 18.3 17.4 19.0
## 5 hsa-let-7c-000379 Target 22.2 23.7 23.8 22.9 22.0 24.0
## 6 hsa-let-7d-002283 Target 22.2 22.7 21.9 22.0 21.2 21.9
Extracción de controles y cálculo del 2^-DCT
Controles <- datos %>%
filter(Condicion == "Control")
head(Controles)
## # A tibble: 3 × 8
## Gen Condicion Cx1 Cx2 Cx3 T1 T2 T3
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 U6 snRNA-001973 Control 13.8 11.7 11.9 13.2 13.0 12.6
## 2 RNU44-001094 Control 17.3 16.9 16.8 17.7 17.3 16.8
## 3 RNU48-001006 Control 15.1 16.4 17.0 16.0 15.1 16.4
Promedio de controles
promedio_controles <- Controles %>%
summarise(Mean_C1 = mean(Cx1),
Mean_C2 = mean(Cx2),
Mean_C3 = mean(Cx3),
Mean_T1 = mean(T1),
Mean_T2 = mean(T2),
Mean_T3 = mean(T3)) %>%
mutate(Gen = "Promedio_controles") %>%
select(7,1,2,3,4,5,6)
promedio_controles
## # A tibble: 1 × 7
## Gen Mean_C1 Mean_C2 Mean_C3 Mean_T1 Mean_T2 Mean_T3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Promedio_controles 15.4 15.0 15.3 15.6 15.1 15.3
Extraer los genes de la tabla “datos”
genes <- datos %>%
filter(Condicion == "Target") %>%
select(-2)
head(genes)
## # A tibble: 6 × 7
## Gen Cx1 Cx2 Cx3 T1 T2 T3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ath-miR159a-000338 35 35 35 35 35 35
## 2 hsa-let-7a-000377 20.5 21.0 21.0 20.4 19.6 20.9
## 3 hsa-let-7b-002619 18.4 19.0 19.1 18.3 17.4 19.0
## 4 hsa-let-7c-000379 22.2 23.7 23.8 22.9 22.0 24.0
## 5 hsa-let-7d-002283 22.2 22.7 21.9 22.0 21.2 21.9
## 6 hsa-let-7e-002406 18.0 18.4 18.5 18.0 17.3 18.6
Calcular el 2^-DCT
DCT <- genes %>%
mutate(DCT_C1=2^-(Cx1 - promedio_controles$Mean_C1),
DCT_C2=2^-(Cx2 - promedio_controles$Mean_C2),
DCT_C3=2^-(Cx3 - promedio_controles$Mean_C3),
DCT_T1=2^-(T1 - promedio_controles$Mean_T1),
DCT_T2=2^-(T2 - promedio_controles$Mean_T2),
DCT_T3=2^-(T3 - promedio_controles$Mean_T3)) %>%
select(-2,-3,-4,-5,-6,-7)
DCT
## # A tibble: 360 × 7
## Gen DCT_C1 DCT_C2 DCT_C3 DCT_T1 DCT_T2 DCT_T3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ath-miR159a-000338 0.00000124 0.000000960 0.00000114 1.47e-6 1.03e-6 1.16e-6
## 2 hsa-let-7a-000377 0.0289 0.0157 0.0185 3.70e-2 4.49e-2 1.98e-2
## 3 hsa-let-7b-002619 0.127 0.0615 0.0677 1.62e-1 1.98e-1 7.75e-2
## 4 hsa-let-7c-000379 0.00900 0.00243 0.00272 6.40e-3 8.67e-3 2.39e-3
## 5 hsa-let-7d-002283 0.00893 0.00492 0.0102 1.23e-2 1.46e-2 9.87e-3
## 6 hsa-let-7e-002406 0.167 0.0960 0.108 1.91e-1 2.21e-1 1.03e-1
## 7 hsa-let-7f-000382 0.00190 0.000882 0.00146 3.58e-3 4.25e-3 1.24e-3
## 8 hsa-let-7g-002282 0.00963 0.00720 0.00659 9.86e-3 8.59e-3 5.50e-3
## 9 hsa-miR-1-002222 0.0000150 0.00000404 0.00000661 5.99e-6 9.45e-6 3.04e-6
## 10 hsa-miR-100-000437 0.287 0.390 0.498 4.10e-1 2.69e-1 6.25e-1
## # ℹ 350 more rows
#Promedios por gen y prueba estadística T-Student (Welch)
promedio_genes <- DCT %>%
mutate(Mean_DCT_Cx = (DCT_C1+DCT_C2+DCT_C3)/3,
Mean_DCT_Tx = (DCT_T1+DCT_T2+DCT_T3)/3)
promedio_genes
## # A tibble: 360 × 9
## Gen DCT_C1 DCT_C2 DCT_C3 DCT_T1 DCT_T2 DCT_T3 Mean_DCT_Cx Mean_DCT_Tx
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ath-… 1.24e-6 9.60e-7 1.14e-6 1.47e-6 1.03e-6 1.16e-6 0.00000111 0.00000122
## 2 hsa-… 2.89e-2 1.57e-2 1.85e-2 3.70e-2 4.49e-2 1.98e-2 0.0210 0.0339
## 3 hsa-… 1.27e-1 6.15e-2 6.77e-2 1.62e-1 1.98e-1 7.75e-2 0.0856 0.146
## 4 hsa-… 9.00e-3 2.43e-3 2.72e-3 6.40e-3 8.67e-3 2.39e-3 0.00472 0.00582
## 5 hsa-… 8.93e-3 4.92e-3 1.02e-2 1.23e-2 1.46e-2 9.87e-3 0.00802 0.0123
## 6 hsa-… 1.67e-1 9.60e-2 1.08e-1 1.91e-1 2.21e-1 1.03e-1 0.123 0.172
## 7 hsa-… 1.90e-3 8.82e-4 1.46e-3 3.58e-3 4.25e-3 1.24e-3 0.00142 0.00302
## 8 hsa-… 9.63e-3 7.20e-3 6.59e-3 9.86e-3 8.59e-3 5.50e-3 0.00781 0.00798
## 9 hsa-… 1.50e-5 4.04e-6 6.61e-6 5.99e-6 9.45e-6 3.04e-6 0.00000856 0.00000616
## 10 hsa-… 2.87e-1 3.90e-1 4.98e-1 4.10e-1 2.69e-1 6.25e-1 0.392 0.434
## # ℹ 350 more rows
Definir prueba estadístuca: t-Student
tvalue_gen <- row_t_welch(promedio_genes[, c("DCT_C1",
"DCT_C2",
"DCT_C3")],
promedio_genes[, c("DCT_T1",
"DCT_T2",
"DCT_T3")])
head(tvalue_gen)
## obs.x obs.y obs.tot mean.x mean.y mean.diff var.x
## 1 3 3 6 1.113319e-06 1.220085e-06 -1.067666e-07 2.043512e-14
## 2 3 3 6 2.104945e-02 3.390214e-02 -1.285269e-02 4.856207e-05
## 3 3 3 6 8.558097e-02 1.457383e-01 -6.015735e-02 1.327202e-03
## 4 3 3 6 4.716674e-03 5.820710e-03 -1.104036e-03 1.377166e-05
## 5 3 3 6 8.019463e-03 1.226943e-02 -4.249971e-03 7.633725e-06
## 6 3 3 6 1.234439e-01 1.716779e-01 -4.823397e-02 1.436199e-03
## var.y stderr df statistic pvalue conf.low
## 1 5.127956e-14 1.546121e-07 3.375569 -0.6905449 0.5343619 -5.692448e-07
## 2 1.636924e-04 8.411390e-03 3.090675 -1.5280102 0.2213573 -3.918263e-02
## 3 3.822375e-03 4.143097e-02 3.239447 -1.4519899 0.2359597 -1.866636e-01
## 4 1.011733e-05 2.821878e-03 3.908539 -0.3912417 0.7160073 -9.011661e-03
## 5 5.688217e-06 2.107284e-03 3.916473 -2.0168001 0.1154112 -1.015027e-02
## 6 3.763194e-03 4.163089e-02 3.332494 -1.1586103 0.3228507 -1.735489e-01
## conf.high mean.null alternative conf.level
## 1 3.557117e-07 0 two.sided 0.95
## 2 1.347725e-02 0 two.sided 0.95
## 3 6.634885e-02 0 two.sided 0.95
## 4 6.803589e-03 0 two.sided 0.95
## 5 1.650329e-03 0 two.sided 0.95
## 6 7.708100e-02 0 two.sided 0.95
FcYPV <- promedio_genes %>%
select(1, 8, 9) %>%
mutate(p_value = tvalue_gen$pvalue,
Fold_change = Mean_DCT_Tx / Mean_DCT_Cx) %>%
select(1, 4, 5)
FcYPV
## # A tibble: 360 × 3
## Gen p_value Fold_change
## <chr> <dbl> <dbl>
## 1 ath-miR159a-000338 0.534 1.10
## 2 hsa-let-7a-000377 0.221 1.61
## 3 hsa-let-7b-002619 0.236 1.70
## 4 hsa-let-7c-000379 0.716 1.23
## 5 hsa-let-7d-002283 0.115 1.53
## 6 hsa-let-7e-002406 0.323 1.39
## 7 hsa-let-7f-000382 0.214 2.13
## 8 hsa-let-7g-002282 0.918 1.02
## 9 hsa-miR-1-002222 0.571 0.719
## 10 hsa-miR-100-000437 0.743 1.11
## # ℹ 350 more rows
Logs <- FcYPV %>%
mutate(LPV = -log10(p_value),
LFC = log2(Fold_change)) %>%
select(1, 4, 5)
# eje de las x es el valor de Fc y el valor de y es -logpvalue
Logs
## # A tibble: 360 × 3
## Gen LPV LFC
## <chr> <dbl> <dbl>
## 1 ath-miR159a-000338 0.272 0.132
## 2 hsa-let-7a-000377 0.655 0.688
## 3 hsa-let-7b-002619 0.627 0.768
## 4 hsa-let-7c-000379 0.145 0.303
## 5 hsa-let-7d-002283 0.938 0.613
## 6 hsa-let-7e-002406 0.491 0.476
## 7 hsa-let-7f-000382 0.669 1.09
## 8 hsa-let-7g-002282 0.0370 0.0320
## 9 hsa-miR-1-002222 0.244 -0.475
## 10 hsa-miR-100-000437 0.129 0.150
## # ℹ 350 more rows
tvalue_gen <- row_t_welch(promedio_genes[, c("DCT_C1",
"DCT_C2",
"DCT_C3")],
promedio_genes[, c("DCT_T1",
"DCT_T2",
"DCT_T3")])
head(tvalue_gen)
## obs.x obs.y obs.tot mean.x mean.y mean.diff var.x
## 1 3 3 6 1.113319e-06 1.220085e-06 -1.067666e-07 2.043512e-14
## 2 3 3 6 2.104945e-02 3.390214e-02 -1.285269e-02 4.856207e-05
## 3 3 3 6 8.558097e-02 1.457383e-01 -6.015735e-02 1.327202e-03
## 4 3 3 6 4.716674e-03 5.820710e-03 -1.104036e-03 1.377166e-05
## 5 3 3 6 8.019463e-03 1.226943e-02 -4.249971e-03 7.633725e-06
## 6 3 3 6 1.234439e-01 1.716779e-01 -4.823397e-02 1.436199e-03
## var.y stderr df statistic pvalue conf.low
## 1 5.127956e-14 1.546121e-07 3.375569 -0.6905449 0.5343619 -5.692448e-07
## 2 1.636924e-04 8.411390e-03 3.090675 -1.5280102 0.2213573 -3.918263e-02
## 3 3.822375e-03 4.143097e-02 3.239447 -1.4519899 0.2359597 -1.866636e-01
## 4 1.011733e-05 2.821878e-03 3.908539 -0.3912417 0.7160073 -9.011661e-03
## 5 5.688217e-06 2.107284e-03 3.916473 -2.0168001 0.1154112 -1.015027e-02
## 6 3.763194e-03 4.163089e-02 3.332494 -1.1586103 0.3228507 -1.735489e-01
## conf.high mean.null alternative conf.level
## 1 3.557117e-07 0 two.sided 0.95
## 2 1.347725e-02 0 two.sided 0.95
## 3 6.634885e-02 0 two.sided 0.95
## 4 6.803589e-03 0 two.sided 0.95
## 5 1.650329e-03 0 two.sided 0.95
## 6 7.708100e-02 0 two.sided 0.95
FcYPV <- promedio_genes %>%
select(1, 8, 9) %>%
mutate(p_value = tvalue_gen$pvalue,
Fold_change = Mean_DCT_Tx / Mean_DCT_Cx) %>%
select(1, 4, 5)
FcYPV
## # A tibble: 360 × 3
## Gen p_value Fold_change
## <chr> <dbl> <dbl>
## 1 ath-miR159a-000338 0.534 1.10
## 2 hsa-let-7a-000377 0.221 1.61
## 3 hsa-let-7b-002619 0.236 1.70
## 4 hsa-let-7c-000379 0.716 1.23
## 5 hsa-let-7d-002283 0.115 1.53
## 6 hsa-let-7e-002406 0.323 1.39
## 7 hsa-let-7f-000382 0.214 2.13
## 8 hsa-let-7g-002282 0.918 1.02
## 9 hsa-miR-1-002222 0.571 0.719
## 10 hsa-miR-100-000437 0.743 1.11
## # ℹ 350 more rows
Logs <- FcYPV %>%
mutate(LPV = -log10(p_value),
LFC = log2(Fold_change)) %>%
select(1, 4, 5)
# eje de las x es el valor de Fc y el valor de y es -logpvalue
Logs
## # A tibble: 360 × 3
## Gen LPV LFC
## <chr> <dbl> <dbl>
## 1 ath-miR159a-000338 0.272 0.132
## 2 hsa-let-7a-000377 0.655 0.688
## 3 hsa-let-7b-002619 0.627 0.768
## 4 hsa-let-7c-000379 0.145 0.303
## 5 hsa-let-7d-002283 0.938 0.613
## 6 hsa-let-7e-002406 0.491 0.476
## 7 hsa-let-7f-000382 0.669 1.09
## 8 hsa-let-7g-002282 0.0370 0.0320
## 9 hsa-miR-1-002222 0.244 -0.475
## 10 hsa-miR-100-000437 0.129 0.150
## # ℹ 350 more rows
Límites como filtrado de miRNAs significativos y extracción de los Top 5 con la parte de top
tvalue_gen <- row_t_welch(promedio_genes[, c("DCT_C1",
"DCT_C2",
"DCT_C3")],
promedio_genes[, c("DCT_T1",
"DCT_T2",
"DCT_T3")])
head(tvalue_gen) # muestra las primeras filas
## obs.x obs.y obs.tot mean.x mean.y mean.diff var.x
## 1 3 3 6 1.113319e-06 1.220085e-06 -1.067666e-07 2.043512e-14
## 2 3 3 6 2.104945e-02 3.390214e-02 -1.285269e-02 4.856207e-05
## 3 3 3 6 8.558097e-02 1.457383e-01 -6.015735e-02 1.327202e-03
## 4 3 3 6 4.716674e-03 5.820710e-03 -1.104036e-03 1.377166e-05
## 5 3 3 6 8.019463e-03 1.226943e-02 -4.249971e-03 7.633725e-06
## 6 3 3 6 1.234439e-01 1.716779e-01 -4.823397e-02 1.436199e-03
## var.y stderr df statistic pvalue conf.low
## 1 5.127956e-14 1.546121e-07 3.375569 -0.6905449 0.5343619 -5.692448e-07
## 2 1.636924e-04 8.411390e-03 3.090675 -1.5280102 0.2213573 -3.918263e-02
## 3 3.822375e-03 4.143097e-02 3.239447 -1.4519899 0.2359597 -1.866636e-01
## 4 1.011733e-05 2.821878e-03 3.908539 -0.3912417 0.7160073 -9.011661e-03
## 5 5.688217e-06 2.107284e-03 3.916473 -2.0168001 0.1154112 -1.015027e-02
## 6 3.763194e-03 4.163089e-02 3.332494 -1.1586103 0.3228507 -1.735489e-01
## conf.high mean.null alternative conf.level
## 1 3.557117e-07 0 two.sided 0.95
## 2 1.347725e-02 0 two.sided 0.95
## 3 6.634885e-02 0 two.sided 0.95
## 4 6.803589e-03 0 two.sided 0.95
## 5 1.650329e-03 0 two.sided 0.95
## 6 7.708100e-02 0 two.sided 0.95
FcYPV <- promedio_genes %>%
select(1, 8, 9) %>%
mutate(p_value = tvalue_gen$pvalue,
Fold_change = Mean_DCT_Tx / Mean_DCT_Cx) %>%
select(1, 4, 5)
FcYPV
## # A tibble: 360 × 3
## Gen p_value Fold_change
## <chr> <dbl> <dbl>
## 1 ath-miR159a-000338 0.534 1.10
## 2 hsa-let-7a-000377 0.221 1.61
## 3 hsa-let-7b-002619 0.236 1.70
## 4 hsa-let-7c-000379 0.716 1.23
## 5 hsa-let-7d-002283 0.115 1.53
## 6 hsa-let-7e-002406 0.323 1.39
## 7 hsa-let-7f-000382 0.214 2.13
## 8 hsa-let-7g-002282 0.918 1.02
## 9 hsa-miR-1-002222 0.571 0.719
## 10 hsa-miR-100-000437 0.743 1.11
## # ℹ 350 more rows
Logs <- FcYPV %>%
mutate(LPV = -log10(p_value),
LFC = log2(Fold_change)) %>%
select(1, 4, 5)
# eje de las x es el valor de Fc y el valor de y es -logpvalue
Logs
## # A tibble: 360 × 3
## Gen LPV LFC
## <chr> <dbl> <dbl>
## 1 ath-miR159a-000338 0.272 0.132
## 2 hsa-let-7a-000377 0.655 0.688
## 3 hsa-let-7b-002619 0.627 0.768
## 4 hsa-let-7c-000379 0.145 0.303
## 5 hsa-let-7d-002283 0.938 0.613
## 6 hsa-let-7e-002406 0.491 0.476
## 7 hsa-let-7f-000382 0.669 1.09
## 8 hsa-let-7g-002282 0.0370 0.0320
## 9 hsa-miR-1-002222 0.244 -0.475
## 10 hsa-miR-100-000437 0.129 0.150
## # ℹ 350 more rows
Mejorar la gráfica al añadir líneas horizontales y verticales de referencia
tvalue_gen <- row_t_welch(promedio_genes[, c("DCT_C1",
"DCT_C2",
"DCT_C3")],
promedio_genes[, c("DCT_T1",
"DCT_T2",
"DCT_T3")])
head(tvalue_gen) # muestra las primeras filas en el documento
## obs.x obs.y obs.tot mean.x mean.y mean.diff var.x
## 1 3 3 6 1.113319e-06 1.220085e-06 -1.067666e-07 2.043512e-14
## 2 3 3 6 2.104945e-02 3.390214e-02 -1.285269e-02 4.856207e-05
## 3 3 3 6 8.558097e-02 1.457383e-01 -6.015735e-02 1.327202e-03
## 4 3 3 6 4.716674e-03 5.820710e-03 -1.104036e-03 1.377166e-05
## 5 3 3 6 8.019463e-03 1.226943e-02 -4.249971e-03 7.633725e-06
## 6 3 3 6 1.234439e-01 1.716779e-01 -4.823397e-02 1.436199e-03
## var.y stderr df statistic pvalue conf.low
## 1 5.127956e-14 1.546121e-07 3.375569 -0.6905449 0.5343619 -5.692448e-07
## 2 1.636924e-04 8.411390e-03 3.090675 -1.5280102 0.2213573 -3.918263e-02
## 3 3.822375e-03 4.143097e-02 3.239447 -1.4519899 0.2359597 -1.866636e-01
## 4 1.011733e-05 2.821878e-03 3.908539 -0.3912417 0.7160073 -9.011661e-03
## 5 5.688217e-06 2.107284e-03 3.916473 -2.0168001 0.1154112 -1.015027e-02
## 6 3.763194e-03 4.163089e-02 3.332494 -1.1586103 0.3228507 -1.735489e-01
## conf.high mean.null alternative conf.level
## 1 3.557117e-07 0 two.sided 0.95
## 2 1.347725e-02 0 two.sided 0.95
## 3 6.634885e-02 0 two.sided 0.95
## 4 6.803589e-03 0 two.sided 0.95
## 5 1.650329e-03 0 two.sided 0.95
## 6 7.708100e-02 0 two.sided 0.95
summary(tvalue_gen) # resumen estadístico
## obs.x obs.y obs.tot mean.x mean.y
## Min. :3 Min. :3 Min. :6 Min. :1.110e-06 Min. :1.220e-06
## 1st Qu.:3 1st Qu.:3 1st Qu.:6 1st Qu.:2.100e-06 1st Qu.:1.950e-06
## Median :3 Median :3 Median :6 Median :8.123e-05 Median :6.753e-05
## Mean :3 Mean :3 Mean :6 Mean :1.327e-02 Mean :1.511e-02
## 3rd Qu.:3 3rd Qu.:3 3rd Qu.:6 3rd Qu.:1.891e-03 3rd Qu.:2.199e-03
## Max. :3 Max. :3 Max. :6 Max. :5.497e-01 Max. :1.056e+00
## mean.diff var.x var.y
## Min. :-5.183e-01 Min. :0.000e+00 Min. :0.000e+00
## 1st Qu.:-1.112e-04 1st Qu.:0.000e+00 1st Qu.:0.000e+00
## Median :-1.100e-07 Median :0.000e+00 Median :0.000e+00
## Mean :-1.844e-03 Mean :2.052e-03 Mean :2.578e-03
## 3rd Qu.: 5.780e-06 3rd Qu.:9.100e-07 3rd Qu.:1.530e-06
## Max. : 2.290e-01 Max. :4.587e-01 Max. :6.438e-01
## stderr df statistic pvalue
## Min. :1.300e-07 Min. :2.000 Min. :-3.6706 Min. :0.03272
## 1st Qu.:2.130e-06 1st Qu.:2.937 1st Qu.:-0.6905 1st Qu.:0.39479
## Median :4.067e-05 Median :3.376 Median :-0.5101 Median :0.53436
## Mean :7.216e-03 Mean :3.286 Mean :-0.2439 Mean :0.52969
## 3rd Qu.:9.849e-04 3rd Qu.:3.853 3rd Qu.: 0.4739 3rd Qu.:0.65655
## Max. :6.062e-01 Max. :4.000 Max. : 3.2876 Max. :0.99797
## conf.low conf.high mean.null alternative
## Min. :-2.220e+00 Min. :-3.181e-04 Min. :0 Length:360
## 1st Qu.:-3.148e-03 1st Qu.: 6.620e-06 1st Qu.:0 Class :character
## Median :-1.173e-04 Median : 1.178e-04 Median :0 Mode :character
## Mean :-2.289e-02 Mean : 1.920e-02 Mean :0
## 3rd Qu.:-6.310e-06 3rd Qu.: 2.366e-03 3rd Qu.:0
## Max. : 4.220e-06 Max. : 1.184e+00 Max. :0
## conf.level
## Min. :0.95
## 1st Qu.:0.95
## Median :0.95
## Mean :0.95
## 3rd Qu.:0.95
## Max. :0.95
FcYPV <- promedio_genes %>%
select(1, 8, 9) %>%
mutate(p_value = tvalue_gen$pvalue,
Fold_change = Mean_DCT_Tx / Mean_DCT_Cx) %>%
select(1, 4, 5)
FcYPV
## # A tibble: 360 × 3
## Gen p_value Fold_change
## <chr> <dbl> <dbl>
## 1 ath-miR159a-000338 0.534 1.10
## 2 hsa-let-7a-000377 0.221 1.61
## 3 hsa-let-7b-002619 0.236 1.70
## 4 hsa-let-7c-000379 0.716 1.23
## 5 hsa-let-7d-002283 0.115 1.53
## 6 hsa-let-7e-002406 0.323 1.39
## 7 hsa-let-7f-000382 0.214 2.13
## 8 hsa-let-7g-002282 0.918 1.02
## 9 hsa-miR-1-002222 0.571 0.719
## 10 hsa-miR-100-000437 0.743 1.11
## # ℹ 350 more rows
Logs <- FcYPV %>%
mutate(LPV = -log10(p_value),
LFC = log2(Fold_change)) %>%
select(1, 4, 5)
# eje de las x es el valor de Fc y el valor de y es -logpvalue
Logs
## # A tibble: 360 × 3
## Gen LPV LFC
## <chr> <dbl> <dbl>
## 1 ath-miR159a-000338 0.272 0.132
## 2 hsa-let-7a-000377 0.655 0.688
## 3 hsa-let-7b-002619 0.627 0.768
## 4 hsa-let-7c-000379 0.145 0.303
## 5 hsa-let-7d-002283 0.938 0.613
## 6 hsa-let-7e-002406 0.491 0.476
## 7 hsa-let-7f-000382 0.669 1.09
## 8 hsa-let-7g-002282 0.0370 0.0320
## 9 hsa-miR-1-002222 0.244 -0.475
## 10 hsa-miR-100-000437 0.129 0.150
## # ℹ 350 more rows
vulcano <- ggplot(Logs, mapping = aes(x = LFC,
y = LPV)) +
geom_point(size = 2,
color = "#546E7A")+
theme_classic()+
labs(title = "Análisis comparativo de miRNAs",
caption = "Diseñado: Zyanya Garduño",
x = "Log2 (2-DDCT)",
y = "-Log10 (valor de p)")
vulcano
# Definir límites de significancia y fold change
limite_p <- 0.05
limite_FC <- 1.5
# Filtrar genes down-regulated
down_regulated <- Logs %>%
filter(LFC < -log2(limite_FC),
LPV > -log10(limite_p))
# Filtrar genes up-regulated
up_regulated <- Logs %>%
filter(LFC > log2(limite_FC),
LPV > -log10(limite_p))
# Top 5 más significativos
top_down_regulated <- down_regulated %>%
arrange(desc(LPV)) %>%
head(5)
top_up_regulated <- up_regulated %>%
arrange(desc(LPV)) %>%
head(5)
# Añadir líneas horizontales y verticales de referencia
vulcano2 <- vulcano +
geom_hline(yintercept = -log10(limite_p),
linetype = "dashed", color = "black") +
geom_vline(xintercept = log2(limite_FC),
linetype = "dashed", color = "black") +
geom_vline(xintercept = -log2(limite_FC),
linetype = "dashed", color = "black")
vulcano2
#Añadir puntos de los genes up-regulated (rojo)
vulcano3 <- vulcano2 +
geom_point(data = up_regulated,
aes(x = LFC, y = LPV),
alpha = 1, size = 3, color = "#E64B35") +
#Añadir puntos de los genes down-regulated (azul)
geom_point(data = down_regulated,
aes(x = LFC, y = LPV),
alpha = 1, size = 3, color = "#8FBAFA")
vulcano3
library(ggrepel)
library(grid) # necesario para usar unit()
vulcano4 <- vulcano3 +
geom_label_repel(data = top_up_regulated,
aes(x = LFC, y = LPV, label = Gen),
box.padding = unit(0.35, "lines"),
point.padding = unit(0.5, "lines"),
segment.color = "grey50",
segment.size = 0.2,
max.overlaps = 50,
nudge_x = 0.5,
nudge_y = 0.5,
show.legend = FALSE) +
geom_label_repel(data = top_down_regulated,
aes(x = LFC, y = LPV, label = Gen),
box.padding = unit(0.35, "lines"),
point.padding = unit(0.5, "lines"),
segment.color = "grey50",
segment.size = 0.2,
max.overlaps = 50,
nudge_x = -0.5,
nudge_y = -0.5,
show.legend = FALSE)
vulcano4
Guardar la gráfica
ggsave("vulcano4.jpg",
plot = vulcano4,
height = 5, width = 6,
dpi = 300)