#Diseñado por: Zyanya Garduño #Gráfica de Volcano

#install.packages("pacman")
library(pacman)

p_load("readr", #para llamar la base de datos
       "ggplot2", #para graficar
       "ggrepel2", #para etiquetar datos en una gráfica
       "dplyr", #para facilitar el manejo de datos
       "matrixTests") #para realizar la prueba estadística 
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
## Warning: package 'ggrepel2' is not available for this version of R
## 
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
## Warning: 'BiocManager' not available.  Could not check Bioconductor.
## 
## Please use `install.packages('BiocManager')` and then retry.
## Warning in p_install(package, character.only = TRUE, ...):
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'ggrepel2'
## Warning in p_load("readr", "ggplot2", "ggrepel2", "dplyr", "matrixTests"): Failed to install/load:
## ggrepel2
datos <- read_csv(file = "https://raw.githubusercontent.com/ManuelLaraMVZ/Transcript-mica/refs/heads/main/datos_miRNAs.csv") 
## Rows: 363 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gen, Condicion
## dbl (6): Cx1, Cx2, Cx3, T1, T2, T3
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(datos)
## # A tibble: 6 × 8
##   Gen                Condicion   Cx1   Cx2   Cx3    T1    T2    T3
##   <chr>              <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 U6 snRNA-001973    Control    13.8  11.7  11.9  13.2  13.0  12.6
## 2 ath-miR159a-000338 Target     35    35    35    35    35    35  
## 3 hsa-let-7a-000377  Target     20.5  21.0  21.0  20.4  19.6  20.9
## 4 hsa-let-7b-002619  Target     18.4  19.0  19.1  18.3  17.4  19.0
## 5 hsa-let-7c-000379  Target     22.2  23.7  23.8  22.9  22.0  24.0
## 6 hsa-let-7d-002283  Target     22.2  22.7  21.9  22.0  21.2  21.9

Extracción de controles y cálculo del 2^-DCT

Controles <- datos %>% 
  filter(Condicion == "Control")

head(Controles)
## # A tibble: 3 × 8
##   Gen             Condicion   Cx1   Cx2   Cx3    T1    T2    T3
##   <chr>           <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 U6 snRNA-001973 Control    13.8  11.7  11.9  13.2  13.0  12.6
## 2 RNU44-001094    Control    17.3  16.9  16.8  17.7  17.3  16.8
## 3 RNU48-001006    Control    15.1  16.4  17.0  16.0  15.1  16.4

Promedio de controles

promedio_controles <- Controles %>% 
  summarise(Mean_C1 = mean(Cx1), 
            Mean_C2 = mean(Cx2), 
            Mean_C3 = mean(Cx3),
            Mean_T1 = mean(T1), 
            Mean_T2 = mean(T2), 
            Mean_T3 = mean(T3)) %>% 
  mutate(Gen = "Promedio_controles") %>% 
  select(7,1,2,3,4,5,6)
promedio_controles
## # A tibble: 1 × 7
##   Gen                Mean_C1 Mean_C2 Mean_C3 Mean_T1 Mean_T2 Mean_T3
##   <chr>                <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1 Promedio_controles    15.4    15.0    15.3    15.6    15.1    15.3

Extraer los genes de la tabla “datos”

genes <- datos %>% 
  filter(Condicion == "Target") %>% 
  select(-2)
head(genes)
## # A tibble: 6 × 7
##   Gen                  Cx1   Cx2   Cx3    T1    T2    T3
##   <chr>              <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ath-miR159a-000338  35    35    35    35    35    35  
## 2 hsa-let-7a-000377   20.5  21.0  21.0  20.4  19.6  20.9
## 3 hsa-let-7b-002619   18.4  19.0  19.1  18.3  17.4  19.0
## 4 hsa-let-7c-000379   22.2  23.7  23.8  22.9  22.0  24.0
## 5 hsa-let-7d-002283   22.2  22.7  21.9  22.0  21.2  21.9
## 6 hsa-let-7e-002406   18.0  18.4  18.5  18.0  17.3  18.6

Calcular el 2^-DCT

DCT <- genes %>% 
  mutate(DCT_C1=2^-(Cx1 - promedio_controles$Mean_C1),
         DCT_C2=2^-(Cx2 - promedio_controles$Mean_C2),
         DCT_C3=2^-(Cx3 - promedio_controles$Mean_C3),
         DCT_T1=2^-(T1 - promedio_controles$Mean_T1),
         DCT_T2=2^-(T2 - promedio_controles$Mean_T2),
         DCT_T3=2^-(T3 - promedio_controles$Mean_T3)) %>% 
  select(-2,-3,-4,-5,-6,-7) 

DCT 
## # A tibble: 360 × 7
##    Gen                    DCT_C1      DCT_C2     DCT_C3   DCT_T1  DCT_T2  DCT_T3
##    <chr>                   <dbl>       <dbl>      <dbl>    <dbl>   <dbl>   <dbl>
##  1 ath-miR159a-000338 0.00000124 0.000000960 0.00000114  1.47e-6 1.03e-6 1.16e-6
##  2 hsa-let-7a-000377  0.0289     0.0157      0.0185      3.70e-2 4.49e-2 1.98e-2
##  3 hsa-let-7b-002619  0.127      0.0615      0.0677      1.62e-1 1.98e-1 7.75e-2
##  4 hsa-let-7c-000379  0.00900    0.00243     0.00272     6.40e-3 8.67e-3 2.39e-3
##  5 hsa-let-7d-002283  0.00893    0.00492     0.0102      1.23e-2 1.46e-2 9.87e-3
##  6 hsa-let-7e-002406  0.167      0.0960      0.108       1.91e-1 2.21e-1 1.03e-1
##  7 hsa-let-7f-000382  0.00190    0.000882    0.00146     3.58e-3 4.25e-3 1.24e-3
##  8 hsa-let-7g-002282  0.00963    0.00720     0.00659     9.86e-3 8.59e-3 5.50e-3
##  9 hsa-miR-1-002222   0.0000150  0.00000404  0.00000661  5.99e-6 9.45e-6 3.04e-6
## 10 hsa-miR-100-000437 0.287      0.390       0.498       4.10e-1 2.69e-1 6.25e-1
## # ℹ 350 more rows
#Promedios por gen y prueba estadística T-Student (Welch)
promedio_genes <- DCT %>% 
  mutate(Mean_DCT_Cx = (DCT_C1+DCT_C2+DCT_C3)/3,
         Mean_DCT_Tx = (DCT_T1+DCT_T2+DCT_T3)/3)

promedio_genes
## # A tibble: 360 × 9
##    Gen    DCT_C1  DCT_C2  DCT_C3  DCT_T1  DCT_T2  DCT_T3 Mean_DCT_Cx Mean_DCT_Tx
##    <chr>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>       <dbl>       <dbl>
##  1 ath-… 1.24e-6 9.60e-7 1.14e-6 1.47e-6 1.03e-6 1.16e-6  0.00000111  0.00000122
##  2 hsa-… 2.89e-2 1.57e-2 1.85e-2 3.70e-2 4.49e-2 1.98e-2  0.0210      0.0339    
##  3 hsa-… 1.27e-1 6.15e-2 6.77e-2 1.62e-1 1.98e-1 7.75e-2  0.0856      0.146     
##  4 hsa-… 9.00e-3 2.43e-3 2.72e-3 6.40e-3 8.67e-3 2.39e-3  0.00472     0.00582   
##  5 hsa-… 8.93e-3 4.92e-3 1.02e-2 1.23e-2 1.46e-2 9.87e-3  0.00802     0.0123    
##  6 hsa-… 1.67e-1 9.60e-2 1.08e-1 1.91e-1 2.21e-1 1.03e-1  0.123       0.172     
##  7 hsa-… 1.90e-3 8.82e-4 1.46e-3 3.58e-3 4.25e-3 1.24e-3  0.00142     0.00302   
##  8 hsa-… 9.63e-3 7.20e-3 6.59e-3 9.86e-3 8.59e-3 5.50e-3  0.00781     0.00798   
##  9 hsa-… 1.50e-5 4.04e-6 6.61e-6 5.99e-6 9.45e-6 3.04e-6  0.00000856  0.00000616
## 10 hsa-… 2.87e-1 3.90e-1 4.98e-1 4.10e-1 2.69e-1 6.25e-1  0.392       0.434     
## # ℹ 350 more rows

Definir prueba estadístuca: t-Student

tvalue_gen <- row_t_welch(promedio_genes[, c("DCT_C1", 
                                             "DCT_C2", 
                                             "DCT_C3")],
                          promedio_genes[, c("DCT_T1", 
                                             "DCT_T2", 
                                             "DCT_T3")])
head(tvalue_gen)
##   obs.x obs.y obs.tot       mean.x       mean.y     mean.diff        var.x
## 1     3     3       6 1.113319e-06 1.220085e-06 -1.067666e-07 2.043512e-14
## 2     3     3       6 2.104945e-02 3.390214e-02 -1.285269e-02 4.856207e-05
## 3     3     3       6 8.558097e-02 1.457383e-01 -6.015735e-02 1.327202e-03
## 4     3     3       6 4.716674e-03 5.820710e-03 -1.104036e-03 1.377166e-05
## 5     3     3       6 8.019463e-03 1.226943e-02 -4.249971e-03 7.633725e-06
## 6     3     3       6 1.234439e-01 1.716779e-01 -4.823397e-02 1.436199e-03
##          var.y       stderr       df  statistic    pvalue      conf.low
## 1 5.127956e-14 1.546121e-07 3.375569 -0.6905449 0.5343619 -5.692448e-07
## 2 1.636924e-04 8.411390e-03 3.090675 -1.5280102 0.2213573 -3.918263e-02
## 3 3.822375e-03 4.143097e-02 3.239447 -1.4519899 0.2359597 -1.866636e-01
## 4 1.011733e-05 2.821878e-03 3.908539 -0.3912417 0.7160073 -9.011661e-03
## 5 5.688217e-06 2.107284e-03 3.916473 -2.0168001 0.1154112 -1.015027e-02
## 6 3.763194e-03 4.163089e-02 3.332494 -1.1586103 0.3228507 -1.735489e-01
##      conf.high mean.null alternative conf.level
## 1 3.557117e-07         0   two.sided       0.95
## 2 1.347725e-02         0   two.sided       0.95
## 3 6.634885e-02         0   two.sided       0.95
## 4 6.803589e-03         0   two.sided       0.95
## 5 1.650329e-03         0   two.sided       0.95
## 6 7.708100e-02         0   two.sided       0.95
FcYPV <- promedio_genes %>% 
  select(1, 8, 9) %>% 
  mutate(p_value = tvalue_gen$pvalue,
         Fold_change = Mean_DCT_Tx / Mean_DCT_Cx) %>% 
  select(1, 4, 5)

FcYPV
## # A tibble: 360 × 3
##    Gen                p_value Fold_change
##    <chr>                <dbl>       <dbl>
##  1 ath-miR159a-000338   0.534       1.10 
##  2 hsa-let-7a-000377    0.221       1.61 
##  3 hsa-let-7b-002619    0.236       1.70 
##  4 hsa-let-7c-000379    0.716       1.23 
##  5 hsa-let-7d-002283    0.115       1.53 
##  6 hsa-let-7e-002406    0.323       1.39 
##  7 hsa-let-7f-000382    0.214       2.13 
##  8 hsa-let-7g-002282    0.918       1.02 
##  9 hsa-miR-1-002222     0.571       0.719
## 10 hsa-miR-100-000437   0.743       1.11 
## # ℹ 350 more rows
Logs <- FcYPV %>% 
  mutate(LPV = -log10(p_value),
         LFC = log2(Fold_change)) %>% 
  select(1, 4, 5)
# eje de las x es el valor de Fc y el valor de y es -logpvalue

Logs
## # A tibble: 360 × 3
##    Gen                   LPV     LFC
##    <chr>               <dbl>   <dbl>
##  1 ath-miR159a-000338 0.272   0.132 
##  2 hsa-let-7a-000377  0.655   0.688 
##  3 hsa-let-7b-002619  0.627   0.768 
##  4 hsa-let-7c-000379  0.145   0.303 
##  5 hsa-let-7d-002283  0.938   0.613 
##  6 hsa-let-7e-002406  0.491   0.476 
##  7 hsa-let-7f-000382  0.669   1.09  
##  8 hsa-let-7g-002282  0.0370  0.0320
##  9 hsa-miR-1-002222   0.244  -0.475 
## 10 hsa-miR-100-000437 0.129   0.150 
## # ℹ 350 more rows
tvalue_gen <- row_t_welch(promedio_genes[, c("DCT_C1", 
                                             "DCT_C2", 
                                             "DCT_C3")],
                          promedio_genes[, c("DCT_T1", 
                                             "DCT_T2", 
                                             "DCT_T3")])
head(tvalue_gen)
##   obs.x obs.y obs.tot       mean.x       mean.y     mean.diff        var.x
## 1     3     3       6 1.113319e-06 1.220085e-06 -1.067666e-07 2.043512e-14
## 2     3     3       6 2.104945e-02 3.390214e-02 -1.285269e-02 4.856207e-05
## 3     3     3       6 8.558097e-02 1.457383e-01 -6.015735e-02 1.327202e-03
## 4     3     3       6 4.716674e-03 5.820710e-03 -1.104036e-03 1.377166e-05
## 5     3     3       6 8.019463e-03 1.226943e-02 -4.249971e-03 7.633725e-06
## 6     3     3       6 1.234439e-01 1.716779e-01 -4.823397e-02 1.436199e-03
##          var.y       stderr       df  statistic    pvalue      conf.low
## 1 5.127956e-14 1.546121e-07 3.375569 -0.6905449 0.5343619 -5.692448e-07
## 2 1.636924e-04 8.411390e-03 3.090675 -1.5280102 0.2213573 -3.918263e-02
## 3 3.822375e-03 4.143097e-02 3.239447 -1.4519899 0.2359597 -1.866636e-01
## 4 1.011733e-05 2.821878e-03 3.908539 -0.3912417 0.7160073 -9.011661e-03
## 5 5.688217e-06 2.107284e-03 3.916473 -2.0168001 0.1154112 -1.015027e-02
## 6 3.763194e-03 4.163089e-02 3.332494 -1.1586103 0.3228507 -1.735489e-01
##      conf.high mean.null alternative conf.level
## 1 3.557117e-07         0   two.sided       0.95
## 2 1.347725e-02         0   two.sided       0.95
## 3 6.634885e-02         0   two.sided       0.95
## 4 6.803589e-03         0   two.sided       0.95
## 5 1.650329e-03         0   two.sided       0.95
## 6 7.708100e-02         0   two.sided       0.95
FcYPV <- promedio_genes %>% 
  select(1, 8, 9) %>% 
  mutate(p_value = tvalue_gen$pvalue,
         Fold_change = Mean_DCT_Tx / Mean_DCT_Cx) %>% 
  select(1, 4, 5)

FcYPV
## # A tibble: 360 × 3
##    Gen                p_value Fold_change
##    <chr>                <dbl>       <dbl>
##  1 ath-miR159a-000338   0.534       1.10 
##  2 hsa-let-7a-000377    0.221       1.61 
##  3 hsa-let-7b-002619    0.236       1.70 
##  4 hsa-let-7c-000379    0.716       1.23 
##  5 hsa-let-7d-002283    0.115       1.53 
##  6 hsa-let-7e-002406    0.323       1.39 
##  7 hsa-let-7f-000382    0.214       2.13 
##  8 hsa-let-7g-002282    0.918       1.02 
##  9 hsa-miR-1-002222     0.571       0.719
## 10 hsa-miR-100-000437   0.743       1.11 
## # ℹ 350 more rows
Logs <- FcYPV %>% 
  mutate(LPV = -log10(p_value),
         LFC = log2(Fold_change)) %>% 
  select(1, 4, 5)
# eje de las x es el valor de Fc y el valor de y es -logpvalue

Logs
## # A tibble: 360 × 3
##    Gen                   LPV     LFC
##    <chr>               <dbl>   <dbl>
##  1 ath-miR159a-000338 0.272   0.132 
##  2 hsa-let-7a-000377  0.655   0.688 
##  3 hsa-let-7b-002619  0.627   0.768 
##  4 hsa-let-7c-000379  0.145   0.303 
##  5 hsa-let-7d-002283  0.938   0.613 
##  6 hsa-let-7e-002406  0.491   0.476 
##  7 hsa-let-7f-000382  0.669   1.09  
##  8 hsa-let-7g-002282  0.0370  0.0320
##  9 hsa-miR-1-002222   0.244  -0.475 
## 10 hsa-miR-100-000437 0.129   0.150 
## # ℹ 350 more rows

Límites como filtrado de miRNAs significativos y extracción de los Top 5 con la parte de top

tvalue_gen <- row_t_welch(promedio_genes[, c("DCT_C1", 
                                             "DCT_C2", 
                                             "DCT_C3")],
                          promedio_genes[, c("DCT_T1", 
                                             "DCT_T2", 
                                             "DCT_T3")])
head(tvalue_gen)  # muestra las primeras filas
##   obs.x obs.y obs.tot       mean.x       mean.y     mean.diff        var.x
## 1     3     3       6 1.113319e-06 1.220085e-06 -1.067666e-07 2.043512e-14
## 2     3     3       6 2.104945e-02 3.390214e-02 -1.285269e-02 4.856207e-05
## 3     3     3       6 8.558097e-02 1.457383e-01 -6.015735e-02 1.327202e-03
## 4     3     3       6 4.716674e-03 5.820710e-03 -1.104036e-03 1.377166e-05
## 5     3     3       6 8.019463e-03 1.226943e-02 -4.249971e-03 7.633725e-06
## 6     3     3       6 1.234439e-01 1.716779e-01 -4.823397e-02 1.436199e-03
##          var.y       stderr       df  statistic    pvalue      conf.low
## 1 5.127956e-14 1.546121e-07 3.375569 -0.6905449 0.5343619 -5.692448e-07
## 2 1.636924e-04 8.411390e-03 3.090675 -1.5280102 0.2213573 -3.918263e-02
## 3 3.822375e-03 4.143097e-02 3.239447 -1.4519899 0.2359597 -1.866636e-01
## 4 1.011733e-05 2.821878e-03 3.908539 -0.3912417 0.7160073 -9.011661e-03
## 5 5.688217e-06 2.107284e-03 3.916473 -2.0168001 0.1154112 -1.015027e-02
## 6 3.763194e-03 4.163089e-02 3.332494 -1.1586103 0.3228507 -1.735489e-01
##      conf.high mean.null alternative conf.level
## 1 3.557117e-07         0   two.sided       0.95
## 2 1.347725e-02         0   two.sided       0.95
## 3 6.634885e-02         0   two.sided       0.95
## 4 6.803589e-03         0   two.sided       0.95
## 5 1.650329e-03         0   two.sided       0.95
## 6 7.708100e-02         0   two.sided       0.95
FcYPV <- promedio_genes %>% 
  select(1, 8, 9) %>% 
  mutate(p_value = tvalue_gen$pvalue,
         Fold_change = Mean_DCT_Tx / Mean_DCT_Cx) %>% 
  select(1, 4, 5)

FcYPV
## # A tibble: 360 × 3
##    Gen                p_value Fold_change
##    <chr>                <dbl>       <dbl>
##  1 ath-miR159a-000338   0.534       1.10 
##  2 hsa-let-7a-000377    0.221       1.61 
##  3 hsa-let-7b-002619    0.236       1.70 
##  4 hsa-let-7c-000379    0.716       1.23 
##  5 hsa-let-7d-002283    0.115       1.53 
##  6 hsa-let-7e-002406    0.323       1.39 
##  7 hsa-let-7f-000382    0.214       2.13 
##  8 hsa-let-7g-002282    0.918       1.02 
##  9 hsa-miR-1-002222     0.571       0.719
## 10 hsa-miR-100-000437   0.743       1.11 
## # ℹ 350 more rows
Logs <- FcYPV %>% 
  mutate(LPV = -log10(p_value),
         LFC = log2(Fold_change)) %>% 
  select(1, 4, 5)
# eje de las x es el valor de Fc y el valor de y es -logpvalue

Logs
## # A tibble: 360 × 3
##    Gen                   LPV     LFC
##    <chr>               <dbl>   <dbl>
##  1 ath-miR159a-000338 0.272   0.132 
##  2 hsa-let-7a-000377  0.655   0.688 
##  3 hsa-let-7b-002619  0.627   0.768 
##  4 hsa-let-7c-000379  0.145   0.303 
##  5 hsa-let-7d-002283  0.938   0.613 
##  6 hsa-let-7e-002406  0.491   0.476 
##  7 hsa-let-7f-000382  0.669   1.09  
##  8 hsa-let-7g-002282  0.0370  0.0320
##  9 hsa-miR-1-002222   0.244  -0.475 
## 10 hsa-miR-100-000437 0.129   0.150 
## # ℹ 350 more rows

Mejorar la gráfica al añadir líneas horizontales y verticales de referencia

tvalue_gen <- row_t_welch(promedio_genes[, c("DCT_C1", 
                                             "DCT_C2", 
                                             "DCT_C3")],
                          promedio_genes[, c("DCT_T1", 
                                             "DCT_T2", 
                                             "DCT_T3")])
head(tvalue_gen) # muestra las primeras filas en el documento
##   obs.x obs.y obs.tot       mean.x       mean.y     mean.diff        var.x
## 1     3     3       6 1.113319e-06 1.220085e-06 -1.067666e-07 2.043512e-14
## 2     3     3       6 2.104945e-02 3.390214e-02 -1.285269e-02 4.856207e-05
## 3     3     3       6 8.558097e-02 1.457383e-01 -6.015735e-02 1.327202e-03
## 4     3     3       6 4.716674e-03 5.820710e-03 -1.104036e-03 1.377166e-05
## 5     3     3       6 8.019463e-03 1.226943e-02 -4.249971e-03 7.633725e-06
## 6     3     3       6 1.234439e-01 1.716779e-01 -4.823397e-02 1.436199e-03
##          var.y       stderr       df  statistic    pvalue      conf.low
## 1 5.127956e-14 1.546121e-07 3.375569 -0.6905449 0.5343619 -5.692448e-07
## 2 1.636924e-04 8.411390e-03 3.090675 -1.5280102 0.2213573 -3.918263e-02
## 3 3.822375e-03 4.143097e-02 3.239447 -1.4519899 0.2359597 -1.866636e-01
## 4 1.011733e-05 2.821878e-03 3.908539 -0.3912417 0.7160073 -9.011661e-03
## 5 5.688217e-06 2.107284e-03 3.916473 -2.0168001 0.1154112 -1.015027e-02
## 6 3.763194e-03 4.163089e-02 3.332494 -1.1586103 0.3228507 -1.735489e-01
##      conf.high mean.null alternative conf.level
## 1 3.557117e-07         0   two.sided       0.95
## 2 1.347725e-02         0   two.sided       0.95
## 3 6.634885e-02         0   two.sided       0.95
## 4 6.803589e-03         0   two.sided       0.95
## 5 1.650329e-03         0   two.sided       0.95
## 6 7.708100e-02         0   two.sided       0.95
summary(tvalue_gen) # resumen estadístico
##      obs.x       obs.y      obs.tot      mean.x              mean.y         
##  Min.   :3   Min.   :3   Min.   :6   Min.   :1.110e-06   Min.   :1.220e-06  
##  1st Qu.:3   1st Qu.:3   1st Qu.:6   1st Qu.:2.100e-06   1st Qu.:1.950e-06  
##  Median :3   Median :3   Median :6   Median :8.123e-05   Median :6.753e-05  
##  Mean   :3   Mean   :3   Mean   :6   Mean   :1.327e-02   Mean   :1.511e-02  
##  3rd Qu.:3   3rd Qu.:3   3rd Qu.:6   3rd Qu.:1.891e-03   3rd Qu.:2.199e-03  
##  Max.   :3   Max.   :3   Max.   :6   Max.   :5.497e-01   Max.   :1.056e+00  
##    mean.diff              var.x               var.y          
##  Min.   :-5.183e-01   Min.   :0.000e+00   Min.   :0.000e+00  
##  1st Qu.:-1.112e-04   1st Qu.:0.000e+00   1st Qu.:0.000e+00  
##  Median :-1.100e-07   Median :0.000e+00   Median :0.000e+00  
##  Mean   :-1.844e-03   Mean   :2.052e-03   Mean   :2.578e-03  
##  3rd Qu.: 5.780e-06   3rd Qu.:9.100e-07   3rd Qu.:1.530e-06  
##  Max.   : 2.290e-01   Max.   :4.587e-01   Max.   :6.438e-01  
##      stderr                df          statistic           pvalue       
##  Min.   :1.300e-07   Min.   :2.000   Min.   :-3.6706   Min.   :0.03272  
##  1st Qu.:2.130e-06   1st Qu.:2.937   1st Qu.:-0.6905   1st Qu.:0.39479  
##  Median :4.067e-05   Median :3.376   Median :-0.5101   Median :0.53436  
##  Mean   :7.216e-03   Mean   :3.286   Mean   :-0.2439   Mean   :0.52969  
##  3rd Qu.:9.849e-04   3rd Qu.:3.853   3rd Qu.: 0.4739   3rd Qu.:0.65655  
##  Max.   :6.062e-01   Max.   :4.000   Max.   : 3.2876   Max.   :0.99797  
##     conf.low            conf.high            mean.null alternative       
##  Min.   :-2.220e+00   Min.   :-3.181e-04   Min.   :0   Length:360        
##  1st Qu.:-3.148e-03   1st Qu.: 6.620e-06   1st Qu.:0   Class :character  
##  Median :-1.173e-04   Median : 1.178e-04   Median :0   Mode  :character  
##  Mean   :-2.289e-02   Mean   : 1.920e-02   Mean   :0                     
##  3rd Qu.:-6.310e-06   3rd Qu.: 2.366e-03   3rd Qu.:0                     
##  Max.   : 4.220e-06   Max.   : 1.184e+00   Max.   :0                     
##    conf.level  
##  Min.   :0.95  
##  1st Qu.:0.95  
##  Median :0.95  
##  Mean   :0.95  
##  3rd Qu.:0.95  
##  Max.   :0.95
FcYPV <- promedio_genes %>% 
  select(1, 8, 9) %>% 
  mutate(p_value = tvalue_gen$pvalue,
         Fold_change = Mean_DCT_Tx / Mean_DCT_Cx) %>% 
  select(1, 4, 5)

FcYPV
## # A tibble: 360 × 3
##    Gen                p_value Fold_change
##    <chr>                <dbl>       <dbl>
##  1 ath-miR159a-000338   0.534       1.10 
##  2 hsa-let-7a-000377    0.221       1.61 
##  3 hsa-let-7b-002619    0.236       1.70 
##  4 hsa-let-7c-000379    0.716       1.23 
##  5 hsa-let-7d-002283    0.115       1.53 
##  6 hsa-let-7e-002406    0.323       1.39 
##  7 hsa-let-7f-000382    0.214       2.13 
##  8 hsa-let-7g-002282    0.918       1.02 
##  9 hsa-miR-1-002222     0.571       0.719
## 10 hsa-miR-100-000437   0.743       1.11 
## # ℹ 350 more rows
Logs <- FcYPV %>% 
  mutate(LPV = -log10(p_value),
         LFC = log2(Fold_change)) %>% 
  select(1, 4, 5)
# eje de las x es el valor de Fc y el valor de y es -logpvalue

Logs
## # A tibble: 360 × 3
##    Gen                   LPV     LFC
##    <chr>               <dbl>   <dbl>
##  1 ath-miR159a-000338 0.272   0.132 
##  2 hsa-let-7a-000377  0.655   0.688 
##  3 hsa-let-7b-002619  0.627   0.768 
##  4 hsa-let-7c-000379  0.145   0.303 
##  5 hsa-let-7d-002283  0.938   0.613 
##  6 hsa-let-7e-002406  0.491   0.476 
##  7 hsa-let-7f-000382  0.669   1.09  
##  8 hsa-let-7g-002282  0.0370  0.0320
##  9 hsa-miR-1-002222   0.244  -0.475 
## 10 hsa-miR-100-000437 0.129   0.150 
## # ℹ 350 more rows
vulcano <- ggplot(Logs, mapping = aes(x = LFC, 
                                       y = LPV)) +
  geom_point(size = 2, 
             color = "#546E7A")+
  theme_classic()+
  labs(title = "Análisis comparativo de miRNAs",
       caption = "Diseñado: Zyanya Garduño",
       x = "Log2 (2-DDCT)",
       y = "-Log10 (valor de p)")

 
vulcano

# Definir límites de significancia y fold change
limite_p <- 0.05
limite_FC <- 1.5

# Filtrar genes down-regulated
down_regulated <- Logs %>% 
  filter(LFC < -log2(limite_FC), 
         LPV > -log10(limite_p))

# Filtrar genes up-regulated
up_regulated <- Logs %>% 
  filter(LFC > log2(limite_FC), 
         LPV > -log10(limite_p))

# Top 5 más significativos
top_down_regulated <- down_regulated %>% 
  arrange(desc(LPV)) %>% 
  head(5)

top_up_regulated <- up_regulated %>% 
  arrange(desc(LPV)) %>% 
  head(5)
# Añadir líneas horizontales y verticales de referencia
vulcano2 <- vulcano +
  geom_hline(yintercept = -log10(limite_p), 
             linetype = "dashed", color = "black") +
  geom_vline(xintercept = log2(limite_FC), 
             linetype = "dashed", color = "black") +
  geom_vline(xintercept = -log2(limite_FC), 
             linetype = "dashed", color = "black")

vulcano2

#Añadir puntos de los genes up-regulated (rojo)
vulcano3 <- vulcano2 +
  geom_point(data = up_regulated, 
             aes(x = LFC, y = LPV), 
             alpha = 1, size = 3, color = "#E64B35") +
 
  #Añadir puntos de los genes down-regulated (azul) 
   geom_point(data = down_regulated, 
             aes(x = LFC, y = LPV), 
             alpha = 1, size = 3, color = "#8FBAFA")

vulcano3

library(ggrepel)
library(grid)  # necesario para usar unit()

vulcano4 <- vulcano3 +
  geom_label_repel(data = top_up_regulated,
                   aes(x = LFC, y = LPV, label = Gen),
                   box.padding = unit(0.35, "lines"),
                   point.padding = unit(0.5, "lines"),
                   segment.color = "grey50",
                   segment.size = 0.2,
                   max.overlaps = 50,
                   nudge_x = 0.5, 
                   nudge_y = 0.5,
                   show.legend = FALSE) +
  geom_label_repel(data = top_down_regulated,
                   aes(x = LFC, y = LPV, label = Gen),
                   box.padding = unit(0.35, "lines"),
                   point.padding = unit(0.5, "lines"),
                   segment.color = "grey50",
                   segment.size = 0.2,
                   max.overlaps = 50,
                   nudge_x = -0.5, 
                   nudge_y = -0.5,
                   show.legend = FALSE)
vulcano4                   

Guardar la gráfica

ggsave("vulcano4.jpg",
       plot = vulcano4,
       height = 5, width = 6,
       dpi = 300)