if(!require(pacman))
  install.packages("pacman")
## Loading required package: pacman
library("pacman")
p_load("vroom",
       "dplyr",
       "ggplot2",
       "matrixTests",
       "readr",
       "ggrepel")
Datos <- vroom (file="https://raw.githubusercontent.com/ManuelLaraMVZ/Transcript-mica/refs/heads/main/datos_miRNAs.csv")
## Rows: 363 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gen, Condicion
## dbl (6): Cx1, Cx2, Cx3, T1, T2, T3
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(Datos)
## # A tibble: 6 × 8
##   Gen                Condicion   Cx1   Cx2   Cx3    T1    T2    T3
##   <chr>              <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 U6 snRNA-001973    Control    13.8  11.7  11.9  13.2  13.0  12.6
## 2 ath-miR159a-000338 Target     35    35    35    35    35    35  
## 3 hsa-let-7a-000377  Target     20.5  21.0  21.0  20.4  19.6  20.9
## 4 hsa-let-7b-002619  Target     18.4  19.0  19.1  18.3  17.4  19.0
## 5 hsa-let-7c-000379  Target     22.2  23.7  23.8  22.9  22.0  24.0
## 6 hsa-let-7d-002283  Target     22.2  22.7  21.9  22.0  21.2  21.9
Controles <- Datos %>% 
  filter(Condicion=="Control")

head(Controles)
## # A tibble: 3 × 8
##   Gen             Condicion   Cx1   Cx2   Cx3    T1    T2    T3
##   <chr>           <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 U6 snRNA-001973 Control    13.8  11.7  11.9  13.2  13.0  12.6
## 2 RNU44-001094    Control    17.3  16.9  16.8  17.7  17.3  16.8
## 3 RNU48-001006    Control    15.1  16.4  17.0  16.0  15.1  16.4
promedio_controles <- Controles %>% 
  summarise(Mean_C1=mean(Cx1),
            Mean_C2=mean(Cx2),
            Mean_C3=mean(Cx3),
            Mean_T1=mean(T1),
            Mean_T2=mean(T2),
            Mean_T3=mean(T3)) %>% 
  mutate(Gen="Promedio_controles") %>% 
  select(7,1,2,3,4,5,6)

promedio_controles
## # A tibble: 1 × 7
##   Gen                Mean_C1 Mean_C2 Mean_C3 Mean_T1 Mean_T2 Mean_T3
##   <chr>                <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1 Promedio_controles    15.4    15.0    15.3    15.6    15.1    15.3
genes <- Datos %>% 
  filter(Condicion=="Target") %>% 
  select(-2)
head(genes)
## # A tibble: 6 × 7
##   Gen                  Cx1   Cx2   Cx3    T1    T2    T3
##   <chr>              <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ath-miR159a-000338  35    35    35    35    35    35  
## 2 hsa-let-7a-000377   20.5  21.0  21.0  20.4  19.6  20.9
## 3 hsa-let-7b-002619   18.4  19.0  19.1  18.3  17.4  19.0
## 4 hsa-let-7c-000379   22.2  23.7  23.8  22.9  22.0  24.0
## 5 hsa-let-7d-002283   22.2  22.7  21.9  22.0  21.2  21.9
## 6 hsa-let-7e-002406   18.0  18.4  18.5  18.0  17.3  18.6
DCT <- genes %>%
  mutate(DCT_C1=2^(Cx1- promedio_controles$Mean_C1),
         DCT_C2=2^(Cx2- promedio_controles$Mean_C2),
         DCT_C3=2^(Cx3- promedio_controles$Mean_C3),
         DCT_T1=2^(T1- promedio_controles$Mean_T1),
         DCT_T2=2^(T2- promedio_controles$Mean_T2),
         DCT_T3=2^(T3- promedio_controles$Mean_T3)) %>%
  select(-2,-3,-4,-5,-6,-7)

DCT
## # A tibble: 360 × 7
##    Gen                   DCT_C1     DCT_C2    DCT_C3    DCT_T1    DCT_T2  DCT_T3
##    <chr>                  <dbl>      <dbl>     <dbl>     <dbl>     <dbl>   <dbl>
##  1 ath-miR159a-000338 804073.   1041189.   880400.   679572.   968818.    8.65e5
##  2 hsa-let-7a-000377      34.6       63.7      54.0      27.0      22.3   5.04e1
##  3 hsa-let-7b-002619       7.84      16.2      14.8       6.19      5.05  1.29e1
##  4 hsa-let-7c-000379     111.       411.      368.      156.      115.    4.19e2
##  5 hsa-let-7d-002283     112.       203.       97.9      81.3      68.3   1.01e2
##  6 hsa-let-7e-002406       6.00      10.4       9.29      5.23      4.53  9.72e0
##  7 hsa-let-7f-000382     525.      1134.      684.      280.      235.    8.07e2
##  8 hsa-let-7g-002282     104.       139.      152.      101.      116.    1.82e2
##  9 hsa-miR-1-002222    66495.    247300.   151274.   166835.   105792.    3.29e5
## 10 hsa-miR-100-000437      3.49       2.57      2.01      2.44      3.72  1.60e0
## # ℹ 350 more rows
promedio_genes <- DCT %>% 
  mutate(Mean_DCT_Cx=(DCT_C1+DCT_C2+DCT_C3)/3,
         Mean_DCT_Tx=(DCT_T1+DCT_T2+DCT_T3)/3)

promedio_genes
## # A tibble: 360 × 9
##    Gen         DCT_C1 DCT_C2 DCT_C3 DCT_T1 DCT_T2 DCT_T3 Mean_DCT_Cx Mean_DCT_Tx
##    <chr>        <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>       <dbl>       <dbl>
##  1 ath-miR159… 8.04e5 1.04e6 8.80e5 6.80e5 9.69e5 8.65e5   908554.     837675.  
##  2 hsa-let-7a… 3.46e1 6.37e1 5.40e1 2.70e1 2.23e1 5.04e1       50.8        33.2 
##  3 hsa-let-7b… 7.84e0 1.62e1 1.48e1 6.19e0 5.05e0 1.29e1       13.0         8.05
##  4 hsa-let-7c… 1.11e2 4.11e2 3.68e2 1.56e2 1.15e2 4.19e2      297.        230.  
##  5 hsa-let-7d… 1.12e2 2.03e2 9.79e1 8.13e1 6.83e1 1.01e2      138.         83.6 
##  6 hsa-let-7e… 6.00e0 1.04e1 9.29e0 5.23e0 4.53e0 9.72e0        8.57        6.49
##  7 hsa-let-7f… 5.25e2 1.13e3 6.84e2 2.80e2 2.35e2 8.07e2      781.        441.  
##  8 hsa-let-7g… 1.04e2 1.39e2 1.52e2 1.01e2 1.16e2 1.82e2      131.        133.  
##  9 hsa-miR-1-… 6.65e4 2.47e5 1.51e5 1.67e5 1.06e5 3.29e5   155023.     200588.  
## 10 hsa-miR-10… 3.49e0 2.57e0 2.01e0 2.44e0 3.72e0 1.60e0        2.69        2.59
## # ℹ 350 more rows
tvalue_gen <- row_t_welch(promedio_genes[,c("DCT_C1",
                                            "DCT_C2",
                                            "DCT_C3")],
                          promedio_genes[,c("DCT_T1",
                                            "DCT_T2",
                                            "DCT_T3")])

head(tvalue_gen)
##   obs.x obs.y obs.tot       mean.x       mean.y    mean.diff        var.x
## 1     3     3       6 9.085539e+05 8.376754e+05 70878.485134 1.465044e+10
## 2     3     3       6 5.075313e+01 3.323744e+01    17.515689 2.199415e+02
## 3     3     3       6 1.295420e+01 8.045165e+00     4.909031 2.013772e+01
## 4     3     3       6 2.966509e+02 2.300374e+02    66.613527 2.626696e+04
## 5     3     3       6 1.377747e+02 8.364488e+01    54.129843 3.282233e+03
## 6     3     3       6 8.568666e+00 6.490691e+00     2.077974 5.271469e+00
##          var.y       stderr       df statistic    pvalue      conf.low
## 1 2.146103e+10 1.097140e+05 3.862609 0.6460296 0.5546323 -2.380574e+05
## 2 2.264139e+02 1.219775e+01 3.999159 1.4359770 0.2243662 -1.635350e+01
## 3 1.799711e+01 3.565335e+00 3.987436 1.3768781 0.2407979 -5.002237e+00
## 4 2.708403e+04 1.333554e+02 3.999062 0.4995188 0.6436460 -3.036747e+02
## 5 2.764996e+02 3.444189e+01 2.334591 1.5716279 0.2388783 -7.545904e+01
## 6 7.923184e+00 2.097193e+00 3.844718 0.9908361 0.3799539 -3.838696e+00
##      conf.high mean.null alternative conf.level
## 1 3.798144e+05         0   two.sided       0.95
## 2 5.138488e+01         0   two.sided       0.95
## 3 1.482030e+01         0   two.sided       0.95
## 4 4.369017e+02         0   two.sided       0.95
## 5 1.837187e+02         0   two.sided       0.95
## 6 7.994645e+00         0   two.sided       0.95
FCyPV <- promedio_genes %>% 
  select(1,8,9) %>% 
  mutate(p_value=tvalue_gen$pvalue,
         fold_change=Mean_DCT_Tx/Mean_DCT_Cx) %>% 
  select(1,4,5)

FCyPV
## # A tibble: 360 × 3
##    Gen                p_value fold_change
##    <chr>                <dbl>       <dbl>
##  1 ath-miR159a-000338   0.555       0.922
##  2 hsa-let-7a-000377    0.224       0.655
##  3 hsa-let-7b-002619    0.241       0.621
##  4 hsa-let-7c-000379    0.644       0.775
##  5 hsa-let-7d-002283    0.239       0.607
##  6 hsa-let-7e-002406    0.380       0.757
##  7 hsa-let-7f-000382    0.259       0.564
##  8 hsa-let-7g-002282    0.955       1.01 
##  9 hsa-miR-1-002222     0.621       1.29 
## 10 hsa-miR-100-000437   0.902       0.963
## # ℹ 350 more rows
Logs <- FCyPV %>% 
  mutate(LPV=-log10(p_value),
         LFC=log2(fold_change)) %>% 
  select(1,4,5)
Logs
## # A tibble: 360 × 3
##    Gen                   LPV     LFC
##    <chr>               <dbl>   <dbl>
##  1 ath-miR159a-000338 0.256  -0.117 
##  2 hsa-let-7a-000377  0.649  -0.611 
##  3 hsa-let-7b-002619  0.618  -0.687 
##  4 hsa-let-7c-000379  0.191  -0.367 
##  5 hsa-let-7d-002283  0.622  -0.720 
##  6 hsa-let-7e-002406  0.420  -0.401 
##  7 hsa-let-7f-000382  0.587  -0.825 
##  8 hsa-let-7g-002282  0.0201  0.0191
##  9 hsa-miR-1-002222   0.207   0.372 
## 10 hsa-miR-100-000437 0.0449 -0.0545
## # ℹ 350 more rows
vulcano <- ggplot(Logs, aes(x = LFC, y = LPV)) +
  geom_point(size = 2, color = "gray") +
  theme_classic() +
  labs(title = "Análisis comparativo de miRNAs",
       caption = "Creador: Paola López",
       x = "log2(2-DDCT)",
       y = "-Log10(valor de p)")

vulcano

limite_p <- 0.05
limite_FC <- 1.5
down_regulated <- Logs %>% 
  filter(LFC < -log2(limite_FC),
         LPV> -log10(limite_p))
down_regulated
## # A tibble: 1 × 3
##   Gen                  LPV    LFC
##   <chr>              <dbl>  <dbl>
## 1 hsa-miR-361-000554  1.32 -0.650
up_regulated <- Logs %>% 
  filter(LFC>log2(limite_FC),
         LPV>-log10(limite_p))
up_regulated
## # A tibble: 0 × 3
## # ℹ 3 variables: Gen <chr>, LPV <dbl>, LFC <dbl>
top_down_regulated <- down_regulated %>% 
  arrange(desc(LPV)) %>% 
  head(5)
top_down_regulated
## # A tibble: 1 × 3
##   Gen                  LPV    LFC
##   <chr>              <dbl>  <dbl>
## 1 hsa-miR-361-000554  1.32 -0.650
top_up_regulated <- up_regulated %>% 
  arrange(desc(LPV)) %>% 
  head(5)
top_up_regulated
## # A tibble: 0 × 3
## # ℹ 3 variables: Gen <chr>, LPV <dbl>, LFC <dbl>
vulcano2 <- vulcano+
  geom_hline(yintercept=-log10(limite_p),
             linetype="dashed",
             xintercept=c(-log2(limite_FC),log2(limite_FC)),
             linetype="dashed")
## Warning: Duplicated aesthetics after name standardisation: linetype
## Warning in geom_hline(yintercept = -log10(limite_p), linetype = "dashed", :
## Ignoring unknown parameters: `xintercept`
vulcano2

vulcano3 <- vulcano2+
  geom_point(data=up_regulated,
             x=up_regulated$LFC,
             y=up_regulated$LPV,
             alpha=1,
             size=3,
             color="pink")+
 geom_point(data=down_regulated,
             x=down_regulated$LFC,
             y=down_regulated$LPV,
             alpha=1,
             size=3,
             color="purple")
vulcano3

vulcano4 <- vulcano3+
  geom_label_repel(data=top_up_regulated,
                   mapping=aes(x=top_up_regulated$LFC,
                               y=top_up_regulated$LPV),
                   label=top_up_regulated$Gen,
                   max.overlaps = 10)+
  geom_label_repel(data=top_down_regulated,
                   mapping=aes(x=top_down_regulated$LFC,
                               y=top_down_regulated$LPV),
                   label=top_down_regulated$Gen,
                   max.overlaps = 10)+
  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black") +
  geom_vline(xintercept = 1.5, linetype = "dashed", color = "black") +
  geom_vline(xintercept = -1.5, linetype = "dashed", color = "black")

vulcano4
## Warning: Use of `top_down_regulated$LFC` is discouraged.
## ℹ Use `LFC` instead.
## Warning: Use of `top_down_regulated$LPV` is discouraged.
## ℹ Use `LPV` instead.

ggsave(filename = "Gráfica V plot.png",
       plot = vulcano4,
       dpi = 300)
## Saving 7 x 5 in image
## Warning: Use of `top_down_regulated$LFC` is discouraged.
## ℹ Use `LFC` instead.
## Warning: Use of `top_down_regulated$LPV` is discouraged.
## ℹ Use `LPV` instead.