library("pacman") #esta función llama al paquete instalado
## Warning: package 'pacman' was built under R version 4.5.3
p_load("ggplot2", #para graficar
       "dplyr", #para facilitar el manejo de datos
       "vroom",
       "readr",
       "ggrepel",
       "matrixTexts") #llamar repositorios
## Installing package into 'C:/Users/baryo/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## Warning: package 'matrixTexts' is not available for this version of R
## 
## A version of this package for your version of R might be available elsewhere,
## see the ideas at
## https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
## Warning: unable to access index for repository http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.5:
##   no fue posible abrir la URL 'http://www.stats.ox.ac.uk/pub/RWin/bin/windows/contrib/4.5/PACKAGES'
## Warning: 'BiocManager' not available.  Could not check Bioconductor.
## 
## Please use `install.packages('BiocManager')` and then retry.
## Warning in p_install(package, character.only = TRUE, ...):
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : no hay paquete llamado 'matrixTexts'
## Warning in p_load("ggplot2", "dplyr", "vroom", "readr", "ggrepel", "matrixTexts"): Failed to install/load:
## matrixTexts
Datos_ <- vroom("https://raw.githubusercontent.com/ManuelLaraMVZ/Transcript-mica/refs/heads/main/datos_miRNAs.csv")
## Rows: 363 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gen, Condicion
## dbl (6): Cx1, Cx2, Cx3, T1, T2, T3
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Datos_
## # A tibble: 363 × 8
##    Gen                Condicion   Cx1   Cx2   Cx3    T1    T2    T3
##    <chr>              <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 U6 snRNA-001973    Control    13.8  11.7  11.9  13.2  13.0  12.6
##  2 ath-miR159a-000338 Target     35    35    35    35    35    35  
##  3 hsa-let-7a-000377  Target     20.5  21.0  21.0  20.4  19.6  20.9
##  4 hsa-let-7b-002619  Target     18.4  19.0  19.1  18.3  17.4  19.0
##  5 hsa-let-7c-000379  Target     22.2  23.7  23.8  22.9  22.0  24.0
##  6 hsa-let-7d-002283  Target     22.2  22.7  21.9  22.0  21.2  21.9
##  7 hsa-let-7e-002406  Target     18.0  18.4  18.5  18.0  17.3  18.6
##  8 hsa-let-7f-000382  Target     24.4  25.2  24.7  23.8  23.0  24.9
##  9 hsa-let-7g-002282  Target     22.1  22.1  22.5  22.3  22.0  22.8
## 10 hsa-miR-1-002222   Target     31.4  32.9  32.5  33.0  31.8  33.6
## # ℹ 353 more rows
Controles <- Datos_ %>%
  filter(Condicion == "Control")
promedio_controles <- Controles %>%
  summarise(
    Mean_C1 = mean(Cx1),
    Mean_C2 = mean(Cx2),
    Mean_C3 = mean(Cx3),
    Mean_T1 = mean(T1),
    Mean_T2 = mean(T2),
    Mean_T3 = mean(T3)
  ) %>%
  mutate(Gen = "Promedio_controles") %>%
  select(7, 1, 2, 3, 4, 5, 6)

promedio_controles
## # A tibble: 1 × 7
##   Gen                Mean_C1 Mean_C2 Mean_C3 Mean_T1 Mean_T2 Mean_T3
##   <chr>                <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1 Promedio_controles    15.4    15.0    15.3    15.6    15.1    15.3
# extraer los genes de la tabla "datos"

genes <- Datos_ %>%
  filter(Condicion == "Target") %>%
  select(-2)

head(genes)
## # A tibble: 6 × 7
##   Gen                  Cx1   Cx2   Cx3    T1    T2    T3
##   <chr>              <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ath-miR159a-000338  35    35    35    35    35    35  
## 2 hsa-let-7a-000377   20.5  21.0  21.0  20.4  19.6  20.9
## 3 hsa-let-7b-002619   18.4  19.0  19.1  18.3  17.4  19.0
## 4 hsa-let-7c-000379   22.2  23.7  23.8  22.9  22.0  24.0
## 5 hsa-let-7d-002283   22.2  22.7  21.9  22.0  21.2  21.9
## 6 hsa-let-7e-002406   18.0  18.4  18.5  18.0  17.3  18.6
#######

# Sacar el 2^-DCT

DCT <- genes %>%
  mutate(
    DCT_C1 = 2^-(Cx1 - promedio_controles$Mean_C1),
    DCT_C2 = 2^-(Cx2 - promedio_controles$Mean_C2),
    DCT_C3 = 2^-(Cx3 - promedio_controles$Mean_C3),
    DCT_T1 = 2^-(T1 - promedio_controles$Mean_T1),
    DCT_T2 = 2^-(T2 - promedio_controles$Mean_T2),
    DCT_T3 = 2^-(T3 - promedio_controles$Mean_T3)
  ) %>%
  select(-2, -3, -4, -5, -6, -7)

DCT
## # A tibble: 360 × 7
##    Gen                    DCT_C1      DCT_C2     DCT_C3   DCT_T1  DCT_T2  DCT_T3
##    <chr>                   <dbl>       <dbl>      <dbl>    <dbl>   <dbl>   <dbl>
##  1 ath-miR159a-000338 0.00000124 0.000000960 0.00000114  1.47e-6 1.03e-6 1.16e-6
##  2 hsa-let-7a-000377  0.0289     0.0157      0.0185      3.70e-2 4.49e-2 1.98e-2
##  3 hsa-let-7b-002619  0.127      0.0615      0.0677      1.62e-1 1.98e-1 7.75e-2
##  4 hsa-let-7c-000379  0.00900    0.00243     0.00272     6.40e-3 8.67e-3 2.39e-3
##  5 hsa-let-7d-002283  0.00893    0.00492     0.0102      1.23e-2 1.46e-2 9.87e-3
##  6 hsa-let-7e-002406  0.167      0.0960      0.108       1.91e-1 2.21e-1 1.03e-1
##  7 hsa-let-7f-000382  0.00190    0.000882    0.00146     3.58e-3 4.25e-3 1.24e-3
##  8 hsa-let-7g-002282  0.00963    0.00720     0.00659     9.86e-3 8.59e-3 5.50e-3
##  9 hsa-miR-1-002222   0.0000150  0.00000404  0.00000661  5.99e-6 9.45e-6 3.04e-6
## 10 hsa-miR-100-000437 0.287      0.390       0.498       4.10e-1 2.69e-1 6.25e-1
## # ℹ 350 more rows
promedio_genes <- DCT %>%
  mutate(
    Mean_DCT_Cx = (DCT_C1 + DCT_C2 + DCT_C3) / 3,
    Mean_DCT_Tx = (DCT_T1 + DCT_T2 + DCT_T3) / 3
  )

promedio_genes
## # A tibble: 360 × 9
##    Gen    DCT_C1  DCT_C2  DCT_C3  DCT_T1  DCT_T2  DCT_T3 Mean_DCT_Cx Mean_DCT_Tx
##    <chr>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>       <dbl>       <dbl>
##  1 ath-… 1.24e-6 9.60e-7 1.14e-6 1.47e-6 1.03e-6 1.16e-6  0.00000111  0.00000122
##  2 hsa-… 2.89e-2 1.57e-2 1.85e-2 3.70e-2 4.49e-2 1.98e-2  0.0210      0.0339    
##  3 hsa-… 1.27e-1 6.15e-2 6.77e-2 1.62e-1 1.98e-1 7.75e-2  0.0856      0.146     
##  4 hsa-… 9.00e-3 2.43e-3 2.72e-3 6.40e-3 8.67e-3 2.39e-3  0.00472     0.00582   
##  5 hsa-… 8.93e-3 4.92e-3 1.02e-2 1.23e-2 1.46e-2 9.87e-3  0.00802     0.0123    
##  6 hsa-… 1.67e-1 9.60e-2 1.08e-1 1.91e-1 2.21e-1 1.03e-1  0.123       0.172     
##  7 hsa-… 1.90e-3 8.82e-4 1.46e-3 3.58e-3 4.25e-3 1.24e-3  0.00142     0.00302   
##  8 hsa-… 9.63e-3 7.20e-3 6.59e-3 9.86e-3 8.59e-3 5.50e-3  0.00781     0.00798   
##  9 hsa-… 1.50e-5 4.04e-6 6.61e-6 5.99e-6 9.45e-6 3.04e-6  0.00000856  0.00000616
## 10 hsa-… 2.87e-1 3.90e-1 4.98e-1 4.10e-1 2.69e-1 6.25e-1  0.392       0.434     
## # ℹ 350 more rows
#La función t_welch rompía el kint to HTML entonces le pedí a Copilot un workaround. Al parecer no se reconocía la función, entonces la definió internamente

########################################################
# Definir una función para aplicar t.test fila por fila
row_t_welch <- function(mat_control, mat_target) {
  # mat_control y mat_target son data.frames o matrices con las réplicas
  apply(
    cbind(mat_control, mat_target), 
    1, 
    function(row) {
      # separar controles y targets
      controles <- row[1:ncol(mat_control)]
      targets   <- row[(ncol(mat_control)+1):(ncol(mat_control)+ncol(mat_target))]
      
      # prueba t de Welch
      test <- t.test(controles, targets, var.equal = FALSE)
      c(statistic = test$statistic, pvalue = test$p.value)
    }
  ) |> t() |> as.data.frame()
}
tvalue_gen <- row_t_welch(
  promedio_genes[, c("DCT_C1", "DCT_C2", "DCT_C3")],
  promedio_genes[, c("DCT_T1", "DCT_T2", "DCT_T3")]
)

View(tvalue_gen)

FCyPV <- promedio_genes %>%
  select(1, 8, 9) %>%
  mutate(
    p_value = tvalue_gen$pvalue,
    Fold_change = Mean_DCT_Tx / Mean_DCT_Cx
  ) %>%
  select(1, 4, 5)

FCyPV
## # A tibble: 360 × 3
##    Gen                p_value Fold_change
##    <chr>                <dbl>       <dbl>
##  1 ath-miR159a-000338   0.534       1.10 
##  2 hsa-let-7a-000377    0.221       1.61 
##  3 hsa-let-7b-002619    0.236       1.70 
##  4 hsa-let-7c-000379    0.716       1.23 
##  5 hsa-let-7d-002283    0.115       1.53 
##  6 hsa-let-7e-002406    0.323       1.39 
##  7 hsa-let-7f-000382    0.214       2.13 
##  8 hsa-let-7g-002282    0.918       1.02 
##  9 hsa-miR-1-002222     0.571       0.719
## 10 hsa-miR-100-000437   0.743       1.11 
## # ℹ 350 more rows
Logs <- FCyPV %>%
  mutate(
    LPV = -log10(p_value),
    LFC = log2(Fold_change)
  ) %>%
  select(1, 4, 5)

Logs
## # A tibble: 360 × 3
##    Gen                   LPV     LFC
##    <chr>               <dbl>   <dbl>
##  1 ath-miR159a-000338 0.272   0.132 
##  2 hsa-let-7a-000377  0.655   0.688 
##  3 hsa-let-7b-002619  0.627   0.768 
##  4 hsa-let-7c-000379  0.145   0.303 
##  5 hsa-let-7d-002283  0.938   0.613 
##  6 hsa-let-7e-002406  0.491   0.476 
##  7 hsa-let-7f-000382  0.669   1.09  
##  8 hsa-let-7g-002282  0.0370  0.0320
##  9 hsa-miR-1-002222   0.244  -0.475 
## 10 hsa-miR-100-000437 0.129   0.150 
## # ℹ 350 more rows
##############################

vulcano <- ggplot(Logs, mapping = aes(x = LFC, y = LPV)) +
  geom_point(size = 2, color = "gray") +
  theme_classic() +
  labs(
    title = "Portafolio: V.Plot",
    caption = "Creador: Yaniv Bar Yosef",
    x = "Log2 (2-DDCT)",
    y = "-Log10 (valor de p)"
  )

vulcano

##############################

# límites

limite_p <- 0.05

limite_FC <- 1.5

down_regulated <- Logs %>%
  filter(
    LFC < -log2(limite_FC),
    LPV > -log10(limite_p)
  )

down_regulated
## # A tibble: 1 × 3
##   Gen                     LPV    LFC
##   <chr>                 <dbl>  <dbl>
## 1 hsa-miR-502-3p-002083  1.49 -0.658
up_regulated <- Logs %>%
  filter(
    LFC > log2(limite_FC),
    LPV > -log10(limite_p)
  )

up_regulated
## # A tibble: 2 × 3
##   Gen                   LPV   LFC
##   <chr>               <dbl> <dbl>
## 1 hsa-miR-148a-000470  1.45  2.46
## 2 hsa-miR-429-001024   1.34  1.63
top_down_regulated <- down_regulated %>%
  arrange(desc(LPV)) %>%
  head(5)

top_down_regulated
## # A tibble: 1 × 3
##   Gen                     LPV    LFC
##   <chr>                 <dbl>  <dbl>
## 1 hsa-miR-502-3p-002083  1.49 -0.658
top_up_regulated <- up_regulated %>%
  arrange(desc(LPV)) %>%
  head(5)

top_up_regulated
## # A tibble: 2 × 3
##   Gen                   LPV   LFC
##   <chr>               <dbl> <dbl>
## 1 hsa-miR-148a-000470  1.45  2.46
## 2 hsa-miR-429-001024   1.34  1.63
########################################
# Mejorar la gráfica

vulcano2 <- vulcano +
  geom_hline(
    yintercept = -log10(limite_p),
    linetype = "dashed"
  ) +
  geom_vline(
    xintercept = c(-log2(limite_FC), log2(limite_FC)),
    linetype = "dashed"
  )

vulcano2

vulcano3 <- vulcano2 +
  geom_point(
    data = up_regulated,
    aes(x = LFC, y = LPV),
    alpha = 1,
    size = 3,
    color = "#E64B35B2"
  ) +
  geom_point(
    data = down_regulated,
    aes(x = LFC, y = LPV),
    alpha = 1,
    size = 3,
    color = "#3C5488B2"
  )

vulcano3

vulcano4 <- vulcano3 +
  geom_label_repel(
    data = top_up_regulated,
    aes(x = LFC, y = LPV, label = Gen),
    max.overlaps = 100
  ) +
  geom_label_repel(
    data = top_down_regulated,
    aes(x = LFC, y = LPV, label = Gen),
    max.overlaps = 100
  )

vulcano4

ggsave(
  "Vulcano4.jpeg",
  plot = vulcano4,
  height = 5,
  width = 6,
  dpi = 300
)