library(pacman)
p_load("readr", 
       "ggplot2", 
       "ggrepel", 
       "dplyr", 
       "matrixTests") 

Cargar datos

datos <- read_csv(file = "https://raw.githubusercontent.com/ManuelLaraMVZ/Transcript-mica/refs/heads/main/datos_miRNAs.csv") 
## Rows: 363 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gen, Condicion
## dbl (6): Cx1, Cx2, Cx3, T1, T2, T3
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(datos)
## # A tibble: 6 × 8
##   Gen                Condicion   Cx1   Cx2   Cx3    T1    T2    T3
##   <chr>              <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 U6 snRNA-001973    Control    13.8  11.7  11.9  13.2  13.0  12.6
## 2 ath-miR159a-000338 Target     35    35    35    35    35    35  
## 3 hsa-let-7a-000377  Target     20.5  21.0  21.0  20.4  19.6  20.9
## 4 hsa-let-7b-002619  Target     18.4  19.0  19.1  18.3  17.4  19.0
## 5 hsa-let-7c-000379  Target     22.2  23.7  23.8  22.9  22.0  24.0
## 6 hsa-let-7d-002283  Target     22.2  22.7  21.9  22.0  21.2  21.9

CONTROLES

Controles <- datos %>% 
  filter(Condicion == "Control")

Promedio de controles

promedio_controles <- Controles %>% 
  summarise(Mean_C1 = mean(Cx1), 
            Mean_C2 = mean(Cx2), 
            Mean_C3 = mean(Cx3),
            Mean_T1 = mean(T1), 
            Mean_T2 = mean(T2), 
            Mean_T3 = mean(T3)) %>% 
  mutate(Gen = "Promedio_controles") %>% 
  select(Gen, everything())

==============================

GENES TARGET

==============================

genes <- datos %>% 
  filter(Condicion == "Target") %>% 
  select(-Condicion)

==============================

2^-ΔCT

==============================

DCT <- genes %>% 
  mutate(
    DCT_C1 = 2^-(Cx1 - promedio_controles$Mean_C1),
    DCT_C2 = 2^-(Cx2 - promedio_controles$Mean_C2),
    DCT_C3 = 2^-(Cx3 - promedio_controles$Mean_C3),
    DCT_T1 = 2^-(T1 - promedio_controles$Mean_T1),
    DCT_T2 = 2^-(T2 - promedio_controles$Mean_T2),
    DCT_T3 = 2^-(T3 - promedio_controles$Mean_T3)
  ) %>% 
  select(Gen, starts_with("DCT"))

==============================

PROMEDIOS + T TEST

==============================

promedio_genes <- DCT %>% 
  mutate(
    Mean_DCT_Cx = (DCT_C1 + DCT_C2 + DCT_C3) / 3,
    Mean_DCT_Tx = (DCT_T1 + DCT_T2 + DCT_T3) / 3
  )

tvalue_gen <- row_t_welch(
  promedio_genes[, c("DCT_C1", "DCT_C2", "DCT_C3")],
  promedio_genes[, c("DCT_T1", "DCT_T2", "DCT_T3")]
)

==============================

FOLD CHANGE Y P-VALUE

==============================

FCyPV <- promedio_genes %>%
  mutate(
    p_value = tvalue_gen$pvalue,
    Fold_change = Mean_DCT_Tx / Mean_DCT_Cx
  ) %>%
  select(Gen, p_value, Fold_change)

==============================

LOGS

==============================

Logs <- FCyPV %>%
  mutate(
    LPV = -log10(p_value),
    LFC = log2(Fold_change)
  ) %>%
  select(Gen, LPV, LFC)

==============================

VOLCANO BASE

==============================

vulcano <- ggplot(Logs, aes(x = LFC, y = LPV)) +
  geom_point(size = 2, color = "gray") +
  theme_classic() +
  labs(
    title = "Analisis comparativo de miRNAs",
    caption = "Creador: ANA SOFIA RODRIGUEZ",
    x = "Log2 (Fold Change)",
    y = "-Log10(p-value)"
  )

==============================

UMBRALES

==============================

limite_p <- 0.05
limite_FC <- 1.5

==============================

FILTROS CORREGIDOS

==============================

down_regulated <- Logs %>%
  filter(
    LFC < -log2(limite_FC),
    LPV > -log10(limite_p)
  )

up_regulated <- Logs %>%
  filter(
    LFC > log2(limite_FC),
    LPV > -log10(limite_p)
  )

==============================

TOP GENES

==============================

top_down_regulated <- down_regulated %>%
  arrange(desc(LPV)) %>%
  head(5)

top_up_regulated <- up_regulated %>%
  arrange(desc(LPV)) %>%
  head(5)

==============================

VOLCANO MEJORADO

==============================

vulcano2 <- vulcano +
  geom_hline(
    yintercept = -log10(limite_p),
    linetype = "dashed"
  ) +
  geom_vline(
    xintercept = c(-log2(limite_FC), log2(limite_FC)),
    linetype = "dashed"
  )

==============================

PUNTOS COLOREADOS (CORREGIDO)

==============================

vulcano3 <- vulcano2 +
  geom_point(
    data = up_regulated,
    aes(x = LFC, y = LPV),
    color = "blue",
    size = 3
  ) +
  geom_point(
    data = down_regulated,
    aes(x = LFC, y = LPV),
    color = "red",
    size = 3
  )


vulcano3

vulcano4 <- vulcano3 +
  geom_label_repel(
    data = Logs %>% 
      filter(LPV > 1.3 & abs(LFC) > 0.5),
    aes(x = LFC, y = LPV, label = Gen),
    size = 3,
    max.overlaps = 50
  )

vulcano4

vulcano4 <- vulcano3 +
  geom_label_repel(
    data = top_down_regulated,
    aes(x = LFC, y = LPV, label = Gen),
    color = "red",
    size = 3,
    max.overlaps = 50
  )

vulcano4

ggsave("Vulcano4.jpeg",
       plot = vulcano4,
       height = 5,
       width = 6,
       dpi = 300)