#Evaluacion medio termino #Creado por Zulia Soto

if(!require(pacman)) install.packages("pacman", dependencies=TRUE)
## Loading required package: pacman
library("pacman")
p_load("vroom",
       "ggplot2",
       "ggrepel",
       "dplyr",
       "matrixTests")
Volcano_data <- vroom(file="https://raw.githubusercontent.com/ManuelLaraMVZ/Examen_R_transcriptomica/refs/heads/main/Datos_Genes_azar1.csv")
## Rows: 703 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gen, Type
## dbl (6): C1, C2, C3, T1, T2, T3
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(Volcano_data)
## # A tibble: 6 × 8
##   Gen   Type      C1    C2    C3    T1    T2    T3
##   <chr> <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 Target  19.5  23.8  27.4  26.3  27.9  23.0
## 2 Gen-2 Target  20.9  27.4  28.8  33.9  20.0  28.2
## 3 Gen-3 Target  31.6  22.6  24.5  19.7  33.7  34.0
## 4 Gen-4 Target  19.1  23.7  23.9  24.6  33.8  22.6
## 5 Gen-5 Target  33.8  23.6  34.3  31.1  34.0  23.5
## 6 Gen-6 Target  30.8  25.2  20.3  32.8  31.8  20.4
controles <- Volcano_data %>% 
  filter(Type=="Selected Control") %>% 
  select(-2)
head(controles)
## # A tibble: 3 × 7
##   Gen          C1    C2    C3    T1    T2    T3
##   <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Control-1  18.3  18.1  18.0  18.2  19.8  21.6
## 2 Control-2  20.1  18.2  18.3  19.5  19.6  21.8
## 3 Control-3  22.0  20.1  20.9  18.9  21.7  21.7
Volcano_data2 <- Volcano_data %>% 
  filter(Type=="Target") %>% 
  select(-2)
head(Volcano_data2)
## # A tibble: 6 × 7
##   Gen      C1    C2    C3    T1    T2    T3
##   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1  19.5  23.8  27.4  26.3  27.9  23.0
## 2 Gen-2  20.9  27.4  28.8  33.9  20.0  28.2
## 3 Gen-3  31.6  22.6  24.5  19.7  33.7  34.0
## 4 Gen-4  19.1  23.7  23.9  24.6  33.8  22.6
## 5 Gen-5  33.8  23.6  34.3  31.1  34.0  23.5
## 6 Gen-6  30.8  25.2  20.3  32.8  31.8  20.4

#obtención de promedios

promedioT1 <- mean(controles$T1)
promedioT2 <- mean(controles$T2)
promedioT3 <- mean(controles$T3)
promedioC1 <- mean(controles$C1)
promedioC2 <- mean(controles$C2)
promedioC3 <- mean(controles$C3)

Volcano_data3 <- Volcano_data2 %>% 
  mutate(DCT1=T1-promedioT1,
         DCT2=T2-promedioT2,
         DCT3=T3-promedioT3,
         DCC1=C1-promedioC1,
         DCC2=C2-promedioC2,
         DCC3=C3-promedioC3) %>% 
  select(-2,-3,-4,-5,-6,-7) %>% 
  mutate(promedioDCT=(DCT1+DCT2+DCT3)/3,
         promedioDCC=(DCC1+DCC2+DCC3)/3) %>% 
  select(-2,-3,-4,-5,-6,-7) %>% 
  mutate(dosDCT=2^-promedioDCT,
         dosDCC=2^-promedioDCC,
         dosDDCT=dosDCT/dosDCC,
         FC=dosDDCT) %>% 
  mutate(LogFC=log2(FC)) %>% 
  select(1,4,5,7,8) 

head(Volcano_data3)
## # A tibble: 6 × 5
##   Gen    dosDCT   dosDCC     FC  LogFC
##   <chr>   <dbl>    <dbl>  <dbl>  <dbl>
## 1 Gen-1 0.0228  0.0532   0.429  -1.22 
## 2 Gen-2 0.00749 0.0122   0.616  -0.700
## 3 Gen-3 0.00222 0.00851  0.261  -1.94 
## 4 Gen-4 0.00984 0.136    0.0725 -3.79 
## 5 Gen-5 0.00169 0.000415 4.06    2.02 
## 6 Gen-6 0.00387 0.0148   0.262  -1.93
Volcano_data4 <- Volcano_data2 %>% 
  mutate(DCT1=T1-promedioT1,
         DCT2=T2-promedioT2,
         DCT3=T3-promedioT3,
         DCC1=C1-promedioC1,
         DCC2=C2-promedioC2,
         DCC3=C3-promedioC3) %>% 
  select(-2,-3,-4,-5,-6,-7)
head(Volcano_data4)
## # A tibble: 6 × 7
##   Gen     DCT1   DCT2   DCT3   DCC1  DCC2  DCC3
##   <chr>  <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl>
## 1 Gen-1  7.45   7.55   1.35  -0.636  4.99  8.34
## 2 Gen-2 15.1   -0.352  6.48   0.747  8.58  9.75
## 3 Gen-3  0.821 13.4   12.3   11.5    3.76  5.40
## 4 Gen-4  5.71  13.4    0.893 -1.03   4.85  4.82
## 5 Gen-5 12.3   13.6    1.76  13.7    4.77 15.2 
## 6 Gen-6 14.0   11.4   -1.33  10.6    6.39  1.23
Tvalue <- row_t_welch(Volcano_data4[,c("DCC1","DCC2","DCC3")],
                      Volcano_data4[,c("DCT1","DCT2","DCT3")])
head(Tvalue)
##   obs.x obs.y obs.tot    mean.x   mean.y  mean.diff    var.x    var.y   stderr
## 1     3     3       6  4.231599 5.453882 -1.2222833 20.58890 12.60560 3.326384
## 2     3     3       6  6.360058 7.059935 -0.6998764 23.97374 59.59841 5.278010
## 3     3     3       6  6.876199 8.816669 -1.9404708 16.50954 48.23197 4.645482
## 4     3     3       6  2.881586 6.667691 -3.7861057 11.49311 39.75311 4.133047
## 5     3     3       6 11.233682 9.212339  2.0213427 31.94080 42.09802 4.967857
## 6     3     3       6  6.082040 8.014915 -1.9328747 22.08988 67.05555 5.451160
##         df  statistic    pvalue  conf.low conf.high mean.null alternative
## 1 3.781288 -0.3674511 0.7329175 -10.67234  8.227776         0   two.sided
## 2 3.384926 -0.1326023 0.9020062 -16.46631 15.066556         0   two.sided
## 3 3.225582 -0.4177114 0.7024214 -16.15642 12.275483         0   two.sided
## 4 3.067243 -0.9160568 0.4258409 -16.77767  9.205460         0   two.sided
## 5 3.926109  0.4068842 0.7052957 -11.87462 15.917308         0   two.sided
## 6 3.188706 -0.3545805 0.7450693 -18.71432 14.848571         0   two.sided
##   conf.level
## 1       0.95
## 2       0.95
## 3       0.95
## 4       0.95
## 5       0.95
## 6       0.95
Valor_p <- Tvalue %>% 
  select(pvalue)
head(Valor_p)
##      pvalue
## 1 0.7329175
## 2 0.9020062
## 3 0.7024214
## 4 0.4258409
## 5 0.7052957
## 6 0.7450693
FC_y_PV <- Volcano_data3 %>% 
  select(1,4) %>% 
  mutate(PV = Valor_p$pvalue,
         LFC= log2(FC),
         LPV= -log10(PV))
head(FC_y_PV)
## # A tibble: 6 × 5
##   Gen       FC    PV    LFC    LPV
##   <chr>  <dbl> <dbl>  <dbl>  <dbl>
## 1 Gen-1 0.429  0.733 -1.22  0.135 
## 2 Gen-2 0.616  0.902 -0.700 0.0448
## 3 Gen-3 0.261  0.702 -1.94  0.153 
## 4 Gen-4 0.0725 0.426 -3.79  0.371 
## 5 Gen-5 4.06   0.705  2.02  0.152 
## 6 Gen-6 0.262  0.745 -1.93  0.128

#Selección de genes sobreexpresados y silenciados

p_Value=-log10(0.05)
FC_threshold=log2(2)

down.regulated <- FC_y_PV %>% 
  filter(LFC < -FC_threshold,
         LPV > p_Value)
head(down.regulated)
## # A tibble: 6 × 5
##   Gen          FC      PV   LFC   LPV
##   <chr>     <dbl>   <dbl> <dbl> <dbl>
## 1 Gen-29  0.0396  0.0480  -4.66  1.32
## 2 Gen-88  0.00942 0.0369  -6.73  1.43
## 3 Gen-190 0.00201 0.0417  -8.96  1.38
## 4 Gen-195 0.00272 0.0284  -8.52  1.55
## 5 Gen-406 0.00212 0.00527 -8.88  2.28
## 6 Gen-408 0.0146  0.0471  -6.10  1.33
up.regulated <- FC_y_PV %>% 
  filter(LFC > FC_threshold,
         LPV > p_Value)
head(up.regulated)
## # A tibble: 6 × 5
##   Gen         FC      PV   LFC   LPV
##   <chr>    <dbl>   <dbl> <dbl> <dbl>
## 1 Gen-14   284.  0.0184   8.15  1.73
## 2 Gen-38   170.  0.00287  7.41  2.54
## 3 Gen-87    99.3 0.0337   6.63  1.47
## 4 Gen-92   977.  0.0187   9.93  1.73
## 5 Gen-105 5987.  0.00252 12.5   2.60
## 6 Gen-110  145.  0.0196   7.18  1.71
top.down <- down.regulated %>% 
  arrange(PV) %>% 
  head(5)
head(top.down)
## # A tibble: 5 × 5
##   Gen          FC      PV   LFC   LPV
##   <chr>     <dbl>   <dbl> <dbl> <dbl>
## 1 Gen-406 0.00212 0.00527 -8.88  2.28
## 2 Gen-446 0.00174 0.00731 -9.17  2.14
## 3 Gen-683 0.00255 0.0170  -8.62  1.77
## 4 Gen-630 0.00110 0.0176  -9.82  1.76
## 5 Gen-195 0.00272 0.0284  -8.52  1.55
top.up <- up.regulated %>% 
  arrange(PV) %>% 
  head(5)
head(top.down)
## # A tibble: 5 × 5
##   Gen          FC      PV   LFC   LPV
##   <chr>     <dbl>   <dbl> <dbl> <dbl>
## 1 Gen-406 0.00212 0.00527 -8.88  2.28
## 2 Gen-446 0.00174 0.00731 -9.17  2.14
## 3 Gen-683 0.00255 0.0170  -8.62  1.77
## 4 Gen-630 0.00110 0.0176  -9.82  1.76
## 5 Gen-195 0.00272 0.0284  -8.52  1.55

#Gráfica volcano

Volcano <- ggplot(data = FC_y_PV,
                  mapping = aes(x = LFC, y = LPV)) +
  geom_point(alpha = 1.2, color = "gray") +
  theme_classic() +
  labs(title = "Titulo", subtitle = "Subtitulo",
       caption = "Creado por: Zulia Soto") +
  xlab(expression("Log"[2]~"(FC)")) +
  ylab(expression("-Log"[10]~"(Valor de p)")) +  # Subíndice en "10"
  geom_hline(yintercept = p_Value,
             linetype = "dashed",
             color = "gray", 
             size = 1) +
  geom_vline(xintercept = FC_threshold,
             linetype = "dashed", 
             color = "gray", 
             size = 1) +
  geom_vline(xintercept = -FC_threshold,
             linetype = "dashed", 
             color = "gray", 
             size = 1) +
  geom_point(data = up.regulated,
             x = up.regulated$LFC,
             y = up.regulated$LPV,
             alpha = 1, 
             size = 2.5,
             color = "gray") +
  geom_point(data = down.regulated,
             x = down.regulated$LFC,
             y = down.regulated$LPV,
             alpha = 1, 
             size = 2.5,
             color = "gray") +
  geom_label_repel(data = top.down,
                   mapping = aes(x = LFC, y = LPV, label = Gen),
                   max.overlaps = 50,
                   color = "#1a5276",
                   fill = "#5499c7") +
  geom_label_repel(data = top.up,
                   mapping = aes(x = LFC, y = LPV, label = Gen),
                   max.overlaps = 50,
                   color = "#7b241c",
                   fill = "#f1948a") +
  theme(    axis.line = element_line(size = 1.2), 
            axis.title.x = element_text(face = "bold"),  
            axis.title.y = element_text(face = "bold"),  
            axis.text.x = element_text(face = "bold"),  
            axis.text.y = element_text(face = "bold")) +
  scale_x_continuous(breaks = seq(min(-12), max(12), by =2)) +
  scale_y_continuous(breaks = seq(0, max(3), by = 0.5))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Volcano

#Para guardar determine el directorio y activar el siguiente código

# Guardar <- ggsave(Volcano, file = "Ejercicio_examen.jpeg", width = 8, height = 6, dpi = 300)