# Evaluación Medio Término
# Creado por:Paulina Gallegos

if(!require(pacman)) install.packages("pacman", dependencies=TRUE)
## Loading required package: pacman
library("pacman")
p_load("vroom",
       "ggplot2",
       "ggrepel",
       "dplyr",
       "matrixTests")

Volcano_data <- vroom(file="https://raw.githubusercontent.com/ManuelLaraMVZ/Examen_R_transcriptomica/refs/heads/main/Datos_Genes_azar2.csv")
## `curl` package not installed, falling back to using `url()`
## Rows: 703 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gen, Type
## dbl (6): C1, C2, C3, T1, T2, T3
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(Volcano_data)
## # A tibble: 6 × 8
##   Gen   Type      C1    C2    C3    T1    T2    T3
##   <chr> <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 Target  31.0  21.8  25.7  29.8  23.6  20.2
## 2 Gen-2 Target  20.3  29.3  30.2  27.4  30.6  28.1
## 3 Gen-3 Target  18.3  26.7  31.6  31.3  27.3  20.9
## 4 Gen-4 Target  27.2  20.1  26.0  28.6  29.8  19.9
## 5 Gen-5 Target  30.8  24.4  18.4  23.9  26.8  34.2
## 6 Gen-6 Target  21.5  24.6  23.2  19.8  34.4  32.5
controles <- Volcano_data %>% 
  filter(Type=="Selected Control") %>% 
  select(-2)
head(controles)
## # A tibble: 3 × 7
##   Gen          C1    C2    C3    T1    T2    T3
##   <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Control-1  21.5  21.9  18.2  20.8  20.0  21.9
## 2 Control-2  21.9  19.8  20.1  20.8  19.0  20.9
## 3 Control-3  20.9  19.9  21.7  19.6  19.5  21.2
Volcano_data2 <- Volcano_data %>% 
  filter(Type=="Target") %>% 
  select(-2)
head(Volcano_data2)
## # A tibble: 6 × 7
##   Gen      C1    C2    C3    T1    T2    T3
##   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1  31.0  21.8  25.7  29.8  23.6  20.2
## 2 Gen-2  20.3  29.3  30.2  27.4  30.6  28.1
## 3 Gen-3  18.3  26.7  31.6  31.3  27.3  20.9
## 4 Gen-4  27.2  20.1  26.0  28.6  29.8  19.9
## 5 Gen-5  30.8  24.4  18.4  23.9  26.8  34.2
## 6 Gen-6  21.5  24.6  23.2  19.8  34.4  32.5
#obtención de promedios
promedioT1 <- mean(controles$T1)
promedioT2 <- mean(controles$T2)
promedioT3 <- mean(controles$T3)
promedioC1 <- mean(controles$C1)
promedioC2 <- mean(controles$C2)
promedioC3 <- mean(controles$C3)

Volcano_data3 <- Volcano_data2 %>% 
  mutate(DCT1=T1-promedioT1,
         DCT2=T2-promedioT2,
         DCT3=T3-promedioT3,
         DCC1=C1-promedioC1,
         DCC2=C2-promedioC2,
         DCC3=C3-promedioC3) %>% 
  select(-2,-3,-4,-5,-6,-7) %>% 
  mutate(promedioDCT=(DCT1+DCT2+DCT3)/3,
         promedioDCC=(DCC1+DCC2+DCC3)/3) %>% 
  select(-2,-3,-4,-5,-6,-7) %>% 
  mutate(dosDCT=2^-promedioDCT,
         dosDCC=2^-promedioDCC,
         dosDDCT=dosDCT/dosDCC,
         FC=dosDDCT) %>% 
  mutate(LogFC=log2(FC)) %>% 
  select(1,4,5,7,8) 

head(Volcano_data3)
## # A tibble: 6 × 5
##   Gen    dosDCT dosDCC     FC LogFC
##   <chr>   <dbl>  <dbl>  <dbl> <dbl>
## 1 Gen-1 0.0571  0.0218 2.61    1.39
## 2 Gen-2 0.00319 0.0160 0.199  -2.33
## 3 Gen-3 0.0145  0.0331 0.439  -1.19
## 4 Gen-4 0.0196  0.0713 0.274  -1.87
## 5 Gen-5 0.00416 0.0680 0.0612 -4.03
## 6 Gen-6 0.00272 0.187  0.0146 -6.10
Volcano_data4 <- Volcano_data2 %>% 
  mutate(DCT1=T1-promedioT1,
         DCT2=T2-promedioT2,
         DCT3=T3-promedioT3,
         DCC1=C1-promedioC1,
         DCC2=C2-promedioC2,
         DCC3=C3-promedioC3) %>% 
  select(-2,-3,-4,-5,-6,-7)
head(Volcano_data4)
## # A tibble: 6 × 7
##   Gen     DCT1  DCT2   DCT3    DCC1   DCC2  DCC3
##   <chr>  <dbl> <dbl>  <dbl>   <dbl>  <dbl> <dbl>
## 1 Gen-1  9.38   4.12 -1.11   9.61    1.27   5.67
## 2 Gen-2  6.99  11.1   6.81  -1.10    8.80  10.2 
## 3 Gen-3 10.9    7.78 -0.379 -3.07    6.18  11.6 
## 4 Gen-4  8.19  10.3  -1.46   5.81   -0.418  6.03
## 5 Gen-5  3.55   7.30 12.9    9.41    3.88  -1.65
## 6 Gen-6 -0.572 14.9  11.2    0.0790  4.01   3.17
Tvalue <- row_t_welch(Volcano_data4[,c("DCC1","DCC2","DCC3")],
                      Volcano_data4[,c("DCT1","DCT2","DCT3")])
head(Tvalue)
##   obs.x obs.y obs.tot   mean.x   mean.y mean.diff     var.x     var.y   stderr
## 1     3     3       6 5.516362 4.130165  1.386197 17.418258 27.527223 3.870637
## 2     3     3       6 5.962701 8.290610 -2.327908 37.833173  5.835606 3.815267
## 3     3     3       6 4.915654 6.104556 -1.188902 55.270082 33.981370 5.454400
## 4     3     3       6 3.809957 5.676679 -1.866721 13.417871 39.344895 4.193756
## 5     3     3       6 3.878387 7.908000 -4.029613 30.556033 22.038044 4.187047
## 6     3     3       6 2.421839 8.523376 -6.101538  4.293486 65.463304 4.822060
##         df  statistic    pvalue   conf.low conf.high mean.null alternative
## 1 3.807394  0.3581315 0.7392117  -9.578247 12.350641         0   two.sided
## 2 2.602645 -0.6101562 0.5908524 -15.588936 10.933119         0   two.sided
## 3 3.784674 -0.2179712 0.8386951 -16.678670 14.300866         0   two.sided
## 4 3.222006 -0.4451191 0.6844553 -14.707669 10.974227         0   two.sided
## 5 3.897761 -0.9623997 0.3916769 -15.775968  7.716743         0   two.sided
## 6 2.261221 -1.2653383 0.3204668 -24.713424 12.510349         0   two.sided
##   conf.level
## 1       0.95
## 2       0.95
## 3       0.95
## 4       0.95
## 5       0.95
## 6       0.95
Valor_p <- Tvalue %>% 
  select(pvalue)
head(Valor_p)
##      pvalue
## 1 0.7392117
## 2 0.5908524
## 3 0.8386951
## 4 0.6844553
## 5 0.3916769
## 6 0.3204668
FC_y_PV <- Volcano_data3 %>% 
  select(1,4) %>% 
  mutate(PV = Valor_p$pvalue,
         LFC= log2(FC),
         LPV= -log10(PV))
head(FC_y_PV)
## # A tibble: 6 × 5
##   Gen       FC    PV   LFC    LPV
##   <chr>  <dbl> <dbl> <dbl>  <dbl>
## 1 Gen-1 2.61   0.739  1.39 0.131 
## 2 Gen-2 0.199  0.591 -2.33 0.229 
## 3 Gen-3 0.439  0.839 -1.19 0.0764
## 4 Gen-4 0.274  0.684 -1.87 0.165 
## 5 Gen-5 0.0612 0.392 -4.03 0.407 
## 6 Gen-6 0.0146 0.320 -6.10 0.494
#Selección de genes sobreexpresados y silenciados

p_Value=-log10(0.05)
FC_threshold=log2(2)

down.regulated <- FC_y_PV %>% 
  filter(LFC < -FC_threshold,
         LPV > p_Value)
head(down.regulated)
## # A tibble: 6 × 5
##   Gen           FC      PV    LFC   LPV
##   <chr>      <dbl>   <dbl>  <dbl> <dbl>
## 1 Gen-64  0.00533  0.0343   -7.55  1.46
## 2 Gen-111 0.000631 0.00962 -10.6   2.02
## 3 Gen-134 0.00224  0.0317   -8.80  1.50
## 4 Gen-285 0.00122  0.0442   -9.68  1.35
## 5 Gen-330 0.00225  0.0143   -8.79  1.84
## 6 Gen-342 0.00135  0.0317   -9.53  1.50
up.regulated <- FC_y_PV %>% 
  filter(LFC > FC_threshold,
         LPV > p_Value)
head(up.regulated)
## # A tibble: 6 × 5
##   Gen        FC     PV   LFC   LPV
##   <chr>   <dbl>  <dbl> <dbl> <dbl>
## 1 Gen-28   67.3 0.0270  6.07  1.57
## 2 Gen-44   42.1 0.0482  5.40  1.32
## 3 Gen-60  143.  0.0371  7.16  1.43
## 4 Gen-99  798.  0.0345  9.64  1.46
## 5 Gen-114  28.3 0.0384  4.82  1.42
## 6 Gen-118 732.  0.0267  9.52  1.57
top.down <- down.regulated %>% 
  arrange(PV) %>% 
  head(5)
head(top.down)
## # A tibble: 5 × 5
##   Gen           FC      PV    LFC   LPV
##   <chr>      <dbl>   <dbl>  <dbl> <dbl>
## 1 Gen-647 0.000597 0.00797 -10.7   2.10
## 2 Gen-405 0.00113  0.00888  -9.79  2.05
## 3 Gen-111 0.000631 0.00962 -10.6   2.02
## 4 Gen-511 0.0136   0.0130   -6.20  1.88
## 5 Gen-330 0.00225  0.0143   -8.79  1.84
top.up <- up.regulated %>% 
  arrange(PV) %>% 
  head(5)
head(top.down)
## # A tibble: 5 × 5
##   Gen           FC      PV    LFC   LPV
##   <chr>      <dbl>   <dbl>  <dbl> <dbl>
## 1 Gen-647 0.000597 0.00797 -10.7   2.10
## 2 Gen-405 0.00113  0.00888  -9.79  2.05
## 3 Gen-111 0.000631 0.00962 -10.6   2.02
## 4 Gen-511 0.0136   0.0130   -6.20  1.88
## 5 Gen-330 0.00225  0.0143   -8.79  1.84
#Gráfica volcano

Volcano <- ggplot(data = FC_y_PV,
                  mapping = aes(x = LFC, y = LPV)) +
  geom_point(alpha = 1.2, color = "gray") +
  theme_classic() +
  labs(title = "Paulina Gallegos",subtitle = "Subtitulo",
       caption = "Creado por:Paulina Gallegos") +
  xlab(expression("Log"[2]~"(FC)")) +
  ylab(expression("-Log"[10]~"(Valor de p)")) +  # Subíndice en "10"
  geom_hline(yintercept = p_Value,
             linetype = "dashed",
             color = "gray", 
             size = 1) +
  geom_vline(xintercept = FC_threshold,
             linetype = "dashed", 
             color = "gray", 
             size = 1) +
  geom_vline(xintercept = -FC_threshold,
             linetype = "dashed", 
             color = "gray", 
             size = 1) +
  geom_point(data = up.regulated,
             x = up.regulated$LFC,
             y = up.regulated$LPV,
             alpha = 1, 
             size = 2.5,
             color = "gray") +
  geom_point(data = down.regulated,
             x = down.regulated$LFC,
             y = down.regulated$LPV,
             alpha = 1, 
             size = 2.5,
             color = "gray") +
  geom_label_repel(data = top.down,
                   mapping = aes(x = LFC, y = LPV, label = Gen),
                   max.overlaps = 50,
                   color = "#1a5276",
                   fill = "#5499c7") +
  geom_label_repel(data = top.up,
                   mapping = aes(x = LFC, y = LPV, label = Gen),
                   max.overlaps = 50,
                   color = "#7b241c",
                   fill = "#f1948a") +
  theme(    axis.line = element_line(size = 1.2), 
            axis.title.x = element_text(face = "bold"),  
            axis.title.y = element_text(face = "bold"),  
            axis.text.x = element_text(face = "bold"),  
            axis.text.y = element_text(face = "bold")) +
  scale_x_continuous(breaks = seq(min(-12), max(12), by =2)) +
  scale_y_continuous(breaks = seq(0, max(3), by = 0.5))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Volcano

#Para guardar determine el directorio y activar el siguiente código

# Guardar <- ggsave(Volcano, file = "Ejercicio_examen.jpeg", width = 8, height = 6, dpi = 300)