Evaluación Medio Término

Creado por:Ricardo Palos Sánchez

if(!require(pacman)) install.packages("pacman", dependencies=TRUE)
## Loading required package: pacman
library("pacman")
p_load("vroom",
       "ggplot2",
       "ggrepel",
       "dplyr",
       "matrixTests")

Volcano_data <- vroom(file="https://raw.githubusercontent.com/ManuelLaraMVZ/Examen_R_transcriptomica/refs/heads/main/Datos_Genes_azar7.csv")
## `curl` package not installed, falling back to using `url()`
## Rows: 703 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gen, Type
## dbl (6): C1, C2, C3, T1, T2, T3
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Volcano_data
## # A tibble: 703 × 8
##    Gen    Type      C1    C2    C3    T1    T2    T3
##    <chr>  <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 Gen-1  Target  25.7  33.4  23.9  19.3  28.8  23.2
##  2 Gen-2  Target  28.7  25.5  20.0  20.9  28.6  32.0
##  3 Gen-3  Target  25.9  24.5  24.3  19.4  19.4  28.3
##  4 Gen-4  Target  31.5  33.1  31.5  31.8  22.5  23.4
##  5 Gen-5  Target  23.8  32.8  24.5  31.6  20.8  33.1
##  6 Gen-6  Target  26.7  20.3  33.4  24.2  31.5  29.1
##  7 Gen-7  Target  33.1  19.8  21.9  34.3  24.4  18.8
##  8 Gen-8  Target  30.5  34.0  31.3  27.3  25.0  21.7
##  9 Gen-9  Target  33.3  24.0  21.9  20.5  20.6  24.7
## 10 Gen-10 Target  25.1  19.6  34.4  20.3  18.9  27.1
## # ℹ 693 more rows
controles <- Volcano_data %>% 
  filter(Type=="Selected Control") %>% 
  select(-2)
head(controles)
## # A tibble: 3 × 7
##   Gen          C1    C2    C3    T1    T2    T3
##   <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Control-1  21.8  21.8  19.3  18.8  19.5  21.7
## 2 Control-2  19.0  19.2  20.5  18.9  21.1  19.9
## 3 Control-3  21.1  20.8  20.2  21.9  19.2  19.4
Volcano_data2 <- Volcano_data %>% 
  filter(Type=="Target") %>% 
  select(-2)
head(Volcano_data2)
## # A tibble: 6 × 7
##   Gen      C1    C2    C3    T1    T2    T3
##   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1  25.7  33.4  23.9  19.3  28.8  23.2
## 2 Gen-2  28.7  25.5  20.0  20.9  28.6  32.0
## 3 Gen-3  25.9  24.5  24.3  19.4  19.4  28.3
## 4 Gen-4  31.5  33.1  31.5  31.8  22.5  23.4
## 5 Gen-5  23.8  32.8  24.5  31.6  20.8  33.1
## 6 Gen-6  26.7  20.3  33.4  24.2  31.5  29.1

#obtención de promedios
promedioT1 <- mean(controles$T1)
promedioT2 <- mean(controles$T2)
promedioT3 <- mean(controles$T3)
promedioC1 <- mean(controles$C1)
promedioC2 <- mean(controles$C2)
promedioC3 <- mean(controles$C3)

Volcano_data3 <- Volcano_data2 %>% 
  mutate(DCT1=T1-promedioT1,
         DCT2=T2-promedioT2,
         DCT3=T3-promedioT3,
         DCC1=C1-promedioC1,
         DCC2=C2-promedioC2,
         DCC3=C3-promedioC3) %>% 
  select(-2,-3,-4,-5,-6,-7) %>% 
  mutate(promedioDCT=(DCT1+DCT2+DCT3)/3,
         promedioDCC=(DCC1+DCC2+DCC3)/3) %>% 
  select(-2,-3,-4,-5,-6,-7) %>% 
  mutate(dosDCT=2^-promedioDCT,
         dosDCC=2^-promedioDCC,
         dosDDCT=dosDCT/dosDCC,
         FC=dosDDCT) %>% 
  mutate(LogFC=log2(FC)) %>% 
  select(1,4,5,7,8) 

head(Volcano_data3)
## # A tibble: 6 × 5
##   Gen    dosDCT   dosDCC     FC LogFC
##   <chr>   <dbl>    <dbl>  <dbl> <dbl>
## 1 Gen-1 0.0744  0.00650  11.4    3.52
## 2 Gen-2 0.00705 0.0508    0.139 -2.85
## 3 Gen-3 0.200   0.0444    4.51   2.17
## 4 Gen-4 0.0170  0.000311 54.5    5.77
## 5 Gen-5 0.00286 0.0105    0.273 -1.87
## 6 Gen-6 0.00338 0.0120    0.282 -1.83
Volcano_data4 <- Volcano_data2 %>% 
  mutate(DCT1=T1-promedioT1,
         DCT2=T2-promedioT2,
         DCT3=T3-promedioT3,
         DCC1=C1-promedioC1,
         DCC2=C2-promedioC2,
         DCC3=C3-promedioC3) %>% 
  select(-2,-3,-4,-5,-6,-7)
head(Volcano_data4)
## # A tibble: 6 × 7
##   Gen     DCT1   DCT2  DCT3  DCC1   DCC2    DCC3
##   <chr>  <dbl>  <dbl> <dbl> <dbl>  <dbl>   <dbl>
## 1 Gen-1 -0.520  8.89   2.88  5.11 12.8    3.85  
## 2 Gen-2  1.06   8.67  11.7   8.04  4.90  -0.0418
## 3 Gen-3 -0.483 -0.492  7.93  5.26  3.92   4.30  
## 4 Gen-4 12.0    2.58   3.10 10.9  12.6   11.5   
## 5 Gen-5 11.7    0.868 12.8   3.13 12.2    4.43  
## 6 Gen-6  4.33  11.6    8.75  6.03 -0.293 13.4
Tvalue <- row_t_welch(Volcano_data4[,c("DCC1","DCC2","DCC3")],
                      Volcano_data4[,c("DCT1","DCT2","DCT3")])
head(Tvalue)
##   obs.x obs.y obs.tot    mean.x   mean.y mean.diff      var.x    var.y   stderr
## 1     3     3       6  7.266015 3.749103  3.516912 23.7046641 22.69160 3.932610
## 2     3     3       6  4.299615 7.149086 -2.849471 16.6034945 30.07486 3.944547
## 3     3     3       6  4.492467 2.318640  2.173827  0.4733169 23.63122 2.834580
## 4     3     3       6 11.650810 5.882352  5.768459  0.7101165 27.86037 3.086016
## 5     3     3       6  6.579474 8.450302 -1.870828 23.9485777 43.42238 4.738880
## 6     3     3       6  6.384861 8.210620 -1.825759 47.1247366 13.25811 4.486381
##         df  statistic    pvalue   conf.low conf.high mean.null alternative
## 1 3.998094  0.8942947 0.4217337  -7.403817 14.437641         0   two.sided
## 2 3.692456 -0.7223822 0.5131470 -14.170586  8.471645         0   two.sided
## 3 2.080085  0.7668955 0.5206115  -9.583276 13.930930         0   two.sided
## 4 2.101887  1.8692251 0.1963408  -6.911475 18.448392         0   two.sided
## 5 3.691563 -0.3947827 0.7147388 -15.473165 11.731509         0   two.sided
## 6 3.042821 -0.4069559 0.7109781 -15.990428 12.338910         0   two.sided
##   conf.level
## 1       0.95
## 2       0.95
## 3       0.95
## 4       0.95
## 5       0.95
## 6       0.95
Valor_p <- Tvalue %>% 
  select(pvalue)
head(Valor_p)
##      pvalue
## 1 0.4217337
## 2 0.5131470
## 3 0.5206115
## 4 0.1963408
## 5 0.7147388
## 6 0.7109781
FC_y_PV <- Volcano_data3 %>% 
  select(1,4) %>% 
  mutate(PV = Valor_p$pvalue,
         LFC= log2(FC),
         LPV= -log10(PV))
head(FC_y_PV)
## # A tibble: 6 × 5
##   Gen       FC    PV   LFC   LPV
##   <chr>  <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 11.4   0.422  3.52 0.375
## 2 Gen-2  0.139 0.513 -2.85 0.290
## 3 Gen-3  4.51  0.521  2.17 0.283
## 4 Gen-4 54.5   0.196  5.77 0.707
## 5 Gen-5  0.273 0.715 -1.87 0.146
## 6 Gen-6  0.282 0.711 -1.83 0.148

########################################################################33

#Selección de genes sobreexpresados y silenciados

p_Value=-log10(0.05)
FC_threshold=log2(2)

down.regulated <- FC_y_PV %>% 
  filter(LFC < -FC_threshold,
         LPV > p_Value)
head(down.regulated)
## # A tibble: 6 × 5
##   Gen           FC      PV    LFC   LPV
##   <chr>      <dbl>   <dbl>  <dbl> <dbl>
## 1 Gen-164 0.0382   0.0435   -4.71  1.36
## 2 Gen-284 0.000958 0.0265  -10.0   1.58
## 3 Gen-408 0.000505 0.0231  -11.0   1.64
## 4 Gen-443 0.0657   0.0273   -3.93  1.56
## 5 Gen-512 0.000425 0.00609 -11.2   2.22
## 6 Gen-528 0.000205 0.00441 -12.3   2.36
up.regulated <- FC_y_PV %>% 
  filter(LFC > FC_threshold,
         LPV > p_Value)
head(up.regulated)
## # A tibble: 6 × 5
##   Gen        FC       PV   LFC   LPV
##   <chr>   <dbl>    <dbl> <dbl> <dbl>
## 1 Gen-8    119. 0.0382    6.89  1.42
## 2 Gen-63   159. 0.0233    7.32  1.63
## 3 Gen-87   627. 0.0500    9.29  1.30
## 4 Gen-321  238. 0.0403    7.89  1.39
## 5 Gen-322 4049. 0.00215  12.0   2.67
## 6 Gen-346  678. 0.000937  9.40  3.03
top.down <- down.regulated %>% 
  arrange(PV) %>% 
  head(5)
head(top.down)
## # A tibble: 5 × 5
##   Gen           FC      PV   LFC   LPV
##   <chr>      <dbl>   <dbl> <dbl> <dbl>
## 1 Gen-528 0.000205 0.00441 -12.3  2.36
## 2 Gen-512 0.000425 0.00609 -11.2  2.22
## 3 Gen-587 0.000626 0.0226  -10.6  1.65
## 4 Gen-408 0.000505 0.0231  -11.0  1.64
## 5 Gen-284 0.000958 0.0265  -10.0  1.58
top.up <- up.regulated %>% 
  arrange(PV) %>% 
  head(5)
head(top.down)
## # A tibble: 5 × 5
##   Gen           FC      PV   LFC   LPV
##   <chr>      <dbl>   <dbl> <dbl> <dbl>
## 1 Gen-528 0.000205 0.00441 -12.3  2.36
## 2 Gen-512 0.000425 0.00609 -11.2  2.22
## 3 Gen-587 0.000626 0.0226  -10.6  1.65
## 4 Gen-408 0.000505 0.0231  -11.0  1.64
## 5 Gen-284 0.000958 0.0265  -10.0  1.58

#Gráfica volcano

Volcano <- ggplot(data = FC_y_PV,
                  mapping = aes(x = LFC, y = LPV)) +
  geom_point(alpha = 1.2, color = "#566573") +
  theme_classic() +
  labs(title = "Evaluación de cambio de la espresión",
       caption = "Modificado por:Ricardo Palos Sánchez") +
  xlab(expression("Log"[2]~"(FC)")) +
  ylab(expression("-Log"[10]~"(Valor de p)")) +  # Subíndice en "10"
  geom_hline(yintercept = p_Value,
             linetype = "dashed",
             color = "#FFC300", 
             size = 1) +
  geom_vline(xintercept = FC_threshold,
             linetype = "dashed", 
             color = "#C70039", 
             size = 1) +
  geom_vline(xintercept = -FC_threshold,
             linetype = "dashed", 
             color = "#0b4eea", 
             size = 1) +
  geom_point(data = up.regulated,
             x = up.regulated$LFC,
             y = up.regulated$LPV,
             alpha = 1, 
             size = 2.5,
             color = "#FF5733") +
  geom_point(data = down.regulated,
             x = down.regulated$LFC,
             y = down.regulated$LPV,
             alpha = 1, 
             size = 2.5,
             color = "#2f4578") +
  geom_label_repel(data = top.down,
                   mapping = aes(x = LFC, y = LPV, label = Gen),
                   max.overlaps = 50,
                   color = "black",
                   fill = "#74da19") +
  geom_label_repel(data = top.up,
                   mapping = aes(x = LFC, y = LPV, label = Gen),
                   max.overlaps = 50,
                   color = "black",
                   fill = "#f29611") +
  theme(    axis.line = element_line(size = 1.2), 
            axis.title.x = element_text(face = "bold"),  
            axis.title.y = element_text(face = "bold"),  
            axis.text.x = element_text(face = "bold"),  
            axis.text.y = element_text(face = "bold")) +
  scale_x_continuous(breaks = seq(min(-12), max(12), by =2)) +
  scale_y_continuous(breaks = seq(0, max(3), by = 0.5))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Volcano