Evaluación Medio Término

Creado por:Diego Arturo Campos Grimaldi

if(!require(pacman)) install.packages("pacman", dependencies=TRUE)
## Loading required package: pacman
library("pacman")
p_load("vroom",
       "ggplot2",
       "ggrepel",
       "dplyr",
       "matrixTests")

Volcano_data <- vroom(file="https://raw.githubusercontent.com/ManuelLaraMVZ/Examen_R_transcriptomica/refs/heads/main/Datos_Genes_azar4.csv")
## Rows: 703 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gen, Type
## dbl (6): C1, C2, C3, T1, T2, T3
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Volcano_data
## # A tibble: 703 × 8
##    Gen    Type      C1    C2    C3    T1    T2    T3
##    <chr>  <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 Gen-1  Target  25.4  20.4  19.8  29.6  30.6  21.2
##  2 Gen-2  Target  25.6  23.0  31.9  33.9  33.9  30.9
##  3 Gen-3  Target  19.0  27.1  33.0  33.2  25.1  22.8
##  4 Gen-4  Target  20.5  27.8  20.6  30.1  18.3  34.4
##  5 Gen-5  Target  23.9  25.8  21.9  27.2  23.7  29.0
##  6 Gen-6  Target  28.2  23.1  30.1  32.0  33.9  18.5
##  7 Gen-7  Target  23.0  23.4  32.4  18.5  33.0  34.5
##  8 Gen-8  Target  25.4  31.9  28.0  31.7  34.6  20.1
##  9 Gen-9  Target  34.6  27.8  24.3  20.1  25.4  20.1
## 10 Gen-10 Target  25.2  20.8  26.4  28.6  33.8  32.7
## # ℹ 693 more rows
controles <- Volcano_data %>% 
  filter(Type=="Selected Control") %>% 
  select(-2)
head(controles)
## # A tibble: 3 × 7
##   Gen          C1    C2    C3    T1    T2    T3
##   <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Control-1  21.9  18.9  18.2  21.7  20.0  18.0
## 2 Control-2  21.1  19.3  19.6  19.0  18.5  19.4
## 3 Control-3  19.8  19.9  20.7  18.2  21.0  19.4
Volcano_data2 <- Volcano_data %>% 
  filter(Type=="Target") %>% 
  select(-2)
head(Volcano_data2)
## # A tibble: 6 × 7
##   Gen      C1    C2    C3    T1    T2    T3
##   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1  25.4  20.4  19.8  29.6  30.6  21.2
## 2 Gen-2  25.6  23.0  31.9  33.9  33.9  30.9
## 3 Gen-3  19.0  27.1  33.0  33.2  25.1  22.8
## 4 Gen-4  20.5  27.8  20.6  30.1  18.3  34.4
## 5 Gen-5  23.9  25.8  21.9  27.2  23.7  29.0
## 6 Gen-6  28.2  23.1  30.1  32.0  33.9  18.5

#obtención de promedios
promedioT1 <- mean(controles$T1)
promedioT2 <- mean(controles$T2)
promedioT3 <- mean(controles$T3)
promedioC1 <- mean(controles$C1)
promedioC2 <- mean(controles$C2)
promedioC3 <- mean(controles$C3)
Volcano_data3 <- Volcano_data2 %>% 
  mutate(DCT1=T1-promedioT1,
         DCT2=T2-promedioT2,
         DCT3=T3-promedioT3,
         DCC1=C1-promedioC1,
         DCC2=C2-promedioC2,
         DCC3=C3-promedioC3) %>% 
  select(-2,-3,-4,-5,-6,-7) %>% 
  mutate(promedioDCT=(DCT1+DCT2+DCT3)/3,
         promedioDCC=(DCC1+DCC2+DCC3)/3) %>% 
  select(-2,-3,-4,-5,-6,-7) %>% 
  mutate(dosDCT=2^-promedioDCT,
         dosDCC=2^-promedioDCC,
         dosDDCT=dosDCT/dosDCC,
         FC=dosDDCT) %>% 
  mutate(LogFC=log2(FC)) %>% 
  select(1,4,5,7,8) 

head(Volcano_data3)
## # A tibble: 6 × 5
##   Gen      dosDCT  dosDCC     FC LogFC
##   <chr>     <dbl>   <dbl>  <dbl> <dbl>
## 1 Gen-1 0.00499   0.260   0.0192 -5.70
## 2 Gen-2 0.0000917 0.00849 0.0108 -6.53
## 3 Gen-3 0.00531   0.0114  0.466  -1.10
## 4 Gen-4 0.00362   0.123   0.0294 -5.09
## 5 Gen-5 0.00717   0.0665  0.108  -3.22
## 6 Gen-6 0.00251   0.00682 0.368  -1.44
Volcano_data4 <- Volcano_data2 %>% 
  mutate(DCT1=T1-promedioT1,
         DCT2=T2-promedioT2,
         DCT3=T3-promedioT3,
         DCC1=C1-promedioC1,
         DCC2=C2-promedioC2,
         DCC3=C3-promedioC3) %>% 
  select(-2,-3,-4,-5,-6,-7)
head(Volcano_data4)
## # A tibble: 6 × 7
##   Gen    DCT1  DCT2   DCT3   DCC1  DCC2   DCC3
##   <chr> <dbl> <dbl>  <dbl>  <dbl> <dbl>  <dbl>
## 1 Gen-1  9.94 10.8   2.23   4.44   1.04  0.347
## 2 Gen-2 14.3  14.1  11.9    4.73   3.56 12.4  
## 3 Gen-3 13.6   5.30  3.78  -1.89   7.72 13.5  
## 4 Gen-4 10.4  -1.51 15.4   -0.412  8.38  1.10 
## 5 Gen-5  7.53  3.87  9.98   2.95   6.41  2.37 
## 6 Gen-6 12.3  14.1  -0.460  7.30   3.69 10.6
Tvalue <- row_t_welch(Volcano_data4[,c("DCC1","DCC2","DCC3")],
                      Volcano_data4[,c("DCT1","DCT2","DCT3")])
head(Tvalue)
##   obs.x obs.y obs.tot   mean.x    mean.y mean.diff     var.x     var.y   stderr
## 1     3     3       6 1.942524  7.646819 -5.704295  4.805709 22.189096 2.999711
## 2     3     3       6 6.880203 13.412633 -6.532430 22.825095  1.740435 2.861557
## 3     3     3       6 6.456014  7.556905 -1.100891 60.728559 27.859749 5.434099
## 4     3     3       6 3.022537  8.110084 -5.087547 22.099044 75.682194 5.709093
## 5     3     3       6 3.909575  7.124798 -3.215224  4.764264  9.453933 2.177016
## 6     3     3       6 7.195345  8.637140 -1.441795 11.932498 62.843431 4.992525
##         df  statistic    pvalue  conf.low conf.high mean.null alternative
## 1 2.827504 -1.9016145 0.1589723 -15.58880  4.180207         0   two.sided
## 2 2.303241 -2.2828235 0.1330744 -17.41463  4.349769         0   two.sided
## 3 3.515982 -0.2025894 0.8506275 -17.04387 14.842090         0   two.sided
## 4 3.076229 -0.8911305 0.4370281 -23.00436 12.829269         0   two.sided
## 5 3.607531 -1.4768948 0.2212092  -9.52797  3.097522         0   two.sided
## 6 2.733077 -0.2887907 0.7932398 -18.24520 15.361610         0   two.sided
##   conf.level
## 1       0.95
## 2       0.95
## 3       0.95
## 4       0.95
## 5       0.95
## 6       0.95
Valor_p <- Tvalue %>% 
  select(pvalue)
head(Valor_p)
##      pvalue
## 1 0.1589723
## 2 0.1330744
## 3 0.8506275
## 4 0.4370281
## 5 0.2212092
## 6 0.7932398
FC_y_PV <- Volcano_data3 %>% 
  select(1,4) %>% 
  mutate(PV = Valor_p$pvalue,
         LFC= log2(FC),
         LPV= -log10(PV))
head(FC_y_PV)
## # A tibble: 6 × 5
##   Gen       FC    PV   LFC    LPV
##   <chr>  <dbl> <dbl> <dbl>  <dbl>
## 1 Gen-1 0.0192 0.159 -5.70 0.799 
## 2 Gen-2 0.0108 0.133 -6.53 0.876 
## 3 Gen-3 0.466  0.851 -1.10 0.0703
## 4 Gen-4 0.0294 0.437 -5.09 0.359 
## 5 Gen-5 0.108  0.221 -3.22 0.655 
## 6 Gen-6 0.368  0.793 -1.44 0.101

#Selección de genes sobreexpresados y silenciados

p_Value=-log10(0.05)
FC_threshold=log2(2)

down.regulated <- FC_y_PV %>% 
  filter(LFC < -FC_threshold,
         LPV > p_Value)
head(down.regulated)
## # A tibble: 6 × 5
##   Gen            FC      PV    LFC   LPV
##   <chr>       <dbl>   <dbl>  <dbl> <dbl>
## 1 Gen-10  0.00379   0.0243   -8.05  1.62
## 2 Gen-153 0.0106    0.00798  -6.55  2.10
## 3 Gen-161 0.0117    0.0404   -6.41  1.39
## 4 Gen-195 0.00153   0.0260   -9.35  1.58
## 5 Gen-214 0.0131    0.00222  -6.25  2.65
## 6 Gen-223 0.0000898 0.0231  -13.4   1.64
up.regulated <- FC_y_PV %>% 
  filter(LFC > FC_threshold,
         LPV > p_Value)
head(up.regulated)
## # A tibble: 6 × 5
##   Gen        FC      PV   LFC   LPV
##   <chr>   <dbl>   <dbl> <dbl> <dbl>
## 1 Gen-51   206. 0.0241   7.68  1.62
## 2 Gen-115 1292. 0.0130  10.3   1.89
## 3 Gen-198  223. 0.0194   7.80  1.71
## 4 Gen-238  302. 0.0479   8.24  1.32
## 5 Gen-251  460. 0.00196  8.85  2.71
## 6 Gen-273  204. 0.0429   7.67  1.37
top.down <- down.regulated %>% 
  arrange(PV) %>% 
  head(5)
head(top.down)
## # A tibble: 5 × 5
##   Gen          FC       PV   LFC   LPV
##   <chr>     <dbl>    <dbl> <dbl> <dbl>
## 1 Gen-358 0.0111  0.000266 -6.50  3.58
## 2 Gen-214 0.0131  0.00222  -6.25  2.65
## 3 Gen-153 0.0106  0.00798  -6.55  2.10
## 4 Gen-289 0.00201 0.00896  -8.96  2.05
## 5 Gen-639 0.00136 0.0147   -9.52  1.83
top.up <- up.regulated %>% 
  arrange(PV) %>% 
  head(5)
head(top.down)
## # A tibble: 5 × 5
##   Gen          FC       PV   LFC   LPV
##   <chr>     <dbl>    <dbl> <dbl> <dbl>
## 1 Gen-358 0.0111  0.000266 -6.50  3.58
## 2 Gen-214 0.0131  0.00222  -6.25  2.65
## 3 Gen-153 0.0106  0.00798  -6.55  2.10
## 4 Gen-289 0.00201 0.00896  -8.96  2.05
## 5 Gen-639 0.00136 0.0147   -9.52  1.83
#Gráfica volcano

Volcano <- ggplot(data = FC_y_PV,
                  mapping = aes(x = LFC, y = LPV)) +
  geom_point(alpha = 1.2, color = "#566573") +
  theme_classic() +
  labs(title = "Evaluación de cambio de la espresión",
       caption = "Modificado por: Diego Arturo Campos Grimaldi") +
  xlab(expression("Log"[2]~"(FC)")) +
  ylab(expression("-Log"[10]~"(Valor de p)")) +  # Subíndice en "10"
  geom_hline(yintercept = p_Value,
             linetype = "dashed",
             color = "#FFC300", 
             size = 1) +
  geom_vline(xintercept = FC_threshold,
             linetype = "dashed", 
             color = "#C70039", 
             size = 1) +
  geom_vline(xintercept = -FC_threshold,
             linetype = "dashed", 
             color = "#0b4eea", 
             size = 1) +
  geom_point(data = up.regulated,
             x = up.regulated$LFC,
             y = up.regulated$LPV,
             alpha = 1, 
             size = 2.5,
             color = "#FF5733") +
  geom_point(data = down.regulated,
             x = down.regulated$LFC,
             y = down.regulated$LPV,
             alpha = 1, 
             size = 2.5,
             color = "#2f4578") +
  geom_label_repel(data = top.down,
                   mapping = aes(x = LFC, y = LPV, label = Gen),
                   max.overlaps = 50,
                   color = "black",
                   fill = "#74da19") +
  geom_label_repel(data = top.up,
                   mapping = aes(x = LFC, y = LPV, label = Gen),
                   max.overlaps = 50,
                   color = "black",
                   fill = "#f29611") +
  theme(    axis.line = element_line(size = 1.2), 
            axis.title.x = element_text(face = "bold"),  
            axis.title.y = element_text(face = "bold"),  
            axis.text.x = element_text(face = "bold"),  
            axis.text.y = element_text(face = "bold")) +
  scale_x_continuous(breaks = seq(min(-12), max(12), by =2)) +
  scale_y_continuous(breaks = seq(0, max(3), by = 0.5))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Volcano