Evaluación Medio Término

Creado por:Marian Islas

if(!require(pacman)) install.packages("pacman", dependencies=TRUE)
## Loading required package: pacman
library("pacman")
p_load("vroom",
       "ggplot2",
       "ggrepel",
       "dplyr",
       "matrixTests")

Volcano_data <-vroom(file = "https://raw.githubusercontent.com/ManuelLaraMVZ/Examen_R_transcriptomica/refs/heads/main/Datos_Genes_azar8.csv")
## Rows: 703 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gen, Type
## dbl (6): C1, C2, C3, T1, T2, T3
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Volcano_data
## # A tibble: 703 × 8
##    Gen    Type      C1    C2    C3    T1    T2    T3
##    <chr>  <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 Gen-1  Target  22.3  20.4  21.6  19.4  31.9  26.5
##  2 Gen-2  Target  28.4  33.4  23.0  28.3  23.8  25.6
##  3 Gen-3  Target  18.7  20.0  25.4  31.8  33.8  34.4
##  4 Gen-4  Target  24.4  25.2  18.3  32.6  31.7  32.0
##  5 Gen-5  Target  18.4  28.8  20.2  23.7  32.2  22.1
##  6 Gen-6  Target  19.2  31.7  25.3  28.7  29.4  34.0
##  7 Gen-7  Target  28.8  28.6  27.5  23.1  34.1  29.5
##  8 Gen-8  Target  31.1  32.1  20.1  34.4  28.4  34.2
##  9 Gen-9  Target  33.5  27.3  24.7  32.2  24.6  24.9
## 10 Gen-10 Target  32.3  24.7  19.1  33.2  30.5  29.9
## # ℹ 693 more rows
controles <- Volcano_data %>% 
  filter(Type=="Selected Control") %>% 
  select(-2)
head(controles)
## # A tibble: 3 × 7
##   Gen          C1    C2    C3    T1    T2    T3
##   <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Control-1  20.5  18.6  21.4  20.7  21.7  20.3
## 2 Control-2  19.4  19.0  21.6  20.3  19.5  20.1
## 3 Control-3  19.4  21.3  20.0  18.8  21.1  18.4
Volcano_data2 <- Volcano_data %>% 
  filter(Type=="Target") %>% 
  select(-2)
head(Volcano_data2)
## # A tibble: 6 × 7
##   Gen      C1    C2    C3    T1    T2    T3
##   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1  22.3  20.4  21.6  19.4  31.9  26.5
## 2 Gen-2  28.4  33.4  23.0  28.3  23.8  25.6
## 3 Gen-3  18.7  20.0  25.4  31.8  33.8  34.4
## 4 Gen-4  24.4  25.2  18.3  32.6  31.7  32.0
## 5 Gen-5  18.4  28.8  20.2  23.7  32.2  22.1
## 6 Gen-6  19.2  31.7  25.3  28.7  29.4  34.0
#obtención de promedios
promedioT1 <- mean(controles$T1)
promedioT2 <- mean(controles$T2)
promedioT3 <- mean(controles$T3)
promedioC1 <- mean(controles$C1)
promedioC2 <- mean(controles$C2)
promedioC3 <- mean(controles$C3)

Volcano_data3 <- Volcano_data2 %>% 
  mutate(DCT1=T1-promedioT1,
         DCT2=T2-promedioT2,
         DCT3=T3-promedioT3,
         DCC1=C1-promedioC1,
         DCC2=C2-promedioC2,
         DCC3=C3-promedioC3) %>% 
  select(-2,-3,-4,-5,-6,-7) %>% 
  mutate(promedioDCT=(DCT1+DCT2+DCT3)/3,
         promedioDCC=(DCC1+DCC2+DCC3)/3) %>% 
  select(-2,-3,-4,-5,-6,-7) %>% 
  mutate(dosDCT=2^-promedioDCT,
         dosDCC=2^-promedioDCC,
         dosDDCT=dosDCT/dosDCC,
         FC=dosDDCT) %>% 
  mutate(LogFC=log2(FC)) %>% 
  select(1,4,5,7,8) 

head(Volcano_data3)
## # A tibble: 6 × 5
##   Gen     dosDCT  dosDCC       FC  LogFC
##   <chr>    <dbl>   <dbl>    <dbl>  <dbl>
## 1 Gen-1 0.0177   0.401   0.0442    -4.50
## 2 Gen-2 0.0181   0.00365 4.95       2.31
## 3 Gen-3 0.000103 0.429   0.000240 -12.0 
## 4 Gen-4 0.000246 0.177   0.00139   -9.49
## 5 Gen-5 0.0169   0.198   0.0852    -3.55
## 6 Gen-6 0.000633 0.0263  0.0241    -5.37
Volcano_data4 <- Volcano_data2 %>% 
  mutate(DCT1=T1-promedioT1,
         DCT2=T2-promedioT2,
         DCT3=T3-promedioT3,
         DCC1=C1-promedioC1,
         DCC2=C2-promedioC2,
         DCC3=C3-promedioC3) %>% 
  select(-2,-3,-4,-5,-6,-7)
head(Volcano_data4)
## # A tibble: 6 × 7
##   Gen     DCT1  DCT2  DCT3   DCC1   DCC2   DCC3
##   <chr>  <dbl> <dbl> <dbl>  <dbl>  <dbl>  <dbl>
## 1 Gen-1 -0.562 11.2   6.85  2.54   0.770  0.640
## 2 Gen-2  8.38   3.00  5.98  8.57  13.7    2.02 
## 3 Gen-3 11.9   13.1  14.8  -1.06   0.331  4.40 
## 4 Gen-4 12.6   11.0  12.4   4.57   5.59  -2.68 
## 5 Gen-5  3.80  11.4   2.45 -1.35   9.19  -0.837
## 6 Gen-6  8.82   8.66 14.4  -0.602 12.0    4.33
Tvalue <- row_t_welch(Volcano_data4[,c("DCC1","DCC2","DCC3")],
                      Volcano_data4[,c("DCT1","DCT2","DCT3")])
head(Tvalue)
##   obs.x obs.y obs.tot   mean.x    mean.y  mean.diff     var.x      var.y
## 1     3     3       6 1.317409  5.816412  -4.499002  1.129908 35.1761341
## 2     3     3       6 8.097411  5.788635   2.308776 34.215828  7.2576359
## 3     3     3       6 1.221129 13.245971 -12.024842  8.041381  2.1467615
## 4     3     3       6 2.495959 11.990186  -9.494227 20.314794  0.8242264
## 5     3     3       6 2.334200  5.887187  -3.552987 35.316933 23.3479278
## 6     3     3       6 5.250566 10.624953  -5.374386 40.473214 10.6573677
##     stderr       df  statistic      pvalue   conf.low conf.high mean.null
## 1 3.478795 2.128353 -1.2932646 0.318469276 -18.634451  9.636446         0
## 2 3.718130 2.811924  0.6209509 0.581308428  -9.984553 14.602104         0
## 3 1.842837 2.996814 -6.5251801 0.007337661 -17.893099 -6.156584         0
## 4 2.654494 2.162024 -3.5766612 0.062387309 -20.133331  1.144877         0
## 5 4.422098 3.840152 -0.8034619 0.468488937 -16.034906  8.928931         0
## 6 4.128381 2.984981 -1.3018146 0.284334168 -18.550214  7.801441         0
##   alternative conf.level
## 1   two.sided       0.95
## 2   two.sided       0.95
## 3   two.sided       0.95
## 4   two.sided       0.95
## 5   two.sided       0.95
## 6   two.sided       0.95
Valor_p <- Tvalue %>% 
  select(pvalue)
head(Valor_p)
##        pvalue
## 1 0.318469276
## 2 0.581308428
## 3 0.007337661
## 4 0.062387309
## 5 0.468488937
## 6 0.284334168
FC_y_PV <- Volcano_data3 %>% 
  select(1,4) %>% 
  mutate(PV = Valor_p$pvalue,
         LFC= log2(FC),
         LPV= -log10(PV))
head(FC_y_PV)
## # A tibble: 6 × 5
##   Gen         FC      PV    LFC   LPV
##   <chr>    <dbl>   <dbl>  <dbl> <dbl>
## 1 Gen-1 0.0442   0.318    -4.50 0.497
## 2 Gen-2 4.95     0.581     2.31 0.236
## 3 Gen-3 0.000240 0.00734 -12.0  2.13 
## 4 Gen-4 0.00139  0.0624   -9.49 1.20 
## 5 Gen-5 0.0852   0.468    -3.55 0.329
## 6 Gen-6 0.0241   0.284    -5.37 0.546
#Selección de genes sobreexpresados y silenciados

p_Value=-log10(0.05)
FC_threshold=log2(2)

down.regulated <- FC_y_PV %>% 
  filter(LFC < -FC_threshold,
         LPV > p_Value)
head(down.regulated)
## # A tibble: 6 × 5
##   Gen           FC      PV    LFC   LPV
##   <chr>      <dbl>   <dbl>  <dbl> <dbl>
## 1 Gen-3   0.000240 0.00734 -12.0   2.13
## 2 Gen-95  0.00167  0.0266   -9.22  1.57
## 3 Gen-184 0.0276   0.0470   -5.18  1.33
## 4 Gen-201 0.0105   0.00302  -6.58  2.52
## 5 Gen-272 0.0124   0.0352   -6.34  1.45
## 6 Gen-289 0.00129  0.0228   -9.60  1.64
up.regulated <- FC_y_PV %>% 
  filter(LFC > FC_threshold,
         LPV > p_Value)
head(up.regulated)
## # A tibble: 6 × 5
##   Gen         FC      PV   LFC   LPV
##   <chr>    <dbl>   <dbl> <dbl> <dbl>
## 1 Gen-174  138.  0.0365   7.11  1.44
## 2 Gen-217  248.  0.0127   7.95  1.89
## 3 Gen-232  230.  0.00761  7.85  2.12
## 4 Gen-264   28.4 0.0461   4.83  1.34
## 5 Gen-317   14.1 0.0463   3.82  1.33
## 6 Gen-342 2023.  0.0248  11.0   1.61
top.down <- down.regulated %>% 
  arrange(PV) %>% 
  head(5)
head(top.down)
## # A tibble: 5 × 5
##   Gen           FC      PV    LFC   LPV
##   <chr>      <dbl>   <dbl>  <dbl> <dbl>
## 1 Gen-201 0.0105   0.00302  -6.58  2.52
## 2 Gen-3   0.000240 0.00734 -12.0   2.13
## 3 Gen-352 0.000852 0.0139  -10.2   1.86
## 4 Gen-345 0.000417 0.0147  -11.2   1.83
## 5 Gen-297 0.0441   0.0200   -4.50  1.70
top.up <- up.regulated %>% 
  arrange(PV) %>% 
  head(5)
head(top.down)
## # A tibble: 5 × 5
##   Gen           FC      PV    LFC   LPV
##   <chr>      <dbl>   <dbl>  <dbl> <dbl>
## 1 Gen-201 0.0105   0.00302  -6.58  2.52
## 2 Gen-3   0.000240 0.00734 -12.0   2.13
## 3 Gen-352 0.000852 0.0139  -10.2   1.86
## 4 Gen-345 0.000417 0.0147  -11.2   1.83
## 5 Gen-297 0.0441   0.0200   -4.50  1.70
#Gráfica volcano

Volcano <- ggplot(data = FC_y_PV,
                  mapping = aes(x = LFC, y = LPV)) +
  geom_point(alpha = 1.2, color = "#566573") +
  theme_classic() +
  labs(title = "Evaluación de cambio de la espresión",
       caption = "Modificado por: __________") +
  xlab(expression("Log"[2]~"(FC)")) +
  ylab(expression("-Log"[10]~"(Valor de p)")) +  # Subíndice en "10"
  geom_hline(yintercept = p_Value,
             linetype = "dashed",
             color = "#FFC300", 
             size = 1) +
  geom_vline(xintercept = FC_threshold,
             linetype = "dashed", 
             color = "#C70039", 
             size = 1) +
  geom_vline(xintercept = -FC_threshold,
             linetype = "dashed", 
             color = "#0b4eea", 
             size = 1) +
  geom_point(data = up.regulated,
             x = up.regulated$LFC,
             y = up.regulated$LPV,
             alpha = 1, 
             size = 2.5,
             color = "#FF5733") +
  geom_point(data = down.regulated,
             x = down.regulated$LFC,
             y = down.regulated$LPV,
             alpha = 1, 
             size = 2.5,
             color = "#2f4578") +
  geom_label_repel(data = top.down,
                   mapping = aes(x = LFC, y = LPV, label = Gen),
                   max.overlaps = 50,
                   color = "black",
                   fill = "#74da19") +
  geom_label_repel(data = top.up,
                   mapping = aes(x = LFC, y = LPV, label = Gen),
                   max.overlaps = 50,
                   color = "black",
                   fill = "#f29611") +
  theme(    axis.line = element_line(size = 1.2), 
            axis.title.x = element_text(face = "bold"),  
            axis.title.y = element_text(face = "bold"),  
            axis.text.x = element_text(face = "bold"),  
            axis.text.y = element_text(face = "bold")) +
  scale_x_continuous(breaks = seq(min(-12), max(12), by =2)) +
  scale_y_continuous(breaks = seq(0, max(3), by = 0.5))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Volcano