Evaluación Medio Término

Creado por:Ana Gabriela Bernardo

#if(!require(pacman)) install.packages("pacman", dependencies=TRUE)
library("pacman")
p_load("vroom",
       "ggplot2",
       "ggrepel",
       "dplyr",
       "matrixTests")
Volcano_data <- vroom(file="https://raw.githubusercontent.com/ManuelLaraMVZ/Examen_R_transcriptomica/refs/heads/main/Datos_Genes_azar6.csv")
## `curl` package not installed, falling back to using `url()`
## Rows: 703 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gen, Type
## dbl (6): C1, C2, C3, T1, T2, T3
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(Volcano_data)
## # A tibble: 6 × 8
##   Gen   Type      C1    C2    C3    T1    T2    T3
##   <chr> <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 Target  18.9  18.2  33.2  32.6  30.4  34.2
## 2 Gen-2 Target  23.3  18.0  19.2  19.0  19.2  19.4
## 3 Gen-3 Target  23.0  27.6  27.8  29.8  33.6  19.2
## 4 Gen-4 Target  19.9  27.8  27.9  29.6  28.7  29.6
## 5 Gen-5 Target  24.2  31.4  29.9  21.8  20.1  25.9
## 6 Gen-6 Target  33.7  20.2  18.2  25.7  23.6  24.6
controles <- Volcano_data %>% 
  filter(Type=="Selected Control") %>% 
  select(-2)
head(controles)
## # A tibble: 3 × 7
##   Gen          C1    C2    C3    T1    T2    T3
##   <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Control-1  19.3  20.2  21.2  18.3  19.6  18.6
## 2 Control-2  21.5  21.6  21.6  19.5  21.7  20.9
## 3 Control-3  19.4  20.3  18.9  21.8  20.2  20.3
Volcano_data2 <- Volcano_data %>% 
  filter(Type=="Target") %>% 
  select(-2)
head(Volcano_data2)
## # A tibble: 6 × 7
##   Gen      C1    C2    C3    T1    T2    T3
##   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1  18.9  18.2  33.2  32.6  30.4  34.2
## 2 Gen-2  23.3  18.0  19.2  19.0  19.2  19.4
## 3 Gen-3  23.0  27.6  27.8  29.8  33.6  19.2
## 4 Gen-4  19.9  27.8  27.9  29.6  28.7  29.6
## 5 Gen-5  24.2  31.4  29.9  21.8  20.1  25.9
## 6 Gen-6  33.7  20.2  18.2  25.7  23.6  24.6
#obtención de promedios
promedioT1 <- mean(controles$T1)
promedioT2 <- mean(controles$T2)
promedioT3 <- mean(controles$T3)
promedioC1 <- mean(controles$C1)
promedioC2 <- mean(controles$C2)
promedioC3 <- mean(controles$C3)

Volcano_data3 <- Volcano_data2 %>% 
  mutate(DCT1=T1-promedioT1,
         DCT2=T2-promedioT2,
         DCT3=T3-promedioT3,
         DCC1=C1-promedioC1,
         DCC2=C2-promedioC2,
         DCC3=C3-promedioC3) %>% 
  select(-2,-3,-4,-5,-6,-7) %>% 
  mutate(promedioDCT=(DCT1+DCT2+DCT3)/3,
         promedioDCC=(DCC1+DCC2+DCC3)/3) %>% 
  select(-2,-3,-4,-5,-6,-7) %>% 
  mutate(dosDCT=2^-promedioDCT,
         dosDCC=2^-promedioDCC,
         dosDDCT=dosDCT/dosDCC,
         FC=dosDDCT) %>% 
  mutate(LogFC=log2(FC)) %>% 
  select(1,4,5,7,8) 

head(Volcano_data3)
## # A tibble: 6 × 5
##   Gen     dosDCT  dosDCC       FC  LogFC
##   <chr>    <dbl>   <dbl>    <dbl>  <dbl>
## 1 Gen-1 0.000196 0.127    0.00154 -9.34 
## 2 Gen-2 1.83     1.23     1.49     0.579
## 3 Gen-3 0.00573  0.0196   0.292   -1.78 
## 4 Gen-4 0.00171  0.0381   0.0449  -4.48 
## 5 Gen-5 0.177    0.00378 46.8      5.55 
## 6 Gen-6 0.0433   0.0824   0.525   -0.929
Volcano_data4 <- Volcano_data2 %>% 
  mutate(DCT1=T1-promedioT1,
         DCT2=T2-promedioT2,
         DCT3=T3-promedioT3,
         DCC1=C1-promedioC1,
         DCC2=C2-promedioC2,
         DCC3=C3-promedioC3) %>% 
  select(-2,-3,-4,-5,-6,-7)
head(Volcano_data4)
## # A tibble: 6 × 7
##   Gen     DCT1   DCT2   DCT3   DCC1   DCC2  DCC3
##   <chr>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl> <dbl>
## 1 Gen-1 12.7    9.92  14.3   -1.18  -2.50  12.6 
## 2 Gen-2 -0.817 -1.29  -0.508  3.16  -2.68  -1.36
## 3 Gen-3  9.95  13.1   -0.670  2.90   6.88   7.23
## 4 Gen-4  9.69   8.17   9.71  -0.236  7.07   7.31
## 5 Gen-5  1.91  -0.420  6.01   4.15  10.7    9.31
## 6 Gen-6  5.82   3.10   4.67  13.6   -0.484 -2.36
Tvalue <- row_t_welch(Volcano_data4[,c("DCC1","DCC2","DCC3")],
                      Volcano_data4[,c("DCT1","DCT2","DCT3")])
head(Tvalue)
##   obs.x obs.y obs.tot     mean.x     mean.y  mean.diff     var.x      var.y
## 1     3     3       6  2.9780783 12.3190656 -9.3409873 70.061953  4.9488146
## 2     3     3       6 -0.2931447 -0.8718025  0.5786577  9.363845  0.1550182
## 3     3     3       6  5.6704455  7.4477496 -1.7773041  5.781972 51.8510294
## 4     3     3       6  4.7141998  9.1905274 -4.4763276 18.392609  0.7816483
## 5     3     3       6  8.0492955  2.4996546  5.5496409 11.873997 10.6042521
## 6     3     3       6  3.6015261  4.5305313 -0.9290052 76.509427  1.8679787
##     stderr       df  statistic    pvalue   conf.low conf.high mean.null
## 1 5.000359 2.281137 -1.8680634 0.1868241 -28.504309  9.822334         0
## 2 1.781279 2.066202  0.3248552 0.7752458  -6.854988  8.012303         0
## 3 4.383036 2.440567 -0.4054962 0.7179667 -17.721528 14.166920         0
## 4 2.528126 2.169685 -1.7706113 0.2087099 -14.578068  5.625413         0
## 5 2.737289 3.987277  2.0274225 0.1127664  -2.059864 13.159146         0
## 6 5.111341 2.097602 -0.1817537 0.8718345 -21.969229 20.111218         0
##   alternative conf.level
## 1   two.sided       0.95
## 2   two.sided       0.95
## 3   two.sided       0.95
## 4   two.sided       0.95
## 5   two.sided       0.95
## 6   two.sided       0.95
Valor_p <- Tvalue %>% 
  select(pvalue)
head(Valor_p)
##      pvalue
## 1 0.1868241
## 2 0.7752458
## 3 0.7179667
## 4 0.2087099
## 5 0.1127664
## 6 0.8718345
FC_y_PV <- Volcano_data3 %>% 
  select(1,4) %>% 
  mutate(PV = Valor_p$pvalue,
         LFC= log2(FC),
         LPV= -log10(PV))
head(FC_y_PV)
## # A tibble: 6 × 5
##   Gen         FC    PV    LFC    LPV
##   <chr>    <dbl> <dbl>  <dbl>  <dbl>
## 1 Gen-1  0.00154 0.187 -9.34  0.729 
## 2 Gen-2  1.49    0.775  0.579 0.111 
## 3 Gen-3  0.292   0.718 -1.78  0.144 
## 4 Gen-4  0.0449  0.209 -4.48  0.680 
## 5 Gen-5 46.8     0.113  5.55  0.948 
## 6 Gen-6  0.525   0.872 -0.929 0.0596
#Selección de genes sobreexpresados y silenciados

p_Value=-log10(0.05)
FC_threshold=log2(2)

down.regulated <- FC_y_PV %>% 
  filter(LFC < -FC_threshold,
         LPV > p_Value)
head(down.regulated)
## # A tibble: 6 × 5
##   Gen          FC      PV   LFC   LPV
##   <chr>     <dbl>   <dbl> <dbl> <dbl>
## 1 Gen-74  0.00175 0.0345  -9.16  1.46
## 2 Gen-80  0.0390  0.0381  -4.68  1.42
## 3 Gen-146 0.0180  0.00870 -5.80  2.06
## 4 Gen-240 0.00108 0.00184 -9.85  2.73
## 5 Gen-257 0.00683 0.0481  -7.19  1.32
## 6 Gen-269 0.0219  0.0473  -5.51  1.33
up.regulated <- FC_y_PV %>% 
  filter(LFC > FC_threshold,
         LPV > p_Value)
head(up.regulated)
## # A tibble: 6 × 5
##   Gen         FC       PV   LFC   LPV
##   <chr>    <dbl>    <dbl> <dbl> <dbl>
## 1 Gen-13    46.1 0.0438    5.53  1.36
## 2 Gen-86   535.  0.0110    9.06  1.96
## 3 Gen-114  686.  0.0273    9.42  1.56
## 4 Gen-243   91.3 0.0281    6.51  1.55
## 5 Gen-338  329.  0.00530   8.36  2.28
## 6 Gen-341 3284.  0.000331 11.7   3.48
top.down <- down.regulated %>% 
  arrange(PV) %>% 
  head(5)
head(top.down)
## # A tibble: 5 × 5
##   Gen          FC      PV   LFC   LPV
##   <chr>     <dbl>   <dbl> <dbl> <dbl>
## 1 Gen-240 0.00108 0.00184 -9.85  2.73
## 2 Gen-632 0.0161  0.00368 -5.95  2.43
## 3 Gen-499 0.0377  0.00641 -4.73  2.19
## 4 Gen-146 0.0180  0.00870 -5.80  2.06
## 5 Gen-639 0.00451 0.00920 -7.79  2.04
top.up <- up.regulated %>% 
  arrange(PV) %>% 
  head(5)
head(top.down)
## # A tibble: 5 × 5
##   Gen          FC      PV   LFC   LPV
##   <chr>     <dbl>   <dbl> <dbl> <dbl>
## 1 Gen-240 0.00108 0.00184 -9.85  2.73
## 2 Gen-632 0.0161  0.00368 -5.95  2.43
## 3 Gen-499 0.0377  0.00641 -4.73  2.19
## 4 Gen-146 0.0180  0.00870 -5.80  2.06
## 5 Gen-639 0.00451 0.00920 -7.79  2.04
#Gráfica volcano

Volcano <- ggplot(data = FC_y_PV,
                  mapping = aes(x = LFC, y = LPV)) +
  geom_point(alpha = 1.2, color = "gray") +
  theme_classic() +
  labs(title = "Titulo", subtitle = "Subtitulo",
       caption = "Creado por: Ana Gabriela Bernardo") +
  xlab(expression("Log"[2]~"(FC)")) +
  ylab(expression("-Log"[10]~"(Valor de p)")) +  # Subíndice en "10"
  geom_hline(yintercept = p_Value,
             linetype = "dashed",
             color = "gray", 
             size = 1) +
  geom_vline(xintercept = FC_threshold,
             linetype = "dashed", 
             color = "gray", 
             size = 1) +
  geom_vline(xintercept = -FC_threshold,
             linetype = "dashed", 
             color = "gray", 
             size = 1) +
  geom_point(data = up.regulated,
             x = up.regulated$LFC,
             y = up.regulated$LPV,
             alpha = 1, 
             size = 2.5,
             color = "gray") +
  geom_point(data = down.regulated,
             x = down.regulated$LFC,
             y = down.regulated$LPV,
             alpha = 1, 
             size = 2.5,
             color = "gray") +
  geom_label_repel(data = top.down,
                   mapping = aes(x = LFC, y = LPV, label = Gen),
                   max.overlaps = 50,
                   color = "#1a5276",
                   fill = "#5499c7") +
  geom_label_repel(data = top.up,
                   mapping = aes(x = LFC, y = LPV, label = Gen),
                   max.overlaps = 50,
                   color = "#7b241c",
                   fill = "#f1948a") +
  theme(    axis.line = element_line(size = 1.2), 
            axis.title.x = element_text(face = "bold"),  
            axis.title.y = element_text(face = "bold"),  
            axis.text.x = element_text(face = "bold"),  
            axis.text.y = element_text(face = "bold")) +
  scale_x_continuous(breaks = seq(min(-12), max(12), by =2)) +
  scale_y_continuous(breaks = seq(0, max(3), by = 0.5))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Volcano

#Para guardar determine el directorio y activar el siguiente código

Guardar <- ggsave(Volcano, file = “Ejercicio_examen.jpeg”, width = 8, height = 6, dpi = 300)