Evaluación Medio Término

Creado por:Deni Uribe

if(!require(pacman)) install.packages("pacman", dependencies=TRUE)
## Loading required package: pacman
library("pacman")
p_load("vroom",
       "ggplot2",
       "ggrepel",
       "dplyr",
       "matrixTests")
Volcano_data <- vroom(file="https://raw.githubusercontent.com/ManuelLaraMVZ/Examen_R_transcriptomica/refs/heads/main/Datos_Genes_azar5.csv")
## Rows: 703 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gen, Type
## dbl (6): C1, C2, C3, T1, T2, T3
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
controles <- Volcano_data %>% 
  filter(Type=="Selected Control") %>% 
  select(-2)
head(controles)
## # A tibble: 3 × 7
##   Gen          C1    C2    C3    T1    T2    T3
##   <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Control-1  21.1  19.8  20.3  21.3  20.9  21.8
## 2 Control-2  19.9  20.4  19.7  19.1  18.5  19.1
## 3 Control-3  20.7  19.1  20.0  19.0  19.9  19.9
Volcano_data2 <- Volcano_data %>% 
  filter(Type=="Target") %>% 
  select(-2)
head(Volcano_data2)
## # A tibble: 6 × 7
##   Gen      C1    C2    C3    T1    T2    T3
##   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1  29.4  28.8  22.5  22.5  21.5  18.7
## 2 Gen-2  20.6  20.1  21.6  21.5  22.1  22.9
## 3 Gen-3  20.5  28.6  31.5  32.4  20.2  30.4
## 4 Gen-4  20.1  25.4  25.6  18.5  23.2  18.2
## 5 Gen-5  28.7  19.1  24.4  22.3  27.6  26.2
## 6 Gen-6  29.2  18.3  29.3  18.3  21.0  25.5
###############################################
#obtención de promedios
promedioT1 <- mean(controles$T1)
promedioT2 <- mean(controles$T2)
promedioT3 <- mean(controles$T3)
promedioC1 <- mean(controles$C1)
promedioC2 <- mean(controles$C2)
promedioC3 <- mean(controles$C3)
Volcano_data3 <- Volcano_data2 %>% 
  mutate(DCT1=T1-promedioT1,
         DCT2=T2-promedioT2,
         DCT3=T3-promedioT3,
         DCC1=C1-promedioC1,
         DCC2=C2-promedioC2,
         DCC3=C3-promedioC3) %>% 
  select(-2,-3,-4,-5,-6,-7) %>% 
  mutate(promedioDCT=(DCT1+DCT2+DCT3)/3,
         promedioDCC=(DCC1+DCC2+DCC3)/3) %>% 
  select(-2,-3,-4,-5,-6,-7) %>% 
  mutate(dosDCT=2^-promedioDCT,
         dosDCC=2^-promedioDCC,
         dosDDCT=dosDCT/dosDCC,
         FC=dosDDCT) %>% 
  mutate(LogFC=log2(FC)) %>% 
  select(1,4,5,7,8) 

head(Volcano_data3)
## # A tibble: 6 × 5
##   Gen    dosDCT  dosDCC     FC  LogFC
##   <chr>   <dbl>   <dbl>  <dbl>  <dbl>
## 1 Gen-1 0.508   0.00890 57.1    5.83 
## 2 Gen-2 0.215   0.644    0.334 -1.58 
## 3 Gen-3 0.00468 0.00933  0.501 -0.996
## 4 Gen-4 0.999   0.0835  12.0    3.58 
## 5 Gen-5 0.0233  0.0636   0.367 -1.45 
## 6 Gen-6 0.316   0.0217  14.6    3.86
Volcano_data4 <- Volcano_data2 %>% 
  mutate(DCT1=T1-promedioT1,
         DCT2=T2-promedioT2,
         DCT3=T3-promedioT3,
         DCC1=C1-promedioC1,
         DCC2=C2-promedioC2,
         DCC3=C3-promedioC3) %>% 
  select(-2,-3,-4,-5,-6,-7)
head(Volcano_data4)
## # A tibble: 6 × 7
##   Gen    DCT1  DCT2  DCT3     DCC1   DCC2  DCC3
##   <chr> <dbl> <dbl> <dbl>    <dbl>  <dbl> <dbl>
## 1 Gen-1  2.70 1.74  -1.51  8.85     9.05   2.55
## 2 Gen-2  1.67 2.37   2.61 -0.00871  0.342  1.57
## 3 Gen-3 12.6  0.435 10.2  -0.104    8.88  11.5 
## 4 Gen-4 -1.32 3.40  -2.07 -0.472    5.65   5.57
## 5 Gen-5  2.49 7.82   5.96  8.15    -0.627  4.41
## 6 Gen-6 -1.46 1.21   5.23  8.67    -1.42   9.33
Tvalue <- row_t_welch(Volcano_data4[,c("DCC1","DCC2","DCC3")],
                      Volcano_data4[,c("DCT1","DCT2","DCT3")])
head(Tvalue)
##   obs.x obs.y obs.tot    mean.x      mean.y  mean.diff      var.x      var.y
## 1     3     3       6 6.8122516 0.977979465  5.8342722 13.6621243  4.8605316
## 2     3     3       6 0.6351197 2.216305382 -1.5811857  0.6887799  0.2379247
## 3     3     3       6 6.7439178 7.740196110 -0.9962783 36.8419220 41.5165553
## 4     3     3       6 3.5823279 0.001853026  3.5804749 12.3299706  8.7838382
## 5     3     3       6 3.9758730 5.422252739 -1.4463797 19.3796271  7.3165479
## 6     3     3       6 5.5247001 1.660363769  3.8643363 36.3221045 11.3347804
##      stderr       df  statistic    pvalue   conf.low  conf.high mean.null
## 1 2.4847975 3.263186  2.3479870 0.0934535  -1.724618 13.3931623         0
## 2 0.5557891 3.234424 -2.8449383 0.0598433  -3.279597  0.1172258         0
## 3 5.1107233 3.985815 -0.1949388 0.8549741 -15.205859 13.2133029         0
## 4 2.6529109 3.890262  1.3496401 0.2503189  -3.867856 11.0288059         0
## 5 2.9830731 3.321756 -0.4848623 0.6579730 -10.440250  7.5474904         0
## 6 3.9856779 3.137480  0.9695556 0.4009324  -8.511164 16.2398362         0
##   alternative conf.level
## 1   two.sided       0.95
## 2   two.sided       0.95
## 3   two.sided       0.95
## 4   two.sided       0.95
## 5   two.sided       0.95
## 6   two.sided       0.95
Valor_p <- Tvalue %>% 
  select(pvalue)
head(Valor_p)
##      pvalue
## 1 0.0934535
## 2 0.0598433
## 3 0.8549741
## 4 0.2503189
## 5 0.6579730
## 6 0.4009324
FC_y_PV <- Volcano_data3 %>% 
  select(1,4) %>% 
  mutate(PV = Valor_p$pvalue,
         LFC= log2(FC),
         LPV= -log10(PV))
head(FC_y_PV)
## # A tibble: 6 × 5
##   Gen       FC     PV    LFC    LPV
##   <chr>  <dbl>  <dbl>  <dbl>  <dbl>
## 1 Gen-1 57.1   0.0935  5.83  1.03  
## 2 Gen-2  0.334 0.0598 -1.58  1.22  
## 3 Gen-3  0.501 0.855  -0.996 0.0680
## 4 Gen-4 12.0   0.250   3.58  0.602 
## 5 Gen-5  0.367 0.658  -1.45  0.182 
## 6 Gen-6 14.6   0.401   3.86  0.397
#Selección de genes sobreexpresados y silenciados

p_Value=-log10(0.05)
FC_threshold=log2(2)
down.regulated <- FC_y_PV %>% 
  filter(LFC < -FC_threshold,
         LPV > p_Value)
head(down.regulated)
## # A tibble: 6 × 5
##   Gen           FC      PV    LFC   LPV
##   <chr>      <dbl>   <dbl>  <dbl> <dbl>
## 1 Gen-79  0.0497   0.0157   -4.33  1.81
## 2 Gen-80  0.00497  0.0226   -7.65  1.65
## 3 Gen-153 0.0391   0.0117   -4.68  1.93
## 4 Gen-235 0.00171  0.00923  -9.19  2.03
## 5 Gen-236 0.000667 0.0381  -10.6   1.42
## 6 Gen-269 0.000908 0.0399  -10.1   1.40
up.regulated <- FC_y_PV %>% 
  filter(LFC > FC_threshold,
         LPV > p_Value)
head(up.regulated)
## # A tibble: 6 × 5
##   Gen        FC     PV   LFC   LPV
##   <chr>   <dbl>  <dbl> <dbl> <dbl>
## 1 Gen-32  237.  0.0401  7.89  1.40
## 2 Gen-54  125.  0.0166  6.97  1.78
## 3 Gen-132 200.  0.0225  7.65  1.65
## 4 Gen-159 162.  0.0388  7.34  1.41
## 5 Gen-163  82.9 0.0347  6.37  1.46
## 6 Gen-209 162.  0.0228  7.34  1.64
top.down <- down.regulated %>% 
  arrange(PV) %>% 
  head(5)
head(top.down)
## # A tibble: 5 × 5
##   Gen           FC      PV    LFC   LPV
##   <chr>      <dbl>   <dbl>  <dbl> <dbl>
## 1 Gen-235 0.00171  0.00923  -9.19  2.03
## 2 Gen-153 0.0391   0.0117   -4.68  1.93
## 3 Gen-79  0.0497   0.0157   -4.33  1.81
## 4 Gen-595 0.00732  0.0161   -7.09  1.79
## 5 Gen-603 0.000354 0.0198  -11.5   1.70
top.up <- up.regulated %>% 
  arrange(PV) %>% 
  head(5)
head(top.down)
## # A tibble: 5 × 5
##   Gen           FC      PV    LFC   LPV
##   <chr>      <dbl>   <dbl>  <dbl> <dbl>
## 1 Gen-235 0.00171  0.00923  -9.19  2.03
## 2 Gen-153 0.0391   0.0117   -4.68  1.93
## 3 Gen-79  0.0497   0.0157   -4.33  1.81
## 4 Gen-595 0.00732  0.0161   -7.09  1.79
## 5 Gen-603 0.000354 0.0198  -11.5   1.70
#Gráfica volcano

Volcano <- ggplot(data = FC_y_PV,
                  mapping = aes(x = LFC, y = LPV)) +
  geom_point(alpha = 1.2, color = "gray") +
  theme_classic() +
  labs(title = "Titulo", subtitle = "Subtitulo",
       caption = "Creado por: Deni Uribe") +
  xlab(expression("Log"[2]~"(FC)")) +
  ylab(expression("-Log"[10]~"(Valor de p)")) +  # Subíndice en "10"
  geom_hline(yintercept = p_Value,
             linetype = "dashed",
             color = "gray", 
             size = 1) +
  geom_vline(xintercept = FC_threshold,
             linetype = "dashed", 
             color = "gray", 
             size = 1) +
  geom_vline(xintercept = -FC_threshold,
             linetype = "dashed", 
             color = "gray", 
             size = 1) +
  geom_point(data = up.regulated,
             x = up.regulated$LFC,
             y = up.regulated$LPV,
             alpha = 1, 
             size = 2.5,
             color = "gray") +
  geom_point(data = down.regulated,
             x = down.regulated$LFC,
             y = down.regulated$LPV,
             alpha = 1, 
             size = 2.5,
             color = "gray") +
  geom_label_repel(data = top.down,
                   mapping = aes(x = LFC, y = LPV, label = Gen),
                   max.overlaps = 50,
                   color = "#1a5276",
                   fill = "#5499c7") +
  geom_label_repel(data = top.up,
                   mapping = aes(x = LFC, y = LPV, label = Gen),
                   max.overlaps = 50,
                   color = "#7b241c",
                   fill = "#f1948a") +
  theme(    axis.line = element_line(size = 1.2), 
            axis.title.x = element_text(face = "bold"),  
            axis.title.y = element_text(face = "bold"),  
            axis.text.x = element_text(face = "bold"),  
            axis.text.y = element_text(face = "bold")) +
  scale_x_continuous(breaks = seq(min(-12), max(12), by =2)) +
  scale_y_continuous(breaks = seq(0, max(3), by = 0.5))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Volcano

#Para guardar determine el directorio y activar el siguiente código

# Guardar <- ggsave(Volcano, file = "Ejercicio_examen.jpeg", width = 8, height = 6, dpi = 300)