Evaluación Medio Término

Creado por: Fernanda Hernández Rico

#install.packages("pacman")
library("pacman")

p_load("readr","dplyr","ggplot2")
Volcano_data <- read.csv(file="https://raw.githubusercontent.com/ManuelLaraMVZ/Examen_R_transcriptomica/refs/heads/main/Datos_Genes_azar3.csv")
head(Volcano_data)
##     Gen   Type       C1       C2       C3       T1       T2       T3
## 1 Gen-1 Target 31.93885 24.91850 21.00850 30.40049 30.19301 34.90770
## 2 Gen-2 Target 28.94203 33.77550 24.87236 18.45608 22.27083 28.42274
## 3 Gen-3 Target 24.16816 25.36319 20.27432 20.96982 30.84685 23.34861
## 4 Gen-4 Target 32.15495 31.23681 20.19330 23.14822 28.32595 27.18610
## 5 Gen-5 Target 23.44227 28.32739 30.34306 18.73650 30.34595 22.33899
## 6 Gen-6 Target 24.65294 29.83189 30.57983 26.73833 34.81166 30.82818
controles <- Volcano_data %>% 
  filter(Type=="Selected Control") %>% 
  select(-2)
head(controles)
##         Gen       C1       C2       C3       T1       T2       T3
## 1 Control-1 21.34098 19.31926 19.42564 19.09324 20.65719 18.53680
## 2 Control-2 19.97911 20.31041 19.45576 20.07839 20.14674 18.08845
## 3 Control-3 20.29327 18.88736 20.65718 20.01232 21.03575 19.41697
Volcano_data2 <- Volcano_data %>% 
  filter(Type=="Target") %>% 
  select(-2)
head(Volcano_data2)
##     Gen       C1       C2       C3       T1       T2       T3
## 1 Gen-1 31.93885 24.91850 21.00850 30.40049 30.19301 34.90770
## 2 Gen-2 28.94203 33.77550 24.87236 18.45608 22.27083 28.42274
## 3 Gen-3 24.16816 25.36319 20.27432 20.96982 30.84685 23.34861
## 4 Gen-4 32.15495 31.23681 20.19330 23.14822 28.32595 27.18610
## 5 Gen-5 23.44227 28.32739 30.34306 18.73650 30.34595 22.33899
## 6 Gen-6 24.65294 29.83189 30.57983 26.73833 34.81166 30.82818
promedioT1 <- mean(controles$T1)
promedioT2 <- mean(controles$T2)
promedioT3 <- mean(controles$T3)
promedioC1 <- mean(controles$C1)
promedioC2 <- mean(controles$C2)
promedioC3 <- mean(controles$C3)

Volcano_data3 <- Volcano_data2 %>% 
  mutate(DCT1=T1-promedioT1,
         DCT2=T2-promedioT2,
         DCT3=T3-promedioT3,
         DCC1=C1-promedioC1,
         DCC2=C2-promedioC2,
         DCC3=C3-promedioC3) %>% 
  select(-2,-3,-4,-5,-6,-7) %>% 
  mutate(promedioDCT=(DCT1+DCT2+DCT3)/3,
         promedioDCC=(DCC1+DCC2+DCC3)/3) %>% 
  select(-2,-3,-4,-5,-6,-7) %>% 
  mutate(dosDCT=2^-promedioDCT,
         dosDCC=2^-promedioDCC,
         dosDDCT=dosDCT/dosDCC,
         FC=dosDDCT) %>% 
  mutate(LogFC=log2(FC)) %>% 
  select(1,4,5,7,8) 

head(Volcano_data3)
##     Gen       dosDCT      dosDCC         FC     LogFC
## 1 Gen-1 0.0002185498 0.015711137  0.0139105 -6.167682
## 2 Gen-2 0.0963281385 0.001661369 57.9811724  5.857513
## 3 Gen-3 0.0239951934 0.101156616  0.2372083 -2.075773
## 4 Gen-4 0.0107010315 0.004191053  2.5533040  1.352365
## 5 Gen-5 0.0569894702 0.005889298  9.6767854  3.274528
## 6 Gen-6 0.0004497031 0.002977527  0.1510324 -2.727070
Volcano_data4 <- Volcano_data2 %>% 
  mutate(DCT1=T1-promedioT1,
         DCT2=T2-promedioT2,
         DCT3=T3-promedioT3,
         DCC1=C1-promedioC1,
         DCC2=C2-promedioC2,
         DCC3=C3-promedioC3) %>% 
  select(-2,-3,-4,-5,-6,-7)
head(Volcano_data4)
##     Gen       DCT1      DCT2      DCT3      DCC1      DCC2       DCC3
## 1 Gen-1 10.6725073  9.579783 16.226961 11.401067  5.412826  1.1623123
## 2 Gen-2 -1.2719024  1.657599  9.742000  8.404246 14.269820  5.0261681
## 3 Gen-3  1.2418401 10.233623  4.667869  3.630372  5.857515  0.4281248
## 4 Gen-4  3.4202394  7.712719  8.505361 11.617166 11.731137  0.3471121
## 5 Gen-5 -0.9914883  9.732722  3.658249  2.904482  8.821716 10.4968672
## 6 Gen-6  7.0103493 14.198431 12.147439  4.115151 10.326216 10.7336420
p_load("matrixTests")
tStudent <- row_t_welch(Volcano_data4[,c("DCC1","DCC2","DCC3")],
                        Volcano_data4[,c("DCT1","DCT2","DCT3")])

head(tStudent)
##   obs.x obs.y obs.tot   mean.x    mean.y mean.diff     var.x     var.y   stderr
## 1     3     3       6 5.992069 12.159750 -6.167682 26.459668 12.705160 3.613162
## 2     3     3       6 9.233412  3.375899  5.857513 21.876912 32.540929 4.259023
## 3     3     3       6 3.305337  5.381111 -2.075773  7.448805 20.594574 3.057416
## 4     3     3       6 7.898472  6.546106  1.352365 42.770520  7.485354 4.092916
## 5     3     3       6 7.407689  4.133161  3.274528 15.910683 28.921324 3.865747
## 6     3     3       6 8.391670 11.118740 -2.727070 13.757959 13.710795 3.025930
##         df  statistic    pvalue   conf.low conf.high mean.null alternative
## 1 3.560815 -1.7070038 0.1717690 -16.706752  4.371389         0   two.sided
## 2 3.852071  1.3753182 0.2435903  -6.148694 17.863719         0   two.sided
## 3 3.279384 -0.6789306 0.5420290 -11.353287  7.201740         0   two.sided
## 4 2.679243  0.3304161 0.7652076 -12.601323 15.306053         0   two.sided
## 5 3.689284  0.8470621 0.4484008  -7.824581 14.373637         0   two.sided
## 6 3.999988 -0.9012337 0.4184225 -11.128408  5.674268         0   two.sided
##   conf.level
## 1       0.95
## 2       0.95
## 3       0.95
## 4       0.95
## 5       0.95
## 6       0.95
Valor_p <- tStudent %>% 
  select(pvalue)
head(Valor_p)
##      pvalue
## 1 0.1717690
## 2 0.2435903
## 3 0.5420290
## 4 0.7652076
## 5 0.4484008
## 6 0.4184225
FC_y_PV <- Volcano_data3 %>% 
  select(1,4) %>% 
  mutate(PV = Valor_p$pvalue,
         LFC= log2(FC),
         LPV= -log10(PV))
head(FC_y_PV)
##     Gen         FC        PV       LFC       LPV
## 1 Gen-1  0.0139105 0.1717690 -6.167682 0.7650553
## 2 Gen-2 57.9811724 0.2435903  5.857513 0.6133400
## 3 Gen-3  0.2372083 0.5420290 -2.075773 0.2659774
## 4 Gen-4  2.5533040 0.7652076  1.352365 0.1162208
## 5 Gen-5  9.6767854 0.4484008  3.274528 0.3483336
## 6 Gen-6  0.1510324 0.4184225 -2.727070 0.3783850
p_Value=-log10(0.05)
FC_threshold=log2(2)

down.regulated <- FC_y_PV %>% 
  filter(LFC < -FC_threshold,
         LPV > p_Value)
head(down.regulated)
##       Gen           FC         PV        LFC      LPV
## 1  Gen-70 0.0001879043 0.01951304 -12.377714 1.709675
## 2  Gen-86 0.0110897486 0.02551192  -6.494630 1.593257
## 3 Gen-106 0.0131757460 0.04469294  -6.245972 1.349761
## 4 Gen-171 0.0457135593 0.04563165  -4.451234 1.340734
## 5 Gen-192 0.0098020830 0.01581243  -6.672696 1.801001
## 6 Gen-220 0.0027046754 0.03762206  -8.530329 1.424557
up.regulated <- FC_y_PV %>% 
  filter(LFC > FC_threshold,
         LPV > p_Value)
head(up.regulated)
##       Gen        FC          PV       LFC      LPV
## 1  Gen-16 6519.4909 0.005650204 12.670544 2.247936
## 2  Gen-21  106.0526 0.011216187  6.728636 1.950155
## 3  Gen-73  448.6262 0.023442608  8.809370 1.629994
## 4 Gen-165 1162.3161 0.007464178 10.182787 2.127018
## 5 Gen-208  240.5641 0.003872912  7.910278 2.411962
## 6 Gen-219  265.4468 0.034889477  8.052279 1.457306
top.down <- down.regulated %>% 
  arrange(PV) %>% 
  head(5)
head(top.down)
##       Gen           FC           PV        LFC      LPV
## 1 Gen-362 0.0013108701 0.0006197707  -9.575260 3.207769
## 2 Gen-314 0.0004638904 0.0059652330 -11.073929 2.224373
## 3 Gen-438 0.0009500693 0.0120003349 -10.039680 1.920807
## 4 Gen-192 0.0098020830 0.0158124277  -6.672696 1.801001
## 5  Gen-70 0.0001879043 0.0195130371 -12.377714 1.709675
top.up <- up.regulated %>% 
  arrange(PV) %>% 
  head(5)
head(top.down)
##       Gen           FC           PV        LFC      LPV
## 1 Gen-362 0.0013108701 0.0006197707  -9.575260 3.207769
## 2 Gen-314 0.0004638904 0.0059652330 -11.073929 2.224373
## 3 Gen-438 0.0009500693 0.0120003349 -10.039680 1.920807
## 4 Gen-192 0.0098020830 0.0158124277  -6.672696 1.801001
## 5  Gen-70 0.0001879043 0.0195130371 -12.377714 1.709675
Volcano <- ggplot(data = FC_y_PV,
                  mapping = aes(x = LFC, y = LPV)) +
  geom_point(alpha = 1.2, color = "gray") +
  theme_classic() +
  labs(title = "V_Plot", subtitle = "Examen medio término",
       caption = "Creado por: Fernanda Hernández Rico") +
  xlab(expression("Log"[2]~"(FC)")) +
  ylab(expression("-Log"[10]~"(Valor de p)")) +  # Subíndice en "10"
  geom_hline(yintercept = p_Value,
             linetype = "dashed",
             color = "gray", 
             size = 1) +
  geom_vline(xintercept = FC_threshold,
             linetype = "dashed", 
             color = "gray", 
             size = 1) +
  geom_vline(xintercept = -FC_threshold,
             linetype = "dashed", 
             color = "gray", 
             size = 1) +
  geom_point(data = up.regulated,
             x = up.regulated$LFC,
             y = up.regulated$LPV,
             alpha = 1, 
             size = 2.5,
             color = "gray") +
  geom_point(data = down.regulated,
             x = down.regulated$LFC,
             y = down.regulated$LPV,
             alpha = 1, 
             size = 2.5,
             color = "gray") +
  geom_label(data = top.down,
                   mapping = aes(x = LFC, y = LPV, label = Gen),
                   color = "#1a5276",
                   fill = "#5499c7") +
  geom_label(data = top.up,
                   mapping = aes(x = LFC, y = LPV, label = Gen),
                   color = "#7b241c",
                   fill = "#f1948a") +
  theme(    axis.line = element_line(size = 1.2), 
            axis.title.x = element_text(face = "bold"),  
            axis.title.y = element_text(face = "bold"),  
            axis.text.x = element_text(face = "bold"),  
            axis.text.y = element_text(face = "bold")) +
  scale_x_continuous(breaks = seq(min(-12), max(12), by =2)) +
  scale_y_continuous(breaks = seq(0, max(3), by = 0.5))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Volcano