Creado por: Fernanda Hernández Rico
#install.packages("pacman")
library("pacman")
p_load("readr","dplyr","ggplot2")
Volcano_data <- read.csv(file="https://raw.githubusercontent.com/ManuelLaraMVZ/Examen_R_transcriptomica/refs/heads/main/Datos_Genes_azar3.csv")
head(Volcano_data)
## Gen Type C1 C2 C3 T1 T2 T3
## 1 Gen-1 Target 31.93885 24.91850 21.00850 30.40049 30.19301 34.90770
## 2 Gen-2 Target 28.94203 33.77550 24.87236 18.45608 22.27083 28.42274
## 3 Gen-3 Target 24.16816 25.36319 20.27432 20.96982 30.84685 23.34861
## 4 Gen-4 Target 32.15495 31.23681 20.19330 23.14822 28.32595 27.18610
## 5 Gen-5 Target 23.44227 28.32739 30.34306 18.73650 30.34595 22.33899
## 6 Gen-6 Target 24.65294 29.83189 30.57983 26.73833 34.81166 30.82818
controles <- Volcano_data %>%
filter(Type=="Selected Control") %>%
select(-2)
head(controles)
## Gen C1 C2 C3 T1 T2 T3
## 1 Control-1 21.34098 19.31926 19.42564 19.09324 20.65719 18.53680
## 2 Control-2 19.97911 20.31041 19.45576 20.07839 20.14674 18.08845
## 3 Control-3 20.29327 18.88736 20.65718 20.01232 21.03575 19.41697
Volcano_data2 <- Volcano_data %>%
filter(Type=="Target") %>%
select(-2)
head(Volcano_data2)
## Gen C1 C2 C3 T1 T2 T3
## 1 Gen-1 31.93885 24.91850 21.00850 30.40049 30.19301 34.90770
## 2 Gen-2 28.94203 33.77550 24.87236 18.45608 22.27083 28.42274
## 3 Gen-3 24.16816 25.36319 20.27432 20.96982 30.84685 23.34861
## 4 Gen-4 32.15495 31.23681 20.19330 23.14822 28.32595 27.18610
## 5 Gen-5 23.44227 28.32739 30.34306 18.73650 30.34595 22.33899
## 6 Gen-6 24.65294 29.83189 30.57983 26.73833 34.81166 30.82818
promedioT1 <- mean(controles$T1)
promedioT2 <- mean(controles$T2)
promedioT3 <- mean(controles$T3)
promedioC1 <- mean(controles$C1)
promedioC2 <- mean(controles$C2)
promedioC3 <- mean(controles$C3)
Volcano_data3 <- Volcano_data2 %>%
mutate(DCT1=T1-promedioT1,
DCT2=T2-promedioT2,
DCT3=T3-promedioT3,
DCC1=C1-promedioC1,
DCC2=C2-promedioC2,
DCC3=C3-promedioC3) %>%
select(-2,-3,-4,-5,-6,-7) %>%
mutate(promedioDCT=(DCT1+DCT2+DCT3)/3,
promedioDCC=(DCC1+DCC2+DCC3)/3) %>%
select(-2,-3,-4,-5,-6,-7) %>%
mutate(dosDCT=2^-promedioDCT,
dosDCC=2^-promedioDCC,
dosDDCT=dosDCT/dosDCC,
FC=dosDDCT) %>%
mutate(LogFC=log2(FC)) %>%
select(1,4,5,7,8)
head(Volcano_data3)
## Gen dosDCT dosDCC FC LogFC
## 1 Gen-1 0.0002185498 0.015711137 0.0139105 -6.167682
## 2 Gen-2 0.0963281385 0.001661369 57.9811724 5.857513
## 3 Gen-3 0.0239951934 0.101156616 0.2372083 -2.075773
## 4 Gen-4 0.0107010315 0.004191053 2.5533040 1.352365
## 5 Gen-5 0.0569894702 0.005889298 9.6767854 3.274528
## 6 Gen-6 0.0004497031 0.002977527 0.1510324 -2.727070
Volcano_data4 <- Volcano_data2 %>%
mutate(DCT1=T1-promedioT1,
DCT2=T2-promedioT2,
DCT3=T3-promedioT3,
DCC1=C1-promedioC1,
DCC2=C2-promedioC2,
DCC3=C3-promedioC3) %>%
select(-2,-3,-4,-5,-6,-7)
head(Volcano_data4)
## Gen DCT1 DCT2 DCT3 DCC1 DCC2 DCC3
## 1 Gen-1 10.6725073 9.579783 16.226961 11.401067 5.412826 1.1623123
## 2 Gen-2 -1.2719024 1.657599 9.742000 8.404246 14.269820 5.0261681
## 3 Gen-3 1.2418401 10.233623 4.667869 3.630372 5.857515 0.4281248
## 4 Gen-4 3.4202394 7.712719 8.505361 11.617166 11.731137 0.3471121
## 5 Gen-5 -0.9914883 9.732722 3.658249 2.904482 8.821716 10.4968672
## 6 Gen-6 7.0103493 14.198431 12.147439 4.115151 10.326216 10.7336420
p_load("matrixTests")
tStudent <- row_t_welch(Volcano_data4[,c("DCC1","DCC2","DCC3")],
Volcano_data4[,c("DCT1","DCT2","DCT3")])
head(tStudent)
## obs.x obs.y obs.tot mean.x mean.y mean.diff var.x var.y stderr
## 1 3 3 6 5.992069 12.159750 -6.167682 26.459668 12.705160 3.613162
## 2 3 3 6 9.233412 3.375899 5.857513 21.876912 32.540929 4.259023
## 3 3 3 6 3.305337 5.381111 -2.075773 7.448805 20.594574 3.057416
## 4 3 3 6 7.898472 6.546106 1.352365 42.770520 7.485354 4.092916
## 5 3 3 6 7.407689 4.133161 3.274528 15.910683 28.921324 3.865747
## 6 3 3 6 8.391670 11.118740 -2.727070 13.757959 13.710795 3.025930
## df statistic pvalue conf.low conf.high mean.null alternative
## 1 3.560815 -1.7070038 0.1717690 -16.706752 4.371389 0 two.sided
## 2 3.852071 1.3753182 0.2435903 -6.148694 17.863719 0 two.sided
## 3 3.279384 -0.6789306 0.5420290 -11.353287 7.201740 0 two.sided
## 4 2.679243 0.3304161 0.7652076 -12.601323 15.306053 0 two.sided
## 5 3.689284 0.8470621 0.4484008 -7.824581 14.373637 0 two.sided
## 6 3.999988 -0.9012337 0.4184225 -11.128408 5.674268 0 two.sided
## conf.level
## 1 0.95
## 2 0.95
## 3 0.95
## 4 0.95
## 5 0.95
## 6 0.95
Valor_p <- tStudent %>%
select(pvalue)
head(Valor_p)
## pvalue
## 1 0.1717690
## 2 0.2435903
## 3 0.5420290
## 4 0.7652076
## 5 0.4484008
## 6 0.4184225
FC_y_PV <- Volcano_data3 %>%
select(1,4) %>%
mutate(PV = Valor_p$pvalue,
LFC= log2(FC),
LPV= -log10(PV))
head(FC_y_PV)
## Gen FC PV LFC LPV
## 1 Gen-1 0.0139105 0.1717690 -6.167682 0.7650553
## 2 Gen-2 57.9811724 0.2435903 5.857513 0.6133400
## 3 Gen-3 0.2372083 0.5420290 -2.075773 0.2659774
## 4 Gen-4 2.5533040 0.7652076 1.352365 0.1162208
## 5 Gen-5 9.6767854 0.4484008 3.274528 0.3483336
## 6 Gen-6 0.1510324 0.4184225 -2.727070 0.3783850
p_Value=-log10(0.05)
FC_threshold=log2(2)
down.regulated <- FC_y_PV %>%
filter(LFC < -FC_threshold,
LPV > p_Value)
head(down.regulated)
## Gen FC PV LFC LPV
## 1 Gen-70 0.0001879043 0.01951304 -12.377714 1.709675
## 2 Gen-86 0.0110897486 0.02551192 -6.494630 1.593257
## 3 Gen-106 0.0131757460 0.04469294 -6.245972 1.349761
## 4 Gen-171 0.0457135593 0.04563165 -4.451234 1.340734
## 5 Gen-192 0.0098020830 0.01581243 -6.672696 1.801001
## 6 Gen-220 0.0027046754 0.03762206 -8.530329 1.424557
up.regulated <- FC_y_PV %>%
filter(LFC > FC_threshold,
LPV > p_Value)
head(up.regulated)
## Gen FC PV LFC LPV
## 1 Gen-16 6519.4909 0.005650204 12.670544 2.247936
## 2 Gen-21 106.0526 0.011216187 6.728636 1.950155
## 3 Gen-73 448.6262 0.023442608 8.809370 1.629994
## 4 Gen-165 1162.3161 0.007464178 10.182787 2.127018
## 5 Gen-208 240.5641 0.003872912 7.910278 2.411962
## 6 Gen-219 265.4468 0.034889477 8.052279 1.457306
top.down <- down.regulated %>%
arrange(PV) %>%
head(5)
head(top.down)
## Gen FC PV LFC LPV
## 1 Gen-362 0.0013108701 0.0006197707 -9.575260 3.207769
## 2 Gen-314 0.0004638904 0.0059652330 -11.073929 2.224373
## 3 Gen-438 0.0009500693 0.0120003349 -10.039680 1.920807
## 4 Gen-192 0.0098020830 0.0158124277 -6.672696 1.801001
## 5 Gen-70 0.0001879043 0.0195130371 -12.377714 1.709675
top.up <- up.regulated %>%
arrange(PV) %>%
head(5)
head(top.down)
## Gen FC PV LFC LPV
## 1 Gen-362 0.0013108701 0.0006197707 -9.575260 3.207769
## 2 Gen-314 0.0004638904 0.0059652330 -11.073929 2.224373
## 3 Gen-438 0.0009500693 0.0120003349 -10.039680 1.920807
## 4 Gen-192 0.0098020830 0.0158124277 -6.672696 1.801001
## 5 Gen-70 0.0001879043 0.0195130371 -12.377714 1.709675
Volcano <- ggplot(data = FC_y_PV,
mapping = aes(x = LFC, y = LPV)) +
geom_point(alpha = 1.2, color = "gray") +
theme_classic() +
labs(title = "V_Plot", subtitle = "Examen medio término",
caption = "Creado por: Fernanda Hernández Rico") +
xlab(expression("Log"[2]~"(FC)")) +
ylab(expression("-Log"[10]~"(Valor de p)")) + # SubÃndice en "10"
geom_hline(yintercept = p_Value,
linetype = "dashed",
color = "gray",
size = 1) +
geom_vline(xintercept = FC_threshold,
linetype = "dashed",
color = "gray",
size = 1) +
geom_vline(xintercept = -FC_threshold,
linetype = "dashed",
color = "gray",
size = 1) +
geom_point(data = up.regulated,
x = up.regulated$LFC,
y = up.regulated$LPV,
alpha = 1,
size = 2.5,
color = "gray") +
geom_point(data = down.regulated,
x = down.regulated$LFC,
y = down.regulated$LPV,
alpha = 1,
size = 2.5,
color = "gray") +
geom_label(data = top.down,
mapping = aes(x = LFC, y = LPV, label = Gen),
color = "#1a5276",
fill = "#5499c7") +
geom_label(data = top.up,
mapping = aes(x = LFC, y = LPV, label = Gen),
color = "#7b241c",
fill = "#f1948a") +
theme( axis.line = element_line(size = 1.2),
axis.title.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold"),
axis.text.x = element_text(face = "bold"),
axis.text.y = element_text(face = "bold")) +
scale_x_continuous(breaks = seq(min(-12), max(12), by =2)) +
scale_y_continuous(breaks = seq(0, max(3), by = 0.5))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Volcano
