if(!require(pacman)) install.packages("pacman", dependencies=TRUE)
## Loading required package: pacman
library("pacman")
p_load("vroom",
"ggplot2",
"ggrepel",
"dplyr",
"matrixTests")
Volcano_data <- vroom(file="https://raw.githubusercontent.com/ManuelLaraMVZ/Examen_R_transcriptomica/refs/heads/main/Datos_Genes_azar4.csv")
## Rows: 703 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gen, Type
## dbl (6): C1, C2, C3, T1, T2, T3
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Volcano_data
## # A tibble: 703 × 8
## Gen Type C1 C2 C3 T1 T2 T3
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 Target 25.4 20.4 19.8 29.6 30.6 21.2
## 2 Gen-2 Target 25.6 23.0 31.9 33.9 33.9 30.9
## 3 Gen-3 Target 19.0 27.1 33.0 33.2 25.1 22.8
## 4 Gen-4 Target 20.5 27.8 20.6 30.1 18.3 34.4
## 5 Gen-5 Target 23.9 25.8 21.9 27.2 23.7 29.0
## 6 Gen-6 Target 28.2 23.1 30.1 32.0 33.9 18.5
## 7 Gen-7 Target 23.0 23.4 32.4 18.5 33.0 34.5
## 8 Gen-8 Target 25.4 31.9 28.0 31.7 34.6 20.1
## 9 Gen-9 Target 34.6 27.8 24.3 20.1 25.4 20.1
## 10 Gen-10 Target 25.2 20.8 26.4 28.6 33.8 32.7
## # ℹ 693 more rows
controles <- Volcano_data %>%
filter(Type=="Selected Control") %>%
select(-2)
head(controles)
## # A tibble: 3 × 7
## Gen C1 C2 C3 T1 T2 T3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Control-1 21.9 18.9 18.2 21.7 20.0 18.0
## 2 Control-2 21.1 19.3 19.6 19.0 18.5 19.4
## 3 Control-3 19.8 19.9 20.7 18.2 21.0 19.4
Volcano_data2 <- Volcano_data %>%
filter(Type=="Target") %>%
select(-2)
head(Volcano_data2)
## # A tibble: 6 × 7
## Gen C1 C2 C3 T1 T2 T3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 25.4 20.4 19.8 29.6 30.6 21.2
## 2 Gen-2 25.6 23.0 31.9 33.9 33.9 30.9
## 3 Gen-3 19.0 27.1 33.0 33.2 25.1 22.8
## 4 Gen-4 20.5 27.8 20.6 30.1 18.3 34.4
## 5 Gen-5 23.9 25.8 21.9 27.2 23.7 29.0
## 6 Gen-6 28.2 23.1 30.1 32.0 33.9 18.5
#obtención de promedios
promedioT1 <- mean(controles$T1)
promedioT2 <- mean(controles$T2)
promedioT3 <- mean(controles$T3)
promedioC1 <- mean(controles$C1)
promedioC2 <- mean(controles$C2)
promedioC3 <- mean(controles$C3)
Volcano_data3 <- Volcano_data2 %>%
mutate(DCT1=T1-promedioT1,
DCT2=T2-promedioT2,
DCT3=T3-promedioT3,
DCC1=C1-promedioC1,
DCC2=C2-promedioC2,
DCC3=C3-promedioC3) %>%
select(-2,-3,-4,-5,-6,-7) %>%
mutate(promedioDCT=(DCT1+DCT2+DCT3)/3,
promedioDCC=(DCC1+DCC2+DCC3)/3) %>%
select(-2,-3,-4,-5,-6,-7) %>%
mutate(dosDCT=2^-promedioDCT,
dosDCC=2^-promedioDCC,
dosDDCT=dosDCT/dosDCC,
FC=dosDDCT) %>%
mutate(LogFC=log2(FC)) %>%
select(1,4,5,7,8)
head(Volcano_data3)
## # A tibble: 6 × 5
## Gen dosDCT dosDCC FC LogFC
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 0.00499 0.260 0.0192 -5.70
## 2 Gen-2 0.0000917 0.00849 0.0108 -6.53
## 3 Gen-3 0.00531 0.0114 0.466 -1.10
## 4 Gen-4 0.00362 0.123 0.0294 -5.09
## 5 Gen-5 0.00717 0.0665 0.108 -3.22
## 6 Gen-6 0.00251 0.00682 0.368 -1.44
Volcano_data4 <- Volcano_data2 %>%
mutate(DCT1=T1-promedioT1,
DCT2=T2-promedioT2,
DCT3=T3-promedioT3,
DCC1=C1-promedioC1,
DCC2=C2-promedioC2,
DCC3=C3-promedioC3) %>%
select(-2,-3,-4,-5,-6,-7)
head(Volcano_data4)
## # A tibble: 6 × 7
## Gen DCT1 DCT2 DCT3 DCC1 DCC2 DCC3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 9.94 10.8 2.23 4.44 1.04 0.347
## 2 Gen-2 14.3 14.1 11.9 4.73 3.56 12.4
## 3 Gen-3 13.6 5.30 3.78 -1.89 7.72 13.5
## 4 Gen-4 10.4 -1.51 15.4 -0.412 8.38 1.10
## 5 Gen-5 7.53 3.87 9.98 2.95 6.41 2.37
## 6 Gen-6 12.3 14.1 -0.460 7.30 3.69 10.6
Tvalue <- row_t_welch(Volcano_data4[,c("DCC1","DCC2","DCC3")],
Volcano_data4[,c("DCT1","DCT2","DCT3")])
head(Tvalue)
## obs.x obs.y obs.tot mean.x mean.y mean.diff var.x var.y stderr
## 1 3 3 6 1.942524 7.646819 -5.704295 4.805709 22.189096 2.999711
## 2 3 3 6 6.880203 13.412633 -6.532430 22.825095 1.740435 2.861557
## 3 3 3 6 6.456014 7.556905 -1.100891 60.728559 27.859749 5.434099
## 4 3 3 6 3.022537 8.110084 -5.087547 22.099044 75.682194 5.709093
## 5 3 3 6 3.909575 7.124798 -3.215224 4.764264 9.453933 2.177016
## 6 3 3 6 7.195345 8.637140 -1.441795 11.932498 62.843431 4.992525
## df statistic pvalue conf.low conf.high mean.null alternative
## 1 2.827504 -1.9016145 0.1589723 -15.58880 4.180207 0 two.sided
## 2 2.303241 -2.2828235 0.1330744 -17.41463 4.349769 0 two.sided
## 3 3.515982 -0.2025894 0.8506275 -17.04387 14.842090 0 two.sided
## 4 3.076229 -0.8911305 0.4370281 -23.00436 12.829269 0 two.sided
## 5 3.607531 -1.4768948 0.2212092 -9.52797 3.097522 0 two.sided
## 6 2.733077 -0.2887907 0.7932398 -18.24520 15.361610 0 two.sided
## conf.level
## 1 0.95
## 2 0.95
## 3 0.95
## 4 0.95
## 5 0.95
## 6 0.95
Valor_p <- Tvalue %>%
select(pvalue)
head(Valor_p)
## pvalue
## 1 0.1589723
## 2 0.1330744
## 3 0.8506275
## 4 0.4370281
## 5 0.2212092
## 6 0.7932398
FC_y_PV <- Volcano_data3 %>%
select(1,4) %>%
mutate(PV = Valor_p$pvalue,
LFC= log2(FC),
LPV= -log10(PV))
head(FC_y_PV)
## # A tibble: 6 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 0.0192 0.159 -5.70 0.799
## 2 Gen-2 0.0108 0.133 -6.53 0.876
## 3 Gen-3 0.466 0.851 -1.10 0.0703
## 4 Gen-4 0.0294 0.437 -5.09 0.359
## 5 Gen-5 0.108 0.221 -3.22 0.655
## 6 Gen-6 0.368 0.793 -1.44 0.101
#Selección de genes sobreexpresados y silenciados
p_Value=-log10(0.05)
FC_threshold=log2(2)
down.regulated <- FC_y_PV %>%
filter(LFC < -FC_threshold,
LPV > p_Value)
head(down.regulated)
## # A tibble: 6 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-10 0.00379 0.0243 -8.05 1.62
## 2 Gen-153 0.0106 0.00798 -6.55 2.10
## 3 Gen-161 0.0117 0.0404 -6.41 1.39
## 4 Gen-195 0.00153 0.0260 -9.35 1.58
## 5 Gen-214 0.0131 0.00222 -6.25 2.65
## 6 Gen-223 0.0000898 0.0231 -13.4 1.64
up.regulated <- FC_y_PV %>%
filter(LFC > FC_threshold,
LPV > p_Value)
head(up.regulated)
## # A tibble: 6 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-51 206. 0.0241 7.68 1.62
## 2 Gen-115 1292. 0.0130 10.3 1.89
## 3 Gen-198 223. 0.0194 7.80 1.71
## 4 Gen-238 302. 0.0479 8.24 1.32
## 5 Gen-251 460. 0.00196 8.85 2.71
## 6 Gen-273 204. 0.0429 7.67 1.37
top.down <- down.regulated %>%
arrange(PV) %>%
head(5)
head(top.down)
## # A tibble: 5 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-358 0.0111 0.000266 -6.50 3.58
## 2 Gen-214 0.0131 0.00222 -6.25 2.65
## 3 Gen-153 0.0106 0.00798 -6.55 2.10
## 4 Gen-289 0.00201 0.00896 -8.96 2.05
## 5 Gen-639 0.00136 0.0147 -9.52 1.83
top.up <- up.regulated %>%
arrange(PV) %>%
head(5)
head(top.down)
## # A tibble: 5 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-358 0.0111 0.000266 -6.50 3.58
## 2 Gen-214 0.0131 0.00222 -6.25 2.65
## 3 Gen-153 0.0106 0.00798 -6.55 2.10
## 4 Gen-289 0.00201 0.00896 -8.96 2.05
## 5 Gen-639 0.00136 0.0147 -9.52 1.83
#Gráfica volcano
Volcano <- ggplot(data = FC_y_PV,
mapping = aes(x = LFC, y = LPV)) +
geom_point(alpha = 1.2, color = "#566573") +
theme_classic() +
labs(title = "Evaluación de cambio de la espresión",
caption = "Modificado por: Diego Arturo Campos Grimaldi") +
xlab(expression("Log"[2]~"(FC)")) +
ylab(expression("-Log"[10]~"(Valor de p)")) + # SubÃndice en "10"
geom_hline(yintercept = p_Value,
linetype = "dashed",
color = "#FFC300",
size = 1) +
geom_vline(xintercept = FC_threshold,
linetype = "dashed",
color = "#C70039",
size = 1) +
geom_vline(xintercept = -FC_threshold,
linetype = "dashed",
color = "#0b4eea",
size = 1) +
geom_point(data = up.regulated,
x = up.regulated$LFC,
y = up.regulated$LPV,
alpha = 1,
size = 2.5,
color = "#FF5733") +
geom_point(data = down.regulated,
x = down.regulated$LFC,
y = down.regulated$LPV,
alpha = 1,
size = 2.5,
color = "#2f4578") +
geom_label_repel(data = top.down,
mapping = aes(x = LFC, y = LPV, label = Gen),
max.overlaps = 50,
color = "black",
fill = "#74da19") +
geom_label_repel(data = top.up,
mapping = aes(x = LFC, y = LPV, label = Gen),
max.overlaps = 50,
color = "black",
fill = "#f29611") +
theme( axis.line = element_line(size = 1.2),
axis.title.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold"),
axis.text.x = element_text(face = "bold"),
axis.text.y = element_text(face = "bold")) +
scale_x_continuous(breaks = seq(min(-12), max(12), by =2)) +
scale_y_continuous(breaks = seq(0, max(3), by = 0.5))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Volcano