Creado por:Marian Islas
if(!require(pacman)) install.packages("pacman", dependencies=TRUE)
## Loading required package: pacman
library("pacman")
p_load("vroom",
"ggplot2",
"ggrepel",
"dplyr",
"matrixTests")
Volcano_data <-vroom(file = "https://raw.githubusercontent.com/ManuelLaraMVZ/Examen_R_transcriptomica/refs/heads/main/Datos_Genes_azar8.csv")
## Rows: 703 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gen, Type
## dbl (6): C1, C2, C3, T1, T2, T3
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Volcano_data
## # A tibble: 703 × 8
## Gen Type C1 C2 C3 T1 T2 T3
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 Target 22.3 20.4 21.6 19.4 31.9 26.5
## 2 Gen-2 Target 28.4 33.4 23.0 28.3 23.8 25.6
## 3 Gen-3 Target 18.7 20.0 25.4 31.8 33.8 34.4
## 4 Gen-4 Target 24.4 25.2 18.3 32.6 31.7 32.0
## 5 Gen-5 Target 18.4 28.8 20.2 23.7 32.2 22.1
## 6 Gen-6 Target 19.2 31.7 25.3 28.7 29.4 34.0
## 7 Gen-7 Target 28.8 28.6 27.5 23.1 34.1 29.5
## 8 Gen-8 Target 31.1 32.1 20.1 34.4 28.4 34.2
## 9 Gen-9 Target 33.5 27.3 24.7 32.2 24.6 24.9
## 10 Gen-10 Target 32.3 24.7 19.1 33.2 30.5 29.9
## # ℹ 693 more rows
controles <- Volcano_data %>%
filter(Type=="Selected Control") %>%
select(-2)
head(controles)
## # A tibble: 3 × 7
## Gen C1 C2 C3 T1 T2 T3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Control-1 20.5 18.6 21.4 20.7 21.7 20.3
## 2 Control-2 19.4 19.0 21.6 20.3 19.5 20.1
## 3 Control-3 19.4 21.3 20.0 18.8 21.1 18.4
Volcano_data2 <- Volcano_data %>%
filter(Type=="Target") %>%
select(-2)
head(Volcano_data2)
## # A tibble: 6 × 7
## Gen C1 C2 C3 T1 T2 T3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 22.3 20.4 21.6 19.4 31.9 26.5
## 2 Gen-2 28.4 33.4 23.0 28.3 23.8 25.6
## 3 Gen-3 18.7 20.0 25.4 31.8 33.8 34.4
## 4 Gen-4 24.4 25.2 18.3 32.6 31.7 32.0
## 5 Gen-5 18.4 28.8 20.2 23.7 32.2 22.1
## 6 Gen-6 19.2 31.7 25.3 28.7 29.4 34.0
#obtención de promedios
promedioT1 <- mean(controles$T1)
promedioT2 <- mean(controles$T2)
promedioT3 <- mean(controles$T3)
promedioC1 <- mean(controles$C1)
promedioC2 <- mean(controles$C2)
promedioC3 <- mean(controles$C3)
Volcano_data3 <- Volcano_data2 %>%
mutate(DCT1=T1-promedioT1,
DCT2=T2-promedioT2,
DCT3=T3-promedioT3,
DCC1=C1-promedioC1,
DCC2=C2-promedioC2,
DCC3=C3-promedioC3) %>%
select(-2,-3,-4,-5,-6,-7) %>%
mutate(promedioDCT=(DCT1+DCT2+DCT3)/3,
promedioDCC=(DCC1+DCC2+DCC3)/3) %>%
select(-2,-3,-4,-5,-6,-7) %>%
mutate(dosDCT=2^-promedioDCT,
dosDCC=2^-promedioDCC,
dosDDCT=dosDCT/dosDCC,
FC=dosDDCT) %>%
mutate(LogFC=log2(FC)) %>%
select(1,4,5,7,8)
head(Volcano_data3)
## # A tibble: 6 × 5
## Gen dosDCT dosDCC FC LogFC
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 0.0177 0.401 0.0442 -4.50
## 2 Gen-2 0.0181 0.00365 4.95 2.31
## 3 Gen-3 0.000103 0.429 0.000240 -12.0
## 4 Gen-4 0.000246 0.177 0.00139 -9.49
## 5 Gen-5 0.0169 0.198 0.0852 -3.55
## 6 Gen-6 0.000633 0.0263 0.0241 -5.37
Volcano_data4 <- Volcano_data2 %>%
mutate(DCT1=T1-promedioT1,
DCT2=T2-promedioT2,
DCT3=T3-promedioT3,
DCC1=C1-promedioC1,
DCC2=C2-promedioC2,
DCC3=C3-promedioC3) %>%
select(-2,-3,-4,-5,-6,-7)
head(Volcano_data4)
## # A tibble: 6 × 7
## Gen DCT1 DCT2 DCT3 DCC1 DCC2 DCC3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 -0.562 11.2 6.85 2.54 0.770 0.640
## 2 Gen-2 8.38 3.00 5.98 8.57 13.7 2.02
## 3 Gen-3 11.9 13.1 14.8 -1.06 0.331 4.40
## 4 Gen-4 12.6 11.0 12.4 4.57 5.59 -2.68
## 5 Gen-5 3.80 11.4 2.45 -1.35 9.19 -0.837
## 6 Gen-6 8.82 8.66 14.4 -0.602 12.0 4.33
Tvalue <- row_t_welch(Volcano_data4[,c("DCC1","DCC2","DCC3")],
Volcano_data4[,c("DCT1","DCT2","DCT3")])
head(Tvalue)
## obs.x obs.y obs.tot mean.x mean.y mean.diff var.x var.y
## 1 3 3 6 1.317409 5.816412 -4.499002 1.129908 35.1761341
## 2 3 3 6 8.097411 5.788635 2.308776 34.215828 7.2576359
## 3 3 3 6 1.221129 13.245971 -12.024842 8.041381 2.1467615
## 4 3 3 6 2.495959 11.990186 -9.494227 20.314794 0.8242264
## 5 3 3 6 2.334200 5.887187 -3.552987 35.316933 23.3479278
## 6 3 3 6 5.250566 10.624953 -5.374386 40.473214 10.6573677
## stderr df statistic pvalue conf.low conf.high mean.null
## 1 3.478795 2.128353 -1.2932646 0.318469276 -18.634451 9.636446 0
## 2 3.718130 2.811924 0.6209509 0.581308428 -9.984553 14.602104 0
## 3 1.842837 2.996814 -6.5251801 0.007337661 -17.893099 -6.156584 0
## 4 2.654494 2.162024 -3.5766612 0.062387309 -20.133331 1.144877 0
## 5 4.422098 3.840152 -0.8034619 0.468488937 -16.034906 8.928931 0
## 6 4.128381 2.984981 -1.3018146 0.284334168 -18.550214 7.801441 0
## alternative conf.level
## 1 two.sided 0.95
## 2 two.sided 0.95
## 3 two.sided 0.95
## 4 two.sided 0.95
## 5 two.sided 0.95
## 6 two.sided 0.95
Valor_p <- Tvalue %>%
select(pvalue)
head(Valor_p)
## pvalue
## 1 0.318469276
## 2 0.581308428
## 3 0.007337661
## 4 0.062387309
## 5 0.468488937
## 6 0.284334168
FC_y_PV <- Volcano_data3 %>%
select(1,4) %>%
mutate(PV = Valor_p$pvalue,
LFC= log2(FC),
LPV= -log10(PV))
head(FC_y_PV)
## # A tibble: 6 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 0.0442 0.318 -4.50 0.497
## 2 Gen-2 4.95 0.581 2.31 0.236
## 3 Gen-3 0.000240 0.00734 -12.0 2.13
## 4 Gen-4 0.00139 0.0624 -9.49 1.20
## 5 Gen-5 0.0852 0.468 -3.55 0.329
## 6 Gen-6 0.0241 0.284 -5.37 0.546
#Selección de genes sobreexpresados y silenciados
p_Value=-log10(0.05)
FC_threshold=log2(2)
down.regulated <- FC_y_PV %>%
filter(LFC < -FC_threshold,
LPV > p_Value)
head(down.regulated)
## # A tibble: 6 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-3 0.000240 0.00734 -12.0 2.13
## 2 Gen-95 0.00167 0.0266 -9.22 1.57
## 3 Gen-184 0.0276 0.0470 -5.18 1.33
## 4 Gen-201 0.0105 0.00302 -6.58 2.52
## 5 Gen-272 0.0124 0.0352 -6.34 1.45
## 6 Gen-289 0.00129 0.0228 -9.60 1.64
up.regulated <- FC_y_PV %>%
filter(LFC > FC_threshold,
LPV > p_Value)
head(up.regulated)
## # A tibble: 6 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-174 138. 0.0365 7.11 1.44
## 2 Gen-217 248. 0.0127 7.95 1.89
## 3 Gen-232 230. 0.00761 7.85 2.12
## 4 Gen-264 28.4 0.0461 4.83 1.34
## 5 Gen-317 14.1 0.0463 3.82 1.33
## 6 Gen-342 2023. 0.0248 11.0 1.61
top.down <- down.regulated %>%
arrange(PV) %>%
head(5)
head(top.down)
## # A tibble: 5 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-201 0.0105 0.00302 -6.58 2.52
## 2 Gen-3 0.000240 0.00734 -12.0 2.13
## 3 Gen-352 0.000852 0.0139 -10.2 1.86
## 4 Gen-345 0.000417 0.0147 -11.2 1.83
## 5 Gen-297 0.0441 0.0200 -4.50 1.70
top.up <- up.regulated %>%
arrange(PV) %>%
head(5)
head(top.down)
## # A tibble: 5 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-201 0.0105 0.00302 -6.58 2.52
## 2 Gen-3 0.000240 0.00734 -12.0 2.13
## 3 Gen-352 0.000852 0.0139 -10.2 1.86
## 4 Gen-345 0.000417 0.0147 -11.2 1.83
## 5 Gen-297 0.0441 0.0200 -4.50 1.70
#Gráfica volcano
Volcano <- ggplot(data = FC_y_PV,
mapping = aes(x = LFC, y = LPV)) +
geom_point(alpha = 1.2, color = "#566573") +
theme_classic() +
labs(title = "Evaluación de cambio de la espresión",
caption = "Modificado por: __________") +
xlab(expression("Log"[2]~"(FC)")) +
ylab(expression("-Log"[10]~"(Valor de p)")) + # SubÃndice en "10"
geom_hline(yintercept = p_Value,
linetype = "dashed",
color = "#FFC300",
size = 1) +
geom_vline(xintercept = FC_threshold,
linetype = "dashed",
color = "#C70039",
size = 1) +
geom_vline(xintercept = -FC_threshold,
linetype = "dashed",
color = "#0b4eea",
size = 1) +
geom_point(data = up.regulated,
x = up.regulated$LFC,
y = up.regulated$LPV,
alpha = 1,
size = 2.5,
color = "#FF5733") +
geom_point(data = down.regulated,
x = down.regulated$LFC,
y = down.regulated$LPV,
alpha = 1,
size = 2.5,
color = "#2f4578") +
geom_label_repel(data = top.down,
mapping = aes(x = LFC, y = LPV, label = Gen),
max.overlaps = 50,
color = "black",
fill = "#74da19") +
geom_label_repel(data = top.up,
mapping = aes(x = LFC, y = LPV, label = Gen),
max.overlaps = 50,
color = "black",
fill = "#f29611") +
theme( axis.line = element_line(size = 1.2),
axis.title.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold"),
axis.text.x = element_text(face = "bold"),
axis.text.y = element_text(face = "bold")) +
scale_x_continuous(breaks = seq(min(-12), max(12), by =2)) +
scale_y_continuous(breaks = seq(0, max(3), by = 0.5))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Volcano
