if(!require(pacman)) install.packages("pacman", dependencies=TRUE)
## Loading required package: pacman
library("pacman")
p_load("vroom",
"ggplot2",
"ggrepel",
"dplyr",
"matrixTests")
Volcano_data <- vroom(file="https://raw.githubusercontent.com/ManuelLaraMVZ/Examen_R_transcriptomica/refs/heads/main/Datos_Genes_azar7.csv")
## `curl` package not installed, falling back to using `url()`
## Rows: 703 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gen, Type
## dbl (6): C1, C2, C3, T1, T2, T3
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Volcano_data
## # A tibble: 703 × 8
## Gen Type C1 C2 C3 T1 T2 T3
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 Target 25.7 33.4 23.9 19.3 28.8 23.2
## 2 Gen-2 Target 28.7 25.5 20.0 20.9 28.6 32.0
## 3 Gen-3 Target 25.9 24.5 24.3 19.4 19.4 28.3
## 4 Gen-4 Target 31.5 33.1 31.5 31.8 22.5 23.4
## 5 Gen-5 Target 23.8 32.8 24.5 31.6 20.8 33.1
## 6 Gen-6 Target 26.7 20.3 33.4 24.2 31.5 29.1
## 7 Gen-7 Target 33.1 19.8 21.9 34.3 24.4 18.8
## 8 Gen-8 Target 30.5 34.0 31.3 27.3 25.0 21.7
## 9 Gen-9 Target 33.3 24.0 21.9 20.5 20.6 24.7
## 10 Gen-10 Target 25.1 19.6 34.4 20.3 18.9 27.1
## # ℹ 693 more rows
controles <- Volcano_data %>%
filter(Type=="Selected Control") %>%
select(-2)
head(controles)
## # A tibble: 3 × 7
## Gen C1 C2 C3 T1 T2 T3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Control-1 21.8 21.8 19.3 18.8 19.5 21.7
## 2 Control-2 19.0 19.2 20.5 18.9 21.1 19.9
## 3 Control-3 21.1 20.8 20.2 21.9 19.2 19.4
Volcano_data2 <- Volcano_data %>%
filter(Type=="Target") %>%
select(-2)
head(Volcano_data2)
## # A tibble: 6 × 7
## Gen C1 C2 C3 T1 T2 T3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 25.7 33.4 23.9 19.3 28.8 23.2
## 2 Gen-2 28.7 25.5 20.0 20.9 28.6 32.0
## 3 Gen-3 25.9 24.5 24.3 19.4 19.4 28.3
## 4 Gen-4 31.5 33.1 31.5 31.8 22.5 23.4
## 5 Gen-5 23.8 32.8 24.5 31.6 20.8 33.1
## 6 Gen-6 26.7 20.3 33.4 24.2 31.5 29.1
#obtención de promedios
promedioT1 <- mean(controles$T1)
promedioT2 <- mean(controles$T2)
promedioT3 <- mean(controles$T3)
promedioC1 <- mean(controles$C1)
promedioC2 <- mean(controles$C2)
promedioC3 <- mean(controles$C3)
Volcano_data3 <- Volcano_data2 %>%
mutate(DCT1=T1-promedioT1,
DCT2=T2-promedioT2,
DCT3=T3-promedioT3,
DCC1=C1-promedioC1,
DCC2=C2-promedioC2,
DCC3=C3-promedioC3) %>%
select(-2,-3,-4,-5,-6,-7) %>%
mutate(promedioDCT=(DCT1+DCT2+DCT3)/3,
promedioDCC=(DCC1+DCC2+DCC3)/3) %>%
select(-2,-3,-4,-5,-6,-7) %>%
mutate(dosDCT=2^-promedioDCT,
dosDCC=2^-promedioDCC,
dosDDCT=dosDCT/dosDCC,
FC=dosDDCT) %>%
mutate(LogFC=log2(FC)) %>%
select(1,4,5,7,8)
head(Volcano_data3)
## # A tibble: 6 × 5
## Gen dosDCT dosDCC FC LogFC
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 0.0744 0.00650 11.4 3.52
## 2 Gen-2 0.00705 0.0508 0.139 -2.85
## 3 Gen-3 0.200 0.0444 4.51 2.17
## 4 Gen-4 0.0170 0.000311 54.5 5.77
## 5 Gen-5 0.00286 0.0105 0.273 -1.87
## 6 Gen-6 0.00338 0.0120 0.282 -1.83
Volcano_data4 <- Volcano_data2 %>%
mutate(DCT1=T1-promedioT1,
DCT2=T2-promedioT2,
DCT3=T3-promedioT3,
DCC1=C1-promedioC1,
DCC2=C2-promedioC2,
DCC3=C3-promedioC3) %>%
select(-2,-3,-4,-5,-6,-7)
head(Volcano_data4)
## # A tibble: 6 × 7
## Gen DCT1 DCT2 DCT3 DCC1 DCC2 DCC3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 -0.520 8.89 2.88 5.11 12.8 3.85
## 2 Gen-2 1.06 8.67 11.7 8.04 4.90 -0.0418
## 3 Gen-3 -0.483 -0.492 7.93 5.26 3.92 4.30
## 4 Gen-4 12.0 2.58 3.10 10.9 12.6 11.5
## 5 Gen-5 11.7 0.868 12.8 3.13 12.2 4.43
## 6 Gen-6 4.33 11.6 8.75 6.03 -0.293 13.4
Tvalue <- row_t_welch(Volcano_data4[,c("DCC1","DCC2","DCC3")],
Volcano_data4[,c("DCT1","DCT2","DCT3")])
head(Tvalue)
## obs.x obs.y obs.tot mean.x mean.y mean.diff var.x var.y stderr
## 1 3 3 6 7.266015 3.749103 3.516912 23.7046641 22.69160 3.932610
## 2 3 3 6 4.299615 7.149086 -2.849471 16.6034945 30.07486 3.944547
## 3 3 3 6 4.492467 2.318640 2.173827 0.4733169 23.63122 2.834580
## 4 3 3 6 11.650810 5.882352 5.768459 0.7101165 27.86037 3.086016
## 5 3 3 6 6.579474 8.450302 -1.870828 23.9485777 43.42238 4.738880
## 6 3 3 6 6.384861 8.210620 -1.825759 47.1247366 13.25811 4.486381
## df statistic pvalue conf.low conf.high mean.null alternative
## 1 3.998094 0.8942947 0.4217337 -7.403817 14.437641 0 two.sided
## 2 3.692456 -0.7223822 0.5131470 -14.170586 8.471645 0 two.sided
## 3 2.080085 0.7668955 0.5206115 -9.583276 13.930930 0 two.sided
## 4 2.101887 1.8692251 0.1963408 -6.911475 18.448392 0 two.sided
## 5 3.691563 -0.3947827 0.7147388 -15.473165 11.731509 0 two.sided
## 6 3.042821 -0.4069559 0.7109781 -15.990428 12.338910 0 two.sided
## conf.level
## 1 0.95
## 2 0.95
## 3 0.95
## 4 0.95
## 5 0.95
## 6 0.95
Valor_p <- Tvalue %>%
select(pvalue)
head(Valor_p)
## pvalue
## 1 0.4217337
## 2 0.5131470
## 3 0.5206115
## 4 0.1963408
## 5 0.7147388
## 6 0.7109781
FC_y_PV <- Volcano_data3 %>%
select(1,4) %>%
mutate(PV = Valor_p$pvalue,
LFC= log2(FC),
LPV= -log10(PV))
head(FC_y_PV)
## # A tibble: 6 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 11.4 0.422 3.52 0.375
## 2 Gen-2 0.139 0.513 -2.85 0.290
## 3 Gen-3 4.51 0.521 2.17 0.283
## 4 Gen-4 54.5 0.196 5.77 0.707
## 5 Gen-5 0.273 0.715 -1.87 0.146
## 6 Gen-6 0.282 0.711 -1.83 0.148
########################################################################33
#Selección de genes sobreexpresados y silenciados
p_Value=-log10(0.05)
FC_threshold=log2(2)
down.regulated <- FC_y_PV %>%
filter(LFC < -FC_threshold,
LPV > p_Value)
head(down.regulated)
## # A tibble: 6 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-164 0.0382 0.0435 -4.71 1.36
## 2 Gen-284 0.000958 0.0265 -10.0 1.58
## 3 Gen-408 0.000505 0.0231 -11.0 1.64
## 4 Gen-443 0.0657 0.0273 -3.93 1.56
## 5 Gen-512 0.000425 0.00609 -11.2 2.22
## 6 Gen-528 0.000205 0.00441 -12.3 2.36
up.regulated <- FC_y_PV %>%
filter(LFC > FC_threshold,
LPV > p_Value)
head(up.regulated)
## # A tibble: 6 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-8 119. 0.0382 6.89 1.42
## 2 Gen-63 159. 0.0233 7.32 1.63
## 3 Gen-87 627. 0.0500 9.29 1.30
## 4 Gen-321 238. 0.0403 7.89 1.39
## 5 Gen-322 4049. 0.00215 12.0 2.67
## 6 Gen-346 678. 0.000937 9.40 3.03
top.down <- down.regulated %>%
arrange(PV) %>%
head(5)
head(top.down)
## # A tibble: 5 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-528 0.000205 0.00441 -12.3 2.36
## 2 Gen-512 0.000425 0.00609 -11.2 2.22
## 3 Gen-587 0.000626 0.0226 -10.6 1.65
## 4 Gen-408 0.000505 0.0231 -11.0 1.64
## 5 Gen-284 0.000958 0.0265 -10.0 1.58
top.up <- up.regulated %>%
arrange(PV) %>%
head(5)
head(top.down)
## # A tibble: 5 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-528 0.000205 0.00441 -12.3 2.36
## 2 Gen-512 0.000425 0.00609 -11.2 2.22
## 3 Gen-587 0.000626 0.0226 -10.6 1.65
## 4 Gen-408 0.000505 0.0231 -11.0 1.64
## 5 Gen-284 0.000958 0.0265 -10.0 1.58
#Gráfica volcano
Volcano <- ggplot(data = FC_y_PV,
mapping = aes(x = LFC, y = LPV)) +
geom_point(alpha = 1.2, color = "#566573") +
theme_classic() +
labs(title = "Evaluación de cambio de la espresión",
caption = "Modificado por:Ricardo Palos Sánchez") +
xlab(expression("Log"[2]~"(FC)")) +
ylab(expression("-Log"[10]~"(Valor de p)")) + # Subíndice en "10"
geom_hline(yintercept = p_Value,
linetype = "dashed",
color = "#FFC300",
size = 1) +
geom_vline(xintercept = FC_threshold,
linetype = "dashed",
color = "#C70039",
size = 1) +
geom_vline(xintercept = -FC_threshold,
linetype = "dashed",
color = "#0b4eea",
size = 1) +
geom_point(data = up.regulated,
x = up.regulated$LFC,
y = up.regulated$LPV,
alpha = 1,
size = 2.5,
color = "#FF5733") +
geom_point(data = down.regulated,
x = down.regulated$LFC,
y = down.regulated$LPV,
alpha = 1,
size = 2.5,
color = "#2f4578") +
geom_label_repel(data = top.down,
mapping = aes(x = LFC, y = LPV, label = Gen),
max.overlaps = 50,
color = "black",
fill = "#74da19") +
geom_label_repel(data = top.up,
mapping = aes(x = LFC, y = LPV, label = Gen),
max.overlaps = 50,
color = "black",
fill = "#f29611") +
theme( axis.line = element_line(size = 1.2),
axis.title.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold"),
axis.text.x = element_text(face = "bold"),
axis.text.y = element_text(face = "bold")) +
scale_x_continuous(breaks = seq(min(-12), max(12), by =2)) +
scale_y_continuous(breaks = seq(0, max(3), by = 0.5))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Volcano