#if(!require(pacman)) install.packages("pacman", dependencies=TRUE)
library("pacman")
p_load("vroom",
"ggplot2",
"ggrepel",
"dplyr",
"matrixTests")
Volcano_data <- vroom(file="https://raw.githubusercontent.com/ManuelLaraMVZ/Examen_R_transcriptomica/refs/heads/main/Datos_Genes_azar6.csv")
## `curl` package not installed, falling back to using `url()`
## Rows: 703 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gen, Type
## dbl (6): C1, C2, C3, T1, T2, T3
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(Volcano_data)
## # A tibble: 6 × 8
## Gen Type C1 C2 C3 T1 T2 T3
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 Target 18.9 18.2 33.2 32.6 30.4 34.2
## 2 Gen-2 Target 23.3 18.0 19.2 19.0 19.2 19.4
## 3 Gen-3 Target 23.0 27.6 27.8 29.8 33.6 19.2
## 4 Gen-4 Target 19.9 27.8 27.9 29.6 28.7 29.6
## 5 Gen-5 Target 24.2 31.4 29.9 21.8 20.1 25.9
## 6 Gen-6 Target 33.7 20.2 18.2 25.7 23.6 24.6
controles <- Volcano_data %>%
filter(Type=="Selected Control") %>%
select(-2)
head(controles)
## # A tibble: 3 × 7
## Gen C1 C2 C3 T1 T2 T3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Control-1 19.3 20.2 21.2 18.3 19.6 18.6
## 2 Control-2 21.5 21.6 21.6 19.5 21.7 20.9
## 3 Control-3 19.4 20.3 18.9 21.8 20.2 20.3
Volcano_data2 <- Volcano_data %>%
filter(Type=="Target") %>%
select(-2)
head(Volcano_data2)
## # A tibble: 6 × 7
## Gen C1 C2 C3 T1 T2 T3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 18.9 18.2 33.2 32.6 30.4 34.2
## 2 Gen-2 23.3 18.0 19.2 19.0 19.2 19.4
## 3 Gen-3 23.0 27.6 27.8 29.8 33.6 19.2
## 4 Gen-4 19.9 27.8 27.9 29.6 28.7 29.6
## 5 Gen-5 24.2 31.4 29.9 21.8 20.1 25.9
## 6 Gen-6 33.7 20.2 18.2 25.7 23.6 24.6
#obtención de promedios
promedioT1 <- mean(controles$T1)
promedioT2 <- mean(controles$T2)
promedioT3 <- mean(controles$T3)
promedioC1 <- mean(controles$C1)
promedioC2 <- mean(controles$C2)
promedioC3 <- mean(controles$C3)
Volcano_data3 <- Volcano_data2 %>%
mutate(DCT1=T1-promedioT1,
DCT2=T2-promedioT2,
DCT3=T3-promedioT3,
DCC1=C1-promedioC1,
DCC2=C2-promedioC2,
DCC3=C3-promedioC3) %>%
select(-2,-3,-4,-5,-6,-7) %>%
mutate(promedioDCT=(DCT1+DCT2+DCT3)/3,
promedioDCC=(DCC1+DCC2+DCC3)/3) %>%
select(-2,-3,-4,-5,-6,-7) %>%
mutate(dosDCT=2^-promedioDCT,
dosDCC=2^-promedioDCC,
dosDDCT=dosDCT/dosDCC,
FC=dosDDCT) %>%
mutate(LogFC=log2(FC)) %>%
select(1,4,5,7,8)
head(Volcano_data3)
## # A tibble: 6 × 5
## Gen dosDCT dosDCC FC LogFC
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 0.000196 0.127 0.00154 -9.34
## 2 Gen-2 1.83 1.23 1.49 0.579
## 3 Gen-3 0.00573 0.0196 0.292 -1.78
## 4 Gen-4 0.00171 0.0381 0.0449 -4.48
## 5 Gen-5 0.177 0.00378 46.8 5.55
## 6 Gen-6 0.0433 0.0824 0.525 -0.929
Volcano_data4 <- Volcano_data2 %>%
mutate(DCT1=T1-promedioT1,
DCT2=T2-promedioT2,
DCT3=T3-promedioT3,
DCC1=C1-promedioC1,
DCC2=C2-promedioC2,
DCC3=C3-promedioC3) %>%
select(-2,-3,-4,-5,-6,-7)
head(Volcano_data4)
## # A tibble: 6 × 7
## Gen DCT1 DCT2 DCT3 DCC1 DCC2 DCC3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 12.7 9.92 14.3 -1.18 -2.50 12.6
## 2 Gen-2 -0.817 -1.29 -0.508 3.16 -2.68 -1.36
## 3 Gen-3 9.95 13.1 -0.670 2.90 6.88 7.23
## 4 Gen-4 9.69 8.17 9.71 -0.236 7.07 7.31
## 5 Gen-5 1.91 -0.420 6.01 4.15 10.7 9.31
## 6 Gen-6 5.82 3.10 4.67 13.6 -0.484 -2.36
Tvalue <- row_t_welch(Volcano_data4[,c("DCC1","DCC2","DCC3")],
Volcano_data4[,c("DCT1","DCT2","DCT3")])
head(Tvalue)
## obs.x obs.y obs.tot mean.x mean.y mean.diff var.x var.y
## 1 3 3 6 2.9780783 12.3190656 -9.3409873 70.061953 4.9488146
## 2 3 3 6 -0.2931447 -0.8718025 0.5786577 9.363845 0.1550182
## 3 3 3 6 5.6704455 7.4477496 -1.7773041 5.781972 51.8510294
## 4 3 3 6 4.7141998 9.1905274 -4.4763276 18.392609 0.7816483
## 5 3 3 6 8.0492955 2.4996546 5.5496409 11.873997 10.6042521
## 6 3 3 6 3.6015261 4.5305313 -0.9290052 76.509427 1.8679787
## stderr df statistic pvalue conf.low conf.high mean.null
## 1 5.000359 2.281137 -1.8680634 0.1868241 -28.504309 9.822334 0
## 2 1.781279 2.066202 0.3248552 0.7752458 -6.854988 8.012303 0
## 3 4.383036 2.440567 -0.4054962 0.7179667 -17.721528 14.166920 0
## 4 2.528126 2.169685 -1.7706113 0.2087099 -14.578068 5.625413 0
## 5 2.737289 3.987277 2.0274225 0.1127664 -2.059864 13.159146 0
## 6 5.111341 2.097602 -0.1817537 0.8718345 -21.969229 20.111218 0
## alternative conf.level
## 1 two.sided 0.95
## 2 two.sided 0.95
## 3 two.sided 0.95
## 4 two.sided 0.95
## 5 two.sided 0.95
## 6 two.sided 0.95
Valor_p <- Tvalue %>%
select(pvalue)
head(Valor_p)
## pvalue
## 1 0.1868241
## 2 0.7752458
## 3 0.7179667
## 4 0.2087099
## 5 0.1127664
## 6 0.8718345
FC_y_PV <- Volcano_data3 %>%
select(1,4) %>%
mutate(PV = Valor_p$pvalue,
LFC= log2(FC),
LPV= -log10(PV))
head(FC_y_PV)
## # A tibble: 6 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 0.00154 0.187 -9.34 0.729
## 2 Gen-2 1.49 0.775 0.579 0.111
## 3 Gen-3 0.292 0.718 -1.78 0.144
## 4 Gen-4 0.0449 0.209 -4.48 0.680
## 5 Gen-5 46.8 0.113 5.55 0.948
## 6 Gen-6 0.525 0.872 -0.929 0.0596
#Selección de genes sobreexpresados y silenciados
p_Value=-log10(0.05)
FC_threshold=log2(2)
down.regulated <- FC_y_PV %>%
filter(LFC < -FC_threshold,
LPV > p_Value)
head(down.regulated)
## # A tibble: 6 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-74 0.00175 0.0345 -9.16 1.46
## 2 Gen-80 0.0390 0.0381 -4.68 1.42
## 3 Gen-146 0.0180 0.00870 -5.80 2.06
## 4 Gen-240 0.00108 0.00184 -9.85 2.73
## 5 Gen-257 0.00683 0.0481 -7.19 1.32
## 6 Gen-269 0.0219 0.0473 -5.51 1.33
up.regulated <- FC_y_PV %>%
filter(LFC > FC_threshold,
LPV > p_Value)
head(up.regulated)
## # A tibble: 6 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-13 46.1 0.0438 5.53 1.36
## 2 Gen-86 535. 0.0110 9.06 1.96
## 3 Gen-114 686. 0.0273 9.42 1.56
## 4 Gen-243 91.3 0.0281 6.51 1.55
## 5 Gen-338 329. 0.00530 8.36 2.28
## 6 Gen-341 3284. 0.000331 11.7 3.48
top.down <- down.regulated %>%
arrange(PV) %>%
head(5)
head(top.down)
## # A tibble: 5 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-240 0.00108 0.00184 -9.85 2.73
## 2 Gen-632 0.0161 0.00368 -5.95 2.43
## 3 Gen-499 0.0377 0.00641 -4.73 2.19
## 4 Gen-146 0.0180 0.00870 -5.80 2.06
## 5 Gen-639 0.00451 0.00920 -7.79 2.04
top.up <- up.regulated %>%
arrange(PV) %>%
head(5)
head(top.down)
## # A tibble: 5 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-240 0.00108 0.00184 -9.85 2.73
## 2 Gen-632 0.0161 0.00368 -5.95 2.43
## 3 Gen-499 0.0377 0.00641 -4.73 2.19
## 4 Gen-146 0.0180 0.00870 -5.80 2.06
## 5 Gen-639 0.00451 0.00920 -7.79 2.04
#Gráfica volcano
Volcano <- ggplot(data = FC_y_PV,
mapping = aes(x = LFC, y = LPV)) +
geom_point(alpha = 1.2, color = "gray") +
theme_classic() +
labs(title = "Titulo", subtitle = "Subtitulo",
caption = "Creado por: Ana Gabriela Bernardo") +
xlab(expression("Log"[2]~"(FC)")) +
ylab(expression("-Log"[10]~"(Valor de p)")) + # Subíndice en "10"
geom_hline(yintercept = p_Value,
linetype = "dashed",
color = "gray",
size = 1) +
geom_vline(xintercept = FC_threshold,
linetype = "dashed",
color = "gray",
size = 1) +
geom_vline(xintercept = -FC_threshold,
linetype = "dashed",
color = "gray",
size = 1) +
geom_point(data = up.regulated,
x = up.regulated$LFC,
y = up.regulated$LPV,
alpha = 1,
size = 2.5,
color = "gray") +
geom_point(data = down.regulated,
x = down.regulated$LFC,
y = down.regulated$LPV,
alpha = 1,
size = 2.5,
color = "gray") +
geom_label_repel(data = top.down,
mapping = aes(x = LFC, y = LPV, label = Gen),
max.overlaps = 50,
color = "#1a5276",
fill = "#5499c7") +
geom_label_repel(data = top.up,
mapping = aes(x = LFC, y = LPV, label = Gen),
max.overlaps = 50,
color = "#7b241c",
fill = "#f1948a") +
theme( axis.line = element_line(size = 1.2),
axis.title.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold"),
axis.text.x = element_text(face = "bold"),
axis.text.y = element_text(face = "bold")) +
scale_x_continuous(breaks = seq(min(-12), max(12), by =2)) +
scale_y_continuous(breaks = seq(0, max(3), by = 0.5))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Volcano
#Para guardar determine el directorio y activar el siguiente código
Guardar <- ggsave(Volcano, file = “Ejercicio_examen.jpeg”, width = 8, height = 6, dpi = 300)