#Evaluacion medio termino #Creado por Zulia Soto
if(!require(pacman)) install.packages("pacman", dependencies=TRUE)
## Loading required package: pacman
library("pacman")
p_load("vroom",
"ggplot2",
"ggrepel",
"dplyr",
"matrixTests")
Volcano_data <- vroom(file="https://raw.githubusercontent.com/ManuelLaraMVZ/Examen_R_transcriptomica/refs/heads/main/Datos_Genes_azar1.csv")
## Rows: 703 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gen, Type
## dbl (6): C1, C2, C3, T1, T2, T3
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(Volcano_data)
## # A tibble: 6 × 8
## Gen Type C1 C2 C3 T1 T2 T3
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 Target 19.5 23.8 27.4 26.3 27.9 23.0
## 2 Gen-2 Target 20.9 27.4 28.8 33.9 20.0 28.2
## 3 Gen-3 Target 31.6 22.6 24.5 19.7 33.7 34.0
## 4 Gen-4 Target 19.1 23.7 23.9 24.6 33.8 22.6
## 5 Gen-5 Target 33.8 23.6 34.3 31.1 34.0 23.5
## 6 Gen-6 Target 30.8 25.2 20.3 32.8 31.8 20.4
controles <- Volcano_data %>%
filter(Type=="Selected Control") %>%
select(-2)
head(controles)
## # A tibble: 3 × 7
## Gen C1 C2 C3 T1 T2 T3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Control-1 18.3 18.1 18.0 18.2 19.8 21.6
## 2 Control-2 20.1 18.2 18.3 19.5 19.6 21.8
## 3 Control-3 22.0 20.1 20.9 18.9 21.7 21.7
Volcano_data2 <- Volcano_data %>%
filter(Type=="Target") %>%
select(-2)
head(Volcano_data2)
## # A tibble: 6 × 7
## Gen C1 C2 C3 T1 T2 T3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 19.5 23.8 27.4 26.3 27.9 23.0
## 2 Gen-2 20.9 27.4 28.8 33.9 20.0 28.2
## 3 Gen-3 31.6 22.6 24.5 19.7 33.7 34.0
## 4 Gen-4 19.1 23.7 23.9 24.6 33.8 22.6
## 5 Gen-5 33.8 23.6 34.3 31.1 34.0 23.5
## 6 Gen-6 30.8 25.2 20.3 32.8 31.8 20.4
#obtención de promedios
promedioT1 <- mean(controles$T1)
promedioT2 <- mean(controles$T2)
promedioT3 <- mean(controles$T3)
promedioC1 <- mean(controles$C1)
promedioC2 <- mean(controles$C2)
promedioC3 <- mean(controles$C3)
Volcano_data3 <- Volcano_data2 %>%
mutate(DCT1=T1-promedioT1,
DCT2=T2-promedioT2,
DCT3=T3-promedioT3,
DCC1=C1-promedioC1,
DCC2=C2-promedioC2,
DCC3=C3-promedioC3) %>%
select(-2,-3,-4,-5,-6,-7) %>%
mutate(promedioDCT=(DCT1+DCT2+DCT3)/3,
promedioDCC=(DCC1+DCC2+DCC3)/3) %>%
select(-2,-3,-4,-5,-6,-7) %>%
mutate(dosDCT=2^-promedioDCT,
dosDCC=2^-promedioDCC,
dosDDCT=dosDCT/dosDCC,
FC=dosDDCT) %>%
mutate(LogFC=log2(FC)) %>%
select(1,4,5,7,8)
head(Volcano_data3)
## # A tibble: 6 × 5
## Gen dosDCT dosDCC FC LogFC
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 0.0228 0.0532 0.429 -1.22
## 2 Gen-2 0.00749 0.0122 0.616 -0.700
## 3 Gen-3 0.00222 0.00851 0.261 -1.94
## 4 Gen-4 0.00984 0.136 0.0725 -3.79
## 5 Gen-5 0.00169 0.000415 4.06 2.02
## 6 Gen-6 0.00387 0.0148 0.262 -1.93
Volcano_data4 <- Volcano_data2 %>%
mutate(DCT1=T1-promedioT1,
DCT2=T2-promedioT2,
DCT3=T3-promedioT3,
DCC1=C1-promedioC1,
DCC2=C2-promedioC2,
DCC3=C3-promedioC3) %>%
select(-2,-3,-4,-5,-6,-7)
head(Volcano_data4)
## # A tibble: 6 × 7
## Gen DCT1 DCT2 DCT3 DCC1 DCC2 DCC3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 7.45 7.55 1.35 -0.636 4.99 8.34
## 2 Gen-2 15.1 -0.352 6.48 0.747 8.58 9.75
## 3 Gen-3 0.821 13.4 12.3 11.5 3.76 5.40
## 4 Gen-4 5.71 13.4 0.893 -1.03 4.85 4.82
## 5 Gen-5 12.3 13.6 1.76 13.7 4.77 15.2
## 6 Gen-6 14.0 11.4 -1.33 10.6 6.39 1.23
Tvalue <- row_t_welch(Volcano_data4[,c("DCC1","DCC2","DCC3")],
Volcano_data4[,c("DCT1","DCT2","DCT3")])
head(Tvalue)
## obs.x obs.y obs.tot mean.x mean.y mean.diff var.x var.y stderr
## 1 3 3 6 4.231599 5.453882 -1.2222833 20.58890 12.60560 3.326384
## 2 3 3 6 6.360058 7.059935 -0.6998764 23.97374 59.59841 5.278010
## 3 3 3 6 6.876199 8.816669 -1.9404708 16.50954 48.23197 4.645482
## 4 3 3 6 2.881586 6.667691 -3.7861057 11.49311 39.75311 4.133047
## 5 3 3 6 11.233682 9.212339 2.0213427 31.94080 42.09802 4.967857
## 6 3 3 6 6.082040 8.014915 -1.9328747 22.08988 67.05555 5.451160
## df statistic pvalue conf.low conf.high mean.null alternative
## 1 3.781288 -0.3674511 0.7329175 -10.67234 8.227776 0 two.sided
## 2 3.384926 -0.1326023 0.9020062 -16.46631 15.066556 0 two.sided
## 3 3.225582 -0.4177114 0.7024214 -16.15642 12.275483 0 two.sided
## 4 3.067243 -0.9160568 0.4258409 -16.77767 9.205460 0 two.sided
## 5 3.926109 0.4068842 0.7052957 -11.87462 15.917308 0 two.sided
## 6 3.188706 -0.3545805 0.7450693 -18.71432 14.848571 0 two.sided
## conf.level
## 1 0.95
## 2 0.95
## 3 0.95
## 4 0.95
## 5 0.95
## 6 0.95
Valor_p <- Tvalue %>%
select(pvalue)
head(Valor_p)
## pvalue
## 1 0.7329175
## 2 0.9020062
## 3 0.7024214
## 4 0.4258409
## 5 0.7052957
## 6 0.7450693
FC_y_PV <- Volcano_data3 %>%
select(1,4) %>%
mutate(PV = Valor_p$pvalue,
LFC= log2(FC),
LPV= -log10(PV))
head(FC_y_PV)
## # A tibble: 6 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 0.429 0.733 -1.22 0.135
## 2 Gen-2 0.616 0.902 -0.700 0.0448
## 3 Gen-3 0.261 0.702 -1.94 0.153
## 4 Gen-4 0.0725 0.426 -3.79 0.371
## 5 Gen-5 4.06 0.705 2.02 0.152
## 6 Gen-6 0.262 0.745 -1.93 0.128
#Selección de genes sobreexpresados y silenciados
p_Value=-log10(0.05)
FC_threshold=log2(2)
down.regulated <- FC_y_PV %>%
filter(LFC < -FC_threshold,
LPV > p_Value)
head(down.regulated)
## # A tibble: 6 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-29 0.0396 0.0480 -4.66 1.32
## 2 Gen-88 0.00942 0.0369 -6.73 1.43
## 3 Gen-190 0.00201 0.0417 -8.96 1.38
## 4 Gen-195 0.00272 0.0284 -8.52 1.55
## 5 Gen-406 0.00212 0.00527 -8.88 2.28
## 6 Gen-408 0.0146 0.0471 -6.10 1.33
up.regulated <- FC_y_PV %>%
filter(LFC > FC_threshold,
LPV > p_Value)
head(up.regulated)
## # A tibble: 6 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-14 284. 0.0184 8.15 1.73
## 2 Gen-38 170. 0.00287 7.41 2.54
## 3 Gen-87 99.3 0.0337 6.63 1.47
## 4 Gen-92 977. 0.0187 9.93 1.73
## 5 Gen-105 5987. 0.00252 12.5 2.60
## 6 Gen-110 145. 0.0196 7.18 1.71
top.down <- down.regulated %>%
arrange(PV) %>%
head(5)
head(top.down)
## # A tibble: 5 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-406 0.00212 0.00527 -8.88 2.28
## 2 Gen-446 0.00174 0.00731 -9.17 2.14
## 3 Gen-683 0.00255 0.0170 -8.62 1.77
## 4 Gen-630 0.00110 0.0176 -9.82 1.76
## 5 Gen-195 0.00272 0.0284 -8.52 1.55
top.up <- up.regulated %>%
arrange(PV) %>%
head(5)
head(top.down)
## # A tibble: 5 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-406 0.00212 0.00527 -8.88 2.28
## 2 Gen-446 0.00174 0.00731 -9.17 2.14
## 3 Gen-683 0.00255 0.0170 -8.62 1.77
## 4 Gen-630 0.00110 0.0176 -9.82 1.76
## 5 Gen-195 0.00272 0.0284 -8.52 1.55
#Gráfica volcano
Volcano <- ggplot(data = FC_y_PV,
mapping = aes(x = LFC, y = LPV)) +
geom_point(alpha = 1.2, color = "gray") +
theme_classic() +
labs(title = "Titulo", subtitle = "Subtitulo",
caption = "Creado por: Zulia Soto") +
xlab(expression("Log"[2]~"(FC)")) +
ylab(expression("-Log"[10]~"(Valor de p)")) + # Subíndice en "10"
geom_hline(yintercept = p_Value,
linetype = "dashed",
color = "gray",
size = 1) +
geom_vline(xintercept = FC_threshold,
linetype = "dashed",
color = "gray",
size = 1) +
geom_vline(xintercept = -FC_threshold,
linetype = "dashed",
color = "gray",
size = 1) +
geom_point(data = up.regulated,
x = up.regulated$LFC,
y = up.regulated$LPV,
alpha = 1,
size = 2.5,
color = "gray") +
geom_point(data = down.regulated,
x = down.regulated$LFC,
y = down.regulated$LPV,
alpha = 1,
size = 2.5,
color = "gray") +
geom_label_repel(data = top.down,
mapping = aes(x = LFC, y = LPV, label = Gen),
max.overlaps = 50,
color = "#1a5276",
fill = "#5499c7") +
geom_label_repel(data = top.up,
mapping = aes(x = LFC, y = LPV, label = Gen),
max.overlaps = 50,
color = "#7b241c",
fill = "#f1948a") +
theme( axis.line = element_line(size = 1.2),
axis.title.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold"),
axis.text.x = element_text(face = "bold"),
axis.text.y = element_text(face = "bold")) +
scale_x_continuous(breaks = seq(min(-12), max(12), by =2)) +
scale_y_continuous(breaks = seq(0, max(3), by = 0.5))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Volcano
#Para guardar determine el directorio y activar el siguiente código
# Guardar <- ggsave(Volcano, file = "Ejercicio_examen.jpeg", width = 8, height = 6, dpi = 300)