Creado por:Deni Uribe
if(!require(pacman)) install.packages("pacman", dependencies=TRUE)
## Loading required package: pacman
library("pacman")
p_load("vroom",
"ggplot2",
"ggrepel",
"dplyr",
"matrixTests")
Volcano_data <- vroom(file="https://raw.githubusercontent.com/ManuelLaraMVZ/Examen_R_transcriptomica/refs/heads/main/Datos_Genes_azar5.csv")
## Rows: 703 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gen, Type
## dbl (6): C1, C2, C3, T1, T2, T3
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
controles <- Volcano_data %>%
filter(Type=="Selected Control") %>%
select(-2)
head(controles)
## # A tibble: 3 × 7
## Gen C1 C2 C3 T1 T2 T3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Control-1 21.1 19.8 20.3 21.3 20.9 21.8
## 2 Control-2 19.9 20.4 19.7 19.1 18.5 19.1
## 3 Control-3 20.7 19.1 20.0 19.0 19.9 19.9
Volcano_data2 <- Volcano_data %>%
filter(Type=="Target") %>%
select(-2)
head(Volcano_data2)
## # A tibble: 6 × 7
## Gen C1 C2 C3 T1 T2 T3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 29.4 28.8 22.5 22.5 21.5 18.7
## 2 Gen-2 20.6 20.1 21.6 21.5 22.1 22.9
## 3 Gen-3 20.5 28.6 31.5 32.4 20.2 30.4
## 4 Gen-4 20.1 25.4 25.6 18.5 23.2 18.2
## 5 Gen-5 28.7 19.1 24.4 22.3 27.6 26.2
## 6 Gen-6 29.2 18.3 29.3 18.3 21.0 25.5
###############################################
#obtención de promedios
promedioT1 <- mean(controles$T1)
promedioT2 <- mean(controles$T2)
promedioT3 <- mean(controles$T3)
promedioC1 <- mean(controles$C1)
promedioC2 <- mean(controles$C2)
promedioC3 <- mean(controles$C3)
Volcano_data3 <- Volcano_data2 %>%
mutate(DCT1=T1-promedioT1,
DCT2=T2-promedioT2,
DCT3=T3-promedioT3,
DCC1=C1-promedioC1,
DCC2=C2-promedioC2,
DCC3=C3-promedioC3) %>%
select(-2,-3,-4,-5,-6,-7) %>%
mutate(promedioDCT=(DCT1+DCT2+DCT3)/3,
promedioDCC=(DCC1+DCC2+DCC3)/3) %>%
select(-2,-3,-4,-5,-6,-7) %>%
mutate(dosDCT=2^-promedioDCT,
dosDCC=2^-promedioDCC,
dosDDCT=dosDCT/dosDCC,
FC=dosDDCT) %>%
mutate(LogFC=log2(FC)) %>%
select(1,4,5,7,8)
head(Volcano_data3)
## # A tibble: 6 × 5
## Gen dosDCT dosDCC FC LogFC
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 0.508 0.00890 57.1 5.83
## 2 Gen-2 0.215 0.644 0.334 -1.58
## 3 Gen-3 0.00468 0.00933 0.501 -0.996
## 4 Gen-4 0.999 0.0835 12.0 3.58
## 5 Gen-5 0.0233 0.0636 0.367 -1.45
## 6 Gen-6 0.316 0.0217 14.6 3.86
Volcano_data4 <- Volcano_data2 %>%
mutate(DCT1=T1-promedioT1,
DCT2=T2-promedioT2,
DCT3=T3-promedioT3,
DCC1=C1-promedioC1,
DCC2=C2-promedioC2,
DCC3=C3-promedioC3) %>%
select(-2,-3,-4,-5,-6,-7)
head(Volcano_data4)
## # A tibble: 6 × 7
## Gen DCT1 DCT2 DCT3 DCC1 DCC2 DCC3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 2.70 1.74 -1.51 8.85 9.05 2.55
## 2 Gen-2 1.67 2.37 2.61 -0.00871 0.342 1.57
## 3 Gen-3 12.6 0.435 10.2 -0.104 8.88 11.5
## 4 Gen-4 -1.32 3.40 -2.07 -0.472 5.65 5.57
## 5 Gen-5 2.49 7.82 5.96 8.15 -0.627 4.41
## 6 Gen-6 -1.46 1.21 5.23 8.67 -1.42 9.33
Tvalue <- row_t_welch(Volcano_data4[,c("DCC1","DCC2","DCC3")],
Volcano_data4[,c("DCT1","DCT2","DCT3")])
head(Tvalue)
## obs.x obs.y obs.tot mean.x mean.y mean.diff var.x var.y
## 1 3 3 6 6.8122516 0.977979465 5.8342722 13.6621243 4.8605316
## 2 3 3 6 0.6351197 2.216305382 -1.5811857 0.6887799 0.2379247
## 3 3 3 6 6.7439178 7.740196110 -0.9962783 36.8419220 41.5165553
## 4 3 3 6 3.5823279 0.001853026 3.5804749 12.3299706 8.7838382
## 5 3 3 6 3.9758730 5.422252739 -1.4463797 19.3796271 7.3165479
## 6 3 3 6 5.5247001 1.660363769 3.8643363 36.3221045 11.3347804
## stderr df statistic pvalue conf.low conf.high mean.null
## 1 2.4847975 3.263186 2.3479870 0.0934535 -1.724618 13.3931623 0
## 2 0.5557891 3.234424 -2.8449383 0.0598433 -3.279597 0.1172258 0
## 3 5.1107233 3.985815 -0.1949388 0.8549741 -15.205859 13.2133029 0
## 4 2.6529109 3.890262 1.3496401 0.2503189 -3.867856 11.0288059 0
## 5 2.9830731 3.321756 -0.4848623 0.6579730 -10.440250 7.5474904 0
## 6 3.9856779 3.137480 0.9695556 0.4009324 -8.511164 16.2398362 0
## alternative conf.level
## 1 two.sided 0.95
## 2 two.sided 0.95
## 3 two.sided 0.95
## 4 two.sided 0.95
## 5 two.sided 0.95
## 6 two.sided 0.95
Valor_p <- Tvalue %>%
select(pvalue)
head(Valor_p)
## pvalue
## 1 0.0934535
## 2 0.0598433
## 3 0.8549741
## 4 0.2503189
## 5 0.6579730
## 6 0.4009324
FC_y_PV <- Volcano_data3 %>%
select(1,4) %>%
mutate(PV = Valor_p$pvalue,
LFC= log2(FC),
LPV= -log10(PV))
head(FC_y_PV)
## # A tibble: 6 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 57.1 0.0935 5.83 1.03
## 2 Gen-2 0.334 0.0598 -1.58 1.22
## 3 Gen-3 0.501 0.855 -0.996 0.0680
## 4 Gen-4 12.0 0.250 3.58 0.602
## 5 Gen-5 0.367 0.658 -1.45 0.182
## 6 Gen-6 14.6 0.401 3.86 0.397
#Selección de genes sobreexpresados y silenciados
p_Value=-log10(0.05)
FC_threshold=log2(2)
down.regulated <- FC_y_PV %>%
filter(LFC < -FC_threshold,
LPV > p_Value)
head(down.regulated)
## # A tibble: 6 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-79 0.0497 0.0157 -4.33 1.81
## 2 Gen-80 0.00497 0.0226 -7.65 1.65
## 3 Gen-153 0.0391 0.0117 -4.68 1.93
## 4 Gen-235 0.00171 0.00923 -9.19 2.03
## 5 Gen-236 0.000667 0.0381 -10.6 1.42
## 6 Gen-269 0.000908 0.0399 -10.1 1.40
up.regulated <- FC_y_PV %>%
filter(LFC > FC_threshold,
LPV > p_Value)
head(up.regulated)
## # A tibble: 6 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-32 237. 0.0401 7.89 1.40
## 2 Gen-54 125. 0.0166 6.97 1.78
## 3 Gen-132 200. 0.0225 7.65 1.65
## 4 Gen-159 162. 0.0388 7.34 1.41
## 5 Gen-163 82.9 0.0347 6.37 1.46
## 6 Gen-209 162. 0.0228 7.34 1.64
top.down <- down.regulated %>%
arrange(PV) %>%
head(5)
head(top.down)
## # A tibble: 5 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-235 0.00171 0.00923 -9.19 2.03
## 2 Gen-153 0.0391 0.0117 -4.68 1.93
## 3 Gen-79 0.0497 0.0157 -4.33 1.81
## 4 Gen-595 0.00732 0.0161 -7.09 1.79
## 5 Gen-603 0.000354 0.0198 -11.5 1.70
top.up <- up.regulated %>%
arrange(PV) %>%
head(5)
head(top.down)
## # A tibble: 5 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-235 0.00171 0.00923 -9.19 2.03
## 2 Gen-153 0.0391 0.0117 -4.68 1.93
## 3 Gen-79 0.0497 0.0157 -4.33 1.81
## 4 Gen-595 0.00732 0.0161 -7.09 1.79
## 5 Gen-603 0.000354 0.0198 -11.5 1.70
#Gráfica volcano
Volcano <- ggplot(data = FC_y_PV,
mapping = aes(x = LFC, y = LPV)) +
geom_point(alpha = 1.2, color = "gray") +
theme_classic() +
labs(title = "Titulo", subtitle = "Subtitulo",
caption = "Creado por: Deni Uribe") +
xlab(expression("Log"[2]~"(FC)")) +
ylab(expression("-Log"[10]~"(Valor de p)")) + # Subíndice en "10"
geom_hline(yintercept = p_Value,
linetype = "dashed",
color = "gray",
size = 1) +
geom_vline(xintercept = FC_threshold,
linetype = "dashed",
color = "gray",
size = 1) +
geom_vline(xintercept = -FC_threshold,
linetype = "dashed",
color = "gray",
size = 1) +
geom_point(data = up.regulated,
x = up.regulated$LFC,
y = up.regulated$LPV,
alpha = 1,
size = 2.5,
color = "gray") +
geom_point(data = down.regulated,
x = down.regulated$LFC,
y = down.regulated$LPV,
alpha = 1,
size = 2.5,
color = "gray") +
geom_label_repel(data = top.down,
mapping = aes(x = LFC, y = LPV, label = Gen),
max.overlaps = 50,
color = "#1a5276",
fill = "#5499c7") +
geom_label_repel(data = top.up,
mapping = aes(x = LFC, y = LPV, label = Gen),
max.overlaps = 50,
color = "#7b241c",
fill = "#f1948a") +
theme( axis.line = element_line(size = 1.2),
axis.title.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold"),
axis.text.x = element_text(face = "bold"),
axis.text.y = element_text(face = "bold")) +
scale_x_continuous(breaks = seq(min(-12), max(12), by =2)) +
scale_y_continuous(breaks = seq(0, max(3), by = 0.5))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Volcano

#Para guardar determine el directorio y activar el siguiente código
# Guardar <- ggsave(Volcano, file = "Ejercicio_examen.jpeg", width = 8, height = 6, dpi = 300)