# Evaluación Medio Término
# Creado por:Paulina Gallegos
if(!require(pacman)) install.packages("pacman", dependencies=TRUE)
## Loading required package: pacman
library("pacman")
p_load("vroom",
"ggplot2",
"ggrepel",
"dplyr",
"matrixTests")
Volcano_data <- vroom(file="https://raw.githubusercontent.com/ManuelLaraMVZ/Examen_R_transcriptomica/refs/heads/main/Datos_Genes_azar2.csv")
## `curl` package not installed, falling back to using `url()`
## Rows: 703 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gen, Type
## dbl (6): C1, C2, C3, T1, T2, T3
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(Volcano_data)
## # A tibble: 6 × 8
## Gen Type C1 C2 C3 T1 T2 T3
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 Target 31.0 21.8 25.7 29.8 23.6 20.2
## 2 Gen-2 Target 20.3 29.3 30.2 27.4 30.6 28.1
## 3 Gen-3 Target 18.3 26.7 31.6 31.3 27.3 20.9
## 4 Gen-4 Target 27.2 20.1 26.0 28.6 29.8 19.9
## 5 Gen-5 Target 30.8 24.4 18.4 23.9 26.8 34.2
## 6 Gen-6 Target 21.5 24.6 23.2 19.8 34.4 32.5
controles <- Volcano_data %>%
filter(Type=="Selected Control") %>%
select(-2)
head(controles)
## # A tibble: 3 × 7
## Gen C1 C2 C3 T1 T2 T3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Control-1 21.5 21.9 18.2 20.8 20.0 21.9
## 2 Control-2 21.9 19.8 20.1 20.8 19.0 20.9
## 3 Control-3 20.9 19.9 21.7 19.6 19.5 21.2
Volcano_data2 <- Volcano_data %>%
filter(Type=="Target") %>%
select(-2)
head(Volcano_data2)
## # A tibble: 6 × 7
## Gen C1 C2 C3 T1 T2 T3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 31.0 21.8 25.7 29.8 23.6 20.2
## 2 Gen-2 20.3 29.3 30.2 27.4 30.6 28.1
## 3 Gen-3 18.3 26.7 31.6 31.3 27.3 20.9
## 4 Gen-4 27.2 20.1 26.0 28.6 29.8 19.9
## 5 Gen-5 30.8 24.4 18.4 23.9 26.8 34.2
## 6 Gen-6 21.5 24.6 23.2 19.8 34.4 32.5
#obtención de promedios
promedioT1 <- mean(controles$T1)
promedioT2 <- mean(controles$T2)
promedioT3 <- mean(controles$T3)
promedioC1 <- mean(controles$C1)
promedioC2 <- mean(controles$C2)
promedioC3 <- mean(controles$C3)
Volcano_data3 <- Volcano_data2 %>%
mutate(DCT1=T1-promedioT1,
DCT2=T2-promedioT2,
DCT3=T3-promedioT3,
DCC1=C1-promedioC1,
DCC2=C2-promedioC2,
DCC3=C3-promedioC3) %>%
select(-2,-3,-4,-5,-6,-7) %>%
mutate(promedioDCT=(DCT1+DCT2+DCT3)/3,
promedioDCC=(DCC1+DCC2+DCC3)/3) %>%
select(-2,-3,-4,-5,-6,-7) %>%
mutate(dosDCT=2^-promedioDCT,
dosDCC=2^-promedioDCC,
dosDDCT=dosDCT/dosDCC,
FC=dosDDCT) %>%
mutate(LogFC=log2(FC)) %>%
select(1,4,5,7,8)
head(Volcano_data3)
## # A tibble: 6 × 5
## Gen dosDCT dosDCC FC LogFC
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 0.0571 0.0218 2.61 1.39
## 2 Gen-2 0.00319 0.0160 0.199 -2.33
## 3 Gen-3 0.0145 0.0331 0.439 -1.19
## 4 Gen-4 0.0196 0.0713 0.274 -1.87
## 5 Gen-5 0.00416 0.0680 0.0612 -4.03
## 6 Gen-6 0.00272 0.187 0.0146 -6.10
Volcano_data4 <- Volcano_data2 %>%
mutate(DCT1=T1-promedioT1,
DCT2=T2-promedioT2,
DCT3=T3-promedioT3,
DCC1=C1-promedioC1,
DCC2=C2-promedioC2,
DCC3=C3-promedioC3) %>%
select(-2,-3,-4,-5,-6,-7)
head(Volcano_data4)
## # A tibble: 6 × 7
## Gen DCT1 DCT2 DCT3 DCC1 DCC2 DCC3
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 9.38 4.12 -1.11 9.61 1.27 5.67
## 2 Gen-2 6.99 11.1 6.81 -1.10 8.80 10.2
## 3 Gen-3 10.9 7.78 -0.379 -3.07 6.18 11.6
## 4 Gen-4 8.19 10.3 -1.46 5.81 -0.418 6.03
## 5 Gen-5 3.55 7.30 12.9 9.41 3.88 -1.65
## 6 Gen-6 -0.572 14.9 11.2 0.0790 4.01 3.17
Tvalue <- row_t_welch(Volcano_data4[,c("DCC1","DCC2","DCC3")],
Volcano_data4[,c("DCT1","DCT2","DCT3")])
head(Tvalue)
## obs.x obs.y obs.tot mean.x mean.y mean.diff var.x var.y stderr
## 1 3 3 6 5.516362 4.130165 1.386197 17.418258 27.527223 3.870637
## 2 3 3 6 5.962701 8.290610 -2.327908 37.833173 5.835606 3.815267
## 3 3 3 6 4.915654 6.104556 -1.188902 55.270082 33.981370 5.454400
## 4 3 3 6 3.809957 5.676679 -1.866721 13.417871 39.344895 4.193756
## 5 3 3 6 3.878387 7.908000 -4.029613 30.556033 22.038044 4.187047
## 6 3 3 6 2.421839 8.523376 -6.101538 4.293486 65.463304 4.822060
## df statistic pvalue conf.low conf.high mean.null alternative
## 1 3.807394 0.3581315 0.7392117 -9.578247 12.350641 0 two.sided
## 2 2.602645 -0.6101562 0.5908524 -15.588936 10.933119 0 two.sided
## 3 3.784674 -0.2179712 0.8386951 -16.678670 14.300866 0 two.sided
## 4 3.222006 -0.4451191 0.6844553 -14.707669 10.974227 0 two.sided
## 5 3.897761 -0.9623997 0.3916769 -15.775968 7.716743 0 two.sided
## 6 2.261221 -1.2653383 0.3204668 -24.713424 12.510349 0 two.sided
## conf.level
## 1 0.95
## 2 0.95
## 3 0.95
## 4 0.95
## 5 0.95
## 6 0.95
Valor_p <- Tvalue %>%
select(pvalue)
head(Valor_p)
## pvalue
## 1 0.7392117
## 2 0.5908524
## 3 0.8386951
## 4 0.6844553
## 5 0.3916769
## 6 0.3204668
FC_y_PV <- Volcano_data3 %>%
select(1,4) %>%
mutate(PV = Valor_p$pvalue,
LFC= log2(FC),
LPV= -log10(PV))
head(FC_y_PV)
## # A tibble: 6 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-1 2.61 0.739 1.39 0.131
## 2 Gen-2 0.199 0.591 -2.33 0.229
## 3 Gen-3 0.439 0.839 -1.19 0.0764
## 4 Gen-4 0.274 0.684 -1.87 0.165
## 5 Gen-5 0.0612 0.392 -4.03 0.407
## 6 Gen-6 0.0146 0.320 -6.10 0.494
#Selección de genes sobreexpresados y silenciados
p_Value=-log10(0.05)
FC_threshold=log2(2)
down.regulated <- FC_y_PV %>%
filter(LFC < -FC_threshold,
LPV > p_Value)
head(down.regulated)
## # A tibble: 6 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-64 0.00533 0.0343 -7.55 1.46
## 2 Gen-111 0.000631 0.00962 -10.6 2.02
## 3 Gen-134 0.00224 0.0317 -8.80 1.50
## 4 Gen-285 0.00122 0.0442 -9.68 1.35
## 5 Gen-330 0.00225 0.0143 -8.79 1.84
## 6 Gen-342 0.00135 0.0317 -9.53 1.50
up.regulated <- FC_y_PV %>%
filter(LFC > FC_threshold,
LPV > p_Value)
head(up.regulated)
## # A tibble: 6 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-28 67.3 0.0270 6.07 1.57
## 2 Gen-44 42.1 0.0482 5.40 1.32
## 3 Gen-60 143. 0.0371 7.16 1.43
## 4 Gen-99 798. 0.0345 9.64 1.46
## 5 Gen-114 28.3 0.0384 4.82 1.42
## 6 Gen-118 732. 0.0267 9.52 1.57
top.down <- down.regulated %>%
arrange(PV) %>%
head(5)
head(top.down)
## # A tibble: 5 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-647 0.000597 0.00797 -10.7 2.10
## 2 Gen-405 0.00113 0.00888 -9.79 2.05
## 3 Gen-111 0.000631 0.00962 -10.6 2.02
## 4 Gen-511 0.0136 0.0130 -6.20 1.88
## 5 Gen-330 0.00225 0.0143 -8.79 1.84
top.up <- up.regulated %>%
arrange(PV) %>%
head(5)
head(top.down)
## # A tibble: 5 × 5
## Gen FC PV LFC LPV
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Gen-647 0.000597 0.00797 -10.7 2.10
## 2 Gen-405 0.00113 0.00888 -9.79 2.05
## 3 Gen-111 0.000631 0.00962 -10.6 2.02
## 4 Gen-511 0.0136 0.0130 -6.20 1.88
## 5 Gen-330 0.00225 0.0143 -8.79 1.84
#Gráfica volcano
Volcano <- ggplot(data = FC_y_PV,
mapping = aes(x = LFC, y = LPV)) +
geom_point(alpha = 1.2, color = "gray") +
theme_classic() +
labs(title = "Paulina Gallegos",subtitle = "Subtitulo",
caption = "Creado por:Paulina Gallegos") +
xlab(expression("Log"[2]~"(FC)")) +
ylab(expression("-Log"[10]~"(Valor de p)")) + # Subíndice en "10"
geom_hline(yintercept = p_Value,
linetype = "dashed",
color = "gray",
size = 1) +
geom_vline(xintercept = FC_threshold,
linetype = "dashed",
color = "gray",
size = 1) +
geom_vline(xintercept = -FC_threshold,
linetype = "dashed",
color = "gray",
size = 1) +
geom_point(data = up.regulated,
x = up.regulated$LFC,
y = up.regulated$LPV,
alpha = 1,
size = 2.5,
color = "gray") +
geom_point(data = down.regulated,
x = down.regulated$LFC,
y = down.regulated$LPV,
alpha = 1,
size = 2.5,
color = "gray") +
geom_label_repel(data = top.down,
mapping = aes(x = LFC, y = LPV, label = Gen),
max.overlaps = 50,
color = "#1a5276",
fill = "#5499c7") +
geom_label_repel(data = top.up,
mapping = aes(x = LFC, y = LPV, label = Gen),
max.overlaps = 50,
color = "#7b241c",
fill = "#f1948a") +
theme( axis.line = element_line(size = 1.2),
axis.title.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold"),
axis.text.x = element_text(face = "bold"),
axis.text.y = element_text(face = "bold")) +
scale_x_continuous(breaks = seq(min(-12), max(12), by =2)) +
scale_y_continuous(breaks = seq(0, max(3), by = 0.5))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Volcano

#Para guardar determine el directorio y activar el siguiente código
# Guardar <- ggsave(Volcano, file = "Ejercicio_examen.jpeg", width = 8, height = 6, dpi = 300)