Sin funciones de activación, las redes neuronales serían solo regresiones lineales disfrazadas. Son estas funciones las que permiten que los modelos aprendan patrones complejos y resuelvan problemas del mundo real
# Cargar librerías necesarias
library(ggplot2)
library(gridExtra)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
library(kableExtra)
##
## Adjuntando el paquete: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
# Definir las funciones de activación y sus derivadas
# 1. Función Sigmoide
sigmoid <- function(x) {
1 / (1 + exp(-x))
}
sigmoid_derivative <- function(x) {
s <- sigmoid(x)
s * (1 - s)
}
# 2. Función Tanh
tanh_func <- function(x) {
tanh(x)
}
tanh_derivative <- function(x) {
1 - tanh(x)^2
}
# 3. Función ReLU
relu <- function(x) {
pmax(0, x)
}
relu_derivative <- function(x) {
ifelse(x > 0, 1, 0)
}
# 4. Función Leaky ReLU
leaky_relu <- function(x, alpha = 0.01) {
ifelse(x > 0, x, alpha * x)
}
leaky_relu_derivative <- function(x, alpha = 0.01) {
ifelse(x > 0, 1, alpha)
}
# 5. Función Swish
swish <- function(x, beta = 1) {
x * sigmoid(beta * x)
}
swish_derivative <- function(x, beta = 1) {
s <- sigmoid(beta * x)
s + x * s * (1 - s) * beta
}
La función sigmoide es una de las funciones de activación más clásicas, mapeando cualquier valor real a un rango entre 0 y 1.
\[f'(x) = f(x) \cdot (1 - f(x)) = \frac{e^{-x}}{(1 + e^{-x})^2}\]
# Crear datos para graficar
x <- seq(-6, 6, 0.1)
# Crear el gráfico de la función sigmoide
p1 <- ggplot(data.frame(x = x, y = sigmoid(x)), aes(x, y)) +
geom_line(color = "blue", size = 1.2) +
labs(title = "Función Sigmoide",
x = "x", y = "f(x)") +
theme_minimal() +
geom_hline(yintercept = c(0, 1), linetype = "dashed", alpha = 0.5) +
ylim(-0.1, 1.1)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Crear el gráfico de la derivada
p2 <- ggplot(data.frame(x = x, y = sigmoid_derivative(x)), aes(x, y)) +
geom_line(color = "red", size = 1.2) +
labs(title = "Derivada de Sigmoide",
x = "x", y = "f'(x)") +
theme_minimal() +
geom_hline(yintercept = 0, linetype = "dashed", alpha = 0.5)
grid.arrange(p1, p2, ncol = 2)
Características:
La función tanh es similar a la sigmoide pero centrada en cero, con rango entre -1 y 1.
\[f'(x) = 1 - \tanh^2(x) = 1 - f(x)^2\]
# Crear el gráfico de la función tanh
p3 <- ggplot(data.frame(x = x, y = tanh_func(x)), aes(x, y)) +
geom_line(color = "green", size = 1.2) +
labs(title = "Función Tanh",
x = "x", y = "f(x)") +
theme_minimal() +
geom_hline(yintercept = c(-1, 0, 1), linetype = "dashed", alpha = 0.5) +
ylim(-1.1, 1.1)
# Crear el gráfico de la derivada
p4 <- ggplot(data.frame(x = x, y = tanh_derivative(x)), aes(x, y)) +
geom_line(color = "darkgreen", size = 1.2) +
labs(title = "Derivada de Tanh",
x = "x", y = "f'(x)") +
theme_minimal() +
geom_hline(yintercept = 0, linetype = "dashed", alpha = 0.5)
grid.arrange(p3, p4, ncol = 2)
Características:
ReLU es la función de activación más popular en deep learning por su simplicidad y efectividad.
\[f'(x) = \begin{cases} 0 & \text{si } x < 0 \\ 1 & \text{si } x > 0 \\ \text{indefinida} & \text{en } x = 0 \end{cases}\]
# Crear el gráfico de la función ReLU
p5 <- ggplot(data.frame(x = x, y = relu(x)), aes(x, y)) +
geom_line(color = "purple", size = 1.2) +
labs(title = "Función ReLU",
x = "x", y = "f(x)") +
theme_minimal() +
geom_hline(yintercept = 0, linetype = "dashed", alpha = 0.5) +
geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.5)
# Crear el gráfico de la derivada
p6 <- ggplot(data.frame(x = x, y = relu_derivative(x)), aes(x, y)) +
geom_line(color = "darkmagenta", size = 1.2) +
labs(title = "Derivada de ReLU",
x = "x", y = "f'(x)") +
theme_minimal() +
geom_hline(yintercept = c(0, 1), linetype = "dashed", alpha = 0.5) +
geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.5) +
ylim(-0.1, 1.1)
grid.arrange(p5, p6, ncol = 2)
Características:
Una variación de ReLU que permite un pequeño gradiente para valores negativos.
\[f'(x) = \begin{cases} \alpha & \text{si } x < 0 \\ 1 & \text{si } x > 0 \\ \text{indefinida} & \text{en } x = 0 \end{cases}\]
# Crear el gráfico de la función Leaky ReLU
p7 <- ggplot(data.frame(x = x, y = leaky_relu(x)), aes(x, y)) +
geom_line(color = "orange", size = 1.2) +
labs(title = "Función Leaky ReLU (α = 0.01)",
x = "x", y = "f(x)") +
theme_minimal() +
geom_hline(yintercept = 0, linetype = "dashed", alpha = 0.5) +
geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.5)
# Crear el gráfico de la derivada
p8 <- ggplot(data.frame(x = x, y = leaky_relu_derivative(x)), aes(x, y)) +
geom_line(color = "darkorange", size = 1.2) +
labs(title = "Derivada de Leaky ReLU",
x = "x", y = "f'(x)") +
theme_minimal() +
geom_hline(yintercept = c(0, 0.01, 1), linetype = "dashed", alpha = 0.5) +
geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.5) +
ylim(-0.1, 1.1)
grid.arrange(p7, p8, ncol = 2)
Características:
Función de activación propuesta por Google, que combina las ventajas de ReLU y sigmoide.
\[f'(x) = \sigma(\beta x) + x \cdot \sigma(\beta x) \cdot (1 - \sigma(\beta x)) \cdot \beta\]
donde \(\sigma(x) = \frac{1}{1 + e^{-x}}\).
# Crear el gráfico de la función Swish
p9 <- ggplot(data.frame(x = x, y = swish(x)), aes(x, y)) +
geom_line(color = "brown", size = 1.2) +
labs(title = "Función Swish (β = 1)",
x = "x", y = "f(x)") +
theme_minimal() +
geom_hline(yintercept = 0, linetype = "dashed", alpha = 0.5) +
geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.5)
# Crear el gráfico de la derivada
p10 <- ggplot(data.frame(x = x, y = swish_derivative(x)), aes(x, y)) +
geom_line(color = "darkred", size = 1.2) +
labs(title = "Derivada de Swish",
x = "x", y = "f'(x)") +
theme_minimal() +
geom_hline(yintercept = 0, linetype = "dashed", alpha = 0.5) +
geom_vline(xintercept = 0, linetype = "dashed", alpha = 0.5)
grid.arrange(p9, p10, ncol = 2)
Características:
# Crear un dataframe con todas las funciones
df_functions <- data.frame(
x = rep(x, 5),
y = c(sigmoid(x), tanh_func(x), relu(x), leaky_relu(x), swish(x)),
Function = rep(c("Sigmoide", "Tanh", "ReLU", "Leaky ReLU", "Swish"), each = length(x))
)
# Gráfico comparativo de funciones
p_comp1 <- ggplot(df_functions, aes(x, y, color = Function)) +
geom_line(size = 1.2) +
labs(title = "Comparación de Funciones de Activación",
x = "x", y = "f(x)") +
theme_minimal() +
theme(legend.position = "bottom") +
scale_color_manual(values = c("blue", "green", "purple", "orange", "brown"))
# Crear un dataframe con todas las derivadas
df_derivatives <- data.frame(
x = rep(x, 5),
y = c(sigmoid_derivative(x), tanh_derivative(x), relu_derivative(x),
leaky_relu_derivative(x), swish_derivative(x)),
Function = rep(c("Sigmoide", "Tanh", "ReLU", "Leaky ReLU", "Swish"), each = length(x))
)
# Gráfico comparativo de derivadas
p_comp2 <- ggplot(df_derivatives, aes(x, y, color = Function)) +
geom_line(size = 1.2) +
labs(title = "Comparación de Derivadas",
x = "x", y = "f'(x)") +
theme_minimal() +
theme(legend.position = "bottom") +
scale_color_manual(values = c("blue", "green", "purple", "orange", "brown"))
grid.arrange(p_comp1, p_comp2, nrow = 2)
# Crear tabla comparativa
comparison_data <- data.frame(
Función = c("Sigmoide", "Tanh", "ReLU", "Leaky ReLU", "Swish"),
Rango = c("(0, 1)", "(-1, 1)", "[0, +∞)", "(-∞, +∞)", "(-∞, +∞)"),
Pros = c("Interpretación probabilística", "Centrada en cero", "Simple, eficiente", "Evita dying neurons", "Rendimiento superior"),
Contras = c("Vanishing gradient", "Vanishing gradient", "Dying ReLU", "Parámetro adicional", "Más costosa computacionalmente"),
Uso_Recomendado = c("Clasificación binaria (salida)", "RNNs, capas ocultas clásicas", "Opción por defecto, CNNs", "Cuando ReLU falla", "Modelos de alta performance")
)
kable(comparison_data,
col.names = c("Función", "Rango", "Pros", "Contras", "Uso Recomendado"),
caption = "Comparación de Funciones de Activación") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
column_spec(1, bold = TRUE) %>%
row_spec(0, bold = TRUE, color = "white", background = "#3498db")
Función | Rango | Pros | Contras | Uso Recomendado |
---|---|---|---|---|
Sigmoide | (0, 1) | Interpretación probabilística | Vanishing gradient | Clasificación binaria (salida) |
Tanh | (-1, 1) | Centrada en cero | Vanishing gradient | RNNs, capas ocultas clásicas |
ReLU | [0, +∞) | Simple, eficiente | Dying ReLU | Opción por defecto, CNNs |
Leaky ReLU | (-∞, +∞) | Evita dying neurons | Parámetro adicional | Cuando ReLU falla |
Swish | (-∞, +∞) | Rendimiento superior | Más costosa computacionalmente | Modelos de alta performance |