Este documento muestra el funcionamiento de una red neuronal simple 2–2–1 con función de activación sigmoide, incluyendo propagación hacia adelante, cálculo de error y retropropagación con actualización de pesos.
# Entradas y objetivo
x1 <- 0; x2 <- 1
t1 <- 1 # objetivo (target)
# Pesos iniciales
w1 <- 0.1; w2 <- 0.5
w3 <- -0.7; w4 <- 0.3
w5 <- 0.2; w6 <- 0.4
b1 <- 0; b2 <- 0; b3 <- 0 # sesgos
alpha <- 0.25 # tasa de aprendizaje
sigmoid <- function(z) 1/(1+exp(-z))
dsigmoid <- function(fz) fz*(1-fz) # derivada usando f(z)
z1 <- w1*x1 + w3*x2 + b1
h1 <- sigmoid(z1)
z2 <- w2*x1 + w4*x2 + b2
h2 <- sigmoid(z2)
z3 <- w5*h1 + w6*h2 + b3
y <- sigmoid(z3) # salida de la red
c(z1=z1, h1=h1, z2=z2, h2=h2, z3=z3, y=y)
## z1 h1 z2 h2 z3 y
## -0.7000000 0.3318122 0.3000000 0.5744425 0.2961395 0.5734985
Etotal <- 0.5*(t1 - y)^2
Etotal
## [1] 0.09095176
dE_dz3 <- (y - t1) * dsigmoid(y) # usando f'(z)=f(z)(1-f(z))
dE_dw5 <- dE_dz3 * h1
dE_dw6 <- dE_dz3 * h2
c(dE_dz3=dE_dz3, dE_dw5=dE_dw5, dE_dw6=dE_dw6)
## dE_dz3 dE_dw5 dE_dw6
## -0.10432140 -0.03461512 -0.05992665
w5_new <- w5 - alpha*dE_dw5
w6_new <- w6 - alpha*dE_dw6
c(w5_new=w5_new, w6_new=w6_new)
## w5_new w6_new
## 0.2086538 0.4149817
# Usamos los pesos *originales* (antes de actualizar) para las derivadas hacia atrás.
dE_dz1 <- dE_dz3 * w5 * dsigmoid(h1)
dE_dz2 <- dE_dz3 * w6 * dsigmoid(h2)
dE_dw1 <- dE_dz1 * x1
dE_dw3 <- dE_dz1 * x2
dE_dw2 <- dE_dz2 * x1
dE_dw4 <- dE_dz2 * x2
c(dE_dz1=dE_dz1, dE_dz2=dE_dz2,
dE_dw1=dE_dw1, dE_dw3=dE_dw3,
dE_dw2=dE_dw2, dE_dw4=dE_dw4)
## dE_dz1 dE_dz2 dE_dw1 dE_dw3 dE_dw2 dE_dw4
## -0.004625879 -0.010200893 0.000000000 -0.004625879 0.000000000 -0.010200893
w1_new <- w1 - alpha*dE_dw1
w3_new <- w3 - alpha*dE_dw3
w2_new <- w2 - alpha*dE_dw2
w4_new <- w4 - alpha*dE_dw4
c(w1_new=w1_new, w2_new=w2_new, w3_new=w3_new, w4_new=w4_new)
## w1_new w2_new w3_new w4_new
## 0.1000000 0.5000000 -0.6988435 0.3025502
old <- c(w1=w1,w2=w2,w3=w3,w4=w4,w5=w5,w6=w6)
new <- c(w1=w1_new,w2=w2_new,w3=w3_new,w4=w4_new,w5=w5_new,w6=w6_new)
delta <- new - old
tabla <- data.frame(Peso=names(old),
Inicial=as.numeric(old),
Gradiente=c(dE_dw1,dE_dw2,dE_dw3,dE_dw4,dE_dw5,dE_dw6),
Actualizado=as.numeric(new),
Cambio=as.numeric(delta))
knitr::kable(tabla, digits=6)
Peso | Inicial | Gradiente | Actualizado | Cambio |
---|---|---|---|---|
w1 | 0.1 | 0.000000 | 0.100000 | 0.000000 |
w2 | 0.5 | 0.000000 | 0.500000 | 0.000000 |
w3 | -0.7 | -0.004626 | -0.698844 | 0.001156 |
w4 | 0.3 | -0.010201 | 0.302550 | 0.002550 |
w5 | 0.2 | -0.034615 | 0.208654 | 0.008654 |
w6 | 0.4 | -0.059927 | 0.414982 | 0.014982 |
z1_n <- w1_new*x1 + w3_new*x2 + b1; h1_n <- sigmoid(z1_n)
z2_n <- w2_new*x1 + w4_new*x2 + b2; h2_n <- sigmoid(z2_n)
z3_n <- w5_new*h1_n + w6_new*h2_n + b3; y_n <- sigmoid(z3_n)
c(z1_new=z1_n, h1_new=h1_n, z2_new=z2_n, h2_new=h2_n,
z3_new=z3_n, y_new=y_n)
## z1_new h1_new z2_new h2_new z3_new y_new
## -0.6988435 0.3320687 0.3025502 0.5750658 0.3079292 0.5763797
w1_star <- w1_new; w2_star <- w2_new; w3_star <- w3_new
w4_star <- w4_new; w5_star <- w5_new; w6_star <- w6_new
# ========== FORWARD (usando pesos de la 1ª época: w*_star) ==========
z1_2 <- w1_star*x1 + w3_star*x2 + 0
h1_2 <- sigmoid(z1_2)
z2_2 <- w2_star*x1 + w4_star*x2 + 0
h2_2 <- sigmoid(z2_2)
z3_2 <- w5_star*h1_2 + w6_star*h2_2 + 0
y_2 <- sigmoid(z3_2)
Etotal_2 <- 0.5*(t1 - y_2)^2
c(z1_2=z1_2, h1_2=h1_2, z2_2=z2_2, h2_2=h2_2, z3_2=z3_2, y_2=y_2, Etotal_2=Etotal_2)
## z1_2 h1_2 z2_2 h2_2 z3_2 y_2
## -0.69884353 0.33206868 0.30255022 0.57506582 0.30792916 0.57637971
## Etotal_2
## 0.08972707
# ========== BACKPROP ==========
# OJO: usamos (t1 - y_2) para que el signo coincida con tus fórmulas
dE_dz3_2 <- (t1 - y_2) * dsigmoid(y_2)
dE_dw5_2 <- dE_dz3_2 * h1_2
dE_dw6_2 <- dE_dz3_2 * h2_2
# Gradientes hacia la capa oculta con los pesos de ESTA época (w5_star, w6_star)
dE_dz1_2 <- dE_dz3_2 * w5_star * dsigmoid(h1_2)
dE_dz2_2 <- dE_dz3_2 * w6_star * dsigmoid(h2_2)
dE_dw1_2 <- dE_dz1_2 * x1
dE_dw3_2 <- dE_dz1_2 * x2
dE_dw2_2 <- dE_dz2_2 * x1
dE_dw4_2 <- dE_dz2_2 * x2
c(dE_dz3_2=dE_dz3_2, dE_dw5_2=dE_dw5_2, dE_dw6_2=dE_dw6_2,
dE_dw1_2=dE_dw1_2, dE_dw3_2=dE_dw3_2, dE_dw2_2=dE_dw2_2, dE_dw4_2=dE_dw4_2)
## dE_dz3_2 dE_dw5_2 dE_dw6_2 dE_dw1_2 dE_dw3_2 dE_dw2_2
## 0.103433731 0.034347103 0.059481203 0.000000000 0.004786832 0.000000000
## dE_dw4_2
## 0.010488909
# ========== ACTUALIZACIÓN DE PESOS (segunda época) ==========
w1_star2 <- w1_star - alpha*dE_dw1_2
w2_star2 <- w2_star - alpha*dE_dw2_2
w3_star2 <- w3_star - alpha*dE_dw3_2
w4_star2 <- w4_star - alpha*dE_dw4_2
w5_star2 <- w5_star - alpha*dE_dw5_2
w6_star2 <- w6_star - alpha*dE_dw6_2
c(w1_star2=w1_star2, w2_star2=w2_star2, w3_star2=w3_star2,
w4_star2=w4_star2, w5_star2=w5_star2, w6_star2=w6_star2)
## w1_star2 w2_star2 w3_star2 w4_star2 w5_star2 w6_star2
## 0.1000000 0.5000000 -0.7000402 0.2999280 0.2000670 0.4001114