Objective function :
\[ g(w) = (\frac{1}{4}w_o - 2w_1)^2+(\frac{1}{4}w_o - \frac{1}{2}w_1)^2 \]
gradient :
\[ \nabla g(w) = \begin{bmatrix} \frac{\partial g}{\partial w_0} \\ \frac{\partial g}{\partial w_1} \end{bmatrix} = \begin{bmatrix} \frac14 w_0 - \frac54 w_1 \\ -\frac54 w_0 + \frac{17}{2} w_1 \end{bmatrix} \]
tolerance :
\[ ||g(w^{k})|| = \]
\[ w^{k} = w^{k-1} - \alpha \nabla g(w) \]
\[ w^{(k+1)} = w^{(k)} - \alpha \frac{\nabla g(w^{(k)})} {\|\nabla g(w^{(k)})\|} \]
\[ \nabla g(w) = \begin{bmatrix} \frac14 w_0 - \frac54 w_1 \\ -\frac54 w_0 + \frac{17}{2} w_1 \end{bmatrix} \]
\[ \left\|\nabla g(w)\right\| = \sqrt{ \left(\frac14 w_0 - \frac54 w_1\right)^2 + \left(-\frac54 w_0 + \frac{17}{2} w_1\right)^2 } \]
\[ \frac{\nabla g(w)} {\|\nabla g(w)\|} = \frac{ \begin{bmatrix} \frac14 w_0 - \frac54 w_1 \\ -\frac54 w_0 + \frac{17}{2} w_1 \end{bmatrix} }{ \sqrt{ \left(\frac14 w_0 - \frac54 w_1\right)^2 + \left(-\frac54 w_0 + \frac{17}{2} w_1\right)^2 } } \]
\[ w^{(k)} = w^{(k-1)} - \alpha \, \mathrm{sign}\!\left(\nabla g(w^{(k-1)})\right) \]
\[ w^{(k)} = w^{(k-1)} - a^{(k-1)} \odot \nabla g(w^{(k-1)}) \]
\[ a^{(k-1)} = \begin{bmatrix} \dfrac{\alpha}{ \left| \frac{\partial g}{\partial w_1}(w^{(k-1)}) \right| } \\[1em] \vdots \\[0.5em] \dfrac{\alpha}{ \left| \frac{\partial g}{\partial w_N}(w^{(k-1)}) \right| } \end{bmatrix} \]
gd <- function(g, g_prime, alpha_choice = 1,
max_its = 100, w0,
tol = 1e-5,
normalized = 0) {
w <- w0
w_history <- list(w)
g_history <- c(g(w))
for (k in 1:max_its) {
grad <- g_prime(w)
grad_length <- sqrt(sum(grad^2))
if (grad_length < tol) {
break
}
if (normalized == 1) {
grad <- grad / grad_length
}
if (alpha_choice == "diminishing") {
alpha <- 1 / k
} else {
alpha <- alpha_choice
}
w <- w - alpha * grad
w_history[[k + 1]] <- w
g_history[k + 1] <- g(w)
}
return(list(
w_final = w,
g_final = g(w),
w_history = w_history,
g_history = g_history
))
}
g <- function(w) {
(1/4 * w[1] - 2 * w[2])^2 +
(1/4 * w[1] - 1/2 * w[2])^2
}
g_prime <- function(w) {
c(
1/4 * w[1] - 5/4 * w[2],
-5/4 * w[1] + 17/2 * w[2]
)
}