Q1) Gradient Decent Algorithms

Objective function :

\[ g(w) = (\frac{1}{4}w_o - 2w_1)^2+(\frac{1}{4}w_o - \frac{1}{2}w_1)^2 \]

gradient :

\[ \nabla g(w) = \begin{bmatrix} \frac{\partial g}{\partial w_0} \\ \frac{\partial g}{\partial w_1} \end{bmatrix} = \begin{bmatrix} \frac14 w_0 - \frac54 w_1 \\ -\frac54 w_0 + \frac{17}{2} w_1 \end{bmatrix} \]

tolerance :

\[ ||g(w^{k})|| = \]

Normal GD

\[ w^{k} = w^{k-1} - \alpha \nabla g(w) \]

Normalized GD

\[ w^{(k+1)} = w^{(k)} - \alpha \frac{\nabla g(w^{(k)})} {\|\nabla g(w^{(k)})\|} \]

\[ \nabla g(w) = \begin{bmatrix} \frac14 w_0 - \frac54 w_1 \\ -\frac54 w_0 + \frac{17}{2} w_1 \end{bmatrix} \]

\[ \left\|\nabla g(w)\right\| = \sqrt{ \left(\frac14 w_0 - \frac54 w_1\right)^2 + \left(-\frac54 w_0 + \frac{17}{2} w_1\right)^2 } \]

\[ \frac{\nabla g(w)} {\|\nabla g(w)\|} = \frac{ \begin{bmatrix} \frac14 w_0 - \frac54 w_1 \\ -\frac54 w_0 + \frac{17}{2} w_1 \end{bmatrix} }{ \sqrt{ \left(\frac14 w_0 - \frac54 w_1\right)^2 + \left(-\frac54 w_0 + \frac{17}{2} w_1\right)^2 } } \]

Component-Normalized GD

\[ w^{(k)} = w^{(k-1)} - \alpha \, \mathrm{sign}\!\left(\nabla g(w^{(k-1)})\right) \]

\[ w^{(k)} = w^{(k-1)} - a^{(k-1)} \odot \nabla g(w^{(k-1)}) \]

\[ a^{(k-1)} = \begin{bmatrix} \dfrac{\alpha}{ \left| \frac{\partial g}{\partial w_1}(w^{(k-1)}) \right| } \\[1em] \vdots \\[0.5em] \dfrac{\alpha}{ \left| \frac{\partial g}{\partial w_N}(w^{(k-1)}) \right| } \end{bmatrix} \]

gd <- function(g, g_prime, alpha_choice = 1,
               max_its = 100, w0,
               tol = 1e-5,
               normalized = 0) {
  
  w <- w0
  w_history <- list(w)
  g_history <- c(g(w))
  
  for (k in 1:max_its) {
    
    grad <- g_prime(w)
    
    grad_length <- sqrt(sum(grad^2))
    
    if (grad_length < tol) {
      break
    }
    
    if (normalized == 1) {
      grad <- grad / grad_length
    }
    
    if (alpha_choice == "diminishing") {
      alpha <- 1 / k
    } else {
      alpha <- alpha_choice
    }
    
    w <- w - alpha * grad
    
    w_history[[k + 1]] <- w
    g_history[k + 1] <- g(w)
  }
  
  return(list(
    w_final = w,
    g_final = g(w),
    w_history = w_history,
    g_history = g_history
  ))
}

Functions

g <- function(w) {
  (1/4 * w[1] - 2 * w[2])^2 +
  (1/4 * w[1] - 1/2 * w[2])^2
}

g_prime <- function(w) {
  c(
    1/4 * w[1] - 5/4 * w[2],
    -5/4 * w[1] + 17/2 * w[2]
  )
}