Optimisasi Tanpa Kendala

Satu Peubah

Newton Rhapson

library("numDeriv")
newton.raphson <- function(f, a, b, tol = 1e-5, n = 1000) {
  require(numDeriv) # Package for computing f'(x)
  
  x0 <- a # Set start value to supplied lower bound
  k <- n # Initialize for iteration results
  
  # Check the upper and lower bounds to see if approximations result in 0
  fa <- f(a)
  if (fa == 0.0) {
    return(a)
  }
  
  fb <- f(b)
  if (fb == 0.0) {
    return(b)
  }
  
  for (i in 1:n) {
    dx <- genD(func = f, x = x0)$D[1] # First-order derivative f'(x0)
    x1 <- x0 - (f(x0) / dx) # Calculate next value x1
    k[i] <- x1 # Store x1
    # Once the difference between x0 and x1 becomes sufficiently small, output the results.
    if (abs(x1 - x0) < tol) {
      root.approx <- tail(k, n=1)
      res <- list('root approximation' = root.approx, 'iterations' = k)
      return(res)
    }
    # If Newton-Raphson has not yet reached convergence set x1 as x0 and continue
    x0 <- x1
  }
  print('Too many iterations in method')
}


#func2 <- function(x) {
#  x^3 - 2* x - 5
#}

func2 <- function(x) {
  x^3 + x^2 - 1
}

#func2 <- function(x) {
#  x^5-5*x^3-20*x+5
#}

newton.raphson(func2, -2.5, 2.5)
## $`root approximation`
## [1] 0.7548777
## 
## $iterations
##  [1] -1.7454545 -1.1663868 -0.4650477 -3.6088306 -2.5107810 -1.7531413
##  [7] -1.1730487 -0.4782675 -3.7361590 -2.5969500 -1.8142901 -1.2250904
## [13] -0.5732422 -5.9245450 -4.0641952 -2.8182367 -1.9693910 -1.3509739
## [19] -0.7594446  3.3150296  2.1427659  1.3991371  0.9728308  0.7916162
## [25]  0.7561786  0.7548794  0.7548777
f <- function(x) {
  x^3 + x^2 - 1
}
curve(f,xlim=c(0,3), col='steelblue',lwd=2)
abline(h=0)
abline(v=0)

Quadratic

#' Quadratic Interpolation Method.
#'
#' \code{quadraticinterpolation} is a one-dimensional minimization method that finds the minimizing
#' step length \code{alpha} through the approximation by a quadratic function. If the \code{alpha}
#' is not sot sufficiently to the minimum a refit of the function is used.
#'
#' @section Reduction of points, interval:
#' To reduce the interval between the three points, first evaluate the value of the function
#' in alpha and in the poinst: lower (\code{l}), upper (\code{u}) and middle (\code{m}).
#' After this evaluation, it is analyzed (through comparisons) at which two points the
#' alpha is between and are chosen the next values for the refite
#'
#' @param phi.f A univariate function.
#' @param l,u Numbers, initial interval (or points).
#' @param eps A number, used in the stop condition.
#' @return Returns the step length \code{alpha}.
#'
#' @section About the parameter phif:
#' \code{phif} should be a univariate function that returns the value of the function
#' at a given point.
#'
#' @seealso The documentation of the function \code{univariate_f} in this package.
#'
#' @examples
#' #phi <- function(alpha) {...}
#' #quadraticinterpolation(phi, l = 0, u = 10, eps = 1e-4)
#' #quadraticinterpolation(phi, 3, 7, 1e-5)
#' @references
#' \enumerate{
#' \item Rao, Singiresu S.; \emph{Engineering Optimization Theory and and Practice}, 4th ed., pages 273:279.
#'
#' }
#' @export


quadraticinterpolation <- function(phi.f, l = 0, u = 1, eps = 1e-6)
{
  #polynomials evaluated at 3 points(l, m, u) for interpolation: phi_fl, phi_fm, phi_fu
  if(l == u)
  {
    l <- 0
  }
  m <- (l + u)/2
  phi.fl <- phi.f(l)
  phi.fm <- phi.f(m)
  phi.fu <- phi.f(u)
  
  
  #a, b and c are coefficients of the polynomial (hx)
  y <- ((l - m)*(m - u)*(u - l))
  a <- (phi.fl*m*u*(u - m) + phi.fm*u*l*(l - u) + phi.fu*l*m*(m - l))/y
  b <- (phi.fl*(m^2 - u^2) + phi.fm*(u^2 - l^2) + phi.fu*(l^2 - m^2))/y
  c <- -(phi.fl*(m - u) + phi.fm*(u - l) + phi.fu*(l - m))/y
  
  alpha <- -b/(2*c)
  falpha <- phi.f(alpha)
  
  ncf <- 4
  
  hx <- function(x)  c*x^2 + b*x + a #environment global
  
  while((abs((hx(alpha) - falpha)) > eps))
  {
    #choice of l, m and C new values
    if(alpha <= m)
    {
      if(phi.fm >= falpha){
        u <- m; phi.fu <- phi.fm
        m <- alpha; phi.fm <- falpha
      }else{
        l <- alpha; phi.fl <- falpha
      }
    }else{
      if(phi.fm >= falpha){
        l <- m; phi.fl <- phi.fm
        m <- alpha; phi.fm <- falpha
      }else{
        u <- alpha; phi.fu <- falpha
      }
      
    }
    
    
    #refitting the function
    y <- ((l - m)*(m - u)*(u - l))
    a <- (phi.fl*m*u*(u - m) + phi.fm*u*l*(l - u) + phi.fu*l*m*(m - l))/y
    b <- (phi.fl*(m^2 - u^2) + phi.fm*(u^2 - l^2) + phi.fu*(l^2 - m^2))/y
    c <- -(phi.fl*(m - u) + phi.fm*(u - l) + phi.fu*(l - m))/y
    
    #quadratic interpolation
    
    alpha <- -b/(2*c)
    falpha <- phi.f(alpha)
    ncf <- ncf + 1
    
  }
  message("Method: Quadratic Interpolation. Number of calls of the objective function: ", ncf)
  return(alpha)
}

f <- function(x) {
  x^4 -2*x^2 + 1/4
}
quadraticinterpolation(f)
## Method: Quadratic Interpolation. Number of calls of the objective function: 4
## [1] -1.5
quadraticinterpolation(f, l = -1.5, u=7, eps=1e-5)
## Method: Quadratic Interpolation. Number of calls of the objective function: 34
## [1] 0.9974924
curve(f,xlim=c(-2,2), col='steelblue',lwd=2)
abline(h=0)
abline(v=0)

Multivariate

Simplex (Nelder-Mead)

#' Nelder-Mead Simplex Optimization
#' 
#' @param fn The objective function to minimize.
#' @param simplex A matrix where each row represents an initial simplex vertex.
#' @param alpha Reflection coefficient (default = 1.0)
#' @param beta Contraction coefficient (default = 0.5)
#' @param gamma Expansion coefficient (default = 2.0)
#' @param eps Convergence criterion (default = 0.2)
#' @param max_iter Maximum number of iterations

nelder_mead <- function(fn, simplex, alpha = 1.0, beta = 0.5, gamma = 2.0, eps = 0.2, max_iter = 1000) {
  
  # Number of dimensions (n) and vertices (n+1)
  n <- ncol(simplex)
  n_vertices <- nrow(simplex)
  
  iter <- 0
  
  while (iter < max_iter) {
    # 1. Evaluate function at all vertices
    f_vals <- apply(simplex, 1, fn)
    
    # 2. Order the vertices based on function values: f(x_1) <= f(x_2) <= ... <= f(x_{n+1})
    order_idx <- order(f_vals)
    simplex <- simplex[order_idx, ]
    f_vals <- f_vals[order_idx]
    
    # Check convergence: standard deviation of the function values <= eps
    f_mean <- mean(f_vals)
    f_std <- sqrt(sum((f_vals - f_mean)^2) / n_vertices)
    
    if (f_std <= eps) {
      message(sprintf("Converged successfully at iteration %d", iter))
      break
    }
    
    # Assign names to the best, second worst, and worst points for readability
    x_best <- simplex[1, ]
    f_best <- f_vals[1]
    
    x_second_worst <- simplex[n_vertices - 1, ]
    f_second_worst <- f_vals[n_vertices - 1]
    
    x_worst <- simplex[n_vertices, ]
    f_worst <- f_vals[n_vertices]
    
    # 3. Calculate the centroid of all points EXCEPT the worst point
    x_centroid <- colMeans(simplex[1:n, , drop = FALSE])
    
    # 4. Reflection
    x_r <- x_centroid + alpha * (x_centroid - x_worst)
    f_r <- fn(x_r)
    
    if (f_r >= f_best && f_r < f_second_worst) {
      # Reflected point is neither the best nor the worst. Replace the worst point.
      simplex[n_vertices, ] <- x_r
      
    } else if (f_r < f_best) {
      # 5. Expansion (Reflected point is the new best)
      x_e <- x_centroid + gamma * (x_r - x_centroid)
      f_e <- fn(x_e)
      
      if (f_e < f_r) {
        # Expansion is successful
        simplex[n_vertices, ] <- x_e
      } else {
        # Expansion failed, stick with reflected point
        simplex[n_vertices, ] <- x_r
      }
      
    } else {
      # 6. Contraction (Reflected point is worse than the second worst)
      if (f_r < f_worst) {
        # Outside contraction
        x_c <- x_centroid + beta * (x_r - x_centroid)
        f_c <- fn(x_c)
        
        if (f_c <= f_r) {
          simplex[n_vertices, ] <- x_c
        } else {
          # 7. Shrink (if contraction fails)
          for (i in 2:n_vertices) {
            simplex[i, ] <- x_best + 0.5 * (simplex[i, ] - x_best)
          }
        }
      } else {
        # Inside contraction
        x_c <- x_centroid - beta * (x_worst - x_centroid)
        f_c <- fn(x_c)
        
        if (f_c < f_worst) {
          simplex[n_vertices, ] <- x_c
        } else {
          # 7. Shrink (if contraction fails)
          for (i in 2:n_vertices) {
            simplex[i, ] <- x_best + 0.5 * (simplex[i, ] - x_best)
          }
        }
      }
    }
    
    iter <- iter + 1
  }
  
  if (iter == max_iter) {
    warning("Maximum iterations reached without full convergence.")
  }
  
  return(list(
    minimum_value = f_vals[1],
    optimal_parameters = simplex[1, ],
    iterations = iter,
    final_simplex = simplex
  ))
}

## CONTOH SOAL
# 1. Define the objective function
objective_function <- function(x) {
  x1 <- x[1]
  x2 <- x[2]
  return(x1 - x2 + 2*x1^2 + 2*x1*x2 + x2^2)
}

# 2. Define the initial simplex as a matrix (each row is X1, X2, X3)
initial_simplex <- matrix(c(
  4.0, 4.0,  # X1
  5.0, 4.0,  # X2
  4.0, 5.0   # X3
), nrow = 3, byrow = TRUE)

# 3. Run the optimization with the given parameters
# alpha = 1.0, beta = 0.5, gamma = 2.0, eps = 0.2
result <- nelder_mead(
  fn = objective_function, 
  simplex = initial_simplex, 
  alpha = 1.0, 
  beta = 0.5, 
  gamma = 2.0, 
  eps = 0.001 # adjust untuk dapat hasil yang lebih akurat
)
## Converged successfully at iteration 23
print(result)
## $minimum_value
## [1] -1.248632
## 
## $optimal_parameters
## [1] -1.030273  1.509033
## 
## $iterations
## [1] 23
## 
## $final_simplex
##            [,1]     [,2]
## [1,] -1.0302734 1.509033
## [2,] -1.0107422 1.470947
## [3,] -0.9550781 1.462402

Powell

#' Powell's Method Optimization
#' 
#' @param fn The objective function to minimize.
#' @param start_point A numeric vector indicating the initial starting point.
#' @param tol Convergence criterion based on step size (default = 1e-5).
#' @param max_iter Maximum number of full iterations (default = 100).
#' @param search_bounds The interval for the 1D line search (default = c(-10, 10)).
powells_method_opt <- function(fn, start_point, tol = 1e-5, max_iter = 100, search_bounds = c(-10, 10)) {
  
  n_dims <- length(start_point)
  x_current <- start_point
  
  # Initialize the search directions as the standard coordinate axes (Identity matrix)
  # For 2D, this is [1, 0] and [0, 1]
  directions <- diag(n_dims)
  
  iter <- 0
  converged <- FALSE
  
  while (iter < max_iter && !converged) {
    x_start_cycle <- x_current
    
    # 1. Perform 1D line searches along all current directions
    for (i in 1:n_dims) {
      u <- directions[, i] # Current search direction vector
      
      # Wrapper function to search along direction u using scalar lambda
      fn_1d <- function(lambda) {
        fn(x_current + lambda * u)
      }
      
      # Find the optimal step size (lambda) along this direction
      opt_res <- optimize(fn_1d, interval = search_bounds)
      
      # Move to the new point
      x_current <- x_current + opt_res$minimum * u
    }
    
    # 2. Calculate the net displacement over this cycle
    new_direction <- x_current - x_start_cycle
    
    # 3. Check for convergence (if the net movement is virtually zero)
    if (sqrt(sum(new_direction^2)) < tol) {
      converged <- TRUE
      message(sprintf("Converged successfully at iteration %d", iter + 1))
      break
    }
    
    # 4. Perform one final line search along this new displacement direction
    fn_1d_new <- function(lambda) {
      fn(x_current + lambda * new_direction)
    }
    opt_res_new <- optimize(fn_1d_new, interval = search_bounds)
    x_current <- x_current + opt_res_new$minimum * new_direction
    
    # 5. Update the direction set: drop the first direction, append the new one
    if (n_dims > 1) {
      directions <- cbind(directions[, 2:n_dims], new_direction)
    } else {
      directions <- as.matrix(new_direction)
    }
    
    iter <- iter + 1
  }
  
  if (!converged) {
    warning("Maximum iterations reached without full convergence.")
  }
  
  return(list(
    minimum_value = fn(x_current),
    optimal_parameters = x_current,
    iterations = iter
  ))
}

# 1. Define the objective function
objective_function <- function(x) {
  x1 <- x[1]
  x2 <- x[2]
  return(x1 - x2 + 2*x1^2 + 2*x1*x2 + x2^2)
}

# 2. Define the starting point
start_pt <- c(4.0, 4.0)

# 3. Run Powell's method
result_powell <- powells_method_opt(
  fn = objective_function, 
  start_point = start_pt,
  tol = 1e-5
)
## Converged successfully at iteration 3
# 4. Print the results
print(result_powell)
## $minimum_value
## [1] -1.25
## 
## $optimal_parameters
## [1] -1.0  1.5
## 
## $iterations
## [1] 2

Random Walk

#' Random Walk Optimization
#' 
#' @param fn The objective function to minimize.
#' @param start_point Initial starting point (a numeric vector).
#' @param initial_step Initial maximum distance to explore in any dimension.
#' @param min_step Convergence criterion: stop when step size falls below this.
#' @param max_tries Maximum number of random attempts around a point before shrinking the step size.
random_walk_opt <- function(fn, start_point, initial_step = 1.0, min_step = 1e-4, max_tries = 100) {
  
  x_current <- start_point
  f_current <- fn(x_current)
  n_dims <- length(start_point)
  
  step_size <- initial_step
  total_iterations <- 0
  
  # Continue searching as long as our search radius is larger than the minimum threshold
  while (step_size > min_step) {
    improved <- FALSE
    
    # Try random directions around the current point
    for (i in 1:max_tries) {
      total_iterations <- total_iterations + 1
      
      # Generate a random step in n-dimensions 
      # runif generates uniform random numbers between -step_size and +step_size
      random_step <- runif(n_dims, min = -step_size, max = step_size)
      
      x_new <- x_current + random_step
      f_new <- fn(x_new)
      
      # If the new random point is better, move there immediately and break the try-loop
      if (f_new < f_current) {
        x_current <- x_new
        f_current <- f_new
        improved <- TRUE
        break 
      }
    }
    
    # If we tried 'max_tries' times and didn't find a better point, shrink the search radius
    if (!improved) {
      step_size <- step_size / 2
    }
  }
  
  return(list(
    minimum_value = f_current,
    optimal_parameters = x_current,
    total_function_evaluations = total_iterations
  ))
}

# Contoh Soal
# 1. Define the objective function
objective_function <- function(x) {
  x1 <- x[1]
  x2 <- x[2]
  return(x1 - x2 + 2*x1^2 + 2*x1*x2 + x2^2)
}

# 2. Define the starting point
start_pt <- c(4.0, 4.0)

# 3. Run the random walk optimization
# We'll set the initial step to 1.0, similar to the alpha in your previous problem.
set.seed(42) # Set seed for reproducibility since this is a stochastic method
result_rw <- random_walk_opt(
  fn = objective_function, 
  start_point = start_pt, 
  initial_step = 1.0, 
  min_step = 0.001, # Stop when step size gets very small
  max_tries = 50
)

print(result_rw)
## $minimum_value
## [1] -1.25
## 
## $optimal_parameters
## [1] -1.000272  1.500291
## 
## $total_function_evaluations
## [1] 770

Univariate

#' Univariate Search Optimization (Alternating Variable Method)
#' 
#' @param fn The objective function to minimize.
#' @param start_point A numeric vector indicating the initial starting point.
#' @param lower_bound Lower limit for the 1D line search (default = -10).
#' @param upper_bound Upper limit for the 1D line search (default = 10).
#' @param tol Convergence criterion based on step size (default = 1e-5).
#' @param max_iter Maximum number of full cycles (default = 100).
univariate_search_opt <- function(fn, start_point, lower_bound = -10, upper_bound = 10, tol = 1e-5, max_iter = 100) {
  
  x_current <- start_point
  f_current <- fn(x_current)
  n_dims <- length(start_point)
  
  iter <- 0
  converged <- FALSE
  
  while (iter < max_iter && !converged) {
    x_prev <- x_current
    
    # Loop through each dimension one by one
    for (i in 1:n_dims) {
      
      # Create a 1D wrapper function where only x_i varies
      fn_1d <- function(xi) {
        x_temp <- x_current
        x_temp[i] <- xi
        return(fn(x_temp))
      }
      
      # Perform a 1D line search to find the minimum along this axis
      opt_res <- optimize(fn_1d, interval = c(lower_bound, upper_bound))
      
      # Update the current coordinate with the new optimal 1D value
      x_current[i] <- opt_res$minimum
      f_current <- opt_res$objective
    }
    
    # Check convergence: has the point stopped moving?
    step_size <- sqrt(sum((x_current - x_prev)^2))
    
    if (step_size < tol) {
      converged <- TRUE
      message(sprintf("Converged successfully at iteration %d", iter + 1))
    }
    
    iter <- iter + 1
  }
  
  if (!converged) {
    warning("Maximum iterations reached without full convergence.")
  }
  
  return(list(
    minimum_value = f_current,
    optimal_parameters = x_current,
    cycles = iter
  ))
}

# 1. Define the objective function
objective_function <- function(x) {
  x1 <- x[1]
  x2 <- x[2]
  return(x1 - x2 + 2*x1^2 + 2*x1*x2 + x2^2)
}

# 2. Define the starting point
start_pt <- c(4.0, 4.0)

# 3. Run the univariate search optimization
# We give it a generous search interval from -10 to 10 along each axis
result_uv <- univariate_search_opt(
  fn = objective_function, 
  start_point = start_pt,
  lower_bound = -10.0,
  upper_bound = 10.0,
  tol = 1e-5
)
## Converged successfully at iteration 19
# 4. Print the results
print(result_uv)
## $minimum_value
## [1] -1.25
## 
## $optimal_parameters
## [1] -1.000005  1.500005
## 
## $cycles
## [1] 19