random_search(``g,alpha_choice,max_its,num_samples,w0``)cost_history"diminishing"Brief layout :
Step 1 : Start somewhere \(w_0\)
Step 2 : Randomly Choose A direction \(d^{(k-1)}\) – extract from uniform distribution
Step 3 : Scale Direction to step size \(\alpha\)
Step 4 : Sum previous candidate pt \(w^{(k-1)}\) with candidate scaled-direction(s) \(\alpha d^{(k-1)}\) to get new point
Step 5 : Do this for each \(w_i\) candidate direction
Step 6 : Choose candidate direction with lowest cost
Step 7 : Repeat until stop condition ( max_its
)
Algorithm Weakness :
Doesnt scale to higher dimensions well – ie. takes a long ahh time
Finds stationary pt, not neccesarily global min
Recall :
\[ W^{(k)}=W^{(k-1)}+\alpha d^{(k-1)} \]
where \(||d^{k-1}||=1\) (ie. normalized) and we just sample from 0 to \(2\pi\) (0 to 360 degrees).
Meaning,
\[ d^{k-1}=\begin{bmatrix}\cos(\theta) \\ \sin(\theta)\end{bmatrix} \]
\[ X \sim \text{Unif}(0,2\pi) \]
random_direction_radians <-
function(num_samp = 1000){
radians <- runif(num_samp, min = 0, max = 2 * pi)
radians
}
# tst
random_direction_radians(3)
## [1] 1.806903 4.953067 2.569678
d_k_1 <- function(num_samp) {
theta <- random_direction_radians(num_samp)
cos_vals <- cos(theta)
sin_vals <- sin(theta)
values <- c(rbind(cos_vals, sin_vals))
names(values) <- paste0(
rep(c("cos_v", "sin_v"), times = num_samp),
rep(seq_len(num_samp), each = 2)
)
values
}
d_k_1(3)
## cos_v1 sin_v1 cos_v2 sin_v2 cos_v3 sin_v3
## 0.7418151 -0.6706045 0.9308533 -0.3653931 0.9593123 0.2823471
scale_direction <- function(num_samp, alpha_choice = 1){
direction <- d_k_1(num_samp)
if(alpha_choice=="diminishing"){
k_val <- rep(seq_len(num_samp), each = 2) # for cos and sin (twice)
dininishing_direction <- (1/k_val)*direction
dininishing_direction
}else{
alpha_choice*direction
}
}
set.seed(123)
list(v1=scale_direction(3), v2=scale_direction(3, 2), v3=scale_direction(3, "diminishing")) |> as.data.frame()
## v1 v2 v3
## cos_v1 -0.2339190 1.4836302 -0.9844481
## sin_v1 0.9722561 -1.3412089 -0.1756756
## cos_v2 0.2383614 1.8617066 0.3900561
## sin_v2 -0.9711765 -0.7307862 -0.3128198
## cos_v3 -0.8408661 1.9186246 -0.3160772
## sin_v3 0.5412432 0.5646943 -0.1058598
set.seed(123)
vec_to_matrix <- function(v) {
matrix(v, ncol = 2, byrow = TRUE)
}
plot_vectors <- function(v, main_title = "Vectors") {
mat <- vec_to_matrix(v)
plot(
0, 0,
xlim = range(c(0, mat[, 1])),
ylim = range(c(0, mat[, 2])),
xlab = "x",
ylab = "y",
main = main_title,
asp = 1
)
abline(h = 0, v = 0, col = "gray")
arrows(
x0 = 0,
y0 = 0,
x1 = mat[, 1],
y1 = mat[, 2],
length = 0.1
)
}
v_list <- list(
v1 = scale_direction(3),
v2 = scale_direction(3, 2),
v3 = scale_direction(3, "diminishing")
)
plot_vectors(v_list$v1, "Alpha = 1")
plot_vectors(v_list$v2, "Alpha = 2")
plot_vectors(v_list$v3, "Diminishing Step Size")
set.seed(1)
g_expr <- expression(100 * w1^2 + w2)
# Generate random inputs (as you wrote)
w1_vals <- rnorm(100, mean = 0, sd = 10)
w2_vals <- rnorm(100, mean = 0, sd = 10)
g_vals <- eval(g_expr, list(
w1 = w1_vals,
w2 = w2_vals
))
data.frame(g_vals) |> head(5)
## g_vals
## 1 3918.2401
## 2 337.6699
## 3 6973.6426
## 4 25450.7887
## 5 1079.2079
cat("each is a G value")
## each is a G value
# Create smooth curve (fix w2 at its mean)
w1_grid <- seq(-20, 20, length.out = 400)
w2_fixed <- mean(w2_vals)
g_curve <- eval(g_expr, list(
w1 = w1_grid,
w2 = w2_fixed
))
# Plot
plot(
w1_grid,
g_curve,
type = "l",
lwd = 2,
xlab = "w1",
ylab = "g(w1, w2)",
main = "g(w1, w2) = 100 w1^2 + w2",
col = "red"
)
points(
w1_vals,
g_vals,
pch = 19,
cex = 1.5
)
function_expression <- function(g) {
g_expr <- parse(text = g)
g_expr
}
evaluate_expression <- function(g, data) {
g_expr <- function_expression(g)
w1_vals <- data[seq(1, length(data), by = 2)]
w2_vals <- data[seq(2, length(data), by = 2)]
eval(g_expr, list(
w1 = w1_vals,
w2 = w2_vals
))
}
Recall :
\[ W^k=W^{k-1}+\alpha d^{k-1} \]
random_search(``g,alpha_choice,max_its,num_samples,w0``)random_search <- function(
g,
alpha_choice = 1,
max_its = 100,
num_samples = 1,
w0 = c(0, 0)
) {
# Parse once (do NOT re-parse every loop)
g_expr <- function_expression(g)
# Storage
weight_history <- matrix(NA, nrow = max_its + 1, ncol = 2)
colnames(weight_history) <- c("w1", "w2")
cost_history <- numeric(max_its + 1)
# Initial
w_current <- w0
weight_history[1, ] <- w_current
# evaluate cost function for w_i values
cost_history[1] <- eval(
g_expr,
list(w1 = w_current[1], w2 = w_current[2])
)
# Main loop : iterate across k-steps
for (k in seq_len(max_its)) {
# Generate multiple candidate directions
direction_vec <- scale_direction(num_samples, alpha_choice)
direction_mat <- matrix(direction_vec, ncol = 2, byrow = TRUE)
# Form candidate weights
w_candidates <- sweep(direction_mat, 2, w_current, "+") # row wise operation
# Evaluate objective at ALL candidates
candidate_costs <- eval(
g_expr,
list(
w1 = w_candidates[, 1],
w2 = w_candidates[, 2]
)
)
# Choose best direction (minimum)
best_index <- which.min(candidate_costs)
# Update using best candidate
w_current <- w_candidates[best_index, ]
# Store
weight_history[k + 1, ] <- w_current
cost_history[k + 1] <- candidate_costs[best_index]
}
list(
weight_history = weight_history,
cost_history = cost_history
)
}
set.seed(123)
result <- random_search(
g = "w1^2 + w2^2",
alpha_choice = "diminishing",
max_its = 100, # not inc. initial value
num_samples = 5, # try 5 directions
w0 = c(4, 4)
)
tail(result$weight_history) # we are heading down which is good
## w1 w2
## [96,] -0.11737643 -0.27862070
## [97,] -0.06628516 -0.03389701
## [98,] 0.11608902 -0.11599244
## [99,] -0.10940962 -0.00805559
## [100,] 0.06148494 0.09584364
## [101,] -0.03588856 -0.13441361
tail(result$cost_history) # nearing 0 which is good
## [1] 0.091406718 0.005542729 0.026930907 0.012035357 0.012966401 0.019355007
cost_historyThe function we are minimizing is – it contains multiple mininum:
https://www.desmos.com/3d/9kxi3uvf97
set.seed(123)
result <- random_search(
g = "100*(w2 - w1^2)^2 + (w1 - w2)^2",
alpha_choice = 1,
max_its = 30, # not inc. initial value
num_samples = 100, # try 100 directions
w0 = c(-3, -3)
)
plot(result$cost_history) # notice we get lower over each index
tail(result$cost_history)
## [1] 0.05972796 0.01844171 0.13096306 1.65886500 0.73447902 0.71095444
w_hist <- result$weight_history
plot(
w_hist[,1], w_hist[,2],
type = "o",
pch = 19,
col = "red",
xlab = "w1",
ylab = "w2",
main = "Weight History (Search Path)"
)
# mark start and end
points(w_hist[1,1], w_hist[1,2], col = "green", pch = 19, cex = 1.5) # start
points(w_hist[nrow(w_hist),1], w_hist[nrow(w_hist),2], col = "blue", pch = 19, cex = 1.5) # end
legend("topleft",
legend = c("Path", "Start", "End"),
col = c("red", "green", "blue"),
pch = 19,
cex = .7)
g_func <- function(w1, w2) {
100*(w2 - w1^2)^2 + (w1 - w2)^2
}
x <- seq(-5, 5, length.out = 100)
y <- seq(-5, 5, length.out = 100)
z <- outer(x, y, g_func)
contour(x, y, z,
xlab = "w1", ylab = "w2",
main = "Search Path on Cost Surface")
lines(w_hist[,1], w_hist[,2], col = "red", lwd = 2)
points(w_hist[,1], w_hist[,2], col = "red", pch = 19)
"diminishing"set.seed(123)
result <- random_search(
g = "100*(w2 - w1^2)^2 + (w1 - w2)^2",
alpha_choice = "diminishing",
max_its = 30, # not inc. initial value
num_samples = 100, # try 100 directions
w0 = c(-3, -3)
)
plot(result$cost_history) # notice we get lower over each index
tail(result$cost_history) # much lower value with diminishing
## [1] 7.245882e-06 2.768931e-04 5.799276e-04 7.535580e-04 2.382325e-04
## [6] 5.145663e-06
\[ f(w_1,w_2)=100(w_2-w_1^2)^2+(w_1-w_2)^2 \]
Since both terms are squares, \[ 100(w_2-w_1^2)^2 \ge 0 \qquad \text{and} \qquad (w_1-w_2)^2 \ge 0 \] so \[ f(w_1,w_2)\ge 0 \]
The minimum occurs when both squared terms are zero: \[ w_2-w_1^2=0 \qquad \text{and} \qquad w_1-w_2=0 \]
Thus, \[ w_2=w_1^2 \qquad \text{and} \qquad w_2=w_1 \]
Equating them gives \[ w_1^2=w_1 \] so \[ w_1(w_1-1)=0 \]
Hence \[ w_1=0 \quad \text{or} \quad w_1=1 \]
Using \(w_2=w_1\), we get the minimizers \[ (w_1,w_2)=(0,0)\quad \text{and}\quad (1,1) \]
At both points, \[ f(0,0)=0, \qquad f(1,1)=0 \]
Therefore, the true global minimum value is \[ 0 \] attained at \[ (0,0)\quad \text{and}\quad (1,1) \]
In other words, two inputs may result in the same global min. So
depending upon our initial value \(w_0\) we are subject to get different
results. Either way however, using unit-length (
alpha_choice = 1 ), we perform worse than
diminishing. We see that a better strategy is
diminishing step length for this cost function.
General Framework :
\[ \vec{w}^{(k)}=w^{(k-1)}+\alpha \vec{e}_i \]
Brief layout :
Step 1 : Start somewhere \(w_0\)
Step 2 : Randomly Choose from restricted-direction(s) \(u_i=\vec{e}_i\) – ie. look at coordinate axis and make a choice
Step 3 : Scale Direction to step size \(\alpha\)
Step 4 : Sum previous candidate pt \(w^{(k-1)}\) with candidate scaled-direction(s) \(\alpha u_i\) to get new point
Step 5 : Do this for each \(w_i\) candidate direction
Step 6 : Choose candidate direction with lowest cost
Step 7 : Repeat until stop condition ( max_its or
tol )
Algorithm Weakness :
Algorithm Strength :
random_search() for higher
dimensional inputu_i()`u_i <- function() {
rbind(
c(1, 0),
c(-1, 0),
c(0, 1),
c(0, -1)
)
}
stop_condition()stop_condition <- function(k, max_its, cost_history, tol = 1e-5) {
# stop if max iterations reached
if (k >= max_its) {
return(TRUE)
}
# need at least 2 values to compare improvement
if (k > 1) {
improvement <- abs(cost_history[k] - cost_history[k - 1])
if (improvement < tol) {
return(TRUE)
}
}
return(FALSE)
}
diminishing_func()diminishing_func <- function(directions, alpha_choice = 1, k) {
if (alpha_choice == "diminishing") {
return((1 / k) * directions)
} else {
return(alpha_choice * directions)
}
}
coordinate_search(g,alpha_choice,max_its,tol,w0)coordinate_search <- function(
g,
alpha_choice = 1,
max_its = 100,
tol = 1e-5,
w0 = c(0, 0)
) {
g_expr <- function_expression(g)
# storage
weight_history <- matrix(NA, nrow = max_its + 1, ncol = 2)
colnames(weight_history) <- c("w1", "w2")
cost_history <- numeric(max_its + 1)
# initialize
w_current <- w0
weight_history[1, ] <- w_current
cost_history[1] <- eval(
g_expr,
list(w1 = w_current[1], w2 = w_current[2])
)
k <- 1
repeat {
# basis directions (±e1, ±e2)
directions <- u_i()
# apply step size (handles diminishing)
direction_mat <- diminishing_func(directions, alpha_choice, k)
# form candidates
w_candidates <- sweep(direction_mat, 2, w_current, "+")
# evaluate
candidate_costs <- eval(
g_expr,
list(
w1 = w_candidates[, 1],
w2 = w_candidates[, 2]
)
)
# pick best
best_index <- which.min(candidate_costs)
# update
w_current <- w_candidates[best_index, ]
# store
k <- k + 1
weight_history[k, ] <- w_current
cost_history[k] <- candidate_costs[best_index]
# stop condition
if (stop_condition(k, max_its, cost_history, tol)) {
break
}
}
list(
weight_history = weight_history[1:k, ],
cost_history = cost_history[1:k]
)
}
result <- coordinate_search(
g = "w1^2 + w2^2",
alpha_choice = "diminishing",
max_its = 1000, # not inc. initial value
w0 = c(4, 4)
)
tail(result$weight_history) # we are heading down which is good
## w1 w2
## [995,] 0.2602721 0.2602721
## [996,] 0.2592671 0.2602721
## [997,] 0.2592671 0.2592681
## [998,] 0.2592671 0.2582651
## [999,] 0.2582651 0.2582651
## [1000,] 0.2582651 0.2572641
tail(result$cost_history) # nearing 0 which is good
## [1] 0.1354831 0.1349610 0.1344394 0.1339203 0.1334017 0.1328856
plot(result$cost_history)
cost_historyThe function we are minimizing is :
\[ x'Ax+c \text{ For :} A=I \\ \implies \\ x'x+c \]
Link : https://www.desmos.com/3d/9kxi3uvf97
result <- coordinate_search(
g = "w1^2 + w2^2 + 2",
alpha_choice = 1, # unit vector
max_its = 1000, # not inc. initial value
w0 = c(4, 4)
)
tail(result$weight_history) # we are heading down which is good
## w1 w2
## [995,] 0 0
## [996,] 1 0
## [997,] 0 0
## [998,] 1 0
## [999,] 0 0
## [1000,] 1 0
tail(result$cost_history) # not great, looks like we missed
## [1] 2 3 2 3 2 3
plot(result$cost_history)
plot(result$weight_history[,1], result$cost_history, main = "wt1")
plot(result$weight_history[,2], result$cost_history, main = "wt2")
Steep decline – good
we can see across time we are getting closer to the min
Notice however that this is a convex function so in the long, run we are guaranteed this
w_hist <- result$weight_history
plot(
w_hist[,1], w_hist[,2],
type = "o",
pch = 19,
col = "red",
xlab = "w1",
ylab = "w2",
main = "Weight History (Search Path)"
)
# mark start and end
points(w_hist[1,1], w_hist[1,2], col = "green", pch = 19, cex = 1.5) # start
points(w_hist[nrow(w_hist),1], w_hist[nrow(w_hist),2], col = "blue", pch = 19, cex = 1.5) # end
legend("bottomright",
legend = c("Path", "Start", "End"),
col = c("red", "green", "blue"),
pch = 19,
cex = .7)
g_func <- function(w1, w2) {
w1^2 + w2^2 + 2
}
x <- seq(-5, 5, length.out = 100)
y <- seq(-5, 5, length.out = 100)
z <- outer(x, y, g_func)
contour(x, y, z,
xlab = "w1", ylab = "w2",
main = "Search Path on Cost Surface")
lines(w_hist[,1], w_hist[,2], col = "red", lwd = 2)
points(w_hist[,1], w_hist[,2], col = "red", pch = 19)
"diminishing"result <- coordinate_search(
g = "w1^2 + w2^2 + 2",
alpha_choice = "diminishing", # unit vector
max_its = 1000, # not inc. initial value
w0 = c(4, 4)
)
tail(result$weight_history) # we are heading down which is good
## w1 w2
## [995,] 0.2602721 0.2602721
## [996,] 0.2592671 0.2602721
## [997,] 0.2592671 0.2592681
## [998,] 0.2592671 0.2582651
## [999,] 0.2582651 0.2582651
## [1000,] 0.2582651 0.2572641
tail(result$cost_history) # much better -- but not best
## [1] 2.135483 2.134961 2.134439 2.133920 2.133402 2.132886
plot(result$cost_history) # beatiful
w_hist <- result$weight_history
plot(
w_hist[,1], w_hist[,2],
type = "o",
pch = 19,
col = "red",
xlab = "w1",
ylab = "w2",
main = "Weight History (Search Path)"
)
# mark start and end
points(w_hist[1,1], w_hist[1,2], col = "green", pch = 19, cex = 1.5) # start
points(w_hist[nrow(w_hist),1], w_hist[nrow(w_hist),2], col = "blue", pch = 19, cex = 1.5) # end
legend("bottomright",
legend = c("Path", "Start", "End"),
col = c("red", "green", "blue"),
pch = 19,
cex = .7)
g_func <- function(w1, w2) {
w1^2 + w2^2 + 2
}
x <- seq(-5, 5, length.out = 100)
y <- seq(-5, 5, length.out = 100)
z <- outer(x, y, g_func)
contour(x, y, z,
xlab = "w1", ylab = "w2",
main = "Search Path on Cost Surface")
lines(w_hist[,1], w_hist[,2], col = "red", lwd = 2)
points(w_hist[,1], w_hist[,2], col = "red", pch = 19)
Okay so, imagine we didnt have the graph – how might we determine
We are provided :
\[ f(w_1, w_2)=c_1(w_1^2 + w_2^2)+c_2w_1w_2 \]
Where \(c_1 \ne c_2\)
Notice that our function is of the form :
\[ f(w_1,w_2)=x'Ax \]
(ie. quadratic form)
Recall :
v1 <- c(.26, -0.24)
v2 <- c( -0.24, .26)
A <- matrix(c(v1,v2), nrow=2, byrow=T)
cat("A Matrix =\n")
## A Matrix =
A
## [,1] [,2]
## [1,] 0.26 -0.24
## [2,] -0.24 0.26
cat("\n")
cat("eigen(A)$values\n")
## eigen(A)$values
eigen(A)$values
## [1] 0.50 0.02
And recall \(\nabla f =\vec{0}\) means we can find gradient.
And recall \(\nabla (x'Ax) = (A+A')\vec{x}\) except recall \(A\) is symmetric so \(A=A'\) ; so :
\[ \nabla f=\nabla f=(x'Ax)=2A\vec{x} \]
B <- 2*A
B
## [,1] [,2]
## [1,] 0.52 -0.48
## [2,] -0.48 0.52
and we want to know : \(\nabla f =\vec{0}\) – so :
\[ B\vec{x}=\vec{0} \]
Which is the definition of the kernal, so in other words \(\text{Ker}(B)\)
\[ \text{Solve } Ax = 0, \quad A = \begin{bmatrix} 0.26 & -0.24 \\ -0.24 & 0.26 \end{bmatrix}. \]
Multiply by \(100\) to clear decimals: \[ \begin{bmatrix} 26 & -24 \\ -24 & 26 \end{bmatrix} \begin{bmatrix} w_1 \\ w_2 \end{bmatrix} = \begin{bmatrix} 0 \\ 0 \end{bmatrix}. \]
Row reduce: \[ \begin{bmatrix} 26 & -24 \\ -24 & 26 \end{bmatrix} \xrightarrow{R_2 \leftarrow R_2 + R_1} \begin{bmatrix} 26 & -24 \\ 2 & 2 \end{bmatrix} \xrightarrow{R_2 \leftarrow \tfrac{1}{2}R_2} \begin{bmatrix} 26 & -24 \\ 1 & 1 \end{bmatrix} \xrightarrow{R_1 \leftarrow R_1 - 26R_2} \begin{bmatrix} 0 & -50 \\ 1 & 1 \end{bmatrix} \]
Thus, \[ -50w_2 = 0 \Rightarrow w_2 = 0, \quad w_1 + w_2 = 0 \Rightarrow w_1 = 0. \]
Hence, \[ \ker(A) = \left\{ \begin{bmatrix}0 \\ 0\end{bmatrix} \right\}, \] so the unique global minimizer is \((0,0)\).
Verify Result :
library(MASS)
Null(B)
##
## [1,]
## [2,]
or, we can just be lazy and check if our matrix is invertible and if so then we know the kernal space is just the “trivial solution” and therefore the min point is just (0,0)
det(B)
## [1] 0.04
analyze_results <- function(
results,
g_func = NULL,
orientation = "bottomright",
cex = 0.8
) {
# Extract histories
w_hist <- results$weight_history
cost_hist <- results$cost_history
# Print tail tables
cat("---- Tail of weight history ----\n")
print(tail(w_hist))
cat("\n---- Tail of cost history ----\n")
print(tail(cost_hist))
# Plot 1: cost history
plot(
cost_hist,
type = "p",
pch = 19,
col = "blue",
xlab = "Iteration",
ylab = "Cost",
main = "Cost History"
)
# Plot 2: w1 vs cost
plot(
w_hist[,1], cost_hist,
pch = 19,
col = "purple",
xlab = "w1",
ylab = "Cost",
main = "w1 vs Cost"
)
# Plot 3: w2 vs cost
plot(
w_hist[,2], cost_hist,
pch = 19,
col = "darkgreen",
xlab = "w2",
ylab = "Cost",
main = "w2 vs Cost"
)
# Plot 4: contour + search path
if (!is.null(g_func)) {
# Convert character input to function
if (is.character(g_func)) {
expr <- parse(text = g_func)
g_func <- function(w1, w2) {
eval(expr, list(w1 = w1, w2 = w2))
}
}
x <- seq(min(w_hist[, 1]) - 1, max(w_hist[, 1]) + 1, length.out = 100)
y <- seq(min(w_hist[, 2]) - 1, max(w_hist[, 2]) + 1, length.out = 100)
z <- outer(x, y, Vectorize(g_func))
contour(
x, y, z,
xlab = "w1",
ylab = "w2",
main = "Search Path on Cost Surface"
)
# Overlay path
lines(w_hist[, 1], w_hist[, 2], col = "red", lwd = 2)
points(w_hist[, 1], w_hist[, 2], col = "red", pch = 19)
# Start and end
points(w_hist[1, 1], w_hist[1, 2], col = "green", pch = 19, cex = 1.5)
points(w_hist[nrow(w_hist), 1], w_hist[nrow(w_hist), 2], col = "blue", pch = 19, cex = 1.5)
legend(
orientation,
legend = c("Path", "Start", "End"),
col = c("red", "green", "blue"),
pch = 19,
cex = cex
)
}
}
set.seed(123)
result <- random_search(
g = "0.26*(w1^2+w2^2)-.48*w1*w2",
alpha_choice = 1,
max_its = 1000, # not inc. initial value
num_samples = 100, # try 100 directions
w0 = c(4, 4)
)
g <- "0.26*(w1^2+w2^2)-.48*w1*w2"
plot(result$cost_history)
analyze_results(result, g_func="0.26*(w1^2+w2^2)-.48*w1*w2")
## ---- Tail of weight history ----
## w1 w2
## [996,] 0.4667857 0.4554810
## [997,] -0.2513049 -0.2404686
## [998,] 0.4569310 0.4655072
## [999,] -0.2521168 -0.2396532
## [1000,] 0.4366925 0.4852894
## [1001,] -0.2226319 -0.2665693
##
## ---- Tail of cost history ----
## [1] 0.008537707 0.002447769 0.008527310 0.002457213 0.009090920 0.002875802
set.seed(123)
result <- random_search(
g = "0.26*(w1^2+w2^2)-.48*w1*w2",
alpha_choice = "diminishing",
max_its = 1000, # not inc. initial value
num_samples = 100, # try 100 directions
w0 = c(4, 4)
)
g <- "0.26*(w1^2+w2^2)-.48*w1*w2"
plot(result$cost_history)
analyze_results(result, g_func="0.26*(w1^2+w2^2)-.48*w1*w2")
## ---- Tail of weight history ----
## w1 w2
## [996,] -0.005304488 -0.002771495
## [997,] 0.005274850 0.003591949
## [998,] -0.003747409 -0.002044737
## [999,] 0.004297576 0.007728131
## [1000,] -0.002052746 -0.002510637
## [1001,] 0.006562357 0.002566765
##
## ---- Tail of cost history ----
## [1] 2.256228e-06 1.494240e-06 1.060262e-06 4.388353e-06 2.606606e-07
## [6] 4.824597e-06
result <- coordinate_search(
g = "0.26*(w1^2+w2^2)-.48*w1*w2",
alpha_choice = 1,
max_its = 1000,
w0 = c(4, 4)
)
g <- "0.26*(w1^2+w2^2)-.48*w1*w2"
plot(result$cost_history)
analyze_results(result, g_func="0.26*(w1^2+w2^2)-.48*w1*w2")
## ---- Tail of weight history ----
## w1 w2
## [995,] 0 0
## [996,] 1 0
## [997,] 0 0
## [998,] 1 0
## [999,] 0 0
## [1000,] 1 0
##
## ---- Tail of cost history ----
## [1] 0.00 0.26 0.00 0.26 0.00 0.26
result <- coordinate_search(
g = "0.26*(w1^2+w2^2)-.48*w1*w2",
alpha_choice = "diminishing",
max_its = 1000,
w0 = c(4, 4)
)
plot(result$cost_history)
tail(result$cost_history)
## [1] 0.002709663 0.002699462 0.002688787 0.002678646 0.002668034 0.002657953
analyze_results(result, g_func="0.26*(w1^2+w2^2)-.48*w1*w2")
## ---- Tail of weight history ----
## w1 w2
## [995,] 0.2602721 0.2602721
## [996,] 0.2592671 0.2602721
## [997,] 0.2592671 0.2592681
## [998,] 0.2592671 0.2582651
## [999,] 0.2582651 0.2582651
## [1000,] 0.2582651 0.2572641
##
## ---- Tail of cost history ----
## [1] 0.002709663 0.002699462 0.002688787 0.002678646 0.002668034 0.002657953
Brief layout :
Step 1 : Start somewhere \(w_0\)
Recall :
Therefore
\[ h(\eta)\le \lambda h(x_1)+(1-\lambda)h(x_2) \]
So its also convex