\[ g(w) = (\frac{1}{4}w_0-2w_1)^2+(\frac{1}{4}w_0-\frac{1}{2}w_1)^2 \]
Notice its of the form :
\[ q(w)=x'Ax \]
Where the A matrix is :
A <- matrix(c(1/8, -5/8, -5/8, 17/4), nrow =2, byrow = T)
A
## [,1] [,2]
## [1,] 0.125 -0.625
## [2,] -0.625 4.250
\[ \nabla^2 q(w)=\nabla(2Cw) = 2C \]
using Sylvester’s Criterion we see :
2*A
## [,1] [,2]
## [1,] 0.25 -1.25
## [2,] -1.25 8.50
2*A[1,1]
## [1] 0.25
det(2*A) > 0
## [1] TRUE
# we suspect eigen are positive
proof :
eigen(A)$values
## [1] 4.34261746 0.03238254
eigen(A)$values > 0
## [1] TRUE TRUE
conclusion : this is a convex function – minimum exists
recall scalar multiples of null space is the same space, so we can just take the null of A – as, \(\nabla q(w)=0 \implies 2Ac = 0\) and,
\[ 2Ac=0 \implies \text{Ker}(2A) =\text{Ker}(A) \]
library(MASS)
Null(A)
##
## [1,]
## [2,]
Heres the surface :
link : https://www.desmos.com/3d/sullcehznj
\[ \vec{w}^{(k)}=\vec{w}^{(k-1)}-\alpha \frac{\nabla g(w^{(k-1)})}{||\nabla g(w^{(k-1)})||^2} \]
which the key pt is that : \(||\alpha \frac{\nabla g(w^{(k-1)})}{||\nabla g(w^{(k-1)})||^2}||=\alpha\)
previously, for
\[ d^{(k-1)} = - \nabla g(w) \]
\[ \vec{w}^{(k)}=\vec{w}^{(k-1)}+\alpha d^{(k-1)} \]
objective_function <- function(w) {
w_0 <- w[1]
w_1 <- w[2]
((1/4) * w_0 - 2 * w_1)^2 +
((1/4) * w_0 - (1/2) * w_1)^2
}
objective_function(c(1, 2))
## [1] 14.625
\[ \nabla g(w) = \begin{bmatrix} \frac{\partial g}{\partial w_0} \\ \frac{\partial g}{\partial w_1} \end{bmatrix} \]
objective_function_exp <- expression(
((1/4) * w_0 - 2 * w_1)^2 +
((1/4) * w_0 - (1/2) * w_1)^2
)
# partial derivatives
p1 <- D(objective_function_exp, "w_0")
p1
## 2 * ((1/4) * ((1/4) * w_0 - 2 * w_1)) + 2 * ((1/4) * ((1/4) *
## w_0 - (1/2) * w_1))
paste("---")
## [1] "---"
p2 <- D(objective_function_exp, "w_1")
p2
## -(2 * ((1/2) * ((1/4) * w_0 - (1/2) * w_1)) + 2 * (2 * ((1/4) *
## w_0 - 2 * w_1)))
gradient <- c(p1,p2)
gradient
## [[1]]
## 2 * ((1/4) * ((1/4) * w_0 - 2 * w_1)) + 2 * ((1/4) * ((1/4) *
## w_0 - (1/2) * w_1))
##
## [[2]]
## -(2 * ((1/2) * ((1/4) * w_0 - (1/2) * w_1)) + 2 * (2 * ((1/4) *
## w_0 - 2 * w_1)))
mu <- c(2,4,-1,3,0)
mu <- matrix(mu)
Sigma <- matrix(c(4,-1,1/2,-1/2, 0, -1,3,1,-1,0,1/2, 1,6,1,-4,-1/2,-1,1,4,0,0,0,-4,0,2), nrow = 5, ncol = 5, byrow=T)
A <- matrix(c(-1, 1, 1, 1/2), ncol =2, byrow=T)
B <- matrix(c(1,1,1/2,-2, 1, -2), nrow = 2, byrow = T)
\[ X^{(1)}=(X_1 X_2)' \]
\[ X^{(2)}=(X_3 X_4 X_5)' \]
\[ \mathbb{E}(X^{(1)}) \]
mu[1:2] |> as.matrix()
## [,1]
## [1,] 2
## [2,] 4
A %*% mu[1:2] |> as.matrix()
## [,1]
## [1,] 2
## [2,] 4
Sigma[1:2, 1:2]
## [,1] [,2]
## [1,] 4 -1
## [2,] -1 3
A %*% Sigma[1:2, 1:2] %*% t(A)
## [,1] [,2]
## [1,] 9 -3.00
## [2,] -3 3.75
mu[3:length(mu)] |> as.matrix()
## [,1]
## [1,] -1
## [2,] 3
## [3,] 0
B %*% mu[3:length(mu)] |> as.matrix()
## [,1]
## [1,] 2
## [2,] 5
Sigma[3:length(mu), 3:length(mu)]
## [,1] [,2] [,3]
## [1,] 6 1 -4
## [2,] 1 4 0
## [3,] -4 0 2
B %*% Sigma[3:length(mu), 3:length(mu)] |> as.matrix() %*% t(B)
## [,1] [,2]
## [1,] 8.5 1
## [2,] 1.0 0
Sigma[1:3, 4:5]
## [,1] [,2]
## [1,] -0.5 0
## [2,] -1.0 0
## [3,] 1.0 -4
A %*% Sigma[4:5, 1:3] %*% t(B)
## [,1] [,2]
## [1,] -1 10
## [2,] -2 2