#| label: Load necessary libraries
library(MASS)
set.seed(101317) # For reproducibilityLab1_ data 609
Problem 1: Gradient Descent
f <- function(x, y, a) {
return(x^2 + a * y^2)
}
gradient <- function(x, y, a) {
return(c(2 * x, 2 * a * y))
}
gradientfunction (x, y, a)
{
return(c(2 * x, 2 * a * y))
}
#| label: Gradient Descent Function
gradient_descent <- function(a, alpha, max_iter = 100, tol = 1e-6) {
x <- 1 # Initial x
y <- 1 # Initial y
history <- matrix(NA, nrow = max_iter, ncol = 2)
for (i in 1:max_iter) {
grad <- gradient(x, y, a)
x <- x - alpha * grad[1]
y <- y - alpha * grad[2]
history[i, ] <- c(x, y)
if (sqrt(sum(grad^2)) < tol) {
break
}
}
return(list(final_x = x, final_y = y, history = history))
}#| label: Experiment with different learning rates
alpha_values <- seq(0.001, 1, length.out = 100)
errors_a1 <- numeric(length(alpha_values))
errors_a01 <- numeric(length(alpha_values))
for (i in 1:length(alpha_values)) {
result_a1 <- gradient_descent(a = 1, alpha = alpha_values[i])
result_a01 <- gradient_descent(a = 0.01, alpha = alpha_values[i])
errors_a1[i] <- log10(sqrt(result_a1$final_x^2 + result_a1$final_y^2))
errors_a01[i] <- log10(sqrt(result_a01$final_x^2 + result_a01$final_y^2))
}
#| label: Plot results
par(mfrow = c(1, 2))
plot(alpha_values, errors_a1, type = "l", col = "red",
xlab = "Learning Rate (alpha)", ylab = "Log10(Final Error)",
main = "Final Error vs Learning Rate (a = 1)")
plot(alpha_values, errors_a01, type = "l", col = "blue",
xlab = "Learning Rate (alpha)", ylab = "Log10(Final Error)",
main = "Final Error vs Learning Rate (a = 0.01)")critical_alpha <- 0.1 # Pick based on reversal trend
result_critical <- gradient_descent(a = 0.01, alpha = critical_alpha)
x_vals <- seq(-1, 1, length.out = 100)
y_vals <- seq(-1, 1, length.out = 100)
z_vals <- outer(x_vals, y_vals, Vectorize(function(x, y) f(x, y, 0.01)))
contour(x_vals, y_vals, z_vals, levels = 10, main = "Gradient Descent Trajectory")
lines(result_critical$history[,1], result_critical$history[,2], col = "red", lwd = 2)
points(0, 0, col = "blue", pch = 19)Problem 2: Solving Least Squares Problems
#| label: Generate random 20x10 matrix A and 20x1 vector b (Gaussian distribution)
A <- matrix(rnorm(20 * 10), nrow = 20, ncol = 10)
b <- rnorm(20)
#| label: (a) Pseudoinverse solution: x = A^+ * b
A_pinv <- ginv(A) # Compute Moore-Penrose pseudoinverse
x_pinv <- A_pinv %*% b
#| label: (b) Built-in least squares function
x_lm <- lm.fit(A, b)$coefficients
#| label: (c) QR Factorization: x = R^{-1} Q^T b
QR <- qr(A)
Q <- qr.Q(QR) # Extract Q
R <- qr.R(QR) # Extract R
x_qr <- solve(R, t(Q) %*% b)
#| label: Convert all solutions to numeric vectors for correct comparison
x_pinv_vec <- as.vector(x_pinv)
x_qr_vec <- as.vector(x_qr)#| label: (d) Verify solutions are nearly equal
equal_pinv_lm <- all.equal(x_pinv_vec, x_lm)
equal_lm_qr <- all.equal(x_lm, x_qr_vec)residuals_pinv <- A %*% x_pinv - b
residuals_lm <- A %*% x_lm - b
residuals_qr <- A %*% x_qr - borthogonality_pinv <- t(residuals_pinv) %*% (A %*% x_pinv)
orthogonality_lm <- t(residuals_lm) %*% (A %*% x_lm)
orthogonality_qr <- t(residuals_qr) %*% (A %*% x_qr)
#| label: Print results
list(
solutions_equal = list(equal_pinv_lm, equal_lm_qr),
orthogonality_pinv = orthogonality_pinv,
orthogonality_lm = orthogonality_lm,
orthogonality_qr = orthogonality_qr
)$solutions_equal
$solutions_equal[[1]]
[1] "names for current but not for target"
$solutions_equal[[2]]
[1] "names for target but not for current"
$orthogonality_pinv
[,1]
[1,] 1.035501e-15
$orthogonality_lm
[,1]
[1,] -6.896032e-16
$orthogonality_qr
[,1]
[1,] -9.018485e-16
Problem 3: Iterative Solutions to Least Squares: Richardson Iteration for Least Squares:
A <- matrix(rnorm(20 * 10), nrow = 20, ncol = 10)
b <- rnorm(20)
#| label: Compute exact least squares solution using the pseudoinverse
x_exact <- ginv(A) %*% b
x_exact [,1]
[1,] -0.497875053
[2,] -0.394771337
[3,] -0.669152261
[4,] 0.094448027
[5,] 0.306222311
[6,] 0.136908920
[7,] -0.105711833
[8,] -0.006290893
[9,] 0.737834607
[10,] 0.237205856
# Compute the step size parameter: mu = 1 / ||A||^2
A_norm <- norm(A, type = "F") # Frobenius norm ||A||
mu <- 1 / (A_norm^2)
#| label: Initialize x^(0) = 0 and set iteration parameters
x_k <- rep(0, 10) # Initial guess (vector of zeros)
max_iter <- 500 # Number of iterations
errors <- numeric(max_iter) # Store errors ||x^(k) - x||
errors [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[38] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[75] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[112] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[149] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[186] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[223] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[260] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[297] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[334] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[371] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[408] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[445] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[482] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
for (k in 1:max_iter) {
x_k <- x_k - mu * t(A) %*% (A %*% x_k - b) # Richardson update
errors[k] <- norm(x_k - x_exact, type = "2") # Compute error
}#| label: Plot error decay
plot(1:max_iter, log10(errors), type = "l", col = "blue", lwd = 2,
xlab = "Iteration (k)", ylab = "log10(||x^(k) - x||)",
main = "Convergence of Richardson Iteration")
grid()