Somesh Biswas
BSD-CC-2420
Necessary libraries
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(readr)
## Warning: package 'readr' was built under R version 4.4.3
gradient_descent <- function(f, grad_f, x0, eta, epsilon, max_iter, data = NULL) {
x <- as.vector(x0)
path <- matrix(x, nrow = 1, ncol = length(x))
history <- data.frame(iteration = 0, loss = f(x, data), norm_grad = Inf)
for (i in 1:max_iter) {
grad <- grad_f(x, data)
norm_grad <- sqrt(sum(grad^2))
history <- rbind(history, c(i, f(x, data), norm_grad))
path <- rbind(path, as.vector(x))
if (norm_grad < epsilon) {
cat("Converged after", i, "iterations\n")
return(list(solution = x, history = history, path = path, iterations = i))
}
x <- x - eta * as.vector(grad)
}
cat("Reached maximum iterations (", max_iter, ") without convergence\n")
return(list(solution = x, history = history, path = path, iterations = max_iter))
}
Plotting Function
plot_convergence <- function(history, title) {
ggplot(history, aes(x = iteration, y = loss)) +
geom_line() +
labs(title = title, x = "Iteration", y = "Loss") +
theme_minimal()
}
Problem 1: Linear Regression
problem1_data <- function(n = 100) {
set.seed(123)
x <- rnorm(n, mean = 1, sd = 2)
y <- 2 + 3 * x + rnorm(n, mean = 0, sd = sqrt(5))
X <- cbind(1, x) # Design matrix with intercept
return(list(X = X, y = y))
}
f1 <- function(beta, data) {
X <- data$X
y <- data$y
n <- length(y)
(1/(2*n)) * sum((y - X %*% beta)^2)
}
grad_f1 <- function(beta, data) {
X <- data$X
y <- data$y
n <- length(y)
-(1/n) * t(X) %*% (y - X %*% beta)
}
Problem 2: Logistic Regression
load_problem2_data <- function(file_path) {
data <- read.csv(file_path)
X <- as.matrix(data[, c("x1", "x2")]) # Feature matrix (n x 2)
y <- as.vector(data$y) # Response vector
return(list(X = X, y = y))
}
f2 <- function(beta, data) {
X <- data$X
y <- data$y
sum(log(1 + exp(-y * (X %*% beta))))
}
grad_f2 <- function(beta, data) {
X <- data$X
y <- data$y
sigmoid <- function(z) 1 / (1 + exp(-z))
z <- y * (X %*% beta)
s <- sigmoid(z)
grad <- -t(X) %*% (y * (1 - s))
as.vector(grad)
}
Problem 3: Quadratic Function
A <- matrix(c(2, 0, 0, 4), 2, 2)
b <- c(-4, -8)
f3 <- function(x, data = NULL) {
as.numeric(t(x) %*% A %*% x + t(b) %*% x)
}
grad_f3 <- function(x, data = NULL) {
2 * A %*% x + b
}
Problem 4: Normal Distribution MLE
f4 <- function(params, data) {
mu <- params[1]
sigma <- params[2]
x <- data$x
n <- length(x)
sum(log(sigma) + (x - mu)^2 / (2 * sigma^2))
}
grad_f4 <- function(params, data) {
mu <- params[1]
sigma <- params[2]
x <- data$x
n <- length(x)
c(
sum(-(x - mu) / sigma^2),
sum(1/sigma - (x - mu)^2 / sigma^3)
)
}
Problem 5: Rosenbrock Function
f5 <- function(x, data = NULL) {
(1 - x[1])^2 + 100 * (x[2] - x[1]^2)^2
}
grad_f5 <- function(x, data = NULL) {
c(
-2 * (1 - x[1]) - 400 * x[1] * (x[2] - x[1]^2),
200 * (x[2] - x[1]^2)
)
}
Main Execution Function:-
run_problem <- function(prob_num, f, grad_f, x0, eta, epsilon, max_iter = 10000, data = NULL) {
result <- gradient_descent(f, grad_f, x0, eta, epsilon, max_iter, data)
print(plot_convergence(result$history, paste("Problem", prob_num, "Convergence")))
cat("Problem", prob_num, "Solution:", result$solution, "\n")
return(result)
}
Running all the problems:-
set.seed(123)
# Problem 1
data1 <- problem1_data()
result1 <- run_problem(1, f1, grad_f1, x0 = c(0, 0), eta = 0.01, epsilon = 1e-6, data = data1)
## Converged after 2029 iterations

## Problem 1 Solution: 1.828789 2.941335
# Problem 2
file_path <- "C:/Users/swapa/Downloads/Copy of synthetic data - problem-2.csv"
data2 <- load_problem2_data(file_path)
result2 <- run_problem(2, f2, grad_f2, x0 = c(0, 0), eta = 0.05, epsilon = 1e-5, data = data2)
## Converged after 254 iterations

## Problem 2 Solution: 2.384688 -2.855388
# Problem 3
result3 <- run_problem(3, f3, grad_f3, x0 = c(1, 1), eta = 0.1, epsilon = 1e-6)
## Converged after 1 iterations

## Problem 3 Solution: 1 1
# Problem 4
data4 <- read.csv("C:/Users/swapa/Downloads/synthetic data - problem-4.csv")
result4 <- run_problem(4, f4, grad_f4, x0 = c(0, 1), eta = 0.01, epsilon = 1e-5, data = data4)
## Converged after 474 iterations

## Problem 4 Solution: 5.013222 2.093897
# Problem 5
result5 <- run_problem(5, f5, grad_f5, x0 = c(-1, 1), eta = 0.001, epsilon = 1e-6)
## Reached maximum iterations ( 10000 ) without convergence

## Problem 5 Solution: 0.9924817 0.9849898