Somesh Biswas

BSD-CC-2420

Necessary libraries

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(readr) 
## Warning: package 'readr' was built under R version 4.4.3
gradient_descent <- function(f, grad_f, x0, eta, epsilon, max_iter, data = NULL) {
  x <- as.vector(x0)
  path <- matrix(x, nrow = 1, ncol = length(x))
  history <- data.frame(iteration = 0, loss = f(x, data), norm_grad = Inf)
  
  for (i in 1:max_iter) {
    grad <- grad_f(x, data)
    norm_grad <- sqrt(sum(grad^2))
    
    history <- rbind(history, c(i, f(x, data), norm_grad))
    path <- rbind(path, as.vector(x))
    
    if (norm_grad < epsilon) {
      cat("Converged after", i, "iterations\n")
      return(list(solution = x, history = history, path = path, iterations = i))
    }
    
    x <- x - eta * as.vector(grad)
  }
  
  cat("Reached maximum iterations (", max_iter, ") without convergence\n")
  return(list(solution = x, history = history, path = path, iterations = max_iter))
}

Plotting Function

plot_convergence <- function(history, title) {
  ggplot(history, aes(x = iteration, y = loss)) +
    geom_line() +
    labs(title = title, x = "Iteration", y = "Loss") +
    theme_minimal()
}

Problem 1: Linear Regression

problem1_data <- function(n = 100) {
  set.seed(123)
  x <- rnorm(n, mean = 1, sd = 2)
  y <- 2 + 3 * x + rnorm(n, mean = 0, sd = sqrt(5))
  X <- cbind(1, x) # Design matrix with intercept
  return(list(X = X, y = y))
}

f1 <- function(beta, data) {
  X <- data$X
  y <- data$y
  n <- length(y)
  (1/(2*n)) * sum((y - X %*% beta)^2)
}

grad_f1 <- function(beta, data) {
  X <- data$X
  y <- data$y
  n <- length(y)
  -(1/n) * t(X) %*% (y - X %*% beta)
}

Problem 2: Logistic Regression

load_problem2_data <- function(file_path) {
  data <- read.csv(file_path)
  X <- as.matrix(data[, c("x1", "x2")]) # Feature matrix (n x 2)
  y <- as.vector(data$y) # Response vector
  return(list(X = X, y = y))
}

f2 <- function(beta, data) {
  X <- data$X
  y <- data$y
  sum(log(1 + exp(-y * (X %*% beta))))
}

grad_f2 <- function(beta, data) {
  X <- data$X
  y <- data$y
  sigmoid <- function(z) 1 / (1 + exp(-z))
  
  z <- y * (X %*% beta)
  s <- sigmoid(z)
  grad <- -t(X) %*% (y * (1 - s))
  
  as.vector(grad)
}

Problem 3: Quadratic Function

A <- matrix(c(2, 0, 0, 4), 2, 2)
b <- c(-4, -8)

f3 <- function(x, data = NULL) {
  as.numeric(t(x) %*% A %*% x + t(b) %*% x)
}

grad_f3 <- function(x, data = NULL) {
  2 * A %*% x + b
}

Problem 4: Normal Distribution MLE

f4 <- function(params, data) {
  mu <- params[1]
  sigma <- params[2]
  x <- data$x
  n <- length(x)
  sum(log(sigma) + (x - mu)^2 / (2 * sigma^2))
}

grad_f4 <- function(params, data) {
  mu <- params[1]
  sigma <- params[2]
  x <- data$x
  n <- length(x)
  c(
    sum(-(x - mu) / sigma^2), 
    sum(1/sigma - (x - mu)^2 / sigma^3)
  )
}

Problem 5: Rosenbrock Function

f5 <- function(x, data = NULL) {
  (1 - x[1])^2 + 100 * (x[2] - x[1]^2)^2
}

grad_f5 <- function(x, data = NULL) {
  c(
    -2 * (1 - x[1]) - 400 * x[1] * (x[2] - x[1]^2),
    200 * (x[2] - x[1]^2)
  )
}

Main Execution Function:-

run_problem <- function(prob_num, f, grad_f, x0, eta, epsilon, max_iter = 10000, data = NULL) {
  result <- gradient_descent(f, grad_f, x0, eta, epsilon, max_iter, data)
  print(plot_convergence(result$history, paste("Problem", prob_num, "Convergence")))
  cat("Problem", prob_num, "Solution:", result$solution, "\n")
  return(result)
}

Running all the problems:-

set.seed(123)

# Problem 1
data1 <- problem1_data()
result1 <- run_problem(1, f1, grad_f1, x0 = c(0, 0), eta = 0.01, epsilon = 1e-6, data = data1)
## Converged after 2029 iterations

## Problem 1 Solution: 1.828789 2.941335
# Problem 2
file_path <- "C:/Users/swapa/Downloads/Copy of synthetic data - problem-2.csv"
data2 <- load_problem2_data(file_path)
result2 <- run_problem(2, f2, grad_f2, x0 = c(0, 0), eta = 0.05, epsilon = 1e-5, data = data2)
## Converged after 254 iterations

## Problem 2 Solution: 2.384688 -2.855388
# Problem 3
result3 <- run_problem(3, f3, grad_f3, x0 = c(1, 1), eta = 0.1, epsilon = 1e-6)
## Converged after 1 iterations

## Problem 3 Solution: 1 1
# Problem 4
data4 <- read.csv("C:/Users/swapa/Downloads/synthetic data - problem-4.csv")
result4 <- run_problem(4, f4, grad_f4, x0 = c(0, 1), eta = 0.01, epsilon = 1e-5, data = data4)
## Converged after 474 iterations

## Problem 4 Solution: 5.013222 2.093897
# Problem 5
result5 <- run_problem(5, f5, grad_f5, x0 = c(-1, 1), eta = 0.001, epsilon = 1e-6)
## Reached maximum iterations ( 10000 ) without convergence

## Problem 5 Solution: 0.9924817 0.9849898