gradient_descent <- function(expr, learning_rate, steps, x0) {
# Compute derivative
grad <- D(expr, "x")
# Initialize x
x <- x0
cat("Step 0: x =", x, "\n")
# Perform gradient descent
for (k in 1:steps) {
# Evaluate derivative at current x
grad_val <- eval(grad, list(x = x))
# Update rule
x <- x - learning_rate * grad_val
cat("Step", k, ": x =", x, "\n")
}
# Return final value
return(x)
}
Proof it works : )
f <- expression(x^4 - 6*x^2 + 4*x + 18)
gradient_descent(f, learning_rate = 0.1, steps = 3, x0 = 1) #same results
## Step 0: x = 1
## Step 1 : x = 1.4
## Step 2 : x = 1.5824
## Step 3 : x = 1.496355
## [1] 1.496355
gradient_descent(f, learning_rate = 0.1, steps = 3, x0 = 0) #same results
## Step 0: x = 0
## Step 1 : x = -0.4
## Step 2 : x = -1.2544
## Step 3 : x = -2.370151
## [1] -2.370151
gradient_vis <- function(expr, learning_rate, steps, x0, x_range = c(-3, 3)) {
# Compute derivative
grad <- D(expr, "x")
# Create a sequence of x values for plotting
x_vals <- seq(x_range[1], x_range[2], length.out = 300)
y_vals <- sapply(x_vals, function(x) eval(expr, list(x = x)))
# Initialize lists to store x_k and f(x_k)
x_hist <- numeric(steps + 1)
y_hist <- numeric(steps + 1)
x <- x0
x_hist[1] <- x
y_hist[1] <- eval(expr, list(x = x))
# Perform gradient descent
for (k in 1:steps) {
grad_val <- eval(grad, list(x = x))
x <- x - learning_rate * grad_val
x_hist[k + 1] <- x
y_hist[k + 1] <- eval(expr, list(x = x))
}
# Plot the function curve
plot(x_vals, y_vals, type = "l", lwd = 2, col = "steelblue",
main = paste("Gradient Descent Visualization (x0 =", x0, ")"),
xlab = "x", ylab = "f(x)")
# Add points for descent steps
points(x_hist, y_hist, col = "red", pch = 19)
lines(x_hist, y_hist, col = "red", lwd = 1.5, lty = 2)
# Annotate points
text(x_hist, y_hist, labels = 0:steps, pos = 3, cex = 0.8, col = "darkred")
}
gradient_vis(expr=f, learning_rate=.1, steps=20, x0=0, x_range = c(-2.5, 2.5))
Starting at 1 shows us its doing pretty good
gradient_vis(expr=f, learning_rate=.1, steps=20, x0=1, x_range = c(-2.5, 2.5))
Starting at 0 shows us its still got some work to do
gradient_vis(expr=f, learning_rate=.01, steps=20, x0=1, x_range = c(-2.5, 2.5))
gradient_vis(expr=f, learning_rate=.01, steps=20, x0=0, x_range = c(-2.5, 2.5))
As we can see, for both cases, we need more steps at this learning rate. Its clear we haven’t found the min.
rm(list=ls())
library(readr)
df <- read_csv("Downloads/College.csv")
## New names:
## Rows: 777 Columns: 19
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (2): ...1, Private dbl (17): Apps, Accept, Enroll, Top10perc, Top25perc,
## F.Undergrad, P.Undergr...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df <- df %>% select(-...1)
# splt
train_idx <- 1:600
train <- df[train_idx, ]
test <- df[-train_idx, ] # 177 rows
We want to predict the number of applications received (“Apps”) using the other variables in the dataset
fit <- lm(Apps ~ ., data = train)
pred <- predict(fit, newdata = test)
# Compute test error metrics
residuals <- test$Apps - pred
mse <- mean(residuals^2)
rmse <- sqrt(mse)
mae <- mean(abs(residuals))
r2_test <-
1 - sum(residuals^2) / sum( (test$Apps - mean(test$Apps))^2 )
## Test MSE : 1502077.435
## Test RMSE: 1225.593
## Test MAE : 759.369
## Test R^2 : 0.917