set.seed(123)
x <- rnorm(500, 0, 1)
summary(x)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.66100 -0.57460 0.02072 0.03459 0.68520 3.24100
y <- x + rnorm(500)+1
summary(y)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.28300 0.08475 1.04600 1.03200 1.89000 5.45500
plot(x,y)
rnorm() creates standard normal random variables with a mean of 0 and a standard deviation of 1. However, the mean and standard deviation can be altered using the mean and sd arguments
Linear_Regression<-lm(y~x)
plot(x,y,col="blue")
abline(coef(Linear_Regression),lwd=3,col="red")
coef(Linear_Regression)
## (Intercept) x
## 0.9995322 0.9460271
The generalized equation of loss function for any problem is: Loss Function (LF)= (y-)^{2}
note- %*% is matrix multiplication. For matrix multiplication, we need an m x n matrix times an n x p matrix.
LF <- function(X, y, theta) {
sum( (X %*% theta - y)^2 ) / (2*length(y))
}
# Define learning rate and iteration limit
alpha <- 0.01
num_iters <- 500
# Save history
LF_history <- double(num_iters)
theta_history <- list(num_iters)
# initialize coefficients
theta <- matrix(c(0,0), nrow=2)
# add a column of 1's for the intercept coefficient
X <- cbind(1, matrix(x))
# gradient descent
for (i in 1:num_iters) {
error <- (X %*% theta - y)
delta <- t(X) %*% error / length(y)
theta <- theta - alpha * delta
LF_history[i] <- LF(X, y, theta)
theta_history[[i]] <- theta
}
# Print the final value of m & c
print(theta)
## [,1]
## [1,] 0.9941104
## [2,] 0.9390584
m<-seq(-100, 100, 5)
c<-seq(-100, 100, 5)
Y2<-sum(y^2)
X2<-sum(x^2)
XY<-sum(x*y)
X<-sum(x)
Y<-sum(y)
loss<-Y2+X2*m^2+c^2*length(y)+2*XY*m+2*Y*c-2*X*m*c
f <- function(m, c) {Y2+X2*m^2+c^2*length(y)+2*XY*m+2*Y*c-2*X*m*c}
z <- outer(m, c, f)
persp(m, c, z, phi = 30, theta = 30,col = "orange",xlab = "m (Slope of the Line)",ylab = "c (Intercept on the Y-axis)",zlab = "Loss Function")
#### 2D Heat map (Loss Function, m, c)
m<-seq(-100, 100, .5)
c<-seq(-100, 100, .5)
Y2<-sum(y^2)
X2<-sum(x^2)
XY<-sum(x*y)
X<-sum(x)
Y<-sum(y)
loss<-Y2+X2*m^2+c^2*length(y)+2*XY*m+2*Y*c-2*X*m*c
f <- function(m, c) {Y2+X2*m^2+c^2*length(y)+2*XY*m+2*Y*c-2*X*m*c}
z <- outer(m, c, f)
image(m,c,z,xlab = "m (Slope of the Line)",ylab = "c (Intercept on the Y-axis)",main="Loss Function Vs (m & c)")
par(new=TRUE)
there are many other inputs that can be used to fine-tune the output of the contour() function type ?contour.
contour(m,c,z, xaxt='n', yaxt='n',lwd = 2)
abline(h=1,col="green")
abline(v=1,col="blue")
I have used this link to learn gradient descent and some code from ISLR http://machinelearningmastery.com/linear-regression-tutorial-using-gradient-descent-for-machine-learning/