library(tidyverse,quietly = T)
diamonds<-diamonds %>% select_if(is.numeric)
diamonds<-apply(diamonds,2,function(x){(x-min(x))/(max(x)-min(x))}) %>% as.data.frame()
glimpse(diamonds)
## Observations: 53,940
## Variables: 7
## $ carat <dbl> 0.006237006, 0.002079002, 0.006237006, 0.018711019, 0.02...
## $ depth <dbl> 0.5138889, 0.4666667, 0.3861111, 0.5388889, 0.5638889, 0...
## $ table <dbl> 0.2307692, 0.3461538, 0.4230769, 0.2884615, 0.2884615, 0...
## $ price <dbl> 0.000000e+00, 0.000000e+00, 5.406282e-05, 4.325026e-04, ...
## $ x <dbl> 0.3677840, 0.3621974, 0.3770950, 0.3910615, 0.4040968, 0...
## $ y <dbl> 0.06757216, 0.06519525, 0.06910017, 0.07181664, 0.073853...
## $ z <dbl> 0.07641509, 0.07264151, 0.07264151, 0.08270440, 0.086477...
The final solution we want is β=(XTX)-1XTY(T means tranpose).
ols<-function(y,x){
x<-as.matrix(x)
x<-cbind(intercept=1,x)
return(solve(t(x) %*% x) %*% t(x) %*% y)
}
ols(y=diamonds$price,x=diamonds %>% select(-price)) %>% print()
## [,1]
## intercept 0.50575312
## carat 2.84430795
## depth -0.85263614
## table -0.51704494
## x -0.75069185
## y 0.20753028
## z 0.07032677
Maybe we should check the correctness
print(lm(price~.,diamonds))
##
## Call:
## lm(formula = price ~ ., data = diamonds)
##
## Coefficients:
## (Intercept) carat depth table x
## 0.51467 2.77889 -0.39539 -0.28800 -0.76392
## y z
## 0.21119 0.07157
gradient_descent<-function(x,y,theta,alpha,steps){
res<-rep(0,steps)
x<-cbind(intercept=1,as.matrix(x))
for(i in 1:steps){
theta<-theta - alpha * (t(x) %*% (x %*% theta - y))
res[i]<-sum((x %*% theta - y)^2)/2
}
return(list('theta'=theta,'res'=res))
}
tt<-rep(0,7)
result<-gradient_descent(x=diamonds %>% select(-price),y=diamonds$price,theta = tt,alpha = 0.000005,steps = 2000)
The result we get from the defined function contains two components,one is the theta, the other is error.Let’s visualize them.
plot(result$res,ylab="Error per iteration",xlab = "iteration",main = "loss value of each iteration")
print(result$theta)
## [,1]
## intercept -0.2592776
## carat 1.1926290
## depth -0.1256820
## table -0.1548032
## x 0.7206690
## y 0.1344053
## z 0.1501132
To be honest, I don’t know why I get a different result using gradient descent,I will explain the reason later.