data<-read.csv(file="kc_house_data.csv")
training_data<-read.csv(file="kc_house_train_data.csv")
test_data<-read.csv(file="kc_house_test_data.csv")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data<-tbl_df(data)
test_data<-tbl_df(test_data)
training_data<-tbl_df(training_data)
predictions<-function(feature_matrix,weights){
feature_matrix<-as.matrix(feature_matrix)
return(feature_matrix%*%weights)
}
normalize_features<-function(feature_matrix){
squared<-feature_matrix^2
squared<-data.frame(squared)
norms_sq<-sapply(squared,sum)
norms<-sqrt(norms_sq)
r<-nrow(feature_matrix)
for(i in c(1:r)) {
feature_matrix[i,]<-feature_matrix[i,]/norms
}
return(feature_matrix)
}
We test normalize_features function.
X<-matrix(c(3,4,6,8,9,12),nrow=2,ncol=3)
X<-tbl_df(as.data.frame(X))
normalize_features(X)
## Source: local data frame [2 x 3]
##
## V1 V2 V3
## (dbl) (dbl) (dbl)
## 1 0.6 0.6 0.6
## 2 0.8 0.8 0.8
We need to compute ro[i] is SUM( [feature i * (output - predictions(omitting feature i)) ). The sum is over all observations.
# We want our simple feature matrix to consist of two features: sqft_living and bedrooms
simple_feature_matrix<-cbind(1,select(data,sqft_living,bedrooms))
output<-select(data,price)
normalized_sfm<-tbl_df(normalize_features(simple_feature_matrix))
weights<-matrix(c(1,4,1),nrow=3,ncol=1)
We define a function Ro, which takes integer i as input.
ro<-function(i){
normalized_sfm_i<-select(normalized_sfm,-c(i))
weights_i<-weights[-c(i),]
normalized_sfm_i<-as.matrix(normalized_sfm_i)
weights_i<-as.matrix(weights_i)
p_i<-normalized_sfm_i%*%weights_i
rswi<-as.matrix(output-p_i)
feature_i<-select(normalized_sfm,c(i))
feature_i<-t(as.matrix(feature_i))
ro_i<-feature_i%*%rswi
return(ro_i)
}
lasso_coordinate_descent_step<-function(i,feature_matrix,output,weights,l1_penalty){
prediction<-predictions(feature_matrix,weights)
ro_i<-ro(i)
if(i==0){
new_weight_i<-ro_i
}
else if(ro_i<-l1_penalty/2){
new_weight_i<-ro_i+l1_penalty/2
}
else if(ro_i>l1_penalty/2){
new_weight_i<-ro_i-l1_penalty/2
}
else {
new_weight_i <- 0
}
return(new_weight_i)
}