#Data preparation for testing
#Get the DF that will be used
dfTrain <- read_csv("~/Downloads/sample_knn_regresion.csv",
col_types = cols(id = col_integer()))
dfPredictions <- read_csv("~/Downloads/sample_knn_regresion.csv",
col_types = cols(id = col_integer()))
#Get dimensions just to make it work
dfTrainDim <- dim(dfTrain)
#Get a subset of the same df and insert random data in every column and NAs in the last column
dfPredictions <- dfPredictions[-c(1:5),]
dfPredictions$id <- c(dfTrain$id[dfTrainDim[1]] + 1 : dim(dfPredictions)[1])
for(i in 2:dim(dfPredictions)[2]){
dfPredictions[i] <- runif(dim(dfPredictions)[1], min(dfTrain[i]), max(dfTrain[i]))
}
dfPredictions[,dim(dfPredictions)[2]] <- NA
#Function to calculate the K nearest neighbors
knn <- function(df_Train, df_Predictions, kn, method){
#take both df and turn it into one
aux_df <- rbind(df_Train, df_Predictions)
#calculate the distance from each point to his neighbors
aux_df$id <- c(1:dim(aux_df)[1])
dist_MN <- dist(aux_df[,2:dim(aux_df)[2] - 1])
dist_MN <- as.matrix(dist_MN)
distance_To <- as.matrix(dist_MN)[dim(dist_MN)[1],]
print(dist_MN)
#distance from the points that need to be estimated and the neighbors
df_Start <- dim(df_Train)[1] + 1
df_End <- dim(aux_df)[1]
for(i in df_Start:df_End){
#distance between points for the prediction df in order and removing the one to itself
distance_To <- as.matrix(dist_MN)[i,]
ascending_Distance <- as.integer(names(sort(distance_To)))
neighbors_KNN <- ascending_Distance[-1]
#getting the kn neighbors to this particular one
filter_KNN <- neighbors_KNN[1:kn]
prediction_KNN <- aux_df[filter_KNN,4] %>% na.omit() %>% sum()/kn
aux_df[i,dim(aux_df)[2]] <- prediction_KNN
}
print(aux_df)
}
knn(dfTrain,dfPredictions,5,1)
## 1 2 3 4 5 6 7
## 1 0.000000 19.026616 15.144636 11.437220 6.406247 10.326665 26.685015
## 2 19.026616 0.000000 4.152120 8.283966 14.321177 10.792409 8.604423
## 3 15.144636 4.152120 0.000000 4.134005 10.229369 6.711185 11.708544
## 4 11.437220 8.283966 4.134005 0.000000 6.181424 2.830194 15.308821
## 5 6.406247 14.321177 10.229369 6.181424 0.000000 4.242641 21.100948
## 6 10.326665 10.792409 6.711185 2.830194 4.242641 0.000000 17.036725
## 7 26.685015 8.604423 11.708544 15.308821 21.100948 17.036725 0.000000
## 8 18.402174 6.362083 5.388877 7.211796 12.409674 8.246211 9.069179
## 9 23.414739 7.625752 9.220087 12.089665 17.478272 13.345037 4.476606
## 10 15.822768 10.011998 7.280110 6.331666 9.467840 5.660389 13.345037
## 11 10.630113 17.868572 13.952811 10.216315 6.209430 7.387877 22.786097
## 12 15.513891 12.844167 9.886466 8.040542 9.185022 6.344864 15.870153
## 13 22.991647 11.043421 11.007503 12.442393 16.676242 12.695937 8.798681
## 14 13.114205 21.179355 17.368459 13.753019 9.681043 10.925310 25.426881
## 15 19.116102 14.324723 12.165054 11.185099 12.832510 9.852226 15.264674
## 8 9 10 11 12 13 14
## 1 18.402174 23.414739 15.822768 10.630113 15.513891 22.991647 13.114205
## 2 6.362083 7.625752 10.011998 17.868572 12.844167 11.043421 21.179355
## 3 5.388877 9.220087 7.280110 13.952811 9.886466 11.007503 17.368459
## 4 7.211796 12.089665 6.331666 10.216315 8.040542 12.442393 13.753019
## 5 12.409674 17.478272 9.467840 6.209430 9.185022 16.676242 9.681043
## 6 8.246211 13.345037 5.660389 7.387877 6.344864 12.695937 10.925310
## 7 9.069179 4.476606 13.345037 22.786097 15.870153 8.798681 25.426881
## 8 0.000000 5.107837 4.476606 13.765197 7.295618 5.632370 16.565271
## 9 5.107837 0.000000 9.055937 18.539444 11.467303 4.685368 21.044399
## 10 4.476606 9.055937 0.000000 9.484511 2.915478 7.247822 12.120310
## 11 13.765197 18.539444 9.484511 0.000000 7.449921 16.151419 3.617834
## 12 7.295618 11.467303 2.915478 7.449921 0.000000 8.745664 9.612380
## 13 5.632370 4.685368 7.247822 16.151419 8.745664 0.000000 18.060125
## 14 16.565271 21.044399 12.120310 3.617834 9.612380 18.060125 0.000000
## 15 8.063512 10.814646 5.000007 10.247390 3.674757 6.891854 11.487830
## 15
## 1 19.116102
## 2 14.324723
## 3 12.165054
## 4 11.185099
## 5 12.832510
## 6 9.852226
## 7 15.264674
## 8 8.063512
## 9 10.814646
## 10 5.000007
## 11 10.247390
## 12 3.674757
## 13 6.891854
## 14 11.487830
## 15 0.000000
## # A tibble: 15 x 4
## id height age weight
## <int> <dbl> <dbl> <dbl>
## 1 1 5 45 77
## 2 2 5.11 26 47
## 3 3 5.6 30 55
## 4 4 5.9 34 59
## 5 5 4.8 40 72
## 6 6 5.8 36 60
## 7 7 5.3 19 40
## 8 8 5.8 28 60
## 9 9 5.5 23 45
## 10 10 5.6 32 58
## 11 11 5.51 41.4 38
## 12 12 5.10 34.1 43.2
## 13 13 5.89 25.4 41.2
## 14 14 5.74 43.4 42.6
## 15 15 5.59 32.0 52.5