KNN for Las Vegas Trip Advisor dataset

#K Nearest Neighbor
dataset=read.csv("lasvegas.csv") 

dataset$Score <- with(dataset, ifelse(Score<=3,0,1))
dataset$Score<-factor(dataset$Score)
View(dataset)

#Taking only Nr.reviews,Nr.hotel reviews,Helpful votes and Score attributes 
dataset=dataset[2:5]
View(dataset)


#Splitting the data
library(caTools)
set.seed(123) 
split=sample.split(Y=dataset$Score,SplitRatio=2/3)


train_set=subset(x=dataset,split==TRUE) 
test_set=subset(x=dataset,split==FALSE)


#Feature Scaling 
train_set[-4]=scale(train_set[-4]) 
test_set[-4]=scale(test_set[-4]) 
str(train_set)

## 'data.frame':    336 obs. of  4 variables:
##  $ Nr..reviews      : num  -0.48 0.945 -0.441 -0.599 -0.309 ...
##  $ Nr..hotel.reviews: num  -0.508 0.24 -0.376 -0.64 -0.552 ...
##  $ Helpful.votes    : num  -0.365 0.866 -0.346 -0.544 -0.465 ...
##  $ Score            : Factor w/ 2 levels "0","1": 2 1 2 2 2 1 1 1 1 1 ...

str(test_set)

## 'data.frame':    168 obs. of  4 variables:
##  $ Nr..reviews      : num  -0.185 -0.6059 -0.2529 -0.0629 -0.5108 ...
##  $ Nr..hotel.reviews: num  -0.303 -0.456 -0.341 -0.189 -0.379 ...
##  $ Helpful.votes    : num  -0.167 -0.681 -0.122 0.303 -0.479 ...
##  $ Score            : Factor w/ 2 levels "0","1": 2 2 1 2 1 2 1 1 1 2 ...

#Creating a Model
library(class) 
y_pred=knn(train_set[,-4],test=test_set[,-4],cl=train_set[,4],k=5)


err_metric=function(CM)
{
  
  TN =CM[2,2]
  TP =CM[1,1]
  FP =CM[1,2]
  FN =CM[2,1]
  precision =(TP)/(TP+FP)
  recall_score =(TP)/(TP+FN)
  f1_score=2*((precision*recall_score)/(precision+recall_score))
  accuracy_model  =(TP+TN)/(TP+TN+FP+FN)
  False_positive_rate =(FP)/(FP+TN)
  False_negative_rate =(FN)/(FN+TP)
  print(paste("Precision value of the model: ",round(precision,3)))
  print(paste("Accuracy of the model: ",round(accuracy_model,3)))
  print(paste("Recall value of the model: ",round(recall_score,3)))
  print(paste("False Positive rate of the model: ",round(False_positive_rate,2)))
  print(paste("False Negative rate of the model: ",round(False_negative_rate,2)))
  print(paste("f1 score of the model: ",round(f1_score,2)))
}

cm=table(test_set[,4],y_pred)
cm

##    y_pred
##       0   1
##   0   3  35
##   1  15 115

err_metric(cm)

## [1] "Precision value of the model:  0.079"
## [1] "Accuracy of the model:  0.702"
## [1] "Recall value of the model:  0.167"
## [1] "False Positive rate of the model:  0.23"
## [1] "False Negative rate of the model:  0.83"
## [1] "f1 score of the model:  0.11"

KNN for Las Vegas Trip Advisor dataset

Kapil

2022-11-11