#Import the dataset
data=read.csv("lasvegas.csv")
View(data)
data=data[c(5,8,9,10,11,12,13)]
summary(data)
## Score Pool Gym Tennis.court
## Min. :1.000 Length:504 Length:504 Length:504
## 1st Qu.:4.000 Class :character Class :character Class :character
## Median :4.000 Mode :character Mode :character Mode :character
## Mean :4.123
## 3rd Qu.:5.000
## Max. :5.000
## Spa Casino Free.internet
## Length:504 Length:504 Length:504
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
data$Score <- with(data, ifelse(Score<=3,"Worst",("Best")))
data$Score<-factor(data$Score)
View(data)
#Splitting the dataset
library(caTools)
set.seed(899)
split=sample.split(data$Score,SplitRatio = 2/3)
training_set=subset(data,split==T)
test_set=subset(data,split==F)
dim(training_set)
## [1] 336 7
#Fitting a training model
library(e1071)
classifier=naiveBayes(x=training_set[-1],y=training_set$Score)
summary(classifier)
## Length Class Mode
## apriori 2 table numeric
## tables 6 -none- list
## levels 2 -none- character
## isnumeric 6 -none- logical
## call 3 -none- call
#Prediction
y_pred=predict(object = classifier,newdata=test_set)
y_pred
## [1] Worst Worst Worst Worst Best Best Best Best Best Best Best Best
## [13] Best Best Best Best Best Best Best Best Best Best Best Best
## [25] Best Best Best Best Best Best Best Best Best Best Best Best
## [37] Best Best Best Best Best Best Best Best Best Best Best Best
## [49] Best Best Best Best Best Best Best Best Best Best Best Best
## [61] Best Best Best Best Best Best Best Best Best Best Best Best
## [73] Best Best Best Best Best Best Best Best Best Best Best Best
## [85] Best Best Best Best Best Best Best Best Best Best Best Best
## [97] Best Best Best Best Best Best Best Best Best Best Best Best
## [109] Best Best Best Best Best Best Best Best Best Best Best Best
## [121] Best Best Best Best Best Best Best Best Best Best Best Best
## [133] Best Best Best Best Best Best Best Best Best Best Best Best
## [145] Best Best Best Best Best Best Best Best Best Best Best Best
## [157] Best Best Best Best Best Best Best Best Best Best Best Best
## Levels: Best Worst
#Confusion matrix
err_metric=function(CM)
{
TN =CM[2,2]
TP =CM[1,1]
FP =CM[1,2]
FN =CM[2,1]
precision =(TP)/(TP+FP)
recall_score =(TP)/(TP+FN)
f1_score=2*((precision*recall_score)/(precision+recall_score))
accuracy_model =(TP+TN)/(TP+TN+FP+FN)
False_positive_rate =(FP)/(FP+TN)
False_negative_rate =(FN)/(FN+TP)
print(paste("Precision value of the model: ",round(precision,2)))
print(paste("Accuracy of the model: ",round(accuracy_model,2)))
print(paste("Recall value of the model: ",round(recall_score,2)))
print(paste("False Positive rate of the model: ",round(False_positive_rate,2)))
print(paste("False Negative rate of the model: ",round(False_negative_rate,2)))
print(paste("f1 score of the model: ",round(f1_score,2)))
}
cm=table(test_set[,1],y_pred)
cm
## y_pred
## Best Worst
## Best 129 1
## Worst 35 3
err_metric(cm)
## [1] "Precision value of the model: 0.99"
## [1] "Accuracy of the model: 0.79"
## [1] "Recall value of the model: 0.79"
## [1] "False Positive rate of the model: 0.25"
## [1] "False Negative rate of the model: 0.21"
## [1] "f1 score of the model: 0.88"