Required Packages
# install.packages("e1071")
Classifaction using Naive Bayes
library("e1071")
df = iris
model = naiveBayes(Species~.,data=df)
predicted_values = predict(model,df[,1:4])
correctly_predicted = sum(predicted_values == df[,"Species"])
print(paste("Correctly predicted",correctly_predicted))
## [1] "Correctly predicted 144"
accuracy = correctly_predicted / nrow(df)
print(paste("accuracy",accuracy))
## [1] "accuracy 0.96"
Train test spliting.
library("e1071")
df = iris
train_test_split_percentage = 0.66
train_rows = sample(nrow(df), nrow(df)*train_test_split_percentage)
train_data = df[train_rows,]
test_data = df[-train_rows,]
model = naiveBayes(Species~.,data=train_data)
predicted_values_test = predict(model,test_data[,1:4])
correctly_predicted_test = sum(predicted_values_test == test_data[,"Species"])
print(paste("Correctly predicted on TEST",correctly_predicted_test))
## [1] "Correctly predicted on TEST 48"
accuracy = correctly_predicted_test / nrow(test_data)
print(paste("accuracy on Test Dataset",accuracy))
## [1] "accuracy on Test Dataset 0.941176470588235"
Installing the rpart package
# install.packages("rpart")
Decision trees in R
library("e1071")
library("rpart")
df = iris
model = rpart(Species~.,data=df)
predicted_values = predict(model,df[,1:4], type="class")
correctly_predicted = sum(predicted_values == df[,"Species"])
print(paste("Correctly predicted",correctly_predicted))
## [1] "Correctly predicted 144"
accuracy = correctly_predicted / nrow(df)
print(paste("accuracy",accuracy))
## [1] "accuracy 0.96"
library("e1071")
library("rpart")
df = iris
train_test_split_percentage = 0.66
train_rows = sample(nrow(df), nrow(df)*train_test_split_percentage)
train_data = df[train_rows,]
test_data = df[-train_rows,]
model = naiveBayes(Species~.,data=train_data)
predicted_values_test = predict(model,test_data[,1:4])
correctly_predicted_test = sum(predicted_values_test == test_data[,"Species"])
print(paste("Correctly predicted on TEST",correctly_predicted_test))
## [1] "Correctly predicted on TEST 48"
accuracy = correctly_predicted_test / nrow(test_data)
print(paste("accuracy on Test Dataset",accuracy))
## [1] "accuracy on Test Dataset 0.941176470588235"