library(e1071)
m<-read.csv("C:/Users/pradeep/OneDrive/datasets/students_placement_data.csv")
head(m)
## Roll.No Gender Section SSC.Percentage inter_Diploma_percentage
## 1 1 M A 87.30 65.3
## 2 2 F B 89.00 92.4
## 3 3 F A 67.00 68.0
## 4 4 M A 71.00 70.4
## 5 5 M A 67.00 65.5
## 6 6 M A 81.26 68.0
## B.Tech_percentage Backlogs registered_for_.Placement_Training
## 1 40.00 18 NO
## 2 71.45 0 yes
## 3 45.26 13 yes
## 4 36.47 17 yes
## 5 42.52 17 yes
## 6 62.20 6 yes
## placement.status
## 1 Not placed
## 2 Placed
## 3 Not placed
## 4 Not placed
## 5 Not placed
## 6 Not placed
# divide the data into training data and test data.
n=nrow(m) # n is total number of rows.
set.seed(101)
# We use sample function to partition the data. Here 85 percent is training data and 15 percent is test data. Note that since "replace = TRUE", we may have a row sampled more than once.
data_index=sample(1:n, size = round(0.85*n),replace = TRUE)
train_data=m[data_index,]
test_data=m[-data_index,]
print(head(train_data))
## Roll.No Gender Section SSC.Percentage inter_Diploma_percentage
## 44 44 M B 86.00 92.5
## 6 6 M A 81.26 68.0
## 84 84 M A 89.00 88.9
## 77 77 M A 78.00 59.0
## 30 30 M B 72.00 88.1
## 36 36 M B 87.33 90.0
## B.Tech_percentage Backlogs registered_for_.Placement_Training
## 44 70.85 0 yes
## 6 62.20 6 yes
## 84 63.00 1 NO
## 77 51.06 17 NO
## 30 69.60 0 yes
## 36 68.70 0 yes
## placement.status
## 44 Not placed
## 6 Not placed
## 84 Not placed
## 77 Not placed
## 30 Placed
## 36 Placed
print(head(test_data))
## Roll.No Gender Section SSC.Percentage inter_Diploma_percentage
## 1 1 M A 87.30 65.3
## 4 4 M A 71.00 70.4
## 7 7 M A 71.00 56.5
## 8 8 F A 84.83 79.3
## 11 11 F B 82.33 76.3
## 12 12 M A 91.00 66.0
## B.Tech_percentage Backlogs registered_for_.Placement_Training
## 1 40.00 18 NO
## 4 36.47 17 yes
## 7 33.77 20 yes
## 8 61.02 3 NO
## 11 71.49 0 NO
## 12 49.67 16 yes
## placement.status
## 1 Not placed
## 4 Not placed
## 7 Not placed
## 8 Not placed
## 11 Placed
## 12 Not placed
stu_model1<-naiveBayes(placement.status~ Backlogs+Gender+B.Tech_percentage+SSC.Percentage+inter_Diploma_percentage, data=train_data)
p<-predict(stu_model1,test_data,type="class")
print(p)
## [1] Not placed Not placed Not placed Not placed Placed Not placed
## [7] Placed Not placed Placed Placed Placed Placed
## [13] Placed Not placed Placed Not placed Not placed Placed
## [19] Placed Placed Not placed Not placed Placed Not placed
## [25] Placed Not placed Not placed Placed Placed Not placed
## [31] Placed Placed Placed Placed Not placed Placed
## [37] Not placed Placed Not placed Not placed Not placed Placed
## [43] Placed Not placed Placed Not placed Not placed Placed
## [49] Placed
## Levels: Not placed Placed
“table” command is used to draw confusion matrix. “test_data[,9]” is the original class labels and “p” are predicted class labels. Confusion matrix gives information about number of correct predictions and number of wrong predictions.
t<-table(test_data[,9],p)
print(t)
## p
## Not placed Placed
## Not placed 22 9
## Placed 1 17
In the above table, (22+ 17) are correct predictions and (9+1) are wrong predictions.
Accuracy of the model is number of correct predictions in test set divided by total number of samples in test set.
print(sum(diag(t))/sum(t))
## [1] 0.7959184