#Input Data
setwd("C:/Users/acer/Downloads")
dataset <- read.csv2("C:/Users/acer/Downloads/Maternal Health Risk Data Set(Machine Learning).csv")
head(dataset)
##   Age SystolicBP DiastolicBP   BS BodyTemp HeartRate RiskLevel
## 1  25        130          80 15.0       98        86 high risk
## 2  35        140          90 13.0       98        70 high risk
## 3  29         90          70  8.0      100        80 high risk
## 4  30        140          85  7.0       98        70 high risk
## 5  35        120          60  6.1       98        76  low risk
## 6  23        140          80  7.0       98        70 high risk
#Packages yang dibutuhkan
library(e1071) #library untuk Naive Bayes
library(caret) #library untuk mengetahui apakah metode naive bayes baik untuk data kita apa tidak
#membagi data menjadi 2 bagian yaitu data training dan data testing
sampel=sample(1:nrow(dataset),0.75*nrow(dataset),replace=TRUE)
training=data.frame(dataset)[sampel,]
testing=data.frame(dataset)[-sampel,]
# Membuat model naive Bayes
nb_model <- naiveBayes(RiskLevel ~ ., data = training)
# Prediksi dengan naive Bayes
predictions_nb <- predict(nb_model, testing)
# Evaluasi model naive Bayes
hasil=confusionMatrix(table(predictions_nb, testing$RiskLevel))
hasil
## Confusion Matrix and Statistics
## 
##               
## predictions_nb high risk low risk mid risk
##      high risk        95        4       10
##      low risk         18      169      109
##      mid risk         21       15       35
## 
## Overall Statistics
##                                          
##                Accuracy : 0.6282         
##                  95% CI : (0.583, 0.6717)
##     No Information Rate : 0.395          
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.4205         
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
## 
## Statistics by Class:
## 
##                      Class: high risk Class: low risk Class: mid risk
## Sensitivity                    0.7090          0.8989         0.22727
## Specificity                    0.9591          0.5590         0.88820
## Pos Pred Value                 0.8716          0.5709         0.49296
## Neg Pred Value                 0.8937          0.8944         0.70617
## Prevalence                     0.2815          0.3950         0.32353
## Detection Rate                 0.1996          0.3550         0.07353
## Detection Prevalence           0.2290          0.6218         0.14916
## Balanced Accuracy              0.8340          0.7290         0.55774