# load dataset
cardio <- read.csv("C:/Users/Arifin/Downloads/cardio.csv")
summary(cardio)
##    patientid          age            gender      chestpain      restingBP    
##  Min.   : 1.00   Min.   :39.00   Min.   :0.0   Min.   :0.00   Min.   :120.0  
##  1st Qu.: 3.25   1st Qu.:47.75   1st Qu.:0.0   1st Qu.:0.25   1st Qu.:128.5  
##  Median : 5.50   Median :53.50   Median :1.0   Median :1.00   Median :133.5  
##  Mean   : 5.50   Mean   :53.20   Mean   :0.6   Mean   :1.30   Mean   :134.0  
##  3rd Qu.: 7.75   3rd Qu.:60.25   3rd Qu.:1.0   3rd Qu.:2.00   3rd Qu.:139.5  
##  Max.   :10.00   Max.   :63.00   Max.   :1.0   Max.   :3.00   Max.   :150.0  
##  serumcholestrol fastingbloodsugar restingelectro  maxheartrate   exerciseangia
##  Min.   :210.0   Min.   :0.0       Min.   :0.00   Min.   :138.0   Min.   :0.0  
##  1st Qu.:232.5   1st Qu.:0.0       1st Qu.:0.25   1st Qu.:142.8   1st Qu.:0.0  
##  Median :247.5   Median :0.0       Median :1.00   Median :149.0   Median :0.0  
##  Mean   :249.5   Mean   :0.4       Mean   :1.00   Mean   :150.1   Mean   :0.4  
##  3rd Qu.:267.5   3rd Qu.:1.0       3rd Qu.:1.75   3rd Qu.:157.2   3rd Qu.:1.0  
##  Max.   :290.0   Max.   :1.0       Max.   :2.00   Max.   :165.0   Max.   :1.0  
##     oldpeak          slope     noofmajorvessels     target   
##  Min.   :0.500   Min.   :1.0   Min.   :0.00     Min.   :0.0  
##  1st Qu.:0.925   1st Qu.:1.0   1st Qu.:0.00     1st Qu.:0.0  
##  Median :1.250   Median :2.0   Median :0.50     Median :0.5  
##  Mean   :1.450   Mean   :1.6   Mean   :0.80     Mean   :0.5  
##  3rd Qu.:2.050   3rd Qu.:2.0   3rd Qu.:1.75     3rd Qu.:1.0  
##  Max.   :2.500   Max.   :2.0   Max.   :2.00     Max.   :1.0
# Hapus kolom ID dan imputasi nilai hilang
# Asumsikan ID ada di kolom ke-1, target di kolom ke-14
library(mice)        # Mengatasi missing value
## Warning: package 'mice' was built under R version 4.4.3
## 
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
## 
##     filter
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
set.seed(123)
dataset_impute <- mice(cardio[, 2:13], print = FALSE)  # Imputasi hanya fitur (tanpa ID dan target)
## Warning: Number of logged events: 2
cardio <- cbind(cardio[, 14], complete(dataset_impute, 1))  # Gabungkan target + fitur
colnames(cardio)[1] <- "target"  # Beri nama kolom target agar konsisten
summary(cardio)
##      target         age            gender      chestpain      restingBP    
##  Min.   :0.0   Min.   :39.00   Min.   :0.0   Min.   :0.00   Min.   :120.0  
##  1st Qu.:0.0   1st Qu.:47.75   1st Qu.:0.0   1st Qu.:0.25   1st Qu.:128.5  
##  Median :0.5   Median :53.50   Median :1.0   Median :1.00   Median :133.5  
##  Mean   :0.5   Mean   :53.20   Mean   :0.6   Mean   :1.30   Mean   :134.0  
##  3rd Qu.:1.0   3rd Qu.:60.25   3rd Qu.:1.0   3rd Qu.:2.00   3rd Qu.:139.5  
##  Max.   :1.0   Max.   :63.00   Max.   :1.0   Max.   :3.00   Max.   :150.0  
##  serumcholestrol fastingbloodsugar restingelectro  maxheartrate   exerciseangia
##  Min.   :210.0   Min.   :0.0       Min.   :0.00   Min.   :138.0   Min.   :0.0  
##  1st Qu.:232.5   1st Qu.:0.0       1st Qu.:0.25   1st Qu.:142.8   1st Qu.:0.0  
##  Median :247.5   Median :0.0       Median :1.00   Median :149.0   Median :0.0  
##  Mean   :249.5   Mean   :0.4       Mean   :1.00   Mean   :150.1   Mean   :0.4  
##  3rd Qu.:267.5   3rd Qu.:1.0       3rd Qu.:1.75   3rd Qu.:157.2   3rd Qu.:1.0  
##  Max.   :290.0   Max.   :1.0       Max.   :2.00   Max.   :165.0   Max.   :1.0  
##     oldpeak          slope     noofmajorvessels
##  Min.   :0.500   Min.   :1.0   Min.   :0.00    
##  1st Qu.:0.925   1st Qu.:1.0   1st Qu.:0.00    
##  Median :1.250   Median :2.0   Median :0.50    
##  Mean   :1.450   Mean   :1.6   Mean   :0.80    
##  3rd Qu.:2.050   3rd Qu.:2.0   3rd Qu.:1.75    
##  Max.   :2.500   Max.   :2.0   Max.   :2.00
# Split data
library(caTools)     # Split data
## Warning: package 'caTools' was built under R version 4.4.3
set.seed(150)
split <- sample.split(cardio$target, SplitRatio = 0.7)
training_set <- subset(cardio, split == TRUE)
test_set <- subset(cardio, split == FALSE)

# Hapus kolom target dari test set (hanya fitur)
topredict_set <- test_set[, 2:13]

# Implementasi Naive Bayes
library(e1071)       # Naive Bayes
## Warning: package 'e1071' was built under R version 4.4.3
model_naive <- naiveBayes(target ~ ., data = training_set)

# Prediksi
preds_naive <- predict(model_naive, newdata = topredict_set)

# Evaluasi hasil
library(caret)       # Evaluasi dan confusion matrix
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Loading required package: lattice
conf_matrix_naive <- table(preds_naive, test_set$target)
confusionMatrix(conf_matrix_naive)
## Confusion Matrix and Statistics
## 
##            
## preds_naive 0 1
##           0 1 0
##           1 0 1
##                                      
##                Accuracy : 1          
##                  95% CI : (0.1581, 1)
##     No Information Rate : 0.5        
##     P-Value [Acc > NIR] : 0.25       
##                                      
##                   Kappa : 1          
##                                      
##  Mcnemar's Test P-Value : NA         
##                                      
##             Sensitivity : 1.0        
##             Specificity : 1.0        
##          Pos Pred Value : 1.0        
##          Neg Pred Value : 1.0        
##              Prevalence : 0.5        
##          Detection Rate : 0.5        
##    Detection Prevalence : 0.5        
##       Balanced Accuracy : 1.0        
##                                      
##        'Positive' Class : 0          
##