# load dataset
cardio <- read.csv("C:/Users/Arifin/Downloads/cardio.csv")
summary(cardio)
## patientid age gender chestpain restingBP
## Min. : 1.00 Min. :39.00 Min. :0.0 Min. :0.00 Min. :120.0
## 1st Qu.: 3.25 1st Qu.:47.75 1st Qu.:0.0 1st Qu.:0.25 1st Qu.:128.5
## Median : 5.50 Median :53.50 Median :1.0 Median :1.00 Median :133.5
## Mean : 5.50 Mean :53.20 Mean :0.6 Mean :1.30 Mean :134.0
## 3rd Qu.: 7.75 3rd Qu.:60.25 3rd Qu.:1.0 3rd Qu.:2.00 3rd Qu.:139.5
## Max. :10.00 Max. :63.00 Max. :1.0 Max. :3.00 Max. :150.0
## serumcholestrol fastingbloodsugar restingelectro maxheartrate exerciseangia
## Min. :210.0 Min. :0.0 Min. :0.00 Min. :138.0 Min. :0.0
## 1st Qu.:232.5 1st Qu.:0.0 1st Qu.:0.25 1st Qu.:142.8 1st Qu.:0.0
## Median :247.5 Median :0.0 Median :1.00 Median :149.0 Median :0.0
## Mean :249.5 Mean :0.4 Mean :1.00 Mean :150.1 Mean :0.4
## 3rd Qu.:267.5 3rd Qu.:1.0 3rd Qu.:1.75 3rd Qu.:157.2 3rd Qu.:1.0
## Max. :290.0 Max. :1.0 Max. :2.00 Max. :165.0 Max. :1.0
## oldpeak slope noofmajorvessels target
## Min. :0.500 Min. :1.0 Min. :0.00 Min. :0.0
## 1st Qu.:0.925 1st Qu.:1.0 1st Qu.:0.00 1st Qu.:0.0
## Median :1.250 Median :2.0 Median :0.50 Median :0.5
## Mean :1.450 Mean :1.6 Mean :0.80 Mean :0.5
## 3rd Qu.:2.050 3rd Qu.:2.0 3rd Qu.:1.75 3rd Qu.:1.0
## Max. :2.500 Max. :2.0 Max. :2.00 Max. :1.0
# Hapus kolom ID dan imputasi nilai hilang
# Asumsikan ID ada di kolom ke-1, target di kolom ke-14
library(mice) # Mengatasi missing value
## Warning: package 'mice' was built under R version 4.4.3
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
set.seed(123)
dataset_impute <- mice(cardio[, 2:13], print = FALSE) # Imputasi hanya fitur (tanpa ID dan target)
## Warning: Number of logged events: 2
cardio <- cbind(cardio[, 14], complete(dataset_impute, 1)) # Gabungkan target + fitur
colnames(cardio)[1] <- "target" # Beri nama kolom target agar konsisten
summary(cardio)
## target age gender chestpain restingBP
## Min. :0.0 Min. :39.00 Min. :0.0 Min. :0.00 Min. :120.0
## 1st Qu.:0.0 1st Qu.:47.75 1st Qu.:0.0 1st Qu.:0.25 1st Qu.:128.5
## Median :0.5 Median :53.50 Median :1.0 Median :1.00 Median :133.5
## Mean :0.5 Mean :53.20 Mean :0.6 Mean :1.30 Mean :134.0
## 3rd Qu.:1.0 3rd Qu.:60.25 3rd Qu.:1.0 3rd Qu.:2.00 3rd Qu.:139.5
## Max. :1.0 Max. :63.00 Max. :1.0 Max. :3.00 Max. :150.0
## serumcholestrol fastingbloodsugar restingelectro maxheartrate exerciseangia
## Min. :210.0 Min. :0.0 Min. :0.00 Min. :138.0 Min. :0.0
## 1st Qu.:232.5 1st Qu.:0.0 1st Qu.:0.25 1st Qu.:142.8 1st Qu.:0.0
## Median :247.5 Median :0.0 Median :1.00 Median :149.0 Median :0.0
## Mean :249.5 Mean :0.4 Mean :1.00 Mean :150.1 Mean :0.4
## 3rd Qu.:267.5 3rd Qu.:1.0 3rd Qu.:1.75 3rd Qu.:157.2 3rd Qu.:1.0
## Max. :290.0 Max. :1.0 Max. :2.00 Max. :165.0 Max. :1.0
## oldpeak slope noofmajorvessels
## Min. :0.500 Min. :1.0 Min. :0.00
## 1st Qu.:0.925 1st Qu.:1.0 1st Qu.:0.00
## Median :1.250 Median :2.0 Median :0.50
## Mean :1.450 Mean :1.6 Mean :0.80
## 3rd Qu.:2.050 3rd Qu.:2.0 3rd Qu.:1.75
## Max. :2.500 Max. :2.0 Max. :2.00
# Split data
library(caTools) # Split data
## Warning: package 'caTools' was built under R version 4.4.3
set.seed(150)
split <- sample.split(cardio$target, SplitRatio = 0.7)
training_set <- subset(cardio, split == TRUE)
test_set <- subset(cardio, split == FALSE)
# Hapus kolom target dari test set (hanya fitur)
topredict_set <- test_set[, 2:13]
# Implementasi Naive Bayes
library(e1071) # Naive Bayes
## Warning: package 'e1071' was built under R version 4.4.3
model_naive <- naiveBayes(target ~ ., data = training_set)
# Prediksi
preds_naive <- predict(model_naive, newdata = topredict_set)
# Evaluasi hasil
library(caret) # Evaluasi dan confusion matrix
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Loading required package: lattice
conf_matrix_naive <- table(preds_naive, test_set$target)
confusionMatrix(conf_matrix_naive)
## Confusion Matrix and Statistics
##
##
## preds_naive 0 1
## 0 1 0
## 1 0 1
##
## Accuracy : 1
## 95% CI : (0.1581, 1)
## No Information Rate : 0.5
## P-Value [Acc > NIR] : 0.25
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Sensitivity : 1.0
## Specificity : 1.0
## Pos Pred Value : 1.0
## Neg Pred Value : 1.0
## Prevalence : 0.5
## Detection Rate : 0.5
## Detection Prevalence : 0.5
## Balanced Accuracy : 1.0
##
## 'Positive' Class : 0
##