library(readr)
## Warning: package 'readr' was built under R version 4.4.3
heart <- read_csv("D:/heart.csv", show_col_types = FALSE)
summary(heart)
## age sex cp trestbps
## Min. :29.00 Min. :0.0000 Min. :0.0000 Min. : 94.0
## 1st Qu.:48.00 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:120.0
## Median :56.00 Median :1.0000 Median :1.0000 Median :130.0
## Mean :54.43 Mean :0.6956 Mean :0.9424 Mean :131.6
## 3rd Qu.:61.00 3rd Qu.:1.0000 3rd Qu.:2.0000 3rd Qu.:140.0
## Max. :77.00 Max. :1.0000 Max. :3.0000 Max. :200.0
## chol fbs restecg thalach
## Min. :126 Min. :0.0000 Min. :0.0000 Min. : 71.0
## 1st Qu.:211 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:132.0
## Median :240 Median :0.0000 Median :1.0000 Median :152.0
## Mean :246 Mean :0.1493 Mean :0.5298 Mean :149.1
## 3rd Qu.:275 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:166.0
## Max. :564 Max. :1.0000 Max. :2.0000 Max. :202.0
## exang oldpeak slope ca
## Min. :0.0000 Min. :0.000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:1.000 1st Qu.:0.0000
## Median :0.0000 Median :0.800 Median :1.000 Median :0.0000
## Mean :0.3366 Mean :1.072 Mean :1.385 Mean :0.7541
## 3rd Qu.:1.0000 3rd Qu.:1.800 3rd Qu.:2.000 3rd Qu.:1.0000
## Max. :1.0000 Max. :6.200 Max. :2.000 Max. :4.0000
## thal target
## Min. :0.000 Min. :0.0000
## 1st Qu.:2.000 1st Qu.:0.0000
## Median :2.000 Median :1.0000
## Mean :2.324 Mean :0.5132
## 3rd Qu.:3.000 3rd Qu.:1.0000
## Max. :3.000 Max. :1.0000
str(heart)
## spc_tbl_ [1,025 × 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ age : num [1:1025] 52 53 70 61 62 58 58 55 46 54 ...
## $ sex : num [1:1025] 1 1 1 1 0 0 1 1 1 1 ...
## $ cp : num [1:1025] 0 0 0 0 0 0 0 0 0 0 ...
## $ trestbps: num [1:1025] 125 140 145 148 138 100 114 160 120 122 ...
## $ chol : num [1:1025] 212 203 174 203 294 248 318 289 249 286 ...
## $ fbs : num [1:1025] 0 1 0 0 1 0 0 0 0 0 ...
## $ restecg : num [1:1025] 1 0 1 1 1 0 2 0 0 0 ...
## $ thalach : num [1:1025] 168 155 125 161 106 122 140 145 144 116 ...
## $ exang : num [1:1025] 0 1 1 0 0 0 0 1 0 1 ...
## $ oldpeak : num [1:1025] 1 3.1 2.6 0 1.9 1 4.4 0.8 0.8 3.2 ...
## $ slope : num [1:1025] 2 0 0 2 1 1 0 1 2 1 ...
## $ ca : num [1:1025] 2 0 0 1 3 0 3 1 0 2 ...
## $ thal : num [1:1025] 3 3 3 3 2 2 1 3 3 2 ...
## $ target : num [1:1025] 0 0 0 0 0 1 0 0 0 0 ...
## - attr(*, "spec")=
## .. cols(
## .. age = col_double(),
## .. sex = col_double(),
## .. cp = col_double(),
## .. trestbps = col_double(),
## .. chol = col_double(),
## .. fbs = col_double(),
## .. restecg = col_double(),
## .. thalach = col_double(),
## .. exang = col_double(),
## .. oldpeak = col_double(),
## .. slope = col_double(),
## .. ca = col_double(),
## .. thal = col_double(),
## .. target = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
library(mice)
## Warning: package 'mice' was built under R version 4.4.3
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
set.seed(123)
dataset_impute <- mice(heart[,2:13], print = FALSE) #Imputasi hanya fitur (tanpa ID dan target)
heart <- cbind(heart[, 14], complete(dataset_impute, 1)) #Gabungkan target + fitur
colnames(heart)[1] <- "target" #Beri nama kolom target agar konsisten
summary(heart)
## target sex cp trestbps
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. : 94.0
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:120.0
## Median :1.0000 Median :1.0000 Median :1.0000 Median :130.0
## Mean :0.5132 Mean :0.6956 Mean :0.9424 Mean :131.6
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:2.0000 3rd Qu.:140.0
## Max. :1.0000 Max. :1.0000 Max. :3.0000 Max. :200.0
## chol fbs restecg thalach
## Min. :126 Min. :0.0000 Min. :0.0000 Min. : 71.0
## 1st Qu.:211 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:132.0
## Median :240 Median :0.0000 Median :1.0000 Median :152.0
## Mean :246 Mean :0.1493 Mean :0.5298 Mean :149.1
## 3rd Qu.:275 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:166.0
## Max. :564 Max. :1.0000 Max. :2.0000 Max. :202.0
## exang oldpeak slope ca
## Min. :0.0000 Min. :0.000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:1.000 1st Qu.:0.0000
## Median :0.0000 Median :0.800 Median :1.000 Median :0.0000
## Mean :0.3366 Mean :1.072 Mean :1.385 Mean :0.7541
## 3rd Qu.:1.0000 3rd Qu.:1.800 3rd Qu.:2.000 3rd Qu.:1.0000
## Max. :1.0000 Max. :6.200 Max. :2.000 Max. :4.0000
## thal
## Min. :0.000
## 1st Qu.:2.000
## Median :2.000
## Mean :2.324
## 3rd Qu.:3.000
## Max. :3.000
library(caTools) #library untuk pembagian dataset
## Warning: package 'caTools' was built under R version 4.4.3
set.seed(150)
split = sample.split(heart, SplitRatio = 0.7) #Membagi data training dan data testing
training_set=subset(heart, split==TRUE)
test_set=subset(heart, split==FALSE)
dim(training_set) #Dimensi data training
## [1] 711 13
dim(test_set)
## [1] 314 13
topredict_set <- test_set[2:13]
dim(topredict_set)
## [1] 314 12
library(e1071)
## Warning: package 'e1071' was built under R version 4.4.3
model_naive <- naiveBayes(target ~ ., data = training_set)
preds_naive <- predict(model_naive, newdata = topredict_set)
(conf_matrix_naive <- table(preds_naive, test_set$target))
##
## preds_naive 0 1
## 0 117 16
## 1 32 149
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Loading required package: lattice
confusionMatrix(conf_matrix_naive)
## Confusion Matrix and Statistics
##
##
## preds_naive 0 1
## 0 117 16
## 1 32 149
##
## Accuracy : 0.8471
## 95% CI : (0.8025, 0.8851)
## No Information Rate : 0.5255
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.6919
##
## Mcnemar's Test P-Value : 0.03038
##
## Sensitivity : 0.7852
## Specificity : 0.9030
## Pos Pred Value : 0.8797
## Neg Pred Value : 0.8232
## Prevalence : 0.4745
## Detection Rate : 0.3726
## Detection Prevalence : 0.4236
## Balanced Accuracy : 0.8441
##
## 'Positive' Class : 0
##