Mengimpor library yang diperlukan

library(randomForest)
## Warning: package 'randomForest' was built under R version 4.2.3
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:randomForest':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(caret)
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
## 
##     margin
## Loading required package: lattice

Mengimpor dataset

data <- read_excel ("D:/SOBAT KARIER/PORTOFOLIO/Jantung/hearth.xlsx")
head(data)
## # A tibble: 6 × 14
##     age   sex    cp trestbps  chol   fbs restecg thalach exang oldpeak slope
##   <dbl> <dbl> <dbl>    <dbl> <dbl> <dbl>   <dbl>   <dbl> <dbl>   <dbl> <dbl>
## 1    69     1     0      160   234     1       2     131     0     0.1     1
## 2    69     0     0      140   239     0       0     151     0     1.8     0
## 3    66     0     0      150   226     0       0     114     0     2.6     2
## 4    65     1     0      138   282     1       2     174     0     1.4     1
## 5    64     1     0      110   211     0       2     144     1     1.8     1
## 6    64     1     0      170   227     0       2     155     0     0.6     1
## # … with 3 more variables: ca <dbl>, thal <dbl>, condition <dbl>
str(data)
## tibble [297 × 14] (S3: tbl_df/tbl/data.frame)
##  $ age      : num [1:297] 69 69 66 65 64 64 63 61 60 59 ...
##  $ sex      : num [1:297] 1 0 0 1 1 1 1 1 0 1 ...
##  $ cp       : num [1:297] 0 0 0 0 0 0 0 0 0 0 ...
##  $ trestbps : num [1:297] 160 140 150 138 110 170 145 134 150 178 ...
##  $ chol     : num [1:297] 234 239 226 282 211 227 233 234 240 270 ...
##  $ fbs      : num [1:297] 1 0 0 1 0 0 1 0 0 0 ...
##  $ restecg  : num [1:297] 2 0 0 2 2 2 2 0 0 2 ...
##  $ thalach  : num [1:297] 131 151 114 174 144 155 150 145 171 145 ...
##  $ exang    : num [1:297] 0 0 0 0 1 0 0 0 0 0 ...
##  $ oldpeak  : num [1:297] 0.1 1.8 2.6 1.4 1.8 0.6 2.3 2.6 0.9 4.2 ...
##  $ slope    : num [1:297] 1 0 2 1 1 1 2 1 0 2 ...
##  $ ca       : num [1:297] 1 2 0 1 0 0 0 2 0 0 ...
##  $ thal     : num [1:297] 0 0 0 0 0 2 1 0 0 2 ...
##  $ condition: num [1:297] 0 0 0 1 0 0 0 1 0 0 ...

Memisahkan data menjadi fitur (X) dan variabel target (y)

X <- data[, -14]   # Mengambil semua kolom kecuali kolom terakhir
y <- data$condition   # Kolom terakhir sebagai variabel target

Membagi dataset menjadi data training dan data testing

set.seed(123)   # Mengatur seed untuk reproducibility
n <- round(nrow(data) * 0.7)   # Jumlah data training (70% dari total data)
sample <- sample(seq_len(nrow(data)), size = n)   # Membuat sampel data training
train <- data[sample, ]   # Data training
test <- data[-sample, ]   # Data testing
# Melakukan proses klasifikasi dengan metode Random Forest
model <- randomForest(factor(condition) ~ ., data = train)
# Menampilkan ringkasan dari model
print(model)
## 
## Call:
##  randomForest(formula = factor(condition) ~ ., data = train) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 3
## 
##         OOB estimate of  error rate: 20.19%
## Confusion matrix:
##    0  1 class.error
## 0 94 17   0.1531532
## 1 25 72   0.2577320

Melakukan prediksi menggunakan data testing

predictions <- predict(model, newdata = test)

Membuat confusion matrix

# Create a confusion matrix
cm <- confusionMatrix(data = as.factor(predictions), reference = as.factor(test$condition))

# Print the confusion matrix
print(cm)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 41  5
##          1  8 35
##                                           
##                Accuracy : 0.8539          
##                  95% CI : (0.7632, 0.9199)
##     No Information Rate : 0.5506          
##     P-Value [Acc > NIR] : 1.077e-09       
##                                           
##                   Kappa : 0.7069          
##                                           
##  Mcnemar's Test P-Value : 0.5791          
##                                           
##             Sensitivity : 0.8367          
##             Specificity : 0.8750          
##          Pos Pred Value : 0.8913          
##          Neg Pred Value : 0.8140          
##              Prevalence : 0.5506          
##          Detection Rate : 0.4607          
##    Detection Prevalence : 0.5169          
##       Balanced Accuracy : 0.8559          
##                                           
##        'Positive' Class : 0               
## 
# Visualize the decision tree
tree <- model$forest[[1]]
plot(tree, main = "Decision Tree", sub = "")

# Load the rpart package for decision tree
library(rpart)

# Train the decision tree model
model <- rpart(condition ~., data = train, method = "class")

# Visualize the decision tree
plot(model)
text(model, use.n = TRUE, all = TRUE, cex = 0.8)

library(rpart)
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.2.3
# Membangun model pohon keputusan
model <- rpart(condition ~ ., data = train, method = "class")

# Menampilkan pohon keputusan dengan rpart.plot
rpart.plot(model)