Mengimpor library yang diperlukan

library(randomForest)

## Warning: package 'randomForest' was built under R version 4.2.3

## randomForest 4.7-1.1

## Type rfNews() to see new features/changes/bug fixes.

library(readxl)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:randomForest':
## 
##     combine

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(caret)

## Loading required package: ggplot2

## 
## Attaching package: 'ggplot2'

## The following object is masked from 'package:randomForest':
## 
##     margin

## Loading required package: lattice

Mengimpor dataset

data <- read_excel ("D:/SOBAT KARIER/PORTOFOLIO/Jantung/hearth.xlsx")
head(data)

## # A tibble: 6 × 14
##     age   sex    cp trestbps  chol   fbs restecg thalach exang oldpeak slope
##   <dbl> <dbl> <dbl>    <dbl> <dbl> <dbl>   <dbl>   <dbl> <dbl>   <dbl> <dbl>
## 1    69     1     0      160   234     1       2     131     0     0.1     1
## 2    69     0     0      140   239     0       0     151     0     1.8     0
## 3    66     0     0      150   226     0       0     114     0     2.6     2
## 4    65     1     0      138   282     1       2     174     0     1.4     1
## 5    64     1     0      110   211     0       2     144     1     1.8     1
## 6    64     1     0      170   227     0       2     155     0     0.6     1
## # … with 3 more variables: ca <dbl>, thal <dbl>, condition <dbl>

str(data)

## tibble [297 × 14] (S3: tbl_df/tbl/data.frame)
##  $ age      : num [1:297] 69 69 66 65 64 64 63 61 60 59 ...
##  $ sex      : num [1:297] 1 0 0 1 1 1 1 1 0 1 ...
##  $ cp       : num [1:297] 0 0 0 0 0 0 0 0 0 0 ...
##  $ trestbps : num [1:297] 160 140 150 138 110 170 145 134 150 178 ...
##  $ chol     : num [1:297] 234 239 226 282 211 227 233 234 240 270 ...
##  $ fbs      : num [1:297] 1 0 0 1 0 0 1 0 0 0 ...
##  $ restecg  : num [1:297] 2 0 0 2 2 2 2 0 0 2 ...
##  $ thalach  : num [1:297] 131 151 114 174 144 155 150 145 171 145 ...
##  $ exang    : num [1:297] 0 0 0 0 1 0 0 0 0 0 ...
##  $ oldpeak  : num [1:297] 0.1 1.8 2.6 1.4 1.8 0.6 2.3 2.6 0.9 4.2 ...
##  $ slope    : num [1:297] 1 0 2 1 1 1 2 1 0 2 ...
##  $ ca       : num [1:297] 1 2 0 1 0 0 0 2 0 0 ...
##  $ thal     : num [1:297] 0 0 0 0 0 2 1 0 0 2 ...
##  $ condition: num [1:297] 0 0 0 1 0 0 0 1 0 0 ...

Memisahkan data menjadi fitur (X) dan variabel target (y)

X <- data[, -14]   # Mengambil semua kolom kecuali kolom terakhir
y <- data$condition   # Kolom terakhir sebagai variabel target

Membagi dataset menjadi data training dan data testing

set.seed(123)   # Mengatur seed untuk reproducibility
n <- round(nrow(data) * 0.7)   # Jumlah data training (70% dari total data)
sample <- sample(seq_len(nrow(data)), size = n)   # Membuat sampel data training
train <- data[sample, ]   # Data training
test <- data[-sample, ]   # Data testing

# Melakukan proses klasifikasi dengan metode Random Forest
model <- randomForest(factor(condition) ~ ., data = train)

# Menampilkan ringkasan dari model
print(model)

## 
## Call:
##  randomForest(formula = factor(condition) ~ ., data = train) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 3
## 
##         OOB estimate of  error rate: 20.19%
## Confusion matrix:
##    0  1 class.error
## 0 94 17   0.1531532
## 1 25 72   0.2577320

Melakukan prediksi menggunakan data testing

predictions <- predict(model, newdata = test)

Membuat confusion matrix

# Create a confusion matrix
cm <- confusionMatrix(data = as.factor(predictions), reference = as.factor(test$condition))

# Print the confusion matrix
print(cm)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 41  5
##          1  8 35
##                                           
##                Accuracy : 0.8539          
##                  95% CI : (0.7632, 0.9199)
##     No Information Rate : 0.5506          
##     P-Value [Acc > NIR] : 1.077e-09       
##                                           
##                   Kappa : 0.7069          
##                                           
##  Mcnemar's Test P-Value : 0.5791          
##                                           
##             Sensitivity : 0.8367          
##             Specificity : 0.8750          
##          Pos Pred Value : 0.8913          
##          Neg Pred Value : 0.8140          
##              Prevalence : 0.5506          
##          Detection Rate : 0.4607          
##    Detection Prevalence : 0.5169          
##       Balanced Accuracy : 0.8559          
##                                           
##        'Positive' Class : 0               
##

# Visualize the decision tree
tree <- model$forest[[1]]
plot(tree, main = "Decision Tree", sub = "")

# Load the rpart package for decision tree
library(rpart)

# Train the decision tree model
model <- rpart(condition ~., data = train, method = "class")

# Visualize the decision tree
plot(model)
text(model, use.n = TRUE, all = TRUE, cex = 0.8)

library(rpart)
library(rpart.plot)

## Warning: package 'rpart.plot' was built under R version 4.2.3

# Membangun model pohon keputusan
model <- rpart(condition ~ ., data = train, method = "class")

# Menampilkan pohon keputusan dengan rpart.plot
rpart.plot(model)

Empowering Cardiac Health: Exploring Heart Disease Prediction with Random Forest

Hanung Safrizal

2023-05-19

Mengimpor library yang diperlukan

Mengimpor dataset

Memisahkan data menjadi fitur (X) dan variabel target (y)

Membagi dataset menjadi data training dan data testing

Melakukan prediksi menggunakan data testing

Membuat confusion matrix