1. Import dataset

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(readxl)
library(pROC)

## Type 'citation("pROC")' for a citation.

## 
## Attaching package: 'pROC'

## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var

df_pima = read_excel("/Users/thien/Desktop/R-dir/R studying/dataset/Pima Indian Diabetes Dta.xlsx")
colnames(df_pima)

##  [1] "id"                       "Pregnancies"             
##  [3] "Glucose"                  "BloodPressure"           
##  [5] "SkinThickness"            "Insulin"                 
##  [7] "bmi"                      "DiabetesPedigreeFunction"
##  [9] "Age"                      "Outcome"

2. Xác định Kết quả Lâm sàng

# Chúng ta sẽ sử dụng cột `Glucose` và `Outcome`
# Tính toán ROC curve
roc_obj <- roc(df_pima$Outcome, df_pima$Glucose)

## Setting levels: control = 0, case = 1

## Setting direction: controls < cases

# Vẽ ROC Curve
plot.roc(roc_obj, col = "darkorange", lwd = 2, main = "ROC Curve for Glucose")

# Tính toán AUC
auc_value <- auc(roc_obj)
auc_value

## Area under the curve: 0.7881

# Tính toán Youden’s index
youden_index <- roc_obj$sensitivities + roc_obj$specificities - 1
best_threshold <- roc_obj$thresholds[which.max(youden_index)]
cat("Cut-off point tốt nhất:", best_threshold, "\n")

## Cut-off point tốt nhất: 123.5

Ghi chú: Đường cong ROC giúp trực quan hóa hiệu suất của mô hình phân loại.
Youden’s Index cho mỗi ngưỡng (threshold) trên đường ROC và chọn ngưỡng có trị số Youden cao nhất làm cut-off point tối ưu.
Cut-off point tốt nhất: Giá trị glucose tối ưu để phân biệt giữa bệnh nhân tiểu đường và không tiểu đường.

3. Dự đoán kết quả với cut-off point đã chọn

df_pima <- df_pima %>% mutate(predicted = ifelse(Glucose >= best_threshold, 1, 0))

4. Tính toán các chỉ số đánh giá

accuracy <- mean(df_pima$predicted == df_pima$Outcome)
accuracy

## [1] 0.7213542

confusion_matrix <- table(df_pima$Outcome, df_pima$predicted)
confusion_matrix

##    
##       0   1
##   0 366 134
##   1  80 188

precision <- confusion_matrix[2, 2] / sum(confusion_matrix[, 2])
precision

## [1] 0.5838509

recall <- confusion_matrix[2, 2] / sum(confusion_matrix[2, ])
recall

## [1] 0.7014925

Cut-off point for clinical application

Vu Thien

2024-06-14

1. Import dataset

2. Xác định Kết quả Lâm sàng

3. Dự đoán kết quả với cut-off point đã chọn

4. Tính toán các chỉ số đánh giá