calculating-AUC-ROC-between-two-continuous.R

library(pROC)

## Warning: 程辑包'pROC'是用R版本4.2.2 来建造的

## Type 'citation("pROC")' for a citation.

## 
## 载入程辑包：'pROC'

## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var

# Generate sample data
set.seed(123)
n <- 100
true_values <- rnorm(n)  # true values
pred_values <- true_values + rnorm(n, 0, 0.5)  # predicted values (with noise)
head(true_values)

## [1] -0.56047565 -0.23017749  1.55870831  0.07050839  0.12928774  1.71506499

head(pred_values)

## [1] -0.9156789 -0.1017356  1.4353624 -0.1032629 -0.3465215  1.6925511

# Method 1: Use median as threshold to binarize true values
threshold <- median(true_values)
binary_true <- ifelse(true_values >= threshold, 1, 0)

# Calculate AUC using median threshold
auc1 <- roc(binary_true, pred_values)$auc

## Setting levels: control = 0, case = 1

## Setting direction: controls < cases

print(paste("AUC using median threshold:", round(auc1, 3)))

## [1] "AUC using median threshold: 0.936"

# Method 2: Use specific percentile (75th) as threshold
threshold_75 <- quantile(true_values, 0.75)
binary_true_75 <- ifelse(true_values >= threshold_75, 1, 0)
auc2 <- roc(binary_true_75, pred_values)$auc

## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

print(paste("AUC using 75th percentile threshold:", round(auc2, 3)))

## [1] "AUC using 75th percentile threshold: 0.936"

# Method 3: Use custom threshold
custom_threshold <- 0  # e.g., use 0 for standardized data
binary_true_custom <- ifelse(true_values >= custom_threshold, 1, 0)
auc3 <- roc(binary_true_custom, pred_values)$auc

## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

print(paste("AUC using custom threshold:", round(auc3, 3)))

## [1] "AUC using custom threshold: 0.924"

# Visualization
par(mfrow=c(1,3))  # Set up 1x3 plot layout

# Plot original scatter plot
plot(true_values, pred_values, 
     main="Original Values Scatter Plot",
     xlab="True Values", 
     ylab="Predicted Values")
abline(h=median(pred_values), col="red", lty=2)
abline(v=threshold, col="blue", lty=2)

# Plot ROC curve using median threshold
plot(roc(binary_true, pred_values), 
     main="ROC Curve (Median Threshold)",
     col="blue")

## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

# Plot ROC curve using 75th percentile threshold
plot(roc(binary_true_75, pred_values), 
     main="ROC Curve (75th Percentile)",
     col="red")

## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

# Reset plot layout
par(mfrow=c(1,1))

# Compare results with different thresholds
results <- data.frame(
  Threshold = c("Median", "75th Percentile", "Custom"),
  AUC = c(auc1, auc2, auc3)
)
print("AUC comparison with different thresholds:")

## [1] "AUC comparison with different thresholds:"

print(results)

##         Threshold       AUC
## 1          Median 0.9364000
## 2 75th Percentile 0.9360000
## 3          Custom 0.9242788

# Function to calculate evaluation metrics
calculate_metrics <- function(true_vals, pred_vals, threshold) {
  predicted_class <- ifelse(pred_vals >= threshold, 1, 0)
  true_class <- ifelse(true_vals >= threshold, 1, 0)
  
  # Calculate confusion matrix elements
  TP <- sum(predicted_class == 1 & true_class == 1)
  TN <- sum(predicted_class == 0 & true_class == 0)
  FP <- sum(predicted_class == 1 & true_class == 0)
  FN <- sum(predicted_class == 0 & true_class == 1)
  
  # Calculate metrics
  accuracy <- (TP + TN) / (TP + TN + FP + FN)
  precision <- TP / (TP + FP)
  recall <- TP / (TP + FN)
  f1 <- 2 * (precision * recall) / (precision + recall)
  
  return(c(Accuracy = accuracy,
           Precision = precision,
           Recall = recall,
           F1 = f1))
}

# Calculate and display metrics for each threshold
metrics_median <- calculate_metrics(true_values, pred_values, threshold)
metrics_75 <- calculate_metrics(true_values, pred_values, threshold_75)
metrics_custom <- calculate_metrics(true_values, pred_values, custom_threshold)

print("Evaluation metrics using median threshold:")

## [1] "Evaluation metrics using median threshold:"

print(round(metrics_median, 3))

##  Accuracy Precision    Recall        F1 
##     0.860     0.846     0.880     0.863

print("Evaluation metrics using 75th percentile threshold:")

## [1] "Evaluation metrics using 75th percentile threshold:"

print(round(metrics_75, 3))

##  Accuracy Precision    Recall        F1 
##       0.9       0.8       0.8       0.8

print("Evaluation metrics using custom threshold:")

## [1] "Evaluation metrics using custom threshold:"

print(round(metrics_custom, 3))

##  Accuracy Precision    Recall        F1 
##     0.830     0.818     0.865     0.841

# Additional analysis options:

# 1. Data preprocessing - Standardization
scaled_true <- scale(true_values)
scaled_pred <- scale(pred_values)

# 2. Cross-validation approach
library(caret)

## Warning: 程辑包'caret'是用R版本4.2.2 来建造的

## 载入需要的程辑包：ggplot2

## Warning: 程辑包'ggplot2'是用R版本4.2.3 来建造的

## 载入需要的程辑包：lattice

# Create folds for cross-validation
folds <- createFolds(binary_true, k = 5)
cv_aucs <- sapply(folds, function(test_idx) {
  train_idx <- setdiff(1:length(binary_true), test_idx)
  roc(binary_true[test_idx], pred_values[test_idx])$auc
})

## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

## Setting levels: control = 0, case = 1

## Setting direction: controls < cases

## Setting levels: control = 0, case = 1

## Setting direction: controls < cases

## Setting levels: control = 0, case = 1

## Setting direction: controls < cases

## Setting levels: control = 0, case = 1

## Setting direction: controls < cases

print("Cross-validated AUC results:")

## [1] "Cross-validated AUC results:"

print(summary(cv_aucs))

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.860   0.950   0.960   0.942   0.960   0.980

# 3. Confidence intervals for AUC
ci_auc <- ci.auc(roc(binary_true, pred_values))

## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

print("AUC Confidence Interval:")

## [1] "AUC Confidence Interval:"

print(ci_auc)

## 95% CI: 0.8938-0.979 (DeLong)

# 4. Plot with more details
roc_obj <- roc(binary_true, pred_values)

## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

plot(roc_obj,
     main = "Detailed ROC Curve Analysis",
     col = "blue",
     lwd = 2,
     print.auc = TRUE,  # Print AUC on plot
     print.thres = TRUE,  # Show optimal threshold
     grid = TRUE)  # Add grid lines

calculating-AUC-ROC-between-two-continuous.R

liyix

2024-11-13