library(pROC)
## Warning: 程辑包'pROC'是用R版本4.2.2 来建造的
## Type 'citation("pROC")' for a citation.
##
## 载入程辑包:'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
# Generate sample data
set.seed(123)
n <- 100
true_values <- rnorm(n) # true values
pred_values <- true_values + rnorm(n, 0, 0.5) # predicted values (with noise)
head(true_values)
## [1] -0.56047565 -0.23017749 1.55870831 0.07050839 0.12928774 1.71506499
head(pred_values)
## [1] -0.9156789 -0.1017356 1.4353624 -0.1032629 -0.3465215 1.6925511
# Method 1: Use median as threshold to binarize true values
threshold <- median(true_values)
binary_true <- ifelse(true_values >= threshold, 1, 0)
# Calculate AUC using median threshold
auc1 <- roc(binary_true, pred_values)$auc
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
print(paste("AUC using median threshold:", round(auc1, 3)))
## [1] "AUC using median threshold: 0.936"
# Method 2: Use specific percentile (75th) as threshold
threshold_75 <- quantile(true_values, 0.75)
binary_true_75 <- ifelse(true_values >= threshold_75, 1, 0)
auc2 <- roc(binary_true_75, pred_values)$auc
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
print(paste("AUC using 75th percentile threshold:", round(auc2, 3)))
## [1] "AUC using 75th percentile threshold: 0.936"
# Method 3: Use custom threshold
custom_threshold <- 0 # e.g., use 0 for standardized data
binary_true_custom <- ifelse(true_values >= custom_threshold, 1, 0)
auc3 <- roc(binary_true_custom, pred_values)$auc
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
print(paste("AUC using custom threshold:", round(auc3, 3)))
## [1] "AUC using custom threshold: 0.924"
# Visualization
par(mfrow=c(1,3)) # Set up 1x3 plot layout
# Plot original scatter plot
plot(true_values, pred_values,
main="Original Values Scatter Plot",
xlab="True Values",
ylab="Predicted Values")
abline(h=median(pred_values), col="red", lty=2)
abline(v=threshold, col="blue", lty=2)
# Plot ROC curve using median threshold
plot(roc(binary_true, pred_values),
main="ROC Curve (Median Threshold)",
col="blue")
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
# Plot ROC curve using 75th percentile threshold
plot(roc(binary_true_75, pred_values),
main="ROC Curve (75th Percentile)",
col="red")
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

# Reset plot layout
par(mfrow=c(1,1))
# Compare results with different thresholds
results <- data.frame(
Threshold = c("Median", "75th Percentile", "Custom"),
AUC = c(auc1, auc2, auc3)
)
print("AUC comparison with different thresholds:")
## [1] "AUC comparison with different thresholds:"
print(results)
## Threshold AUC
## 1 Median 0.9364000
## 2 75th Percentile 0.9360000
## 3 Custom 0.9242788
# Function to calculate evaluation metrics
calculate_metrics <- function(true_vals, pred_vals, threshold) {
predicted_class <- ifelse(pred_vals >= threshold, 1, 0)
true_class <- ifelse(true_vals >= threshold, 1, 0)
# Calculate confusion matrix elements
TP <- sum(predicted_class == 1 & true_class == 1)
TN <- sum(predicted_class == 0 & true_class == 0)
FP <- sum(predicted_class == 1 & true_class == 0)
FN <- sum(predicted_class == 0 & true_class == 1)
# Calculate metrics
accuracy <- (TP + TN) / (TP + TN + FP + FN)
precision <- TP / (TP + FP)
recall <- TP / (TP + FN)
f1 <- 2 * (precision * recall) / (precision + recall)
return(c(Accuracy = accuracy,
Precision = precision,
Recall = recall,
F1 = f1))
}
# Calculate and display metrics for each threshold
metrics_median <- calculate_metrics(true_values, pred_values, threshold)
metrics_75 <- calculate_metrics(true_values, pred_values, threshold_75)
metrics_custom <- calculate_metrics(true_values, pred_values, custom_threshold)
print("Evaluation metrics using median threshold:")
## [1] "Evaluation metrics using median threshold:"
print(round(metrics_median, 3))
## Accuracy Precision Recall F1
## 0.860 0.846 0.880 0.863
print("Evaluation metrics using 75th percentile threshold:")
## [1] "Evaluation metrics using 75th percentile threshold:"
print(round(metrics_75, 3))
## Accuracy Precision Recall F1
## 0.9 0.8 0.8 0.8
print("Evaluation metrics using custom threshold:")
## [1] "Evaluation metrics using custom threshold:"
print(round(metrics_custom, 3))
## Accuracy Precision Recall F1
## 0.830 0.818 0.865 0.841
# Additional analysis options:
# 1. Data preprocessing - Standardization
scaled_true <- scale(true_values)
scaled_pred <- scale(pred_values)
# 2. Cross-validation approach
library(caret)
## Warning: 程辑包'caret'是用R版本4.2.2 来建造的
## 载入需要的程辑包:ggplot2
## Warning: 程辑包'ggplot2'是用R版本4.2.3 来建造的
## 载入需要的程辑包:lattice
# Create folds for cross-validation
folds <- createFolds(binary_true, k = 5)
cv_aucs <- sapply(folds, function(test_idx) {
train_idx <- setdiff(1:length(binary_true), test_idx)
roc(binary_true[test_idx], pred_values[test_idx])$auc
})
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
print("Cross-validated AUC results:")
## [1] "Cross-validated AUC results:"
print(summary(cv_aucs))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.860 0.950 0.960 0.942 0.960 0.980
# 3. Confidence intervals for AUC
ci_auc <- ci.auc(roc(binary_true, pred_values))
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
print("AUC Confidence Interval:")
## [1] "AUC Confidence Interval:"
print(ci_auc)
## 95% CI: 0.8938-0.979 (DeLong)
# 4. Plot with more details
roc_obj <- roc(binary_true, pred_values)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc_obj,
main = "Detailed ROC Curve Analysis",
col = "blue",
lwd = 2,
print.auc = TRUE, # Print AUC on plot
print.thres = TRUE, # Show optimal threshold
grid = TRUE) # Add grid lines
