library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(pROC)
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(ggplot2)
df <- read.csv("classification-output-data.csv")
str(df)
## 'data.frame': 181 obs. of 11 variables:
## $ pregnant : int 7 2 3 1 4 1 9 8 1 2 ...
## $ glucose : int 124 122 107 91 83 100 89 120 79 123 ...
## $ diastolic : int 70 76 62 64 86 74 62 78 60 48 ...
## $ skinfold : int 33 27 13 24 19 12 0 0 42 32 ...
## $ insulin : int 215 200 48 0 0 46 0 0 48 165 ...
## $ bmi : num 25.5 35.9 22.9 29.2 29.3 19.5 22.5 25 43.5 42.1 ...
## $ pedigree : num 0.161 0.483 0.678 0.192 0.317 0.149 0.142 0.409 0.678 0.52 ...
## $ age : int 37 26 23 21 34 28 33 64 23 26 ...
## $ class : int 0 0 1 0 0 0 0 0 0 0 ...
## $ scored.class : int 0 0 0 0 0 0 0 0 0 0 ...
## $ scored.probability: num 0.328 0.273 0.11 0.056 0.1 ...
head(df)
## pregnant glucose diastolic skinfold insulin bmi pedigree age class
## 1 7 124 70 33 215 25.5 0.161 37 0
## 2 2 122 76 27 200 35.9 0.483 26 0
## 3 3 107 62 13 48 22.9 0.678 23 1
## 4 1 91 64 24 0 29.2 0.192 21 0
## 5 4 83 86 19 0 29.3 0.317 34 0
## 6 1 100 74 12 46 19.5 0.149 28 0
## scored.class scored.probability
## 1 0 0.32845226
## 2 0 0.27319044
## 3 0 0.10966039
## 4 0 0.05599835
## 5 0 0.10049072
## 6 0 0.05515460
cm_raw <- table(Actual = df$class, Predicted = df$scored.class)
cm_raw
## Predicted
## Actual 0 1
## 0 119 5
## 1 30 27
\[ \text{Accuracy} = \frac{TP + TN}{TP + FP + TN + FN} \]
\[ \text{Classification Error Rate} = \frac{FP + FN}{TP + FP + TN + FN} \]
\[ \text{Precision} = \frac{TP}{TP + FP} \]
\[ \text{Sensitivity} = \frac{TP}{TP + FN} \]
\[ \text{Specificity} = \frac{TN}{TN + FP} \]
\[ F1 \text{ Score} = \frac{2 \times \text{Precision} \times \text{Sensitivity}}{\text{Precision} + \text{Sensitivity}} \]
# Confusion counts
confusion_counts <- function(actual, pred, pos = 1) {
TP <- sum(actual == pos & pred == pos)
FP <- sum(actual != pos & pred == pos)
TN <- sum(actual != pos & pred != pos)
FN <- sum(actual == pos & pred != pos)
list(TP = TP, FP = FP, TN = TN, FN = FN)
}
# Accuracy
accuracy_fn <- function(df, pos = 1) {
cc <- confusion_counts(df$class, df$scored.class, pos)
(cc$TP + cc$TN) / (cc$TP + cc$FP + cc$TN + cc$FN)
}
# Error rate
error_rate_fn <- function(df, pos = 1) {
cc <- confusion_counts(df$class, df$scored.class, pos)
(cc$FP + cc$FN) / (cc$TP + cc$FP + cc$TN + cc$FN)
}
# Precision
precision_fn <- function(df, pos = 1) {
cc <- confusion_counts(df$class, df$scored.class, pos)
cc$TP / (cc$TP + cc$FP)
}
# Sensitivity
sensitivity_fn <- function(df, pos = 1) {
cc <- confusion_counts(df$class, df$scored.class, pos)
cc$TP / (cc$TP + cc$FN)
}
# Specificity
specificity_fn <- function(df, pos = 1) {
cc <- confusion_counts(df$class, df$scored.class, pos)
cc$TN / (cc$TN + cc$FP)
}
# F1 Score
f1_fn <- function(df, pos = 1) {
prec <- precision_fn(df, pos)
rec <- sensitivity_fn(df, pos)
2 * prec * rec / (prec + rec)
}
Before we move on, let’s consider a question that was asked: What are the bounds on the F1 score? Show that the F1 score will always be between 0 and 1.
The F1 score is defined as \[ F1 = \frac{2PS}{P + S} \] where P is Precision and S is Sensitivity. By definition, both P and S are proportions, so their values must be between 0 and 1.
Lower Bound: Since P and S are non-negative, the numerator (2PS) is non-negative and the denominator (P+S) is non-negative. Therefore, the F1 score must be >= 0.
Upper Bound: We can prove the upper bound by starting with the fact that the square of any real number is non-negative. \[ (P - S)^2 \ge 0 \]
Expanding this gives: \[ P^2 - 2PS + S^2 \ge 0 \]
Add 4PS to both sides of the inequality: \[ P^2 + 2PS + S^2 \ge 4PS \]
The left side is now a perfect square, (P+S)^2: \[ (P + S)^2 \ge 4PS \]
Since P and S are non-negative, we can divide both sides by P+S (assuming it is not zero): \[ P + S \ge \frac{4PS}{P+S} \]
Now, divide both sides by 2: \[ \frac{P + S}{2} \ge \frac{2PS}{P+S} \]
The term on the right is the F1 score. The term on the left is the arithmetic mean of P and S. Since both P and S are less than or equal to 1, their arithmetic mean must also be less than or equal to 1. Therefore: \[ 1 \ge \frac{P + S}{2} \ge \frac{2PS}{P+S} = F1 \]
This definitively proves that F1 <= 1.
# Metrics
pos_label <- 1
roc_from_probs <- function(df, pos = 1, thresholds = seq(0, 1, by = 0.01)) {
actual <- df$class
probs <- df$scored.probability
tpr <- fpr <- numeric(length(thresholds))
for (i in seq_along(thresholds)) {
thr <- thresholds[i]
pred <- ifelse(probs >= thr, pos, 1 - pos)
cc <- confusion_counts(actual, pred, pos)
tpr[i] <- cc$TP / (cc$TP + cc$FN)
fpr[i] <- cc$FP / (cc$FP + cc$TN)
}
fpr_sorted <- rev(fpr)
tpr_sorted <- rev(tpr)
# Calculate AUC
auc_est <- sum(diff(fpr_sorted) * (head(tpr_sorted, -1) + tail(tpr_sorted, -1)) / 2)
df_roc <- data.frame(Threshold = thresholds, FPR = fpr, TPR = tpr)
p <- ggplot(df_roc, aes(FPR, TPR)) +
geom_line(color = "blue") +
geom_abline(linetype = "dashed", color = "gray") +
theme_minimal() +
labs(title = paste0("Custom ROC Curve (AUC = ", round(auc_est, 4), ")"), # Use "=" instead of "~"
x = "False Positive Rate (1 - Specificity)",
y = "True Positive Rate (Sensitivity)")
list(auc = auc_est, plot = p)
}
roc_res <- roc_from_probs(df, pos_label)
roc_res$auc
## [1] 0.8488964
roc_res$plot
The ROC curve and AUC generated using the pROC package are consistent with the results from the custom ROC function. The AUC values are the same aside from minor rounding differences, which verifies the accuracy of our implementation.
acc <- accuracy_fn(df, pos_label)
err <- error_rate_fn(df, pos_label)
prec <- precision_fn(df, pos_label)
sens <- sensitivity_fn(df, pos_label)
spec <- specificity_fn(df, pos_label)
f1 <- f1_fn(df, pos_label)
metrics <- data.frame(
Metric = c("Accuracy", "Error Rate", "Precision", "Sensitivity (Recall)", "Specificity", "F1 Score"),
Value = round(c(acc, err, prec, sens, spec, f1), 4)
)
# Display metrics neatly
knitr::kable(metrics, caption = "Classification Metrics Summary")
| Metric | Value |
|---|---|
| Accuracy | 0.8066 |
| Error Rate | 0.1934 |
| Precision | 0.8438 |
| Sensitivity (Recall) | 0.4737 |
| Specificity | 0.9597 |
| F1 Score | 0.6067 |
# Check that Accuracy + Error Rate = 1
check_sum <- acc + err
cat("Accuracy + Error Rate =", round(check_sum, 4))
## Accuracy + Error Rate = 1
The results from the confusion Matrix function validate the calculations from our functions. The Sensitivity and Specificity values produced by caret are identical to the values we calculated manually. This confirms that our helper function confusion_counts and the metric functions sensitivity_fn and specificity_fn are implemented correctly.
df$class <- factor(df$class)
df$scored.class <- factor(df$scored.class, levels = levels(df$class))
cm <- confusionMatrix(df$scored.class, df$class, positive = "1")
cm
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 119 30
## 1 5 27
##
## Accuracy : 0.8066
## 95% CI : (0.7415, 0.8615)
## No Information Rate : 0.6851
## P-Value [Acc > NIR] : 0.0001712
##
## Kappa : 0.4916
##
## Mcnemar's Test P-Value : 4.976e-05
##
## Sensitivity : 0.4737
## Specificity : 0.9597
## Pos Pred Value : 0.8438
## Neg Pred Value : 0.7987
## Prevalence : 0.3149
## Detection Rate : 0.1492
## Detection Prevalence : 0.1768
## Balanced Accuracy : 0.7167
##
## 'Positive' Class : 1
##
caret_sens <- sensitivity(df$scored.class, df$class, positive = "1")
caret_spec <- specificity(df$scored.class, df$class, negative = "0")
caret_results <- data.frame(
Metric = c("Caret Sensitivity", "Caret Specificity"),
Value = round(c(caret_sens, caret_spec), 4)
)
caret_results
## Metric Value
## 1 Caret Sensitivity 0.4737
## 2 Caret Specificity 0.9597
roc_obj <- roc(response = df$class,
predictor = df$scored.probability,
levels = c("0", "1"),
direction = "<")
auc_val <- auc(roc_obj)
plot(roc_obj, main = paste("pROC ROC Curve (AUC =", round(auc_val, 4), ")"))
auc_val
## Area under the curve: 0.8503
The results from the caret functions match the values produced by my custom functions for accuracy, sensitivity, and specificity. This confirms that the manual implementations are correct.
The pROC package is a specialized and highly optimized tool for ROC analysis. The Area Under the Curve (AUC) it calculated is almost identical to the AUC from our custom roc_from_probs function.
Our Custom Function AUC: 0.8489 pROC Package AUC: 0.8504
Very close, confirming correctness of custom ROC function.