Measuring Performance in Classification Models
library(dplyr)## Warning: package 'dplyr' was built under R version 3.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(knitr)
# Read in data
df = read.csv("classification-output-data.csv")Confusion Matrix
Using the built in table function, we can generate a raw confusiton matrix for the given dataset. The columns represent the predicted class and the rows the actual class.
# Determine cross reference on the classes
confusion_table = table(df[,'scored.class'], df[,'class'])
confusion_table##
## 0 1
## 0 119 30
## 1 5 27
Define 4 possible outcomes
# Initialize 4 possible outcomes [TP, TN, FP, FN]
# True Positive, True Negative, False Positive, False Negative
confusion_table[1,1] = 'TN'
confusion_table[1,2] = 'FN'
confusion_table[2,1] = 'FP'
confusion_table[2,2] = 'TP'
confusion_table##
## 0 1
## 0 TN FN
## 1 FP TP
Accuracy
get_accuracy <- function(df, predicted, actual){
confusion_table = table(df[,predicted], df[,actual])
TP = confusion_table[2,2]
TN = confusion_table[1,1]
FN = confusion_table[1,2]
FP = confusion_table[2,1]
accuracy = round((TP + TN) / sum(TP,FP,TN,FN), 2)
return(accuracy)
}Classification Error Rate
get_classification_error_rate <- function(df, predicted, actual){
confusion_table = table(df[,predicted], df[,actual])
TP = confusion_table[2,2]
TN = confusion_table[1,1]
FN = confusion_table[1,2]
FP = confusion_table[2,1]
classification_error_rate = round((FP + FN) / sum(TP,FP,TN,FN),2)
return(classification_error_rate)
}Precision
get_precision <- function(df, predicted, actual){
confusion_table = table(df[,predicted], df[,actual])
TP = confusion_table[2,2]
TN = confusion_table[1,1]
FN = confusion_table[1,2]
FP = confusion_table[2,1]
precision = round(TP / (TP + FP), 2)
return(precision)
}Sensitivity
get_sensitivity <- function(df, predicted, actual){
confusion_table = table(df[,predicted], df[,actual])
TP = confusion_table[2,2]
TN = confusion_table[1,1]
FN = confusion_table[1,2]
FP = confusion_table[2,1]
sensitivity = round(TP / (TP + FN), 2)
return(sensitivity)
}Specificity
get_specificity <- function(df, predicted, actual){
confusion_table = table(df[,predicted], df[,actual])
TP = confusion_table[2,2]
TN = confusion_table[1,1]
FN = confusion_table[1,2]
FP = confusion_table[2,1]
specificity = round(TN / (TN + FP), 2)
return(specificity)
}F1
get_f1_score <- function(df, predicted, actual){
confusion_table = table(df[,predicted], df[,actual])
TP = confusion_table[2,2]
TN = confusion_table[1,1]
FN = confusion_table[1,2]
FP = confusion_table[2,1]
precision = round(TP / (TP + FP), 2)
sensitivity = round(TP / (TP + FN), 2)
f1_score = round((2 * precision * sensitivity) / (precision + sensitivity), 2)
return(f1_score)
}What are the bounds on the F1 score? - Knowing that both precision and sensitivity are bound between 0 and 1 as they are percentages of the calculated values, we can use their min and max values as a test to prove that f1 score also falls between 0 & 1.
Min: (2 x 0 x 0) / (0 + 0) - error div by 0
Mid: (2 x 0.5 x 0.5) / (0.5 + 0.5) 0.5 / 1 = 0.5
Max: (2 x 1 x 1) / (1 + 1) 2 / 2 = 1
Programming test: Generate 100 random fractional values between 0 & 1 and test the following: If (0 < a < 1) and (0 < b < 1) then (ab < a)
a <- runif(100)
b <- runif(100)
for (i in range(1:100)){
v <- a[i]*b[i]
if ( v > a[i]){
print(v)
}
}The loop will never print because ab is always smaller than a
Manual Calculation of Receiver Operating Characteristic (ROC) and Area Under the Curve (AUC)
get_roc <- function(df){
# Define threshold values between 0 and 1, incrementing by 0.01
threshold <- seq(0,1,0.01)
sens <- c()
spec <- c()
# For every threshold value, determine
for (t in threshold){
sens <- append(sens, sum((df$scored.probability >= t & df$class == 1)) / sum(df$class == 1))
spec <- append(spec, sum((df$scored.probability >= t & df$class == 0)) / sum(df$class == 0))
}
# Push the resulted vectors to dataframe for plotting
tmp_df <- data.frame(sens=sens, spec=spec)
# Plot
roc_plot <- ggplot(tmp_df, aes(x=spec, y=sens, group=1)) +
geom_line() +
geom_point() +
geom_abline(intercept = 0, slope = 1)
#Area Under the Curve (AUC)
pos = df[df$class == 1, 11]
neg = df[df$class == 0, 11]
auc_value = mean(replicate(100000, sample(pos, size=1) > sample(neg, size=1)))
return(list(plot=roc_plot, auc=auc_value))
}
rocauc <- get_roc(df)
rocauc$plotComplete Evaluation
score = data.frame(accuracy=get_accuracy(df, 'scored.class', 'class'),
classification_error_rate=get_classification_error_rate(df, 'scored.class', 'class'),
precision=get_precision(df, 'scored.class', 'class'),
sensitivity=get_sensitivity(df, 'scored.class', 'class'),
specificity=get_specificity(df, 'scored.class', 'class'),
f1_score=get_f1_score(df, 'scored.class', 'class'),
auc=unlist(rocauc[2]))
kable(score)| accuracy | classification_error_rate | precision | sensitivity | specificity | f1_score | auc | |
|---|---|---|---|---|---|---|---|
| auc | 0.81 | 0.19 | 0.84 | 0.47 | 0.96 | 0.6 | 0.84713 |
Measurements using the caret package
It appears that sensitivity and specificity are reversed from my results. Perhaps caret package requires the confusion matrix to be in a different order. The overall values match my manual calculations.
library(caret)## Warning: package 'caret' was built under R version 3.4.3
## Loading required package: lattice
confusionMatrix(df[,'scored.class'],df[,'class'])## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 119 30
## 1 5 27
##
## Accuracy : 0.8066
## 95% CI : (0.7415, 0.8615)
## No Information Rate : 0.6851
## P-Value [Acc > NIR] : 0.0001712
##
## Kappa : 0.4916
## Mcnemar's Test P-Value : 4.976e-05
##
## Sensitivity : 0.9597
## Specificity : 0.4737
## Pos Pred Value : 0.7987
## Neg Pred Value : 0.8438
## Prevalence : 0.6851
## Detection Rate : 0.6575
## Detection Prevalence : 0.8232
## Balanced Accuracy : 0.7167
##
## 'Positive' Class : 0
##
ROC and AUC using the pROC package
library(pROC)## Warning: package 'pROC' was built under R version 3.4.4
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
roc(df$class ~ df$scored.probability, df, plot=TRUE)##
## Call:
## roc.formula(formula = df$class ~ df$scored.probability, data = df, plot = TRUE)
##
## Data: df$scored.probability in 124 controls (df$class 0) < 57 cases (df$class 1).
## Area under the curve: 0.8503