# Classification Performance Metrics Analysis

# Load necessary libraries
library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)

# Read the classification model performance data

CMP <- read.csv("https://raw.githubusercontent.com/MRobinson112/Classification_Performance_Metrics_ec/main/classification_model_performance.csv", header = TRUE, stringsAsFactors = FALSE)

# Calculate Null Error Rate
most_frequent_class <- names(which.max(table(CMP$class))) 
null_error_rate <- sum(CMP$class != most_frequent_class) / nrow(CMP) 

# Plot Class Distribution
ggplot(CMP, aes(x=factor(class), fill=factor(class))) + 
  geom_bar() +
  labs(x ="Target", y = "Count")

# Analyze performance for each Thresholds
thresholds <- c(0.2, 0.5, 0.8)
outcome <- data.frame(threshold=double(), TP=numeric(), FP=numeric(), TN=numeric(), FN=numeric())

for (tresh_samp in thresholds) {
  predicted <- ifelse(CMP$scored.probability > tresh_samp, 1, 0)
  TP <- sum(predicted == 1 & CMP$class == 1)
  FP <- sum(predicted == 1 & CMP$class == 0)
  TN <- sum(predicted == 0 & CMP$class == 0)
  FN <- sum(predicted == 0 & CMP$class == 1)
  
  outcome <- rbind(outcome, data.frame(threshold=tresh_samp, TP=TP, FP=FP, TN=TN, FN=FN))
}

print(outcome)
##   threshold TP FP  TN FN
## 1       0.2 49 51  73  8
## 2       0.5 27  5 119 30
## 3       0.8  9  1 123 48
# Calculate Performance Metrics for Each Threshold
performance_metrics <- data.frame(threshold=double(), Accuracy=double(), Precision=double(), Recall=double(), F1_Score=double())

for (i in 1:nrow(outcome)) {
  TP <- outcome$TP[i]
  FP <- outcome$FP[i]
  TN <- outcome$TN[i]
  FN <- outcome$FN[i]
  
  Accuracy <- (TP + TN) / (TP + FP + TN + FN)
  Precision <- TP / (TP + FP)
  Recall <- TP / (TP + FN)
  F1_Score <- 2 * (Precision * Recall) / (Precision + Recall)
  
  performance_metrics <- rbind(performance_metrics, data.frame(threshold=outcome$threshold[i], Accuracy=Accuracy, Precision=Precision, Recall=Recall, F1_Score=F1_Score))
}

print(performance_metrics)
##   threshold  Accuracy Precision    Recall  F1_Score
## 1       0.2 0.6740331   0.49000 0.8596491 0.6242038
## 2       0.5 0.8066298   0.84375 0.4736842 0.6067416
## 3       0.8 0.7292818   0.90000 0.1578947 0.2686567

Question 4:

  1. I would think an 0.2 scored probability threshold would be preferable use in a more in situations where missing a positive case has extreme consequences like in the medical fields.
  2. an 0.8 scored probability threshold would be preferable when opting for fewer but more certain positive predictions like credit card company