Evaluating Classification Model Performance

library(readr)
library(RMySQL)

## Loading required package: DBI

library(DBI)
library(RCurl)
library(ggplot2)
library(rstudioapi)

# Read the dataset 


URL <- "https://raw.githubusercontent.com/acatlin/data/master/penguin_predictions.csv"
URL_handle <- RCurl::getURL(URL)
df<-data.frame(read.csv(text=URL_handle, header=TRUE,sep=","))
#dim(Major_data)
pillar::glimpse(df)

## Rows: 93
## Columns: 3
## $ .pred_female <dbl> 0.99217462, 0.95423945, 0.98473504, 0.18702056, 0.9947012…
## $ .pred_class  <chr> "female", "female", "female", "male", "female", "female",…
## $ sex          <chr> "female", "female", "female", "female", "female", "female…

print("This is the size of the dataframen and let's take a look at its contents")

## [1] "This is the size of the dataframen and let's take a look at its contents"

head(df)

# Calculate the frequency table of the actual outcome variable.
freq_table <- table(df$sex)

# Calculate the majority class count and total observations
majority_count <- max(freq_table)
total_count <- sum(freq_table)

# Compute the null error rate: the proportion of observations in the majority class.
null_error_rate <- majority_count / total_count

# Print the null error rate
print(paste("Null Error Rate:", round(null_error_rate, 3)))

## [1] "Null Error Rate: 0.581"

# the null error rate is 0.581 (58.1%), 
# that meaning  if we always predicted the majority class (male), 
#we would be correct 58.1% of the time.
# The null error rate serves as a baseline performance metric,
# If any classification model perform better than the null 
##error rate it is considered as useful.

# Create a bar plot to show the distribution of the actual outcome variable
ggplot(df, aes(x = sex, fill = sex)) +
  geom_bar() +
  labs(title = "Distribution of Actual Sex in the Dataset",
       x = "Sex",
       y = "Count") +
  theme_minimal()

 # Confusion matrix

# Define the thresholds to evaluate
thresholds <- c(0.2, 0.5, 0.8)
# Loop through each threshold and compute the confusion matrix
for (thr in thresholds) {
  # Generate predicted class: if .pred_female is at least the threshold, 
  #predict "female", otherwise "male"
  predicted <- ifelse(df$.pred_female >= thr, "female", "male")
  
  # Calculate confusion matrix components (treating "female" as positive)
  TP <- sum(predicted == "female" & df$sex == "female")
  FP <- sum(predicted == "female" & df$sex == "male")
  TN <- sum(predicted == "male"   & df$sex == "male")
  FN <- sum(predicted == "male"   & df$sex == "female")
  
  # Print out the confusion matrix in a readable format
  cat("-------------------------------------------------\n")
  cat("Threshold:", thr, "\n")
  cat("-------------------------------------------------\n")
  cat(sprintf("%20s %15s\n", "Actual: female", "Actual: male"))
  cat(sprintf("Predicted female: %5d %15d\n", TP, FP))
  cat(sprintf("Predicted male:   %5d %15d\n", FN, TN))
  cat("\n")
}

## -------------------------------------------------
## Threshold: 0.2 
## -------------------------------------------------
##       Actual: female    Actual: male
## Predicted female:    37               6
## Predicted male:       2              48
## 
## -------------------------------------------------
## Threshold: 0.5 
## -------------------------------------------------
##       Actual: female    Actual: male
## Predicted female:    36               3
## Predicted male:       3              51
## 
## -------------------------------------------------
## Threshold: 0.8 
## -------------------------------------------------
##       Actual: female    Actual: male
## Predicted female:    36               2
## Predicted male:       3              52

#3

# Create an empty data frame to store the metrics
metrics_table <- data.frame(
  Threshold = thresholds,
  Accuracy  = NA,
  Precision = NA,
  Recall    = NA,
  F1        = NA
)

#Loop through each threshold and compute the confusion matrix
for (i in seq_along(thresholds)) {
  thr <- thresholds[i]
  # Generate predicted class: if .pred_female is at least the 
  #threshold, predict "female", otherwise "male"
  predicted <- ifelse(df$.pred_female >= thr, "female", "male")
  
  # Calculate confusion matrix components (treating "female" as positive)
  TP <- sum(predicted == "female" & df$sex == "female")
  FP <- sum(predicted == "female" & df$sex == "male")
  TN <- sum(predicted == "male"   & df$sex == "male")
  FN <- sum(predicted == "male"   & df$sex == "female")
  
  # Calculate the evaluation metrics
  accuracy  <- (TP + TN) / (TP + TN + FP + FN)
  precision <- if ((TP + FP) > 0) TP / (TP + FP) else NA
  recall    <- if ((TP + FN) > 0) TP / (TP + FN) else NA
  f1        <- if (!is.na(precision) && !is.na(recall) && 
                   (precision + recall) > 0) {
    2 * precision * recall / (precision + recall)
  } else {
    NA
  }
  
  # Store the metrics in the table
  metrics_table[i, "Accuracy"]  <- round(accuracy, 3)
  metrics_table[i, "Precision"] <- round(precision, 3)
  metrics_table[i, "Recall"]    <- round(recall, 3)
  metrics_table[i, "F1"]        <- round(f1, 3)
}
# Display the resulting table
print(metrics_table)

##   Threshold Accuracy Precision Recall    F1
## 1       0.2    0.914     0.860  0.949 0.902
## 2       0.5    0.935     0.923  0.923 0.923
## 3       0.8    0.946     0.947  0.923 0.935

##In this case the precision for threshold 0.2 is better with the 
##score be ~95% while for the 0.5 and 0.8 precision is lower ~92%.
##But as the threshold increases to 0.8 the accuracy in general has improved.

Evaluating Classification Model Performance_Tanzil