# Thresholds to test
thresholds <- c(0.2, 0.5, 0.8)
# Prepare empty data frame to store metrics
metrics <- data.frame(
Threshold = thresholds,
TP = NA, FP = NA, TN = NA, FN = NA,
Accuracy = NA,
Precision = NA,
Recall = NA
)
# Loop over thresholds
for (i in seq_along(thresholds)) {
thresh <- thresholds[i]
# Convert probabilities to predicted labels based on threshold
pred <- ifelse(mydata$.pred_female > thresh, "female", "male")
# Confusion matrix
cm <- table(mydata$sex, pred)
# Extract TP, FP, TN, FN (make sure table has all levels)
# TP <- ifelse("male" %in% rownames(cm) & "male" %in% colnames(cm), cm["male","male"], 0)
# FP <- ifelse("female" %in% rownames(cm) & "male" %in% colnames(cm), cm["female","male"], 0)
# TN <- ifelse("female" %in% rownames(cm) & "female" %in% colnames(cm), cm["female","female"], 0)
# FN <- ifelse("male" %in% rownames(cm) & "female" %in% colnames(cm), cm["male","female"], 0)
# as LLMs mentioned it all depends on which class you choose as the positive class. If you swap it (Female = positive), then TP/FP/TN/FN swap meaning too.
TP <- ifelse("female" %in% rownames(cm) & "female" %in% colnames(cm), cm["female","female"], 0)
FP <- ifelse("male" %in% rownames(cm) & "female" %in% colnames(cm), cm["male","female"], 0)
TN <- ifelse("male" %in% rownames(cm) & "male" %in% colnames(cm), cm["male","male"], 0)
FN <- ifelse("female" %in% rownames(cm) & "male" %in% colnames(cm), cm["female","male"], 0)
# Store confusion metrics
metrics$TP[i] <- TP
metrics$FP[i] <- FP
metrics$TN[i] <- TN
metrics$FN[i] <- FN
# Compute performance metrics
metrics$Accuracy[i] <- (TP + TN) / (TP + TN + FP + FN)
metrics$Precision[i] <- ifelse((TP + FP) > 0, TP / (TP + FP), NA)
metrics$Recall[i] <- ifelse((TP + FN) > 0, TP / (TP + FN), NA)
# Add f1
metrics$F1[i] <- 2* metrics$Precision[i]*metrics$Recall[i] /(metrics$Precision[i]+metrics$Recall[i])
}
# Show final table
metrics