Final Project - Placement Prediction

Author

Amandeep Singh

Importing libraries

suppressWarnings({
library(tidyverse)
library(tidymodels)
library(dplyr)
library(caret)
library(corrr)
library(ggplot2)
library(plotly)
library(corrplot)
library(recipes)
library(readr)
})
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.2     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
── Attaching packages ────────────────────────────────────── tidymodels 1.1.1 ──

✔ broom        1.0.5     ✔ rsample      1.2.0
✔ dials        1.2.0     ✔ tune         1.1.2
✔ infer        1.0.5     ✔ workflows    1.1.3
✔ modeldata    1.2.0     ✔ workflowsets 1.0.1
✔ parsnip      1.1.1     ✔ yardstick    1.2.0
✔ recipes      1.0.8     

── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
✖ scales::discard() masks purrr::discard()
✖ dplyr::filter()   masks stats::filter()
✖ recipes::fixed()  masks stringr::fixed()
✖ dplyr::lag()      masks stats::lag()
✖ yardstick::spec() masks readr::spec()
✖ recipes::step()   masks stats::step()
• Learn how to get started at https://www.tidymodels.org/start/

Loading required package: lattice


Attaching package: 'caret'


The following objects are masked from 'package:yardstick':

    precision, recall, sensitivity, specificity


The following object is masked from 'package:purrr':

    lift



Attaching package: 'plotly'


The following object is masked from 'package:ggplot2':

    last_plot


The following object is masked from 'package:stats':

    filter


The following object is masked from 'package:graphics':

    layout


corrplot 0.92 loaded

Importing Dataset

# Set the working directory to the location where the CSV file is stored
setwd("C:\\MSBA\\MGT_665 - Solving Problems with Machine Learning\\Final Project")

# Read the CSV file into R
placement_data <- read.csv("Placement_Data_Full_Class.csv", header = TRUE)


# View the first few rows of the data to ensure it has been imported correctly
head(placement_data)
  sl_no gender ssc_p   ssc_b hsc_p   hsc_b    hsc_s degree_p  degree_t workex
1     1      M 67.00  Others 91.00  Others Commerce    58.00  Sci&Tech     No
2     2      M 79.33 Central 78.33  Others  Science    77.48  Sci&Tech    Yes
3     3      M 65.00 Central 68.00 Central     Arts    64.00 Comm&Mgmt     No
4     4      M 56.00 Central 52.00 Central  Science    52.00  Sci&Tech     No
5     5      M 85.80 Central 73.60 Central Commerce    73.30 Comm&Mgmt     No
6     6      M 55.00  Others 49.80  Others  Science    67.25  Sci&Tech    Yes
  etest_p specialisation mba_p     status salary
1    55.0         Mkt&HR 58.80     Placed 270000
2    86.5        Mkt&Fin 66.28     Placed 200000
3    75.0        Mkt&Fin 57.80     Placed 250000
4    66.0         Mkt&HR 59.43 Not Placed     NA
5    96.8        Mkt&Fin 55.50     Placed 425000
6    55.0        Mkt&Fin 51.58 Not Placed     NA

Exploratory Data Analysis (EDA) & Data Pre-processing

glimpse(placement_data)
Rows: 215
Columns: 15
$ sl_no          <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, …
$ gender         <chr> "M", "M", "M", "M", "M", "M", "F", "M", "M", "M", "M", …
$ ssc_p          <dbl> 67.00, 79.33, 65.00, 56.00, 85.80, 55.00, 46.00, 82.00,…
$ ssc_b          <chr> "Others", "Central", "Central", "Central", "Central", "…
$ hsc_p          <dbl> 91.00, 78.33, 68.00, 52.00, 73.60, 49.80, 49.20, 64.00,…
$ hsc_b          <chr> "Others", "Others", "Central", "Central", "Central", "O…
$ hsc_s          <chr> "Commerce", "Science", "Arts", "Science", "Commerce", "…
$ degree_p       <dbl> 58.00, 77.48, 64.00, 52.00, 73.30, 67.25, 79.00, 66.00,…
$ degree_t       <chr> "Sci&Tech", "Sci&Tech", "Comm&Mgmt", "Sci&Tech", "Comm&…
$ workex         <chr> "No", "Yes", "No", "No", "No", "Yes", "No", "Yes", "No"…
$ etest_p        <dbl> 55.00, 86.50, 75.00, 66.00, 96.80, 55.00, 74.28, 67.00,…
$ specialisation <chr> "Mkt&HR", "Mkt&Fin", "Mkt&Fin", "Mkt&HR", "Mkt&Fin", "M…
$ mba_p          <dbl> 58.80, 66.28, 57.80, 59.43, 55.50, 51.58, 53.29, 62.14,…
$ status         <chr> "Placed", "Placed", "Placed", "Not Placed", "Placed", "…
$ salary         <int> 270000, 200000, 250000, NA, 425000, NA, NA, 252000, 231…

Checking missing value

# Check for missing values in each column
missing_values <- colSums(is.na(placement_data))

# Display the sum of missing values in each column
print(missing_values)
         sl_no         gender          ssc_p          ssc_b          hsc_p 
             0              0              0              0              0 
         hsc_b          hsc_s       degree_p       degree_t         workex 
             0              0              0              0              0 
       etest_p specialisation          mba_p         status         salary 
             0              0              0              0             67 

We have 67 NAs in the missing salary column. This is because 67 students didn’t get placed. This is normal and hence no further investigation is required

# Summary statistics
summary(placement_data)
     sl_no          gender              ssc_p          ssc_b          
 Min.   :  1.0   Length:215         Min.   :40.89   Length:215        
 1st Qu.: 54.5   Class :character   1st Qu.:60.60   Class :character  
 Median :108.0   Mode  :character   Median :67.00   Mode  :character  
 Mean   :108.0                      Mean   :67.30                     
 3rd Qu.:161.5                      3rd Qu.:75.70                     
 Max.   :215.0                      Max.   :89.40                     
                                                                      
     hsc_p          hsc_b              hsc_s              degree_p    
 Min.   :37.00   Length:215         Length:215         Min.   :50.00  
 1st Qu.:60.90   Class :character   Class :character   1st Qu.:61.00  
 Median :65.00   Mode  :character   Mode  :character   Median :66.00  
 Mean   :66.33                                         Mean   :66.37  
 3rd Qu.:73.00                                         3rd Qu.:72.00  
 Max.   :97.70                                         Max.   :91.00  
                                                                      
   degree_t            workex             etest_p     specialisation    
 Length:215         Length:215         Min.   :50.0   Length:215        
 Class :character   Class :character   1st Qu.:60.0   Class :character  
 Mode  :character   Mode  :character   Median :71.0   Mode  :character  
                                       Mean   :72.1                     
                                       3rd Qu.:83.5                     
                                       Max.   :98.0                     
                                                                        
     mba_p          status              salary      
 Min.   :51.21   Length:215         Min.   :200000  
 1st Qu.:57.95   Class :character   1st Qu.:240000  
 Median :62.00   Mode  :character   Median :265000  
 Mean   :62.28                      Mean   :288655  
 3rd Qu.:66.25                      3rd Qu.:300000  
 Max.   :77.89                      Max.   :940000  
                                    NA's   :67      
table(placement_data$status)

Not Placed     Placed 
        67        148 
table(placement_data$gender)

  F   M 
 76 139 
table(placement_data$ssc_b)

Central  Others 
    116      99 
table(placement_data$hsc_s)

    Arts Commerce  Science 
      11      113       91 
table(placement_data$degree_t)

Comm&Mgmt    Others  Sci&Tech 
      145        11        59 
table(placement_data$specialisation)

Mkt&Fin  Mkt&HR 
    120      95 
ggplot(placement_data, aes(x = ssc_p,)) + 
  geom_histogram(fill = "lightblue", color = "black") + 
  labs(title = "Distribution of Secondary Education Percentage", x = "Secondary Education %age", y = "Count")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(placement_data, aes(x = hsc_p,)) + 
  geom_histogram(fill = "lightblue", color = "black") + 
  labs(title = "Distribution of Higher Secondary Education Percentage", x = "Higher Secondary Education %age", y = "Count")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(placement_data, aes(x = degree_p,)) + 
  geom_histogram(fill = "lightblue", color = "black") + 
  labs(title = "Distribution of Undergrad Percentage", x = "Undergrad %age", y = "Count")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(placement_data, aes(x = etest_p,)) + 
  geom_histogram(fill = "lightblue", color = "black") + 
  labs(title = "Distribution of Employability Test Percentage", x = "Emp Test %age", y = "Count")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(placement_data, aes(x = mba_p,)) + 
  geom_histogram(fill = "lightblue", color = "black") + 
  labs(title = "Distribution of MBA Percentage", x = "MBA %age", y = "Count")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(placement_data, aes(x = status, fill = status)) +
  geom_bar(fill = "lightblue", color = "black") +
  labs(title = "Distribution of Placement Status", x = "Placement Status", y = "Count") +
  geom_text(stat = 'count', aes(label = after_stat(count)), position = position_stack(vjust = 0.5))

ggplot(placement_data, aes(x = gender, fill = gender)) +
  geom_bar(fill = "lightblue", color = "black") +
  labs(title = "Distribution of Gender", x = "Gender", y = "Count") +
  geom_text(stat = 'count', aes(label = after_stat(count)), position = position_stack(vjust = 0.5))

ggplot(placement_data, aes(x = ssc_b, fill = ssc_b)) +
  geom_bar(fill = "lightblue", color = "black") +
  labs(title = "Distribution of Board of Education", x = "Board of Education", y = "Count") +
  geom_text(stat = 'count', aes(label = after_stat(count)), position = position_stack(vjust = 0.5))

ggplot(placement_data, aes(x = hsc_s, fill = hsc_s)) +
  geom_bar(fill = "lightblue", color = "black") +
  labs(title = "Distribution of Specialization in Higher Secondary Education", x = "HSE Specialization", y = "Count") +
  geom_text(stat = 'count', aes(label = after_stat(count)), position = position_stack(vjust = 0.5))

ggplot(placement_data, aes(x = degree_t, fill = degree_t)) +
  geom_bar(fill = "lightblue", color = "black") +
  labs(title = "Distribution of Specialization in Undergraduate Course", x = "UG Specialization", y = "Count") +
  geom_text(stat = 'count', aes(label = after_stat(count)), position = position_stack(vjust = 0.5))

ggplot(placement_data, aes(x = specialisation, fill = specialisation)) +
  geom_bar(fill = "lightblue", color = "black") +
  labs(title = "Distribution of Specialization in Graduate Course (MBA)", x = "MBA Specialization", y = "Count") +
  geom_text(stat = 'count', aes(label = after_stat(count)), position = position_stack(vjust = 0.5))

Correlation between numerical variables

# Select only the numerical columns for the correlation matrix
numerical_data <- placement_data %>%
  select(-sl_no) %>%
  select_if(is.numeric)

# Calculate the correlation matrix
correlation_matrix <- cor(numerical_data, use = "pairwise.complete.obs")

# Visualize the correlation matrix as a heatmap with values and color coding
corrplot(correlation_matrix, method = "color", type = "upper", order = "hclust", 
         tl.cex = 0.8, tl.col = "black", col = colorRampPalette(c("skyblue", "red"))(100),
         addCoef.col = "black", number.cex = 0.7)

Mutate variables - Converting categorical data into factors

placement_data <- placement_data %>%
  mutate(gender = as.factor(gender),
         ssc_b = as.factor(ssc_b),
         hsc_b = as.factor(hsc_b),
         hsc_s = as.factor(hsc_s),
         degree_t = as.factor(make.names(degree_t)),
         workex = as.factor(workex),
         specialisation = as.factor(make.names(specialisation)),
         status = as.factor(status),
         sl_no = as.factor(sl_no))

str(placement_data)
'data.frame':   215 obs. of  15 variables:
 $ sl_no         : Factor w/ 215 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
 $ gender        : Factor w/ 2 levels "F","M": 2 2 2 2 2 2 1 2 2 2 ...
 $ ssc_p         : num  67 79.3 65 56 85.8 ...
 $ ssc_b         : Factor w/ 2 levels "Central","Others": 2 1 1 1 1 2 2 1 1 1 ...
 $ hsc_p         : num  91 78.3 68 52 73.6 ...
 $ hsc_b         : Factor w/ 2 levels "Central","Others": 2 2 1 1 1 2 2 1 1 1 ...
 $ hsc_s         : Factor w/ 3 levels "Arts","Commerce",..: 2 3 1 3 2 3 2 3 2 2 ...
 $ degree_p      : num  58 77.5 64 52 73.3 ...
 $ degree_t      : Factor w/ 3 levels "Comm.Mgmt","Others",..: 3 3 1 3 1 3 1 3 1 1 ...
 $ workex        : Factor w/ 2 levels "No","Yes": 1 2 1 1 1 2 1 2 1 1 ...
 $ etest_p       : num  55 86.5 75 66 96.8 ...
 $ specialisation: Factor w/ 2 levels "Mkt.Fin","Mkt.HR": 2 1 1 2 1 1 1 1 1 1 ...
 $ mba_p         : num  58.8 66.3 57.8 59.4 55.5 ...
 $ status        : Factor w/ 2 levels "Not Placed","Placed": 2 2 2 1 2 1 1 2 2 1 ...
 $ salary        : int  270000 200000 250000 NA 425000 NA NA 252000 231000 NA ...

Splitting the data into training (80%) and testing (20%)

set.seed(221023)

#clean model data
placement_data_model <- placement_data %>%
  select(-salary)%>%
  mutate(status = as.factor(make.names(status)))

#split into two datasets
split <- createDataPartition(placement_data_model$status,
                             p =0.8, 
                             list = FALSE)

train_data <- placement_data_model[split,]
test_data <- placement_data_model[-split,]
#store the ID variable in its original format
y_test <- test_data$sl_no
y_train <- train_data$sl_no

#removing sl_no and salary from the data to be processed
test_data <- test_data %>% select (-sl_no)
train_data <- train_data %>% select (-sl_no)

#center and scale our data
preProcess_range_model <- preProcess(train_data, method=c("center", "scale"))

train_data <- predict(preProcess_range_model, newdata = train_data)
test_data <- predict(preProcess_range_model, newdata = test_data)

Applied 5-fold Cross-validation 10 times

train.control <- trainControl(method = "repeatedcv", 
                              number = 10,
                              repeats = 5,
                              classProbs = T)

Modeling

Training the model on 4 algorithms - Logistic Regression, Decision Tree, K-nearest neighbor and SVM linear model

# Logistic Regression model
logistic_model <- train(status ~ ., data = train_data, method = "glm", trControl = train.control)

# Decision Tree classifier model
decision_tree_model <- train(status ~ ., data = train_data, method = "rpart", trControl = train.control)

# KNN model
knn_model <- train(status ~ ., data = train_data, method = "knn", trControl = train.control)

# SVM linear model
svm_linear_model <- train(status ~ ., data = train_data, method = "svmLinear", trControl = train.control)

Predicting on the basis 4 trained models

# Predict using the test dataset
logistic_predictions <- predict(logistic_model, newdata = test_data)
decision_tree_predictions <- predict(decision_tree_model, newdata = test_data)
knn_predictions <- predict(knn_model, newdata = test_data)
svm_linear_predictions <- predict(svm_linear_model, newdata = test_data)

Evaluating models

# Evaluation of Logistic Regression model
logistic_predictions <- predict(logistic_model, newdata = test_data)
logistic_conf_matrix <- confusionMatrix(logistic_predictions, test_data$status)
logistic_accuracy <- logistic_conf_matrix$overall['Accuracy']
logistic_precision <- logistic_conf_matrix$byClass['Pos Pred Value']
logistic_recall <- logistic_conf_matrix$byClass['Sensitivity']
logistic_f1 <- logistic_conf_matrix$byClass['F1']

# Evaluation of Decision Tree classifier model
decision_tree_predictions <- predict(decision_tree_model, newdata = test_data)
decision_tree_conf_matrix <- confusionMatrix(decision_tree_predictions, test_data$status)
decision_tree_accuracy <- decision_tree_conf_matrix$overall['Accuracy']
decision_tree_precision <- decision_tree_conf_matrix$byClass['Pos Pred Value']
decision_tree_recall <- decision_tree_conf_matrix$byClass['Sensitivity']
decision_tree_f1 <- decision_tree_conf_matrix$byClass['F1']

# Evaluation of KNN model
knn_predictions <- predict(knn_model, newdata = test_data)
knn_conf_matrix <- confusionMatrix(knn_predictions, test_data$status)
knn_accuracy <- knn_conf_matrix$overall['Accuracy']
knn_precision <- knn_conf_matrix$byClass['Pos Pred Value']
knn_recall <- knn_conf_matrix$byClass['Sensitivity']
knn_f1 <- knn_conf_matrix$byClass['F1']

# Evaluation of SVM linear model
svm_linear_predictions <- predict(svm_linear_model, newdata = test_data)
svm_linear_conf_matrix <- confusionMatrix(svm_linear_predictions, test_data$status)
svm_linear_accuracy <- svm_linear_conf_matrix$overall['Accuracy']
svm_linear_precision <- svm_linear_conf_matrix$byClass['Pos Pred Value']
svm_linear_recall <- svm_linear_conf_matrix$byClass['Sensitivity']
svm_linear_f1 <- svm_linear_conf_matrix$byClass['F1']

# Displaying the results
print("Logistic Regression Model:")
[1] "Logistic Regression Model:"
print(paste("Accuracy:", logistic_accuracy))
[1] "Accuracy: 0.761904761904762"
print(paste("Precision:", logistic_precision))
[1] "Precision: 0.714285714285714"
print(paste("Recall:", logistic_recall))
[1] "Recall: 0.384615384615385"
print(paste("F1 Score:", logistic_f1))
[1] "F1 Score: 0.5"
print(logistic_conf_matrix$table)
            Reference
Prediction   Not.Placed Placed
  Not.Placed          5      2
  Placed              8     27
print("Decision Tree Classifier Model:")
[1] "Decision Tree Classifier Model:"
print(paste("Accuracy:", decision_tree_accuracy))
[1] "Accuracy: 0.833333333333333"
print(paste("Precision:", decision_tree_precision))
[1] "Precision: 0.8"
print(paste("Recall:", decision_tree_recall))
[1] "Recall: 0.615384615384615"
print(paste("F1 Score:", decision_tree_f1))
[1] "F1 Score: 0.695652173913043"
print(decision_tree_conf_matrix$table)
            Reference
Prediction   Not.Placed Placed
  Not.Placed          8      2
  Placed              5     27
print("KNN Model:")
[1] "KNN Model:"
print(paste("Accuracy:", knn_accuracy))
[1] "Accuracy: 0.785714285714286"
print(paste("Precision:", knn_precision))
[1] "Precision: 1"
print(paste("Recall:", knn_recall))
[1] "Recall: 0.307692307692308"
print(paste("F1 Score:", knn_f1))
[1] "F1 Score: 0.470588235294118"
print(knn_conf_matrix$table)
            Reference
Prediction   Not.Placed Placed
  Not.Placed          4      0
  Placed              9     29
print("SVM Linear Model:")
[1] "SVM Linear Model:"
print(paste("Accuracy:", svm_linear_accuracy))
[1] "Accuracy: 0.80952380952381"
print(paste("Precision:", svm_linear_precision))
[1] "Precision: 0.857142857142857"
print(paste("Recall:", svm_linear_recall))
[1] "Recall: 0.461538461538462"
print(paste("F1 Score:", svm_linear_f1))
[1] "F1 Score: 0.6"
print(svm_linear_conf_matrix$table)
            Reference
Prediction   Not.Placed Placed
  Not.Placed          6      1
  Placed              7     28

Visualizationof results

draw_confusion_matrix <- function(cm, model_name) {

  layout(matrix(c(1,1,2)))
  par(mar=c(2,2,2,2))
  plot(c(100, 345), c(300, 450), type = "n", xlab="", ylab="", xaxt='n', yaxt='n')
  title(paste("Confusion Matrix for", model_name), cex.main=2)

  # create the matrix 
  rect(150, 430, 240, 370, col='green')
  text(195, 440, 'Not Placed', cex=1.2)
  rect(250, 430, 340, 370, col='red')
  text(295, 440, 'Placed', cex=1.2)
  text(125, 370, 'Predicted', cex=1.3, srt=90, font=2)
  text(245, 450, 'Actual', cex=1.3, font=2)
  rect(150, 305, 240, 365, col='red')
  rect(250, 305, 340, 365, col='green')
  text(140, 400, 'Not Placed', cex=1.2, srt=90)
  text(140, 335, 'Placed', cex=1.2, srt=90)
  
  # add in the cm results 
  res <- as.numeric(cm$table)
  text(195, 400, res[1], cex=1.6, font=2, col='black')
  text(195, 335, res[2], cex=1.6, font=2, col='black')
  text(295, 400, res[3], cex=1.6, font=2, col='black')
  text(295, 335, res[4], cex=1.6, font=2, col='black')

  # add in the specifics 
  plot(c(100, 0), c(100, 0), type = "n", xlab="", ylab="", main = "DETAILS", xaxt='n', yaxt='n')
  text(10, 85, names(cm$byClass[1]), cex=1.5, font=2)
  text(10, 63, round(as.numeric(cm$byClass[1]), 3), cex=1.2)
  text(30, 85, names(cm$byClass[2]), cex=1.5, font=2)
  text(30, 63, round(as.numeric(cm$byClass[2]), 3), cex=1.2)
  text(50, 85, names(cm$byClass[5]), cex=1.5, font=2)
  text(50, 63, round(as.numeric(cm$byClass[5]), 3), cex=1.2)
  text(70, 85, names(cm$byClass[6]), cex=1.5, font=2)
  text(70, 63, round(as.numeric(cm$byClass[6]), 3), cex=1.2)
  text(90, 85, names(cm$byClass[7]), cex=1.5, font=2)
  text(90, 63, round(as.numeric(cm$byClass[7]), 3), cex=1.2)

  # add in the accuracy information 
  text(30, 40, names(cm$overall[1]), cex=1.5, font=2)
  text(30, 20, round(as.numeric(cm$overall[1]), 3), cex=1.2)
  text(70, 40, names(cm$overall[2]), cex=1.5, font=2)
  text(70, 20, round(as.numeric(cm$overall[2]), 3), cex=1.2) }

cm <- confusionMatrix(logistic_predictions, test_data$status)
draw_confusion_matrix(cm, "Logistic Regression Model")

cm <- confusionMatrix(decision_tree_predictions, test_data$status)
draw_confusion_matrix(cm, "Decision Tree Classifier Model")

cm <- confusionMatrix(knn_predictions, test_data$status)
draw_confusion_matrix(cm, "KNN Model")

cm <- confusionMatrix(svm_linear_predictions, test_data$status)
draw_confusion_matrix(cm, "SVM Linear Model")

Determining the winning model

determine_winning_model <- function(metrics) {
  winning_model <- ""
  max_accuracy <- max(metrics$accuracy)
  max_precision <- max(metrics$precision)
  max_recall <- max(metrics$recall)
  max_f1 <- max(metrics$f1)

  if (max_accuracy == metrics$accuracy["Decision Tree"]) {
    winning_model <- "Decision Tree"
  } else if (max_precision == metrics$precision["KNN"]) {
    winning_model <- "KNN"
  } else if (max_recall == metrics$recall["SVM Linear"]) {
    winning_model <- "SVM Linear"
  } else if (max_f1 == metrics$f1["Logistic Regression"]) {
    winning_model <- "Logistic Regression"
  } else {
    winning_model <- "No clear winner based on the specified criteria."
  }

  return(winning_model)
}

# Sample usage of the function
metrics <- list(
  accuracy = c("Logistic Regression" = 0.7619, "Decision Tree" = 0.8333, "KNN" = 0.7857, "SVM Linear" = 0.8095),
  precision = c("Logistic Regression" = 0.7143, "Decision Tree" = 0.8000, "KNN" = 1.0000, "SVM Linear" = 0.8571),
  recall = c("Logistic Regression" = 0.3846, "Decision Tree" = 0.6154, "KNN" = 0.3077, "SVM Linear" = 0.4615),
  f1 = c("Logistic Regression" = 0.5000, "Decision Tree" = 0.7000, "KNN" = 0.4706, "SVM Linear" = 0.5882)
)

winning_model <- determine_winning_model(metrics)
cat("Winning Model: ", winning_model, "\n")
Winning Model:  Decision Tree