library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.1
## Warning: package 'ggplot2' was built under R version 4.5.2
## Warning: package 'forcats' was built under R version 4.5.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   4.0.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(DataExplorer)
## Warning: package 'DataExplorer' was built under R version 4.5.2
library(corrplot)
## corrplot 0.95 loaded
library(GGally)
## Warning: package 'GGally' was built under R version 4.5.2
library(dplyr)
library(reshape2)
## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths

Exploratory Data Aanlysis

** Conduct outlier and missing data analysis

# import dataset
diabetes <- read.csv("ObesityDataSet_raw_and_data_sinthetic.csv")
head(diabetes)
##   Gender Age Height Weight family_history_with_overweight FAVC FCVC NCP
## 1 Female  21   1.62   64.0                            yes   no    2   3
## 2 Female  21   1.52   56.0                            yes   no    3   3
## 3   Male  23   1.80   77.0                            yes   no    2   3
## 4   Male  27   1.80   87.0                             no   no    3   3
## 5   Male  22   1.78   89.8                             no   no    2   1
## 6   Male  29   1.62   53.0                             no  yes    2   3
##        CAEC SMOKE CH2O SCC FAF TUE       CALC                MTRANS
## 1 Sometimes    no    2  no   0   1         no Public_Transportation
## 2 Sometimes   yes    3 yes   3   0  Sometimes Public_Transportation
## 3 Sometimes    no    2  no   2   1 Frequently Public_Transportation
## 4 Sometimes    no    2  no   2   0 Frequently               Walking
## 5 Sometimes    no    2  no   0   0  Sometimes Public_Transportation
## 6 Sometimes    no    2  no   0   0  Sometimes            Automobile
##            NObeyesdad
## 1       Normal_Weight
## 2       Normal_Weight
## 3       Normal_Weight
## 4  Overweight_Level_I
## 5 Overweight_Level_II
## 6       Normal_Weight
colSums(is.na(diabetes))
##                         Gender                            Age 
##                              0                              0 
##                         Height                         Weight 
##                              0                              0 
## family_history_with_overweight                           FAVC 
##                              0                              0 
##                           FCVC                            NCP 
##                              0                              0 
##                           CAEC                          SMOKE 
##                              0                              0 
##                           CH2O                            SCC 
##                              0                              0 
##                            FAF                            TUE 
##                              0                              0 
##                           CALC                         MTRANS 
##                              0                              0 
##                     NObeyesdad 
##                              0
library(DataExplorer)
plot_missing(diabetes)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the DataExplorer package.
##   Please report the issue at
##   <https://github.com/boxuancui/DataExplorer/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

str(diabetes)
## 'data.frame':    2111 obs. of  17 variables:
##  $ Gender                        : chr  "Female" "Female" "Male" "Male" ...
##  $ Age                           : num  21 21 23 27 22 29 23 22 24 22 ...
##  $ Height                        : num  1.62 1.52 1.8 1.8 1.78 1.62 1.5 1.64 1.78 1.72 ...
##  $ Weight                        : num  64 56 77 87 89.8 53 55 53 64 68 ...
##  $ family_history_with_overweight: chr  "yes" "yes" "yes" "no" ...
##  $ FAVC                          : chr  "no" "no" "no" "no" ...
##  $ FCVC                          : num  2 3 2 3 2 2 3 2 3 2 ...
##  $ NCP                           : num  3 3 3 3 1 3 3 3 3 3 ...
##  $ CAEC                          : chr  "Sometimes" "Sometimes" "Sometimes" "Sometimes" ...
##  $ SMOKE                         : chr  "no" "yes" "no" "no" ...
##  $ CH2O                          : num  2 3 2 2 2 2 2 2 2 2 ...
##  $ SCC                           : chr  "no" "yes" "no" "no" ...
##  $ FAF                           : num  0 3 2 2 0 0 1 3 1 1 ...
##  $ TUE                           : num  1 0 1 0 0 0 0 0 1 1 ...
##  $ CALC                          : chr  "no" "Sometimes" "Frequently" "Frequently" ...
##  $ MTRANS                        : chr  "Public_Transportation" "Public_Transportation" "Public_Transportation" "Walking" ...
##  $ NObeyesdad                    : chr  "Normal_Weight" "Normal_Weight" "Normal_Weight" "Overweight_Level_I" ...
# check outliers
boxplot(diabetes$Age, main = "Age")

boxplot(diabetes$Height, main = "Height")

boxplot(diabetes$Weight, main = "Weight")

** Visualize data

# Single-variable visualizations

library(tidyverse)
num_vars <- diabetes %>% select_if(is.numeric)

num_cols <- names(num_vars)

for (col in num_cols){
  graph <- ggplot(diabetes, aes(x = .data[[col]])) +
    geom_histogram(fill = "skyblue", color = "black", bins = 30) +
    ggtitle(paste("Histogram of", col)) + 
    theme_minimal() 
  
  print(graph)
}

# Bivariate visualizations

ggplot(diabetes, aes(x = Gender, y = Weight, fill = Gender))+
  geom_boxplot() +
  ggtitle("Weight Distribution by Gender")+
  theme_minimal()

ggplot(diabetes, aes(x = Height, y = Weight, color = Gender)) +
  geom_point(alpha = 0.6) + 
  ggtitle("Height vs Weight by Gender") + 
  theme_minimal()

# Correlation matrix

library(corrplot)
cor_matrix <- cor(num_vars)
corrplot(cor_matrix, method = "color", type = "upper", tl.col = "black", tl.cex = 0.8)

** features analysis

Data Modeling Techniques

# data preprocessing
# 1. set up training and test
X <- diabetes %>% select(-NObeyesdad)
y <- as.factor(diabetes$NObeyesdad)


X <- X %>% mutate(across(where(is.character), as.factor))

# one-hot encoding
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
dummy <- dummyVars(~ ., data = X, fullRank = TRUE)
X_encoded <- data.frame(predict(dummy, newdata = X, na.action = na.pass))
# Normalization
preProc <- preProcess(X_encoded, method = c('center', 'scale'))
X_scaled <- predict(preProc, X_encoded)
# split dataset into training and test
set.seed(123)
trainIndex <- createDataPartition(y, p = 0.8, list =FALSE)

X_train <- X_scaled[trainIndex,, drop = FALSE]
y_train <- y[trainIndex]

X_test <- X_scaled[-trainIndex, , drop = FALSE]
y_test <- y[-trainIndex]
cat("Train: ", nrow(X_train), "observations\n")
## Train:  1691 observations
cat("Test: ", nrow(X_test), "observations\n")
## Test:  420 observations
table(y_train)
## y_train
## Insufficient_Weight       Normal_Weight      Obesity_Type_I     Obesity_Type_II 
##                 218                 230                 281                 238 
##    Obesity_Type_III  Overweight_Level_I Overweight_Level_II 
##                 260                 232                 232
table(y_test)
## y_test
## Insufficient_Weight       Normal_Weight      Obesity_Type_I     Obesity_Type_II 
##                  54                  57                  70                  59 
##    Obesity_Type_III  Overweight_Level_I Overweight_Level_II 
##                  64                  58                  58
set.seed(123)
trCtrl <- trainControl(method = "cv",
                       number = 5,
                       classProbs = FALSE,
                       summaryFunction = defaultSummary,
                       savePredictions = "final")
# random forest
library(randomForest)
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin
set.seed(123)
rf_fit <- train(x = X_train, y = y_train,
                method = "rf",
                metric = "Accuracy",
                trControl = trCtrl,
                tuneLength =5)

rf_pred <- predict(rf_fit, X_test)
rf_cm <- confusionMatrix(rf_pred, y_test)
rf_acc <- rf_cm$overall["Accuracy"]

cat("Random Forest Accuracy:", round(as.numeric(rf_acc), 4), "\n")
## Random Forest Accuracy: 0.969
print(rf_cm$table)
##                      Reference
## Prediction            Insufficient_Weight Normal_Weight Obesity_Type_I
##   Insufficient_Weight                  51             1              0
##   Normal_Weight                         3            56              0
##   Obesity_Type_I                        0             0             68
##   Obesity_Type_II                       0             0              0
##   Obesity_Type_III                      0             0              0
##   Overweight_Level_I                    0             0              0
##   Overweight_Level_II                   0             0              2
##                      Reference
## Prediction            Obesity_Type_II Obesity_Type_III Overweight_Level_I
##   Insufficient_Weight               0                0                  0
##   Normal_Weight                     0                0                  3
##   Obesity_Type_I                    0                0                  0
##   Obesity_Type_II                  59                0                  0
##   Obesity_Type_III                  0               64                  0
##   Overweight_Level_I                0                0                 52
##   Overweight_Level_II               0                0                  3
##                      Reference
## Prediction            Overweight_Level_II
##   Insufficient_Weight                   0
##   Normal_Weight                         0
##   Obesity_Type_I                        1
##   Obesity_Type_II                       0
##   Obesity_Type_III                      0
##   Overweight_Level_I                    0
##   Overweight_Level_II                  57
# evaluation metrics
library(caret)

pred_rf <- predict(rf_fit, X_test)
cm_rf <- confusionMatrix(pred_rf, y_test)

cat("\n==============================\n")
## 
## ==============================
cat("Evaluation of Random Forest Model\n")
## Evaluation of Random Forest Model
cat("==============================\n")
## ==============================
cm_rf$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   9.690476e-01   9.638504e-01   9.476520e-01   9.834183e-01   1.666667e-01 
## AccuracyPValue  McnemarPValue 
##  3.107087e-294            NaN
cm_rf$byClass
##                            Sensitivity Specificity Pos Pred Value
## Class: Insufficient_Weight   0.9444444   0.9972678      0.9807692
## Class: Normal_Weight         0.9824561   0.9834711      0.9032258
## Class: Obesity_Type_I        0.9714286   0.9971429      0.9855072
## Class: Obesity_Type_II       1.0000000   1.0000000      1.0000000
## Class: Obesity_Type_III      1.0000000   1.0000000      1.0000000
## Class: Overweight_Level_I    0.8965517   1.0000000      1.0000000
## Class: Overweight_Level_II   0.9827586   0.9861878      0.9193548
##                            Neg Pred Value Precision    Recall        F1
## Class: Insufficient_Weight      0.9918478 0.9807692 0.9444444 0.9622642
## Class: Normal_Weight            0.9972067 0.9032258 0.9824561 0.9411765
## Class: Obesity_Type_I           0.9943020 0.9855072 0.9714286 0.9784173
## Class: Obesity_Type_II          1.0000000 1.0000000 1.0000000 1.0000000
## Class: Obesity_Type_III         1.0000000 1.0000000 1.0000000 1.0000000
## Class: Overweight_Level_I       0.9836957 1.0000000 0.8965517 0.9454545
## Class: Overweight_Level_II      0.9972067 0.9193548 0.9827586 0.9500000
##                            Prevalence Detection Rate Detection Prevalence
## Class: Insufficient_Weight  0.1285714      0.1214286            0.1238095
## Class: Normal_Weight        0.1357143      0.1333333            0.1476190
## Class: Obesity_Type_I       0.1666667      0.1619048            0.1642857
## Class: Obesity_Type_II      0.1404762      0.1404762            0.1404762
## Class: Obesity_Type_III     0.1523810      0.1523810            0.1523810
## Class: Overweight_Level_I   0.1380952      0.1238095            0.1238095
## Class: Overweight_Level_II  0.1380952      0.1357143            0.1476190
##                            Balanced Accuracy
## Class: Insufficient_Weight         0.9708561
## Class: Normal_Weight               0.9829636
## Class: Obesity_Type_I              0.9842857
## Class: Obesity_Type_II             1.0000000
## Class: Obesity_Type_III            1.0000000
## Class: Overweight_Level_I          0.9482759
## Class: Overweight_Level_II         0.9844732
# decision tree
set.seed(123)
rpart_fit <- train(x = X_train, y = y_train,
                   method = "rpart",
                   metric = "Accuracy",
                   trControl = trCtrl,
                   tuneLength = 10)

rpart_pred <- predict(rpart_fit, X_test)
rpart_cm <- confusionMatrix(rpart_pred, y_test)
rpart_acc <- rpart_cm$overall["Accuracy"]

cat("Decision Tree Accuracy:", round(as.numeric(rpart_acc), 4), "\n")
## Decision Tree Accuracy: 0.8214
print(rpart_cm$table)
##                      Reference
## Prediction            Insufficient_Weight Normal_Weight Obesity_Type_I
##   Insufficient_Weight                  49             6              0
##   Normal_Weight                         5            33              0
##   Obesity_Type_I                        0             0             59
##   Obesity_Type_II                       0             0              7
##   Obesity_Type_III                      0             0              0
##   Overweight_Level_I                    0            18              2
##   Overweight_Level_II                   0             0              2
##                      Reference
## Prediction            Obesity_Type_II Obesity_Type_III Overweight_Level_I
##   Insufficient_Weight               0                0                  0
##   Normal_Weight                     0                0                  1
##   Obesity_Type_I                    6                0                  0
##   Obesity_Type_II                  53                0                  0
##   Obesity_Type_III                  0               64                  0
##   Overweight_Level_I                0                0                 55
##   Overweight_Level_II               0                0                  2
##                      Reference
## Prediction            Overweight_Level_II
##   Insufficient_Weight                   0
##   Normal_Weight                         0
##   Obesity_Type_I                       10
##   Obesity_Type_II                       1
##   Obesity_Type_III                      0
##   Overweight_Level_I                   15
##   Overweight_Level_II                  32
# evaluation metrics

pred_rpart <- predict(rpart_fit, X_test)
cm_rpart <- confusionMatrix(pred_rpart, y_test)

cat("\n==============================\n")
## 
## ==============================
cat("Evaluation of Decision Tree Model\n")
## Evaluation of Decision Tree Model
cat("==============================\n")
## ==============================
cm_rpart$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.214286e-01   7.913272e-01   7.813832e-01   8.568725e-01   1.666667e-01 
## AccuracyPValue  McnemarPValue 
##  8.157771e-191            NaN
cm_rpart$byClass
##                            Sensitivity Specificity Pos Pred Value
## Class: Insufficient_Weight   0.9074074   0.9836066      0.8909091
## Class: Normal_Weight         0.5789474   0.9834711      0.8461538
## Class: Obesity_Type_I        0.8428571   0.9542857      0.7866667
## Class: Obesity_Type_II       0.8983051   0.9778393      0.8688525
## Class: Obesity_Type_III      1.0000000   1.0000000      1.0000000
## Class: Overweight_Level_I    0.9482759   0.9033149      0.6111111
## Class: Overweight_Level_II   0.5517241   0.9889503      0.8888889
##                            Neg Pred Value Precision    Recall        F1
## Class: Insufficient_Weight      0.9863014 0.8909091 0.9074074 0.8990826
## Class: Normal_Weight            0.9370079 0.8461538 0.5789474 0.6875000
## Class: Obesity_Type_I           0.9681159 0.7866667 0.8428571 0.8137931
## Class: Obesity_Type_II          0.9832869 0.8688525 0.8983051 0.8833333
## Class: Obesity_Type_III         1.0000000 1.0000000 1.0000000 1.0000000
## Class: Overweight_Level_I       0.9909091 0.6111111 0.9482759 0.7432432
## Class: Overweight_Level_II      0.9322917 0.8888889 0.5517241 0.6808511
##                            Prevalence Detection Rate Detection Prevalence
## Class: Insufficient_Weight  0.1285714     0.11666667           0.13095238
## Class: Normal_Weight        0.1357143     0.07857143           0.09285714
## Class: Obesity_Type_I       0.1666667     0.14047619           0.17857143
## Class: Obesity_Type_II      0.1404762     0.12619048           0.14523810
## Class: Obesity_Type_III     0.1523810     0.15238095           0.15238095
## Class: Overweight_Level_I   0.1380952     0.13095238           0.21428571
## Class: Overweight_Level_II  0.1380952     0.07619048           0.08571429
##                            Balanced Accuracy
## Class: Insufficient_Weight         0.9455070
## Class: Normal_Weight               0.7812092
## Class: Obesity_Type_I              0.8985714
## Class: Obesity_Type_II             0.9380722
## Class: Obesity_Type_III            1.0000000
## Class: Overweight_Level_I          0.9257954
## Class: Overweight_Level_II         0.7703372
# logistic regression
set.seed(123)
multi_fit <- train(x = X_train, y = y_train,
                   method = "multinom",
                   metric = "Accuracy",
                   trControl = trCtrl,
                   trace = FALSE)

multi_pred <- predict(multi_fit, X_test)
multi_cm <- confusionMatrix(multi_pred, y_test)
multi_acc <- multi_cm$overall["Accuracy"]

cat("Multinomial Logistic Accuracy:", round(as.numeric(multi_acc), 4), "\n")
## Multinomial Logistic Accuracy: 0.969
print(multi_cm$table)
##                      Reference
## Prediction            Insufficient_Weight Normal_Weight Obesity_Type_I
##   Insufficient_Weight                  53             2              0
##   Normal_Weight                         1            52              0
##   Obesity_Type_I                        0             0             67
##   Obesity_Type_II                       0             0              0
##   Obesity_Type_III                      0             0              2
##   Overweight_Level_I                    0             3              0
##   Overweight_Level_II                   0             0              1
##                      Reference
## Prediction            Obesity_Type_II Obesity_Type_III Overweight_Level_I
##   Insufficient_Weight               0                0                  0
##   Normal_Weight                     0                0                  1
##   Obesity_Type_I                    0                0                  0
##   Obesity_Type_II                  59                0                  0
##   Obesity_Type_III                  0               64                  0
##   Overweight_Level_I                0                0                 56
##   Overweight_Level_II               0                0                  1
##                      Reference
## Prediction            Overweight_Level_II
##   Insufficient_Weight                   0
##   Normal_Weight                         0
##   Obesity_Type_I                        0
##   Obesity_Type_II                       0
##   Obesity_Type_III                      0
##   Overweight_Level_I                    2
##   Overweight_Level_II                  56
# evaluation metrics

pred_multi <- predict(multi_fit, X_test)
cm_multi <- confusionMatrix(pred_multi, y_test)

cat("\n==============================\n")
## 
## ==============================
cat("Evaluation of Multinomial Logistic Model\n")
## Evaluation of Multinomial Logistic Model
cat("==============================\n")
## ==============================
cm_multi$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   9.690476e-01   9.638542e-01   9.476520e-01   9.834183e-01   1.666667e-01 
## AccuracyPValue  McnemarPValue 
##  3.107087e-294            NaN
cm_multi$byClass
##                            Sensitivity Specificity Pos Pred Value
## Class: Insufficient_Weight   0.9814815   0.9945355      0.9636364
## Class: Normal_Weight         0.9122807   0.9944904      0.9629630
## Class: Obesity_Type_I        0.9571429   1.0000000      1.0000000
## Class: Obesity_Type_II       1.0000000   1.0000000      1.0000000
## Class: Obesity_Type_III      1.0000000   0.9943820      0.9696970
## Class: Overweight_Level_I    0.9655172   0.9861878      0.9180328
## Class: Overweight_Level_II   0.9655172   0.9944751      0.9655172
##                            Neg Pred Value Precision    Recall        F1
## Class: Insufficient_Weight      0.9972603 0.9636364 0.9814815 0.9724771
## Class: Normal_Weight            0.9863388 0.9629630 0.9122807 0.9369369
## Class: Obesity_Type_I           0.9915014 1.0000000 0.9571429 0.9781022
## Class: Obesity_Type_II          1.0000000 1.0000000 1.0000000 1.0000000
## Class: Obesity_Type_III         1.0000000 0.9696970 1.0000000 0.9846154
## Class: Overweight_Level_I       0.9944290 0.9180328 0.9655172 0.9411765
## Class: Overweight_Level_II      0.9944751 0.9655172 0.9655172 0.9655172
##                            Prevalence Detection Rate Detection Prevalence
## Class: Insufficient_Weight  0.1285714      0.1261905            0.1309524
## Class: Normal_Weight        0.1357143      0.1238095            0.1285714
## Class: Obesity_Type_I       0.1666667      0.1595238            0.1595238
## Class: Obesity_Type_II      0.1404762      0.1404762            0.1404762
## Class: Obesity_Type_III     0.1523810      0.1523810            0.1571429
## Class: Overweight_Level_I   0.1380952      0.1333333            0.1452381
## Class: Overweight_Level_II  0.1380952      0.1333333            0.1380952
##                            Balanced Accuracy
## Class: Insufficient_Weight         0.9880085
## Class: Normal_Weight               0.9533855
## Class: Obesity_Type_I              0.9785714
## Class: Obesity_Type_II             1.0000000
## Class: Obesity_Type_III            0.9971910
## Class: Overweight_Level_I          0.9758525
## Class: Overweight_Level_II         0.9799962
# SVM
set.seed(123)
svm_fit <- train(x = X_train, y = y_train,
                 method = "svmRadial",
                 metric = "Accuracy",
                 trControl = trCtrl,
                 tuneLength = 5)

svm_pred <- predict(svm_fit, X_test)
svm_cm <- confusionMatrix(svm_pred, y_test)
svm_acc <- svm_cm$overall["Accuracy"]

cat("SVM Accuracy:", round(as.numeric(svm_acc), 4), "\n")
## SVM Accuracy: 0.8786
print(svm_cm$table)
##                      Reference
## Prediction            Insufficient_Weight Normal_Weight Obesity_Type_I
##   Insufficient_Weight                  47             5              0
##   Normal_Weight                         7            40              4
##   Obesity_Type_I                        0             0             63
##   Obesity_Type_II                       0             0              0
##   Obesity_Type_III                      0             0              0
##   Overweight_Level_I                    0            11              1
##   Overweight_Level_II                   0             1              2
##                      Reference
## Prediction            Obesity_Type_II Obesity_Type_III Overweight_Level_I
##   Insufficient_Weight               0                0                  0
##   Normal_Weight                     1                0                  4
##   Obesity_Type_I                    0                0                  0
##   Obesity_Type_II                  58                0                  0
##   Obesity_Type_III                  0               64                  0
##   Overweight_Level_I                0                0                 51
##   Overweight_Level_II               0                0                  3
##                      Reference
## Prediction            Overweight_Level_II
##   Insufficient_Weight                   0
##   Normal_Weight                         3
##   Obesity_Type_I                        4
##   Obesity_Type_II                       1
##   Obesity_Type_III                      0
##   Overweight_Level_I                    4
##   Overweight_Level_II                  46
# evaluation metrics

pred_svm <- predict(svm_fit, X_test)
cm_svm <- confusionMatrix(pred_svm, y_test)

cat("\n==============================\n")
## 
## ==============================
cat("Evaluation of SVM Model\n")
## Evaluation of SVM Model
cat("==============================\n")
## ==============================
cm_svm$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##   8.785714e-01   8.582020e-01   8.434492e-01   9.082373e-01   1.666667e-01 
## AccuracyPValue  McnemarPValue 
##  1.138903e-225            NaN
cm_svm$byClass
##                            Sensitivity Specificity Pos Pred Value
## Class: Insufficient_Weight   0.8703704   0.9863388      0.9038462
## Class: Normal_Weight         0.7017544   0.9476584      0.6779661
## Class: Obesity_Type_I        0.9000000   0.9885714      0.9402985
## Class: Obesity_Type_II       0.9830508   0.9972299      0.9830508
## Class: Obesity_Type_III      1.0000000   1.0000000      1.0000000
## Class: Overweight_Level_I    0.8793103   0.9558011      0.7611940
## Class: Overweight_Level_II   0.7931034   0.9834254      0.8846154
##                            Neg Pred Value Precision    Recall        F1
## Class: Insufficient_Weight      0.9809783 0.9038462 0.8703704 0.8867925
## Class: Normal_Weight            0.9529086 0.6779661 0.7017544 0.6896552
## Class: Obesity_Type_I           0.9801700 0.9402985 0.9000000 0.9197080
## Class: Obesity_Type_II          0.9972299 0.9830508 0.9830508 0.9830508
## Class: Obesity_Type_III         1.0000000 1.0000000 1.0000000 1.0000000
## Class: Overweight_Level_I       0.9801700 0.7611940 0.8793103 0.8160000
## Class: Overweight_Level_II      0.9673913 0.8846154 0.7931034 0.8363636
##                            Prevalence Detection Rate Detection Prevalence
## Class: Insufficient_Weight  0.1285714      0.1119048            0.1238095
## Class: Normal_Weight        0.1357143      0.0952381            0.1404762
## Class: Obesity_Type_I       0.1666667      0.1500000            0.1595238
## Class: Obesity_Type_II      0.1404762      0.1380952            0.1404762
## Class: Obesity_Type_III     0.1523810      0.1523810            0.1523810
## Class: Overweight_Level_I   0.1380952      0.1214286            0.1595238
## Class: Overweight_Level_II  0.1380952      0.1095238            0.1238095
##                            Balanced Accuracy
## Class: Insufficient_Weight         0.9283546
## Class: Normal_Weight               0.8247064
## Class: Obesity_Type_I              0.9442857
## Class: Obesity_Type_II             0.9901404
## Class: Obesity_Type_III            1.0000000
## Class: Overweight_Level_I          0.9175557
## Class: Overweight_Level_II         0.8882644

** Describe the rational behind choosing that particular method

This dataset represents a multiclass classification problem, so several classification algorithms were selected to compare model performance and interpretability.

** Detail the process of tuning parameters and validating the model’s performance

** Results & Interpretation

Writing and Personal Contribution Statement

** This project was completed entirely by myself. ChatGPT was utilized only for spelling, grammar correction, and improving the clarity of written expression.