Data preprocessing

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.1
## Warning: package 'ggplot2' was built under R version 4.5.2
## Warning: package 'forcats' was built under R version 4.5.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   4.0.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(DataExplorer)
## Warning: package 'DataExplorer' was built under R version 4.5.2
library(corrplot)
## corrplot 0.95 loaded
library(GGally)
## Warning: package 'GGally' was built under R version 4.5.2
library(dplyr)
library(reshape2)
## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths
# import dataset
obesity <- read.csv("ObesityDataSet.csv")
head(obesity)
##   Gender Age Height Weight family_history_with_overweight FAVC FCVC NCP
## 1 Female  21   1.62   64.0                            yes   no    2   3
## 2 Female  21   1.52   56.0                            yes   no    3   3
## 3   Male  23   1.80   77.0                            yes   no    2   3
## 4   Male  27   1.80   87.0                             no   no    3   3
## 5   Male  22   1.78   89.8                             no   no    2   1
## 6   Male  29   1.62   53.0                             no  yes    2   3
##        CAEC SMOKE CH2O SCC FAF TUE       CALC                MTRANS NObeyesdad
## 1 Sometimes    no    2  no   0   1         no Public_Transportation     Normal
## 2 Sometimes   yes    3 yes   3   0  Sometimes Public_Transportation     Normal
## 3 Sometimes    no    2  no   2   1 Frequently Public_Transportation     Normal
## 4 Sometimes    no    2  no   2   0 Frequently               Walking Overweight
## 5 Sometimes    no    2  no   0   0  Sometimes Public_Transportation Overweight
## 6 Sometimes    no    2  no   0   0  Sometimes            Automobile     Normal
colSums(is.na(obesity))
##                         Gender                            Age 
##                              0                              0 
##                         Height                         Weight 
##                              0                              0 
## family_history_with_overweight                           FAVC 
##                              0                              0 
##                           FCVC                            NCP 
##                              0                              0 
##                           CAEC                          SMOKE 
##                              0                              0 
##                           CH2O                            SCC 
##                              0                              0 
##                            FAF                            TUE 
##                              0                              0 
##                           CALC                         MTRANS 
##                              0                              0 
##                     NObeyesdad 
##                              0
str(obesity)
## 'data.frame':    2111 obs. of  17 variables:
##  $ Gender                        : chr  "Female" "Female" "Male" "Male" ...
##  $ Age                           : num  21 21 23 27 22 29 23 22 24 22 ...
##  $ Height                        : num  1.62 1.52 1.8 1.8 1.78 1.62 1.5 1.64 1.78 1.72 ...
##  $ Weight                        : num  64 56 77 87 89.8 53 55 53 64 68 ...
##  $ family_history_with_overweight: chr  "yes" "yes" "yes" "no" ...
##  $ FAVC                          : chr  "no" "no" "no" "no" ...
##  $ FCVC                          : num  2 3 2 3 2 2 3 2 3 2 ...
##  $ NCP                           : num  3 3 3 3 1 3 3 3 3 3 ...
##  $ CAEC                          : chr  "Sometimes" "Sometimes" "Sometimes" "Sometimes" ...
##  $ SMOKE                         : chr  "no" "yes" "no" "no" ...
##  $ CH2O                          : num  2 3 2 2 2 2 2 2 2 2 ...
##  $ SCC                           : chr  "no" "yes" "no" "no" ...
##  $ FAF                           : num  0 3 2 2 0 0 1 3 1 1 ...
##  $ TUE                           : num  1 0 1 0 0 0 0 0 1 1 ...
##  $ CALC                          : chr  "no" "Sometimes" "Frequently" "Frequently" ...
##  $ MTRANS                        : chr  "Public_Transportation" "Public_Transportation" "Public_Transportation" "Walking" ...
##  $ NObeyesdad                    : chr  "Normal" "Normal" "Normal" "Overweight" ...
# check height, age and weight 
boxplot(obesity$Height, main = "Height")

boxplot(obesity$Weight, main = "Weight")

boxplot(obesity$Age, main = "Age")

as there is no extreme data in height and weight, so I can compute BMI directly

library(stringr)
# add new column "BMI" and compute BMI value
obesity <- obesity %>%
  mutate(
    BMI = round(Weight / (Height^2), 2)
  )
head(obesity)
##   Gender Age Height Weight family_history_with_overweight FAVC FCVC NCP
## 1 Female  21   1.62   64.0                            yes   no    2   3
## 2 Female  21   1.52   56.0                            yes   no    3   3
## 3   Male  23   1.80   77.0                            yes   no    2   3
## 4   Male  27   1.80   87.0                             no   no    3   3
## 5   Male  22   1.78   89.8                             no   no    2   1
## 6   Male  29   1.62   53.0                             no  yes    2   3
##        CAEC SMOKE CH2O SCC FAF TUE       CALC                MTRANS NObeyesdad
## 1 Sometimes    no    2  no   0   1         no Public_Transportation     Normal
## 2 Sometimes   yes    3 yes   3   0  Sometimes Public_Transportation     Normal
## 3 Sometimes    no    2  no   2   1 Frequently Public_Transportation     Normal
## 4 Sometimes    no    2  no   2   0 Frequently               Walking Overweight
## 5 Sometimes    no    2  no   0   0  Sometimes Public_Transportation Overweight
## 6 Sometimes    no    2  no   0   0  Sometimes            Automobile     Normal
##     BMI
## 1 24.39
## 2 24.24
## 3 23.77
## 4 26.85
## 5 28.34
## 6 20.20

Since BMI assessment criteria differ for individuals under 18 and adults, and my final app is intended solely for adult evaluation, I will remove all data entries for participants under 18.

obesity_adult <- obesity %>%
  filter(Age >= 18)
cat("total data", nrow(obesity), "\n")
## total data 2111
cat("Adult data", nrow(obesity_adult), "\n")
## Adult data 1998
write.csv(obesity_adult, "ObesityDataSet_adults_only.csv", row.names = FALSE)
min(obesity_adult$Age)
## [1] 18
# classify according to the BMI
obesity_adult <- obesity_adult %>%
  mutate(
    BMI_Class = case_when(
      BMI < 18.5 ~ "Insufficient",
      BMI >= 18.5 & BMI < 25 ~ "Normal",
      BMI >= 25 & BMI <30 ~ "Overweight",
      BMI >= 30~ "Obesity",
      TRUE ~ NA_character_
    )
  )

obesity_adult <- obesity_adult %>%
  mutate(
    Label_simple = case_when(
      str_detect(tolower(NObeyesdad), "insufficient") ~ "Insufficient",
      str_detect(tolower(NObeyesdad), "normal") ~ "Normal",
      str_detect(tolower(NObeyesdad), "overweight") ~ "Overweight",
      str_detect(tolower(NObeyesdad), "obesity|obese") ~ "Obesity",
      TRUE ~ NA_character_
    )
  )


wrong_rows <- obesity_adult %>%
  filter(!is.na(BMI_Class) & !is.na(Label_simple) & BMI_Class != Label_simple)

cat("Wrong data:", nrow(wrong_rows), "\n")
## Wrong data: 17
print(wrong_rows)
##    Gender      Age   Height   Weight family_history_with_overweight FAVC
## 1  Female 25.00000 1.560000 45.00000                             no  yes
## 2    Male 20.00000 1.560000 45.00000                             no   no
## 3  Female 20.22540 1.550648 44.64180                             no  yes
## 4  Female 29.97045 1.610863 49.51603                            yes  yes
## 5  Female 22.37800 1.699568 54.98774                            yes  yes
## 6  Female 32.59313 1.721903 72.74890                            yes  yes
## 7  Female 23.71259 1.588597 62.33900                             no  yes
## 8  Female 37.21816 1.593894 63.32063                            yes  yes
## 9  Female 42.24475 1.768231 75.62931                            yes  yes
## 10 Female 18.00000 1.498561 55.37651                             no  yes
## 11   Male 20.31094 1.849425 85.22812                            yes  yes
## 12 Female 18.54944 1.545196 72.46786                            yes   no
## 13   Male 46.49186 1.718097 88.60088                            yes  yes
## 14 Female 18.19832 1.543338 71.79998                            yes   no
## 15 Female 18.85047 1.550053 72.95180                            yes   no
## 16 Female 23.00000 1.665199 83.15115                            yes  yes
## 17 Female 22.89974 1.661715 82.59579                            yes  yes
##        FCVC      NCP       CAEC SMOKE     CH2O SCC      FAF      TUE      CALC
## 1  2.000000 3.000000  Sometimes    no 1.000000  no 0.000000 0.000000 Sometimes
## 2  2.000000 3.000000  Sometimes    no 2.000000  no 1.000000 1.000000 Sometimes
## 3  3.000000 2.857787 Frequently    no 1.000000  no 0.754646 0.000000 Sometimes
## 4  2.059138 3.904858 Frequently    no 2.000000  no 0.821977 0.000000        no
## 5  3.000000 3.000000 Frequently    no 2.000000  no 0.139808 0.875464        no
## 6  2.000000 3.000000  Sometimes    no 1.000000  no 0.000000 1.339232 Sometimes
## 7  2.397280 2.656588  Sometimes    no 2.061062  no 1.912981 1.887386 Sometimes
## 8  2.374640 3.000000  Sometimes    no 2.000000  no 2.892922 0.480813 Sometimes
## 9  3.000000 2.951837  Sometimes    no 2.112032  no 0.378683 0.000000 Sometimes
## 10 2.000000 3.000000  Sometimes    no 1.274718 yes 0.129902 0.978574 Sometimes
## 11 2.146598 3.000000  Sometimes    no 2.100112  no 1.171160 0.833761 Sometimes
## 12 3.000000 3.014808  Sometimes    no 2.000000  no 1.997529 1.000000 Sometimes
## 13 2.129969 3.000000  Sometimes    no 1.568035  no 0.870127 0.000000        no
## 14 3.000000 3.087119  Sometimes    no 2.000000  no 1.403872 1.000000        no
## 15 3.000000 3.000974  Sometimes    no 2.000000  no 2.274248 1.000000 Sometimes
## 16 2.928234 1.458507  Sometimes    no 2.777379  no 0.354541 1.707018        no
## 17 1.203754 1.355354  Sometimes    no 2.765593  no 0.128342 1.659476 Sometimes
##                   MTRANS   NObeyesdad   BMI    BMI_Class Label_simple
## 1  Public_Transportation       Normal 18.49 Insufficient       Normal
## 2  Public_Transportation       Normal 18.49 Insufficient       Normal
## 3  Public_Transportation Insufficient 18.57       Normal Insufficient
## 4  Public_Transportation Insufficient 19.08       Normal Insufficient
## 5  Public_Transportation Insufficient 19.04       Normal Insufficient
## 6             Automobile   Overweight 24.54       Normal   Overweight
## 7  Public_Transportation   Overweight 24.70       Normal   Overweight
## 8             Automobile   Overweight 24.92       Normal   Overweight
## 9             Automobile   Overweight 24.19       Normal   Overweight
## 10 Public_Transportation   Overweight 24.66       Normal   Overweight
## 11 Public_Transportation   Overweight 24.92       Normal   Overweight
## 12 Public_Transportation   Overweight 30.35      Obesity   Overweight
## 13            Automobile   Overweight 30.02      Obesity   Overweight
## 14 Public_Transportation   Overweight 30.14      Obesity   Overweight
## 15 Public_Transportation   Overweight 30.36      Obesity   Overweight
## 16 Public_Transportation      Obesity 29.99   Overweight      Obesity
## 17 Public_Transportation      Obesity 29.91   Overweight      Obesity
# remove the wrong data
obesity_clean <- obesity_adult %>%
  filter(!(BMI_Class != Label_simple))

write.csv(obesity_clean, "obesity_clean.csv", row.names = FALSE)

feature checking

ggplot(obesity_clean, aes(x = Height, y = Weight, color = Gender)) +
  geom_point(alpha = 0.6) + 
  labs(
    x = "Height (meters)", 
    y = "Weight (kg)",
    title = "Height vs Weight by Gender"
  ) +
  theme_minimal()

ggplot(obesity_clean, aes(x = Gender, y = Weight, fill = Gender))+
  geom_boxplot() +
  ggtitle("Weight Distribution by Gender")+
  theme_minimal()

# explore the relationship between features and BMI classification

# numerical correlation heatmap
numeric_vars <- obesity_clean %>%
  select_if(is.numeric)

corr_matrix <- cor(numeric_vars)

corrplot(corr_matrix, method = "color", type = "upper", t1.cex = 0.8, number.cex = 0.7)
## Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt =
## tl.srt, : "t1.cex" is not a graphical parameter
## Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col =
## tl.col, : "t1.cex" is not a graphical parameter
## Warning in title(title, ...): "t1.cex" is not a graphical parameter

Based on the heatmap, BMI shows strong correlations only with weight and height, while the other features display relatively weak associations.

# CHI-SQUARE tests for categorical features

categorical_vars <- obesity_clean %>%
  select_if(is.character) %>%
  select(-NObeyesdad, -Label_simple)  


chi_results <- data.frame(
  Feature = character(),
  p_value = numeric(),
  stringsAsFactors = FALSE
)

cat("===== CHI-SQUARE tests(Categorical Features vs BMI_Class) =====\n")
## ===== CHI-SQUARE tests(Categorical Features vs BMI_Class) =====
for (col in names(categorical_vars)) {
  tbl <- table(obesity_clean[[col]], obesity_clean$BMI_Class)
  chi <- chisq.test(tbl)

  chi_results <- rbind(
    chi_results,
    data.frame(Feature = col, p_value = chi$p.value)
  )

  cat("Feature:", col, "  p-value =", chi$p.value, "\n")
}
## Feature: Gender   p-value = 2.978115e-12 
## Feature: family_history_with_overweight   p-value = 2.714267e-119 
## Feature: FAVC   p-value = 8.079538e-42 
## Feature: CAEC   p-value = 6.34903e-156
## Warning in chisq.test(tbl): Chi-squared approximation may be incorrect
## Feature: SMOKE   p-value = 0.003230958 
## Feature: SCC   p-value = 1.293436e-20
## Warning in chisq.test(tbl): Chi-squared approximation may be incorrect
## Feature: CALC   p-value = 1.036632e-13
## Warning in chisq.test(tbl): Chi-squared approximation may be incorrect
## Feature: MTRANS   p-value = 1.967326e-25 
## Feature: BMI_Class   p-value = 0
cat("\n===== P-Value(p < 0.05) =====\n")
## 
## ===== P-Value(p < 0.05) =====
print(chi_results %>% filter(p_value < 0.05))
##                          Feature       p_value
## 1                         Gender  2.978115e-12
## 2 family_history_with_overweight 2.714267e-119
## 3                           FAVC  8.079538e-42
## 4                           CAEC 6.349030e-156
## 5                          SMOKE  3.230958e-03
## 6                            SCC  1.293436e-20
## 7                           CALC  1.036632e-13
## 8                         MTRANS  1.967326e-25
## 9                      BMI_Class  0.000000e+00
# visualization: categorical features vs. BMI

for (col in names(categorical_vars)) {
  p <- ggplot(obesity_clean, aes_string(x = col, y = "BMI")) +
    geom_violin(fill = "lightblue", alpha = 0.6) +
    geom_boxplot(width = 0.1, alpha = 0.7, outlier.color = "red") +
    theme_minimal() +
    labs(title = paste("BMI by", col), x = col, y = "BMI") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

}
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(p)

chi_summary <- chi_results %>% arrange(p_value)

print(chi_summary)
##                          Feature       p_value
## 1                      BMI_Class  0.000000e+00
## 2                           CAEC 6.349030e-156
## 3 family_history_with_overweight 2.714267e-119
## 4                           FAVC  8.079538e-42
## 5                         MTRANS  1.967326e-25
## 6                            SCC  1.293436e-20
## 7                           CALC  1.036632e-13
## 8                         Gender  2.978115e-12
## 9                          SMOKE  3.230958e-03
library(randomForest)
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin
obesity_clean$BMI_Class <- as.factor(obesity_clean$BMI_Class)

# Random Forest model
rf_model <- randomForest(
  BMI_Class ~ .,
  data = obesity_clean %>% select(-BMI, -NObeyesdad, -Label_simple),
  importance = TRUE,
  ntree = 500
)


importance_df <- as.data.frame(importance(rf_model))
importance_df$Feature <- rownames(importance_df)

# MeanDecreaseGini for importance
importance_df <- importance_df %>%
  arrange(desc(MeanDecreaseGini))

# visualization
ggplot(importance_df,
       aes(x = reorder(Feature, MeanDecreaseGini),
           y = MeanDecreaseGini)) +
  geom_col(fill = "lightblue") +
  coord_flip() +
  theme_minimal() +
  labs(title = "Feature Importance for BMI Classification",
       x = "Feature",
       y = "Importance (MeanDecreaseGini)")

# analyze the features’ contribution to BMI classification. As BMI is calculate by weight and height, so the result is expected.

# XGBoots model
library(xgboost)
## Warning: package 'xgboost' was built under R version 4.5.2
## 
## Attaching package: 'xgboost'
## The following object is masked from 'package:dplyr':
## 
##     slice
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
library(Matrix)
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
# check distribution of each class
set.seed(123)
table(obesity_clean$BMI_Class)
## 
## Insufficient       Normal      Obesity   Overweight 
##          220          260          962          539

data distribution is unbalanced.

obesity_df <- obesity_clean %>% mutate_if(is.character, as.factor)

obesity_df <- obesity_df %>% select(-BMI, -NObeyesdad, -Label_simple)  

# one-hot encoding
X_full <- model.matrix(~ . - BMI_Class - 1, data = obesity_df)
X_full <- Matrix(X_full, sparse = TRUE)
y_full <- as.numeric(obesity_df$BMI_Class) - 1
classes <- levels(obesity_df$BMI_Class)
num_class <- length(classes)
cat("Classes:", classes, " Num:", num_class, "\n")
## Classes: Insufficient Normal Obesity Overweight  Num: 4
# stratified train/test split

train_idx <- createDataPartition(y_full, p = 0.8, list = FALSE)
test_idx  <- setdiff(seq_len(nrow(X_full)), train_idx)

X_train <- X_full[train_idx, ]
X_test  <- X_full[test_idx, ]
y_train <- y_full[train_idx]
y_test  <- y_full[test_idx]

cat("Train:", length(y_train), " Test:", length(y_test), "\n")
## Train: 1586  Test: 395
table(y_train)  
## y_train
##   0   1   2   3 
## 185 206 763 432
# compute class-based sample weights

freq <- table(y_train)
total <- length(y_train)
weight_per_class <- as.numeric(total / (num_class * freq))
names(weight_per_class) <- names(freq)
weight_per_class  
##         0         1         2         3 
## 2.1432432 1.9247573 0.5196592 0.9178241
# map to each training instance
train_weights <- weight_per_class[ as.character(y_train) ]

# sanity check
table(y_train, train_weights)
##        train_weights
## y_train 0.519659239842726 0.917824074074074 1.9247572815534 2.14324324324324
##       0                 0                 0               0              185
##       1                 0                 0             206                0
##       2               763                 0               0                0
##       3                 0               432               0                0
# build DMatrix with weights
dtrain <- xgb.DMatrix(data = X_train, label = y_train)
dtest  <- xgb.DMatrix(data = X_test,  label = y_test)
setinfo(dtrain, "weight", train_weights)
## [1] TRUE
# xgboost params & training

params <- list(
  objective = "multi:softprob",
  eval_metric = "mlogloss",
  num_class = num_class,
  eta = 0.05,
  max_depth = 6,
  subsample = 0.8,
  colsample_bytree = 0.8
)

watchlist <- list(train = dtrain, eval = dtest)
bst <- xgb.train(params = params,
                 data = dtrain,
                 nrounds = 1000,
                 watchlist = watchlist,
                 early_stopping_rounds = 30,
                 verbose = 1)
## [1]  train-mlogloss:1.305884 eval-mlogloss:1.307361 
## Multiple eval metrics are present. Will use eval_mlogloss for early stopping.
## Will train until eval_mlogloss hasn't improved in 30 rounds.
## 
## [2]  train-mlogloss:1.242716 eval-mlogloss:1.257060 
## [3]  train-mlogloss:1.183840 eval-mlogloss:1.207443 
## [4]  train-mlogloss:1.123313 eval-mlogloss:1.147494 
## [5]  train-mlogloss:1.064457 eval-mlogloss:1.091956 
## [6]  train-mlogloss:1.018452 eval-mlogloss:1.051462 
## [7]  train-mlogloss:0.977128 eval-mlogloss:1.012791 
## [8]  train-mlogloss:0.933218 eval-mlogloss:0.969834 
## [9]  train-mlogloss:0.899515 eval-mlogloss:0.941642 
## [10] train-mlogloss:0.857730 eval-mlogloss:0.902630 
## [11] train-mlogloss:0.817551 eval-mlogloss:0.863471 
## [12] train-mlogloss:0.786637 eval-mlogloss:0.833265 
## [13] train-mlogloss:0.749788 eval-mlogloss:0.797725 
## [14] train-mlogloss:0.714223 eval-mlogloss:0.763942 
## [15] train-mlogloss:0.688215 eval-mlogloss:0.742605 
## [16] train-mlogloss:0.662864 eval-mlogloss:0.717678 
## [17] train-mlogloss:0.637296 eval-mlogloss:0.693191 
## [18] train-mlogloss:0.614818 eval-mlogloss:0.673634 
## [19] train-mlogloss:0.587420 eval-mlogloss:0.648291 
## [20] train-mlogloss:0.567515 eval-mlogloss:0.630925 
## [21] train-mlogloss:0.547602 eval-mlogloss:0.613813 
## [22] train-mlogloss:0.525600 eval-mlogloss:0.593427 
## [23] train-mlogloss:0.505416 eval-mlogloss:0.573245 
## [24] train-mlogloss:0.485806 eval-mlogloss:0.553608 
## [25] train-mlogloss:0.466864 eval-mlogloss:0.533977 
## [26] train-mlogloss:0.453156 eval-mlogloss:0.522554 
## [27] train-mlogloss:0.437298 eval-mlogloss:0.506799 
## [28] train-mlogloss:0.422517 eval-mlogloss:0.494378 
## [29] train-mlogloss:0.409197 eval-mlogloss:0.482705 
## [30] train-mlogloss:0.394362 eval-mlogloss:0.468119 
## [31] train-mlogloss:0.379297 eval-mlogloss:0.453627 
## [32] train-mlogloss:0.367063 eval-mlogloss:0.442552 
## [33] train-mlogloss:0.353809 eval-mlogloss:0.428988 
## [34] train-mlogloss:0.340168 eval-mlogloss:0.414947 
## [35] train-mlogloss:0.327973 eval-mlogloss:0.402038 
## [36] train-mlogloss:0.316667 eval-mlogloss:0.390314 
## [37] train-mlogloss:0.305941 eval-mlogloss:0.378862 
## [38] train-mlogloss:0.294851 eval-mlogloss:0.367634 
## [39] train-mlogloss:0.286120 eval-mlogloss:0.359869 
## [40] train-mlogloss:0.276988 eval-mlogloss:0.351278 
## [41] train-mlogloss:0.267935 eval-mlogloss:0.342412 
## [42] train-mlogloss:0.258912 eval-mlogloss:0.332808 
## [43] train-mlogloss:0.251439 eval-mlogloss:0.326074 
## [44] train-mlogloss:0.242261 eval-mlogloss:0.316383 
## [45] train-mlogloss:0.232949 eval-mlogloss:0.306806 
## [46] train-mlogloss:0.225677 eval-mlogloss:0.299353 
## [47] train-mlogloss:0.218456 eval-mlogloss:0.291920 
## [48] train-mlogloss:0.213328 eval-mlogloss:0.287015 
## [49] train-mlogloss:0.207237 eval-mlogloss:0.280611 
## [50] train-mlogloss:0.200335 eval-mlogloss:0.273708 
## [51] train-mlogloss:0.194656 eval-mlogloss:0.268158 
## [52] train-mlogloss:0.189988 eval-mlogloss:0.263385 
## [53] train-mlogloss:0.185453 eval-mlogloss:0.259774 
## [54] train-mlogloss:0.180563 eval-mlogloss:0.254739 
## [55] train-mlogloss:0.175166 eval-mlogloss:0.249007 
## [56] train-mlogloss:0.170333 eval-mlogloss:0.244424 
## [57] train-mlogloss:0.165247 eval-mlogloss:0.238954 
## [58] train-mlogloss:0.160899 eval-mlogloss:0.234460 
## [59] train-mlogloss:0.156636 eval-mlogloss:0.229601 
## [60] train-mlogloss:0.153542 eval-mlogloss:0.227369 
## [61] train-mlogloss:0.149897 eval-mlogloss:0.223288 
## [62] train-mlogloss:0.145907 eval-mlogloss:0.219091 
## [63] train-mlogloss:0.142468 eval-mlogloss:0.215701 
## [64] train-mlogloss:0.139052 eval-mlogloss:0.212204 
## [65] train-mlogloss:0.134413 eval-mlogloss:0.206907 
## [66] train-mlogloss:0.131635 eval-mlogloss:0.203992 
## [67] train-mlogloss:0.128979 eval-mlogloss:0.201449 
## [68] train-mlogloss:0.125384 eval-mlogloss:0.197860 
## [69] train-mlogloss:0.122329 eval-mlogloss:0.194934 
## [70] train-mlogloss:0.118843 eval-mlogloss:0.191538 
## [71] train-mlogloss:0.115542 eval-mlogloss:0.188013 
## [72] train-mlogloss:0.111901 eval-mlogloss:0.184037 
## [73] train-mlogloss:0.108954 eval-mlogloss:0.181100 
## [74] train-mlogloss:0.106026 eval-mlogloss:0.177889 
## [75] train-mlogloss:0.103642 eval-mlogloss:0.175954 
## [76] train-mlogloss:0.101040 eval-mlogloss:0.173122 
## [77] train-mlogloss:0.098622 eval-mlogloss:0.170781 
## [78] train-mlogloss:0.096735 eval-mlogloss:0.169264 
## [79] train-mlogloss:0.094656 eval-mlogloss:0.166697 
## [80] train-mlogloss:0.093132 eval-mlogloss:0.165540 
## [81] train-mlogloss:0.091156 eval-mlogloss:0.163627 
## [82] train-mlogloss:0.089020 eval-mlogloss:0.160943 
## [83] train-mlogloss:0.087183 eval-mlogloss:0.159165 
## [84] train-mlogloss:0.085415 eval-mlogloss:0.157325 
## [85] train-mlogloss:0.083363 eval-mlogloss:0.154936 
## [86] train-mlogloss:0.081347 eval-mlogloss:0.152296 
## [87] train-mlogloss:0.079221 eval-mlogloss:0.149549 
## [88] train-mlogloss:0.076960 eval-mlogloss:0.147337 
## [89] train-mlogloss:0.075306 eval-mlogloss:0.145004 
## [90] train-mlogloss:0.073715 eval-mlogloss:0.143127 
## [91] train-mlogloss:0.072047 eval-mlogloss:0.141073 
## [92] train-mlogloss:0.070522 eval-mlogloss:0.139245 
## [93] train-mlogloss:0.069268 eval-mlogloss:0.138216 
## [94] train-mlogloss:0.067945 eval-mlogloss:0.137008 
## [95] train-mlogloss:0.066535 eval-mlogloss:0.135788 
## [96] train-mlogloss:0.065120 eval-mlogloss:0.134242 
## [97] train-mlogloss:0.063701 eval-mlogloss:0.132267 
## [98] train-mlogloss:0.062265 eval-mlogloss:0.130759 
## [99] train-mlogloss:0.060717 eval-mlogloss:0.128795 
## [100]    train-mlogloss:0.059281 eval-mlogloss:0.126924 
## [101]    train-mlogloss:0.058232 eval-mlogloss:0.125907 
## [102]    train-mlogloss:0.056823 eval-mlogloss:0.124241 
## [103]    train-mlogloss:0.055496 eval-mlogloss:0.122680 
## [104]    train-mlogloss:0.054371 eval-mlogloss:0.121538 
## [105]    train-mlogloss:0.053224 eval-mlogloss:0.120130 
## [106]    train-mlogloss:0.052193 eval-mlogloss:0.118939 
## [107]    train-mlogloss:0.051152 eval-mlogloss:0.117739 
## [108]    train-mlogloss:0.050063 eval-mlogloss:0.116218 
## [109]    train-mlogloss:0.049140 eval-mlogloss:0.115105 
## [110]    train-mlogloss:0.048278 eval-mlogloss:0.113939 
## [111]    train-mlogloss:0.047209 eval-mlogloss:0.112767 
## [112]    train-mlogloss:0.046164 eval-mlogloss:0.111852 
## [113]    train-mlogloss:0.045146 eval-mlogloss:0.110727 
## [114]    train-mlogloss:0.044389 eval-mlogloss:0.109952 
## [115]    train-mlogloss:0.043901 eval-mlogloss:0.109632 
## [116]    train-mlogloss:0.043339 eval-mlogloss:0.109037 
## [117]    train-mlogloss:0.042406 eval-mlogloss:0.107734 
## [118]    train-mlogloss:0.041550 eval-mlogloss:0.106599 
## [119]    train-mlogloss:0.040622 eval-mlogloss:0.105239 
## [120]    train-mlogloss:0.039917 eval-mlogloss:0.104336 
## [121]    train-mlogloss:0.039251 eval-mlogloss:0.103739 
## [122]    train-mlogloss:0.038658 eval-mlogloss:0.103126 
## [123]    train-mlogloss:0.037891 eval-mlogloss:0.101974 
## [124]    train-mlogloss:0.037267 eval-mlogloss:0.101172 
## [125]    train-mlogloss:0.036670 eval-mlogloss:0.100435 
## [126]    train-mlogloss:0.036333 eval-mlogloss:0.100120 
## [127]    train-mlogloss:0.035525 eval-mlogloss:0.099507 
## [128]    train-mlogloss:0.034836 eval-mlogloss:0.098435 
## [129]    train-mlogloss:0.034479 eval-mlogloss:0.098188 
## [130]    train-mlogloss:0.033914 eval-mlogloss:0.097800 
## [131]    train-mlogloss:0.033244 eval-mlogloss:0.096603 
## [132]    train-mlogloss:0.032752 eval-mlogloss:0.096109 
## [133]    train-mlogloss:0.032289 eval-mlogloss:0.095504 
## [134]    train-mlogloss:0.031724 eval-mlogloss:0.094588 
## [135]    train-mlogloss:0.031215 eval-mlogloss:0.093789 
## [136]    train-mlogloss:0.030599 eval-mlogloss:0.093037 
## [137]    train-mlogloss:0.030037 eval-mlogloss:0.092255 
## [138]    train-mlogloss:0.029554 eval-mlogloss:0.091931 
## [139]    train-mlogloss:0.029053 eval-mlogloss:0.091242 
## [140]    train-mlogloss:0.028635 eval-mlogloss:0.090661 
## [141]    train-mlogloss:0.028254 eval-mlogloss:0.090245 
## [142]    train-mlogloss:0.027787 eval-mlogloss:0.089734 
## [143]    train-mlogloss:0.027229 eval-mlogloss:0.088743 
## [144]    train-mlogloss:0.026703 eval-mlogloss:0.087978 
## [145]    train-mlogloss:0.026271 eval-mlogloss:0.087201 
## [146]    train-mlogloss:0.025868 eval-mlogloss:0.086671 
## [147]    train-mlogloss:0.025480 eval-mlogloss:0.086054 
## [148]    train-mlogloss:0.025109 eval-mlogloss:0.085626 
## [149]    train-mlogloss:0.024624 eval-mlogloss:0.084865 
## [150]    train-mlogloss:0.024199 eval-mlogloss:0.084073 
## [151]    train-mlogloss:0.023784 eval-mlogloss:0.083425 
## [152]    train-mlogloss:0.023491 eval-mlogloss:0.082986 
## [153]    train-mlogloss:0.023217 eval-mlogloss:0.082660 
## [154]    train-mlogloss:0.022870 eval-mlogloss:0.082205 
## [155]    train-mlogloss:0.022604 eval-mlogloss:0.082065 
## [156]    train-mlogloss:0.022282 eval-mlogloss:0.081398 
## [157]    train-mlogloss:0.021958 eval-mlogloss:0.081151 
## [158]    train-mlogloss:0.021669 eval-mlogloss:0.080772 
## [159]    train-mlogloss:0.021314 eval-mlogloss:0.080302 
## [160]    train-mlogloss:0.021032 eval-mlogloss:0.079852 
## [161]    train-mlogloss:0.020737 eval-mlogloss:0.079567 
## [162]    train-mlogloss:0.020457 eval-mlogloss:0.079262 
## [163]    train-mlogloss:0.020253 eval-mlogloss:0.079065 
## [164]    train-mlogloss:0.019969 eval-mlogloss:0.078657 
## [165]    train-mlogloss:0.019622 eval-mlogloss:0.077994 
## [166]    train-mlogloss:0.019390 eval-mlogloss:0.077799 
## [167]    train-mlogloss:0.019148 eval-mlogloss:0.077477 
## [168]    train-mlogloss:0.018802 eval-mlogloss:0.076875 
## [169]    train-mlogloss:0.018640 eval-mlogloss:0.076649 
## [170]    train-mlogloss:0.018453 eval-mlogloss:0.076497 
## [171]    train-mlogloss:0.018144 eval-mlogloss:0.075932 
## [172]    train-mlogloss:0.017852 eval-mlogloss:0.075347 
## [173]    train-mlogloss:0.017585 eval-mlogloss:0.074883 
## [174]    train-mlogloss:0.017401 eval-mlogloss:0.074994 
## [175]    train-mlogloss:0.017248 eval-mlogloss:0.074909 
## [176]    train-mlogloss:0.017002 eval-mlogloss:0.074527 
## [177]    train-mlogloss:0.016817 eval-mlogloss:0.074268 
## [178]    train-mlogloss:0.016587 eval-mlogloss:0.073887 
## [179]    train-mlogloss:0.016434 eval-mlogloss:0.073629 
## [180]    train-mlogloss:0.016343 eval-mlogloss:0.073598 
## [181]    train-mlogloss:0.016164 eval-mlogloss:0.073476 
## [182]    train-mlogloss:0.015935 eval-mlogloss:0.072979 
## [183]    train-mlogloss:0.015788 eval-mlogloss:0.072912 
## [184]    train-mlogloss:0.015571 eval-mlogloss:0.072660 
## [185]    train-mlogloss:0.015405 eval-mlogloss:0.072459 
## [186]    train-mlogloss:0.015215 eval-mlogloss:0.072177 
## [187]    train-mlogloss:0.015079 eval-mlogloss:0.071919 
## [188]    train-mlogloss:0.014956 eval-mlogloss:0.071891 
## [189]    train-mlogloss:0.014821 eval-mlogloss:0.071788 
## [190]    train-mlogloss:0.014612 eval-mlogloss:0.071509 
## [191]    train-mlogloss:0.014483 eval-mlogloss:0.071186 
## [192]    train-mlogloss:0.014362 eval-mlogloss:0.071095 
## [193]    train-mlogloss:0.014188 eval-mlogloss:0.070921 
## [194]    train-mlogloss:0.014016 eval-mlogloss:0.070790 
## [195]    train-mlogloss:0.013866 eval-mlogloss:0.070666 
## [196]    train-mlogloss:0.013733 eval-mlogloss:0.070440 
## [197]    train-mlogloss:0.013585 eval-mlogloss:0.070135 
## [198]    train-mlogloss:0.013472 eval-mlogloss:0.069991 
## [199]    train-mlogloss:0.013325 eval-mlogloss:0.069508 
## [200]    train-mlogloss:0.013253 eval-mlogloss:0.069460 
## [201]    train-mlogloss:0.013105 eval-mlogloss:0.069309 
## [202]    train-mlogloss:0.012926 eval-mlogloss:0.069179 
## [203]    train-mlogloss:0.012827 eval-mlogloss:0.069158 
## [204]    train-mlogloss:0.012729 eval-mlogloss:0.069009 
## [205]    train-mlogloss:0.012645 eval-mlogloss:0.068952 
## [206]    train-mlogloss:0.012529 eval-mlogloss:0.068662 
## [207]    train-mlogloss:0.012435 eval-mlogloss:0.068451 
## [208]    train-mlogloss:0.012370 eval-mlogloss:0.068453 
## [209]    train-mlogloss:0.012265 eval-mlogloss:0.068452 
## [210]    train-mlogloss:0.012137 eval-mlogloss:0.068182 
## [211]    train-mlogloss:0.012026 eval-mlogloss:0.068000 
## [212]    train-mlogloss:0.011920 eval-mlogloss:0.067908 
## [213]    train-mlogloss:0.011824 eval-mlogloss:0.067849 
## [214]    train-mlogloss:0.011728 eval-mlogloss:0.067868 
## [215]    train-mlogloss:0.011599 eval-mlogloss:0.067517 
## [216]    train-mlogloss:0.011502 eval-mlogloss:0.067457 
## [217]    train-mlogloss:0.011405 eval-mlogloss:0.067331 
## [218]    train-mlogloss:0.011340 eval-mlogloss:0.067433 
## [219]    train-mlogloss:0.011280 eval-mlogloss:0.067339 
## [220]    train-mlogloss:0.011186 eval-mlogloss:0.067162 
## [221]    train-mlogloss:0.011044 eval-mlogloss:0.066891 
## [222]    train-mlogloss:0.010996 eval-mlogloss:0.066827 
## [223]    train-mlogloss:0.010915 eval-mlogloss:0.066666 
## [224]    train-mlogloss:0.010860 eval-mlogloss:0.066580 
## [225]    train-mlogloss:0.010770 eval-mlogloss:0.066386 
## [226]    train-mlogloss:0.010689 eval-mlogloss:0.066274 
## [227]    train-mlogloss:0.010604 eval-mlogloss:0.066291 
## [228]    train-mlogloss:0.010497 eval-mlogloss:0.066191 
## [229]    train-mlogloss:0.010407 eval-mlogloss:0.065835 
## [230]    train-mlogloss:0.010343 eval-mlogloss:0.065941 
## [231]    train-mlogloss:0.010277 eval-mlogloss:0.065859 
## [232]    train-mlogloss:0.010193 eval-mlogloss:0.065583 
## [233]    train-mlogloss:0.010117 eval-mlogloss:0.065401 
## [234]    train-mlogloss:0.010068 eval-mlogloss:0.065240 
## [235]    train-mlogloss:0.009981 eval-mlogloss:0.065001 
## [236]    train-mlogloss:0.009908 eval-mlogloss:0.064922 
## [237]    train-mlogloss:0.009859 eval-mlogloss:0.064806 
## [238]    train-mlogloss:0.009774 eval-mlogloss:0.064629 
## [239]    train-mlogloss:0.009714 eval-mlogloss:0.064534 
## [240]    train-mlogloss:0.009649 eval-mlogloss:0.064347 
## [241]    train-mlogloss:0.009579 eval-mlogloss:0.064343 
## [242]    train-mlogloss:0.009527 eval-mlogloss:0.064344 
## [243]    train-mlogloss:0.009473 eval-mlogloss:0.064425 
## [244]    train-mlogloss:0.009420 eval-mlogloss:0.064386 
## [245]    train-mlogloss:0.009372 eval-mlogloss:0.064268 
## [246]    train-mlogloss:0.009337 eval-mlogloss:0.064181 
## [247]    train-mlogloss:0.009280 eval-mlogloss:0.064238 
## [248]    train-mlogloss:0.009219 eval-mlogloss:0.064069 
## [249]    train-mlogloss:0.009138 eval-mlogloss:0.063877 
## [250]    train-mlogloss:0.009073 eval-mlogloss:0.063901 
## [251]    train-mlogloss:0.009039 eval-mlogloss:0.063760 
## [252]    train-mlogloss:0.008978 eval-mlogloss:0.063662 
## [253]    train-mlogloss:0.008943 eval-mlogloss:0.063649 
## [254]    train-mlogloss:0.008889 eval-mlogloss:0.063496 
## [255]    train-mlogloss:0.008840 eval-mlogloss:0.063363 
## [256]    train-mlogloss:0.008777 eval-mlogloss:0.063178 
## [257]    train-mlogloss:0.008716 eval-mlogloss:0.063065 
## [258]    train-mlogloss:0.008674 eval-mlogloss:0.063130 
## [259]    train-mlogloss:0.008612 eval-mlogloss:0.063027 
## [260]    train-mlogloss:0.008562 eval-mlogloss:0.062887 
## [261]    train-mlogloss:0.008525 eval-mlogloss:0.062968 
## [262]    train-mlogloss:0.008470 eval-mlogloss:0.062904 
## [263]    train-mlogloss:0.008438 eval-mlogloss:0.062917 
## [264]    train-mlogloss:0.008395 eval-mlogloss:0.062786 
## [265]    train-mlogloss:0.008365 eval-mlogloss:0.062775 
## [266]    train-mlogloss:0.008317 eval-mlogloss:0.062683 
## [267]    train-mlogloss:0.008276 eval-mlogloss:0.062618 
## [268]    train-mlogloss:0.008239 eval-mlogloss:0.062710 
## [269]    train-mlogloss:0.008209 eval-mlogloss:0.062604 
## [270]    train-mlogloss:0.008177 eval-mlogloss:0.062452 
## [271]    train-mlogloss:0.008126 eval-mlogloss:0.062329 
## [272]    train-mlogloss:0.008101 eval-mlogloss:0.062269 
## [273]    train-mlogloss:0.008062 eval-mlogloss:0.062214 
## [274]    train-mlogloss:0.008014 eval-mlogloss:0.062054 
## [275]    train-mlogloss:0.007984 eval-mlogloss:0.061952 
## [276]    train-mlogloss:0.007949 eval-mlogloss:0.062049 
## [277]    train-mlogloss:0.007919 eval-mlogloss:0.062060 
## [278]    train-mlogloss:0.007877 eval-mlogloss:0.061981 
## [279]    train-mlogloss:0.007851 eval-mlogloss:0.062044 
## [280]    train-mlogloss:0.007827 eval-mlogloss:0.061959 
## [281]    train-mlogloss:0.007783 eval-mlogloss:0.061775 
## [282]    train-mlogloss:0.007748 eval-mlogloss:0.061724 
## [283]    train-mlogloss:0.007717 eval-mlogloss:0.061699 
## [284]    train-mlogloss:0.007691 eval-mlogloss:0.061604 
## [285]    train-mlogloss:0.007657 eval-mlogloss:0.061517 
## [286]    train-mlogloss:0.007629 eval-mlogloss:0.061509 
## [287]    train-mlogloss:0.007590 eval-mlogloss:0.061434 
## [288]    train-mlogloss:0.007558 eval-mlogloss:0.061375 
## [289]    train-mlogloss:0.007527 eval-mlogloss:0.061313 
## [290]    train-mlogloss:0.007495 eval-mlogloss:0.061277 
## [291]    train-mlogloss:0.007463 eval-mlogloss:0.061241 
## [292]    train-mlogloss:0.007431 eval-mlogloss:0.061309 
## [293]    train-mlogloss:0.007410 eval-mlogloss:0.061387 
## [294]    train-mlogloss:0.007377 eval-mlogloss:0.061343 
## [295]    train-mlogloss:0.007345 eval-mlogloss:0.061331 
## [296]    train-mlogloss:0.007314 eval-mlogloss:0.061309 
## [297]    train-mlogloss:0.007274 eval-mlogloss:0.061226 
## [298]    train-mlogloss:0.007248 eval-mlogloss:0.061125 
## [299]    train-mlogloss:0.007225 eval-mlogloss:0.061140 
## [300]    train-mlogloss:0.007191 eval-mlogloss:0.060935 
## [301]    train-mlogloss:0.007158 eval-mlogloss:0.060915 
## [302]    train-mlogloss:0.007129 eval-mlogloss:0.060753 
## [303]    train-mlogloss:0.007101 eval-mlogloss:0.060719 
## [304]    train-mlogloss:0.007075 eval-mlogloss:0.060734 
## [305]    train-mlogloss:0.007043 eval-mlogloss:0.060769 
## [306]    train-mlogloss:0.007015 eval-mlogloss:0.060616 
## [307]    train-mlogloss:0.006981 eval-mlogloss:0.060458 
## [308]    train-mlogloss:0.006956 eval-mlogloss:0.060408 
## [309]    train-mlogloss:0.006936 eval-mlogloss:0.060342 
## [310]    train-mlogloss:0.006911 eval-mlogloss:0.060349 
## [311]    train-mlogloss:0.006890 eval-mlogloss:0.060240 
## [312]    train-mlogloss:0.006874 eval-mlogloss:0.060285 
## [313]    train-mlogloss:0.006849 eval-mlogloss:0.060281 
## [314]    train-mlogloss:0.006829 eval-mlogloss:0.060270 
## [315]    train-mlogloss:0.006798 eval-mlogloss:0.060113 
## [316]    train-mlogloss:0.006782 eval-mlogloss:0.060073 
## [317]    train-mlogloss:0.006762 eval-mlogloss:0.060056 
## [318]    train-mlogloss:0.006734 eval-mlogloss:0.060045 
## [319]    train-mlogloss:0.006713 eval-mlogloss:0.059939 
## [320]    train-mlogloss:0.006687 eval-mlogloss:0.059916 
## [321]    train-mlogloss:0.006665 eval-mlogloss:0.059863 
## [322]    train-mlogloss:0.006645 eval-mlogloss:0.059861 
## [323]    train-mlogloss:0.006625 eval-mlogloss:0.059871 
## [324]    train-mlogloss:0.006604 eval-mlogloss:0.059887 
## [325]    train-mlogloss:0.006583 eval-mlogloss:0.059897 
## [326]    train-mlogloss:0.006570 eval-mlogloss:0.059802 
## [327]    train-mlogloss:0.006547 eval-mlogloss:0.059759 
## [328]    train-mlogloss:0.006523 eval-mlogloss:0.059741 
## [329]    train-mlogloss:0.006495 eval-mlogloss:0.059690 
## [330]    train-mlogloss:0.006472 eval-mlogloss:0.059714 
## [331]    train-mlogloss:0.006456 eval-mlogloss:0.059646 
## [332]    train-mlogloss:0.006428 eval-mlogloss:0.059524 
## [333]    train-mlogloss:0.006412 eval-mlogloss:0.059444 
## [334]    train-mlogloss:0.006398 eval-mlogloss:0.059432 
## [335]    train-mlogloss:0.006381 eval-mlogloss:0.059385 
## [336]    train-mlogloss:0.006354 eval-mlogloss:0.059411 
## [337]    train-mlogloss:0.006342 eval-mlogloss:0.059446 
## [338]    train-mlogloss:0.006326 eval-mlogloss:0.059465 
## [339]    train-mlogloss:0.006303 eval-mlogloss:0.059453 
## [340]    train-mlogloss:0.006287 eval-mlogloss:0.059503 
## [341]    train-mlogloss:0.006268 eval-mlogloss:0.059499 
## [342]    train-mlogloss:0.006242 eval-mlogloss:0.059417 
## [343]    train-mlogloss:0.006233 eval-mlogloss:0.059429 
## [344]    train-mlogloss:0.006217 eval-mlogloss:0.059401 
## [345]    train-mlogloss:0.006203 eval-mlogloss:0.059473 
## [346]    train-mlogloss:0.006183 eval-mlogloss:0.059406 
## [347]    train-mlogloss:0.006166 eval-mlogloss:0.059370 
## [348]    train-mlogloss:0.006149 eval-mlogloss:0.059405 
## [349]    train-mlogloss:0.006128 eval-mlogloss:0.059416 
## [350]    train-mlogloss:0.006111 eval-mlogloss:0.059364 
## [351]    train-mlogloss:0.006085 eval-mlogloss:0.059414 
## [352]    train-mlogloss:0.006067 eval-mlogloss:0.059453 
## [353]    train-mlogloss:0.006050 eval-mlogloss:0.059315 
## [354]    train-mlogloss:0.006037 eval-mlogloss:0.059350 
## [355]    train-mlogloss:0.006015 eval-mlogloss:0.059368 
## [356]    train-mlogloss:0.006004 eval-mlogloss:0.059441 
## [357]    train-mlogloss:0.005992 eval-mlogloss:0.059456 
## [358]    train-mlogloss:0.005975 eval-mlogloss:0.059376 
## [359]    train-mlogloss:0.005964 eval-mlogloss:0.059327 
## [360]    train-mlogloss:0.005952 eval-mlogloss:0.059270 
## [361]    train-mlogloss:0.005933 eval-mlogloss:0.059163 
## [362]    train-mlogloss:0.005918 eval-mlogloss:0.059159 
## [363]    train-mlogloss:0.005905 eval-mlogloss:0.059134 
## [364]    train-mlogloss:0.005890 eval-mlogloss:0.058997 
## [365]    train-mlogloss:0.005866 eval-mlogloss:0.058905 
## [366]    train-mlogloss:0.005849 eval-mlogloss:0.058890 
## [367]    train-mlogloss:0.005837 eval-mlogloss:0.058827 
## [368]    train-mlogloss:0.005824 eval-mlogloss:0.058828 
## [369]    train-mlogloss:0.005806 eval-mlogloss:0.058875 
## [370]    train-mlogloss:0.005792 eval-mlogloss:0.058941 
## [371]    train-mlogloss:0.005768 eval-mlogloss:0.058853 
## [372]    train-mlogloss:0.005753 eval-mlogloss:0.058734 
## [373]    train-mlogloss:0.005738 eval-mlogloss:0.058761 
## [374]    train-mlogloss:0.005726 eval-mlogloss:0.058723 
## [375]    train-mlogloss:0.005712 eval-mlogloss:0.058680 
## [376]    train-mlogloss:0.005701 eval-mlogloss:0.058730 
## [377]    train-mlogloss:0.005689 eval-mlogloss:0.058684 
## [378]    train-mlogloss:0.005681 eval-mlogloss:0.058673 
## [379]    train-mlogloss:0.005661 eval-mlogloss:0.058580 
## [380]    train-mlogloss:0.005644 eval-mlogloss:0.058490 
## [381]    train-mlogloss:0.005628 eval-mlogloss:0.058640 
## [382]    train-mlogloss:0.005610 eval-mlogloss:0.058617 
## [383]    train-mlogloss:0.005599 eval-mlogloss:0.058586 
## [384]    train-mlogloss:0.005588 eval-mlogloss:0.058550 
## [385]    train-mlogloss:0.005579 eval-mlogloss:0.058513 
## [386]    train-mlogloss:0.005562 eval-mlogloss:0.058442 
## [387]    train-mlogloss:0.005549 eval-mlogloss:0.058545 
## [388]    train-mlogloss:0.005540 eval-mlogloss:0.058444 
## [389]    train-mlogloss:0.005526 eval-mlogloss:0.058374 
## [390]    train-mlogloss:0.005509 eval-mlogloss:0.058316 
## [391]    train-mlogloss:0.005487 eval-mlogloss:0.058211 
## [392]    train-mlogloss:0.005480 eval-mlogloss:0.058187 
## [393]    train-mlogloss:0.005468 eval-mlogloss:0.058129 
## [394]    train-mlogloss:0.005452 eval-mlogloss:0.058043 
## [395]    train-mlogloss:0.005440 eval-mlogloss:0.057872 
## [396]    train-mlogloss:0.005432 eval-mlogloss:0.057912 
## [397]    train-mlogloss:0.005413 eval-mlogloss:0.057844 
## [398]    train-mlogloss:0.005401 eval-mlogloss:0.057854 
## [399]    train-mlogloss:0.005390 eval-mlogloss:0.057737 
## [400]    train-mlogloss:0.005376 eval-mlogloss:0.057672 
## [401]    train-mlogloss:0.005362 eval-mlogloss:0.057736 
## [402]    train-mlogloss:0.005353 eval-mlogloss:0.057716 
## [403]    train-mlogloss:0.005344 eval-mlogloss:0.057736 
## [404]    train-mlogloss:0.005332 eval-mlogloss:0.057649 
## [405]    train-mlogloss:0.005312 eval-mlogloss:0.057551 
## [406]    train-mlogloss:0.005301 eval-mlogloss:0.057554 
## [407]    train-mlogloss:0.005289 eval-mlogloss:0.057584 
## [408]    train-mlogloss:0.005281 eval-mlogloss:0.057507 
## [409]    train-mlogloss:0.005268 eval-mlogloss:0.057446 
## [410]    train-mlogloss:0.005257 eval-mlogloss:0.057471 
## [411]    train-mlogloss:0.005249 eval-mlogloss:0.057548 
## [412]    train-mlogloss:0.005236 eval-mlogloss:0.057595 
## [413]    train-mlogloss:0.005228 eval-mlogloss:0.057548 
## [414]    train-mlogloss:0.005219 eval-mlogloss:0.057572 
## [415]    train-mlogloss:0.005210 eval-mlogloss:0.057498 
## [416]    train-mlogloss:0.005199 eval-mlogloss:0.057382 
## [417]    train-mlogloss:0.005190 eval-mlogloss:0.057386 
## [418]    train-mlogloss:0.005180 eval-mlogloss:0.057341 
## [419]    train-mlogloss:0.005170 eval-mlogloss:0.057372 
## [420]    train-mlogloss:0.005161 eval-mlogloss:0.057397 
## [421]    train-mlogloss:0.005149 eval-mlogloss:0.057323 
## [422]    train-mlogloss:0.005139 eval-mlogloss:0.057275 
## [423]    train-mlogloss:0.005129 eval-mlogloss:0.057303 
## [424]    train-mlogloss:0.005121 eval-mlogloss:0.057284 
## [425]    train-mlogloss:0.005112 eval-mlogloss:0.057307 
## [426]    train-mlogloss:0.005100 eval-mlogloss:0.057318 
## [427]    train-mlogloss:0.005091 eval-mlogloss:0.057277 
## [428]    train-mlogloss:0.005083 eval-mlogloss:0.057323 
## [429]    train-mlogloss:0.005074 eval-mlogloss:0.057324 
## [430]    train-mlogloss:0.005064 eval-mlogloss:0.057362 
## [431]    train-mlogloss:0.005055 eval-mlogloss:0.057347 
## [432]    train-mlogloss:0.005044 eval-mlogloss:0.057348 
## [433]    train-mlogloss:0.005037 eval-mlogloss:0.057338 
## [434]    train-mlogloss:0.005028 eval-mlogloss:0.057381 
## [435]    train-mlogloss:0.005019 eval-mlogloss:0.057374 
## [436]    train-mlogloss:0.005012 eval-mlogloss:0.057422 
## [437]    train-mlogloss:0.005004 eval-mlogloss:0.057349 
## [438]    train-mlogloss:0.004999 eval-mlogloss:0.057402 
## [439]    train-mlogloss:0.004989 eval-mlogloss:0.057341 
## [440]    train-mlogloss:0.004981 eval-mlogloss:0.057371 
## [441]    train-mlogloss:0.004974 eval-mlogloss:0.057410 
## [442]    train-mlogloss:0.004968 eval-mlogloss:0.057419 
## [443]    train-mlogloss:0.004960 eval-mlogloss:0.057477 
## [444]    train-mlogloss:0.004952 eval-mlogloss:0.057539 
## [445]    train-mlogloss:0.004941 eval-mlogloss:0.057460 
## [446]    train-mlogloss:0.004935 eval-mlogloss:0.057396 
## [447]    train-mlogloss:0.004924 eval-mlogloss:0.057407 
## [448]    train-mlogloss:0.004917 eval-mlogloss:0.057422 
## [449]    train-mlogloss:0.004907 eval-mlogloss:0.057381 
## [450]    train-mlogloss:0.004898 eval-mlogloss:0.057422 
## [451]    train-mlogloss:0.004892 eval-mlogloss:0.057488 
## [452]    train-mlogloss:0.004884 eval-mlogloss:0.057523 
## Stopping. Best iteration:
## [422]    train-mlogloss:0.005139 eval-mlogloss:0.057275
cat("Best iter:", bst$best_iteration, "\n")
## Best iter: 422
# prediction

pred_probs <- predict(bst, X_test)
pred_mat <- matrix(pred_probs, ncol = num_class, byrow = TRUE)
pred_labels <- max.col(pred_mat) - 1

pred_factor <- factor(classes[pred_labels + 1], levels = classes)
obs_factor  <- factor(classes[y_test + 1], levels = classes)
# confusion matrix & per-class metrics

conf <- caret::confusionMatrix(pred_factor, obs_factor)
print(conf)
## Confusion Matrix and Statistics
## 
##               Reference
## Prediction     Insufficient Normal Obesity Overweight
##   Insufficient           34      2       0          0
##   Normal                  1     51       0          2
##   Obesity                 0      0     198          0
##   Overweight              0      1       1        105
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9823          
##                  95% CI : (0.9638, 0.9928)
##     No Information Rate : 0.5038          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9726          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: Insufficient Class: Normal Class: Obesity
## Sensitivity                      0.97143        0.9444         0.9950
## Specificity                      0.99444        0.9912         1.0000
## Pos Pred Value                   0.94444        0.9444         1.0000
## Neg Pred Value                   0.99721        0.9912         0.9949
## Prevalence                       0.08861        0.1367         0.5038
## Detection Rate                   0.08608        0.1291         0.5013
## Detection Prevalence             0.09114        0.1367         0.5013
## Balanced Accuracy                0.98294        0.9678         0.9975
##                      Class: Overweight
## Sensitivity                     0.9813
## Specificity                     0.9931
## Pos Pred Value                  0.9813
## Neg Pred Value                  0.9931
## Prevalence                      0.2709
## Detection Rate                  0.2658
## Detection Prevalence            0.2709
## Balanced Accuracy               0.9872
# compute per-class precision, recall, F1 (and macro F1)
library(MLmetrics)
## Warning: package 'MLmetrics' was built under R version 4.5.1
## 
## Attaching package: 'MLmetrics'
## The following objects are masked from 'package:caret':
## 
##     MAE, RMSE
## The following object is masked from 'package:base':
## 
##     Recall
precision_vec <- recall_vec <- f1_vec <- numeric(num_class)
for(i in seq_len(num_class)){
  cls <- classes[i]
  y_true_bin <- ifelse(obs_factor == cls, 1, 0)
  y_pred_bin <- ifelse(pred_factor == cls, 1, 0)
  precision_vec[i] <- ifelse(sum(y_pred_bin)==0, NA, Precision(y_pred_bin, y_true_bin))
  recall_vec[i]    <- Recall(y_pred_bin, y_true_bin)
  f1_vec[i]        <- ifelse(is.na(precision_vec[i]) | (precision_vec[i]+recall_vec[i])==0, NA,
                             2 * precision_vec[i] * recall_vec[i] / (precision_vec[i] + recall_vec[i]))
}
metrics_df <- data.frame(Class = classes,
                         Precision = round(precision_vec,3),
                         Recall = round(recall_vec,3),
                         F1 = round(f1_vec,3))
print(metrics_df)
##          Class Precision Recall    F1
## 1 Insufficient     0.994  0.997 0.996
## 2       Normal     0.991  0.991 0.991
## 3      Obesity     1.000  0.995 0.997
## 4   Overweight     0.993  0.993 0.993
macroF1 <- mean(metrics_df$F1, na.rm = TRUE)
cat("Macro F1:", round(macroF1,3), "\n")
## Macro F1: 0.994
# per-class ROC / AUC (one-vs-rest)
auc_list <- list()
par(mfrow = c(ceiling(num_class/2), 2))
for(i in seq_len(num_class)){
  cls <- classes[i]
  true_bin <- ifelse(obs_factor == cls, 1, 0)
  probs_i <- pred_mat[, i]
  if(length(unique(true_bin)) < 2){ next }
  roc_obj <- roc(true_bin, probs_i, quiet = TRUE)
  auc_val <- auc(roc_obj)
  auc_list[[cls]] <- as.numeric(auc_val)
  plot(roc_obj, main = paste0("ROC for: ", cls, " (AUC=", round(auc_val,3), ")"))
}

par(mfrow = c(1,1))
print(auc_list)
## $Insufficient
## [1] 0.9996825
## 
## $Normal
## [1] 0.9985337
## 
## $Obesity
## [1] 0.9999744
## 
## $Overweight
## [1] 0.998702

check the model using 5-fold validation

# test model(5-fold validation)


set.seed(1234)

# Preprocessing 

obesity_df <- obesity_clean %>% mutate_if(is.character, as.factor)

# remove columns that leak label or are redundant (adjust if not present)
drop_cols <- intersect(c("BMI","NObeyesdad","Label_simple"), names(obesity_df))
if(length(drop_cols) > 0) obesity_df <- obesity_df %>% select(-all_of(drop_cols))

# one-hot encode 
X_full <- model.matrix(~ . - BMI_Class - 1, data = obesity_df)
X_full <- Matrix(X_full, sparse = TRUE)
y_full <- as.numeric(obesity_df$BMI_Class) - 1
classes <- levels(obesity_df$BMI_Class)
num_class <- length(classes)
n <- nrow(X_full)
cat("Classes:", classes, " Num:", num_class, " Samples:", n, "\n")
## Classes: Insufficient Normal Obesity Overweight  Num: 4  Samples: 1981
# create stratified folds
folds <- createFolds(y = y_full, k = 5, list = TRUE, returnTrain = FALSE)

# placeholders to collect results
fold_results <- list()
acc_vec <- numeric(length(folds))
macroF1_vec <- numeric(length(folds))


# CV loop
for(i in seq_along(folds)){
  cat("====== Fold", i, "======\n")
  test_idx <- folds[[i]]
  train_idx <- setdiff(seq_len(n), test_idx)

  X_train <- X_full[train_idx, ]
  X_test  <- X_full[test_idx, ]
  y_train <- y_full[train_idx]
  y_test  <- y_full[test_idx]

  cat("Train:", length(y_train), " Test:", length(y_test), "\n")
  print(table(y_train))

  # compute inverse-frequency class weight on this training fold
  freq <- table(y_train)
  total <- length(y_train)
  weight_per_class <- as.numeric(total / (num_class * freq))
  names(weight_per_class) <- names(freq)
  cat("Weight per class (fold):\n"); print(weight_per_class)

  train_weights <- weight_per_class[ as.character(y_train) ]

  # build DMatrix and set weights
  dtrain <- xgb.DMatrix(data = X_train, label = y_train)
  dtest  <- xgb.DMatrix(data = X_test,  label = y_test)
  setinfo(dtrain, "weight", train_weights)

  # params 
  params <- list(
    objective = "multi:softprob",
    eval_metric = "mlogloss",
    num_class = num_class,
    eta = 0.05,
    max_depth = 6,
    subsample = 0.8,
    colsample_bytree = 0.8
  )

  watchlist <- list(train = dtrain, eval = dtest)

  bst <- xgb.train(params = params,
                   data = dtrain,
                   nrounds = 1000,
                   watchlist = watchlist,
                   early_stopping_rounds = 30,
                   verbose = 0)   # set to 1 if you want training log

  cat("Best iter (fold ", i, "):", bst$best_iteration, "\n")

  # predict on test fold
  pred_probs <- predict(bst, X_test)
  pred_mat <- matrix(pred_probs, ncol = num_class, byrow = TRUE)
  pred_labels <- max.col(pred_mat) - 1

  pred_factor <- factor(classes[pred_labels + 1], levels = classes)
  obs_factor  <- factor(classes[y_test + 1], levels = classes)

  # confusion matrix & accuracy
  conf <- caret::confusionMatrix(pred_factor, obs_factor)
  print(conf$table)
  acc <- as.numeric(conf$overall["Accuracy"])
  cat("Accuracy (fold ", i, "):", round(acc,4), "\n")

  # per-class Precision/Recall/F1 (one-vs-rest)
  precision_vec <- recall_vec <- f1_vec <- numeric(num_class)
  for(j in seq_len(num_class)){
    cls <- classes[j]
    y_true_bin <- ifelse(obs_factor == cls, 1, 0)
    y_pred_bin <- ifelse(pred_factor == cls, 1, 0)

    if(sum(y_pred_bin) == 0){
      precision_vec[j] <- NA
    } else {
      precision_vec[j] <- Precision(y_pred_bin, y_true_bin)
    }
    recall_vec[j] <- Recall(y_pred_bin, y_true_bin)
    if(is.na(precision_vec[j]) || (precision_vec[j] + recall_vec[j]) == 0){
      f1_vec[j] <- NA
    } else {
      f1_vec[j] <- 2 * precision_vec[j] * recall_vec[j] / (precision_vec[j] + recall_vec[j])
    }
  }

  metrics_df <- data.frame(Class = classes,
                           Precision = round(precision_vec,3),
                           Recall = round(recall_vec,3),
                           F1 = round(f1_vec,3))
  print(metrics_df)

  macroF1 <- mean(metrics_df$F1, na.rm = TRUE)
  cat("Macro F1 (fold ", i, "):", round(macroF1,4), "\n\n")

  # save fold results
  fold_results[[i]] <- list(conf = conf, metrics = metrics_df, accuracy = acc, macroF1 = macroF1, best_iter = bst$best_iteration)
  acc_vec[i] <- acc
  macroF1_vec[i] <- macroF1
}
## ====== Fold 1 ======
## Train: 1586  Test: 395 
## y_train
##   0   1   2   3 
## 178 205 771 432 
## Weight per class (fold):
##         0         1         2         3 
## 2.2275281 1.9341463 0.5142672 0.9178241 
## Best iter (fold  1 ): 459 
##               Reference
## Prediction     Insufficient Normal Obesity Overweight
##   Insufficient           40      0       0          0
##   Normal                  2     54       0          0
##   Obesity                 0      0     191          0
##   Overweight              0      1       0        107
## Accuracy (fold  1 ): 0.9924 
##          Class Precision Recall    F1
## 1 Insufficient     1.000  0.994 0.997
## 2       Normal     0.994  0.997 0.996
## 3      Obesity     1.000  1.000 1.000
## 4   Overweight     0.997  1.000 0.998
## Macro F1 (fold  1 ): 0.9978 
## 
## ====== Fold 2 ======
## Train: 1585  Test: 396 
## y_train
##   0   1   2   3 
## 173 212 769 431 
## Weight per class (fold):
##         0         1         2         3 
## 2.2904624 1.8691038 0.5152796 0.9193735 
## Best iter (fold  2 ): 888 
##               Reference
## Prediction     Insufficient Normal Obesity Overweight
##   Insufficient           46      0       0          0
##   Normal                  1     47       0          2
##   Obesity                 0      0     192          0
##   Overweight              0      1       1        106
## Accuracy (fold  2 ): 0.9874 
##          Class Precision Recall    F1
## 1 Insufficient     1.000  0.997 0.999
## 2       Normal     0.991  0.997 0.994
## 3      Obesity     1.000  0.995 0.998
## 4   Overweight     0.993  0.993 0.993
## Macro F1 (fold  2 ): 0.996 
## 
## ====== Fold 3 ======
## Train: 1584  Test: 397 
## y_train
##   0   1   2   3 
## 174 206 773 431 
## Weight per class (fold):
##         0         1         2         3 
## 2.2758621 1.9223301 0.5122898 0.9187935 
## Best iter (fold  3 ): 439 
##               Reference
## Prediction     Insufficient Normal Obesity Overweight
##   Insufficient           44      0       0          0
##   Normal                  2     52       0          2
##   Obesity                 0      0     189          1
##   Overweight              0      2       0        105
## Accuracy (fold  3 ): 0.9824 
##          Class Precision Recall    F1
## 1 Insufficient     1.000  0.994 0.997
## 2       Normal     0.988  0.994 0.991
## 3      Obesity     0.995  1.000 0.998
## 4   Overweight     0.993  0.990 0.991
## Macro F1 (fold  3 ): 0.9942 
## 
## ====== Fold 4 ======
## Train: 1584  Test: 397 
## y_train
##   0   1   2   3 
## 173 207 773 431 
## Weight per class (fold):
##         0         1         2         3 
## 2.2890173 1.9130435 0.5122898 0.9187935 
## Best iter (fold  4 ): 487 
##               Reference
## Prediction     Insufficient Normal Obesity Overweight
##   Insufficient           47      0       0          0
##   Normal                  0     50       0          3
##   Obesity                 0      0     189          0
##   Overweight              0      3       0        105
## Accuracy (fold  4 ): 0.9849 
##          Class Precision Recall    F1
## 1 Insufficient     1.000  1.000 1.000
## 2       Normal     0.991  0.991 0.991
## 3      Obesity     1.000  1.000 1.000
## 4   Overweight     0.990  0.990 0.990
## Macro F1 (fold  4 ): 0.9952 
## 
## ====== Fold 5 ======
## Train: 1585  Test: 396 
## y_train
##   0   1   2   3 
## 182 210 762 431 
## Weight per class (fold):
##         0         1         2         3 
## 2.1771978 1.8869048 0.5200131 0.9193735 
## Best iter (fold  5 ): 332 
##               Reference
## Prediction     Insufficient Normal Obesity Overweight
##   Insufficient           36      1       0          0
##   Normal                  2     48       0          5
##   Obesity                 0      0     197          1
##   Overweight              0      1       3        102
## Accuracy (fold  5 ): 0.9672 
##          Class Precision Recall    F1
## 1 Insufficient     0.997  0.994 0.996
## 2       Normal     0.980  0.994 0.987
## 3      Obesity     0.995  0.985 0.990
## 4   Overweight     0.986  0.979 0.983
## Macro F1 (fold  5 ): 0.989
# aggregate CV results

cat("CV Summary \n")
## CV Summary
cat("Accuracy per fold:", round(acc_vec,4), "\n")
## Accuracy per fold: 0.9924 0.9874 0.9824 0.9849 0.9672
cat("Mean Accuracy:", round(mean(acc_vec),4), " SD:", round(sd(acc_vec),4), "\n")
## Mean Accuracy: 0.9828  SD: 0.0095
cat("Macro-F1 per fold:", round(macroF1_vec,4), "\n")
## Macro-F1 per fold: 0.9978 0.996 0.9942 0.9952 0.989
cat("Mean Macro-F1:", round(mean(macroF1_vec, na.rm = TRUE),4), " SD:", round(sd(macroF1_vec, na.rm = TRUE),4), "\n")
## Mean Macro-F1: 0.9944  SD: 0.0033
# mean per-class F1 across folds
all_f1 <- sapply(fold_results, function(x) x$metrics$F1)
mean_f1_per_class <- rowMeans(all_f1, na.rm = TRUE)
f1_table <- data.frame(Class = classes, Mean_F1 = round(mean_f1_per_class,3))
print(f1_table)
##          Class Mean_F1
## 1 Insufficient   0.998
## 2       Normal   0.992
## 3      Obesity   0.997
## 4   Overweight   0.991
# SHAP

library(shapviz)
## Warning: package 'shapviz' was built under R version 4.5.2
X_train_mat <- as.matrix(X_train)

sv <- shapviz(
  bst,
  X = X_train_mat,
  X_pred = X_train_mat
)

sv[[1]]   
## 'shapviz' object representing 1585 x 24 SHAP matrix. Top lines:
## 
##      GenderFemale  GenderMale          Age     Height    Weight
## [1,]  -0.01918555 -0.04026219 -0.005398898 -1.8223714 -1.404736
## [2,]   0.03688017 -0.03063223  0.014316372  0.4167305 -4.108120
##      family_history_with_overweightyes      FAVCyes       FCVC         NCP
## [1,]                       -0.05461422 -0.002471139  0.3563047 -0.17736690
## [2,]                       -0.02334714 -0.007228931 -0.4679727 -0.09698293
##      CAECFrequently     CAECno CAECSometimes   SMOKEyes        CH2O     SCCyes
## [1,]      0.3830118 0.02066617   -0.05515741 0.03535179  0.08977139 0.05184976
## [2,]      0.3604885 0.02309405   -0.06014491 0.01061692 -0.07888518 0.04653170
##              FAF         TUE CALCFrequently     CALCno CALCSometimes MTRANSBike
## [1,] -0.38611311  0.09007351    -0.06098965 0.02174658  0.0001872934          0
## [2,]  0.05980391 -0.04828957    -0.06719342 0.02273862  0.0035742861          0
##      MTRANSMotorbike MTRANSPublic_Transportation MTRANSWalking
## [1,]               0               -0.0196206253  0.0006068132
## [2,]               0               -0.0008451333  0.0006068132
sv[[2]]   
## 'shapviz' object representing 1585 x 24 SHAP matrix. Top lines:
## 
##      GenderFemale GenderMale        Age     Height   Weight
## [1,]  -0.04835163 0.01033439 0.11994301 -0.7314767 1.350759
## [2,]  -0.25132990 0.05520798 0.02402467  2.6033335 0.142669
##      family_history_with_overweightyes     FAVCyes      FCVC        NCP
## [1,]                        0.07482897 0.001545759 0.1225174 0.04041113
## [2,]                       -0.02583305 0.001086622 0.1687990 0.06940047
##      CAECFrequently      CAECno CAECSometimes    SMOKEyes      CH2O      SCCyes
## [1,]    -0.03834416 -0.12269240    0.04401067 -0.04430963 -0.139888  0.00219492
## [2,]    -0.01745589 -0.06407286   -0.03193298 -0.06306682  0.220165 -0.01840627
##            FAF        TUE CALCFrequently      CALCno CALCSometimes MTRANSBike
## [1,] 0.5370671 -2.0849569   0.0009316059 -0.01361955   -0.01620919          0
## [2,] 0.3799591  0.4552251   0.0281654857 -0.02004077   -0.02803967          0
##      MTRANSMotorbike MTRANSPublic_Transportation MTRANSWalking
## [1,]               0                 0.001698886     0.0988561
## [2,]               0                 0.010374645     0.1458712
sv[[3]]   
## 'shapviz' object representing 1585 x 24 SHAP matrix. Top lines:
## 
##      GenderFemale  GenderMale         Age    Height   Weight
## [1,]   0.08115668 -0.01527371 -0.22100155  1.087026 -4.92558
## [2,]   0.12513314 -0.02537715 -0.04475144 -1.713925 -2.43980
##      family_history_with_overweightyes    FAVCyes         FCVC          NCP
## [1,]                        0.10047571 0.02033791  0.005842878 -0.007859098
## [2,]                        0.08154058 0.01294900 -0.029374696  0.013664409
##      CAECFrequently        CAECno CAECSometimes    SMOKEyes      CH2O SCCyes
## [1,]    -0.06137488 -0.0001300465    0.05622872 0.002189198  0.031434      0
## [2,]    -0.07206173 -0.0001129695    0.04931241 0.006883312 -0.103168      0
##             FAF        TUE CALCFrequently      CALCno CALCSometimes MTRANSBike
## [1,] -0.1387543  0.1066503   -0.025705222 -0.01678733  -0.001238827          0
## [2,] -0.1050465 -0.3212476   -0.009374741  0.03435629   0.006248672          0
##      MTRANSMotorbike MTRANSPublic_Transportation MTRANSWalking
## [1,]               0                 0.002944148             0
## [2,]               0                 0.003165144             0
sv[[4]]   
## 'shapviz' object representing 1585 x 24 SHAP matrix. Top lines:
## 
##      GenderFemale   GenderMale         Age     Height     Weight
## [1,]   0.01525060 -0.006459752 -0.09971264  0.5422618 -3.2787855
## [2,]   0.06456939 -0.009119193 -0.20831451 -0.6421134  0.7408128
##      family_history_with_overweightyes     FAVCyes        FCVC         NCP
## [1,]                       -0.03248628 -0.04487322 -0.42408159 -0.05330720
## [2,]                        0.06613126 -0.04125594  0.08723304 -0.06632033
##      CAECFrequently       CAECno CAECSometimes    SMOKEyes        CH2O
## [1,]     -0.1522136  0.004388724    0.03619101 0.025668416  0.07353806
## [2,]     -0.2052898 -0.007618206    0.06121099 0.002909356 -0.01855685
##         SCCyes        FAF        TUE CALCFrequently      CALCno CALCSometimes
## [1,] 0.2128339 -0.1253075  0.3094125     0.10577960 -0.03729240     0.0167471
## [2,] 0.1488610 -0.5001416 -0.4415909     0.06264763 -0.05436797     0.0324832
##      MTRANSBike MTRANSMotorbike MTRANSPublic_Transportation MTRANSWalking
## [1,]          0     0.013042245                0.0082967458   -0.07044958
## [2,]          0     0.002273976                0.0003405067   -0.16094941
print(classes)
## [1] "Insufficient" "Normal"       "Obesity"      "Overweight"
idx_overweight <- which(classes == "Overweight")
idx_obesity    <- which(classes == "Obesity")

sv_overweight <- sv[[ idx_overweight ]]
sv_obesity    <- sv[[ idx_obesity ]]
# features importance visualization
# Obesity
sv_importance(sv_obesity, kind = "both")

# overweight
sv_importance(sv_overweight, kind = "both")

# dependence plot
# age
sv_dependence(sv_obesity, "Age")

# family_history_with_overweight

sv_dependence(sv_obesity, "family_history_with_overweightyes")

# CAECFrequently

sv_dependence(sv_obesity, "CAECFrequently")

# TUE
sv_dependence(sv_obesity, "TUE")

sv_force(sv_obesity, row_id = 10)