Library

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.3     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(factoextra)
## Welcome to factoextra!
## Want to learn more? See two factoextra-related books at https://www.datanovia.com/en/product/practical-guide-to-principal-component-methods-in-r/
library(cluster)
library(dbscan)
## 
## Attaching package: 'dbscan'
## 
## The following object is masked from 'package:stats':
## 
##     as.dendrogram
library(corrplot)
## corrplot 0.95 loaded

Load Data

data <- read.csv("menu.csv")

head(data)
##    Category                             Item   Serving.Size Calories
## 1 Breakfast                     Egg McMuffin 4.8 oz (136 g)      300
## 2 Breakfast                Egg White Delight 4.8 oz (135 g)      250
## 3 Breakfast                 Sausage McMuffin 3.9 oz (111 g)      370
## 4 Breakfast        Sausage McMuffin with Egg 5.7 oz (161 g)      450
## 5 Breakfast Sausage McMuffin with Egg Whites 5.7 oz (161 g)      400
## 6 Breakfast             Steak & Egg McMuffin 6.5 oz (185 g)      430
##   Calories.from.Fat Total.Fat Total.Fat....Daily.Value. Saturated.Fat
## 1               120        13                        20             5
## 2                70         8                        12             3
## 3               200        23                        35             8
## 4               250        28                        43            10
## 5               210        23                        35             8
## 6               210        23                        36             9
##   Saturated.Fat....Daily.Value. Trans.Fat Cholesterol
## 1                            25         0         260
## 2                            15         0          25
## 3                            42         0          45
## 4                            52         0         285
## 5                            42         0          50
## 6                            46         1         300
##   Cholesterol....Daily.Value. Sodium Sodium....Daily.Value. Carbohydrates
## 1                          87    750                     31            31
## 2                           8    770                     32            30
## 3                          15    780                     33            29
## 4                          95    860                     36            30
## 5                          16    880                     37            30
## 6                         100    960                     40            31
##   Carbohydrates....Daily.Value. Dietary.Fiber Dietary.Fiber....Daily.Value.
## 1                            10             4                            17
## 2                            10             4                            17
## 3                            10             4                            17
## 4                            10             4                            17
## 5                            10             4                            17
## 6                            10             4                            18
##   Sugars Protein Vitamin.A....Daily.Value. Vitamin.C....Daily.Value.
## 1      3      17                        10                         0
## 2      3      18                         6                         0
## 3      2      14                         8                         0
## 4      2      21                        15                         0
## 5      2      21                         6                         0
## 6      3      26                        15                         2
##   Calcium....Daily.Value. Iron....Daily.Value.
## 1                      25                   15
## 2                      25                    8
## 3                      25                   10
## 4                      30                   15
## 5                      25                   10
## 6                      30                   20
str(data)
## 'data.frame':    260 obs. of  24 variables:
##  $ Category                     : chr  "Breakfast" "Breakfast" "Breakfast" "Breakfast" ...
##  $ Item                         : chr  "Egg McMuffin" "Egg White Delight" "Sausage McMuffin" "Sausage McMuffin with Egg" ...
##  $ Serving.Size                 : chr  "4.8 oz (136 g)" "4.8 oz (135 g)" "3.9 oz (111 g)" "5.7 oz (161 g)" ...
##  $ Calories                     : int  300 250 370 450 400 430 460 520 410 470 ...
##  $ Calories.from.Fat            : int  120 70 200 250 210 210 230 270 180 220 ...
##  $ Total.Fat                    : num  13 8 23 28 23 23 26 30 20 25 ...
##  $ Total.Fat....Daily.Value.    : int  20 12 35 43 35 36 40 47 32 38 ...
##  $ Saturated.Fat                : num  5 3 8 10 8 9 13 14 11 12 ...
##  $ Saturated.Fat....Daily.Value.: int  25 15 42 52 42 46 65 68 56 59 ...
##  $ Trans.Fat                    : num  0 0 0 0 0 1 0 0 0 0 ...
##  $ Cholesterol                  : int  260 25 45 285 50 300 250 250 35 35 ...
##  $ Cholesterol....Daily.Value.  : int  87 8 15 95 16 100 83 83 11 11 ...
##  $ Sodium                       : int  750 770 780 860 880 960 1300 1410 1300 1420 ...
##  $ Sodium....Daily.Value.       : int  31 32 33 36 37 40 54 59 54 59 ...
##  $ Carbohydrates                : int  31 30 29 30 30 31 38 43 36 42 ...
##  $ Carbohydrates....Daily.Value.: int  10 10 10 10 10 10 13 14 12 14 ...
##  $ Dietary.Fiber                : int  4 4 4 4 4 4 2 3 2 3 ...
##  $ Dietary.Fiber....Daily.Value.: int  17 17 17 17 17 18 7 12 7 12 ...
##  $ Sugars                       : int  3 3 2 2 2 3 3 4 3 4 ...
##  $ Protein                      : int  17 18 14 21 21 26 19 19 20 20 ...
##  $ Vitamin.A....Daily.Value.    : int  10 6 8 15 6 15 10 15 2 6 ...
##  $ Vitamin.C....Daily.Value.    : int  0 0 0 0 0 2 8 8 8 8 ...
##  $ Calcium....Daily.Value.      : int  25 25 25 30 25 30 15 20 15 15 ...
##  $ Iron....Daily.Value.         : int  15 8 10 15 10 20 15 20 10 15 ...
summary(data)
##    Category             Item           Serving.Size          Calories     
##  Length:260         Length:260         Length:260         Min.   :   0.0  
##  Class :character   Class :character   Class :character   1st Qu.: 210.0  
##  Mode  :character   Mode  :character   Mode  :character   Median : 340.0  
##                                                           Mean   : 368.3  
##                                                           3rd Qu.: 500.0  
##                                                           Max.   :1880.0  
##  Calories.from.Fat   Total.Fat       Total.Fat....Daily.Value. Saturated.Fat   
##  Min.   :   0.0    Min.   :  0.000   Min.   :  0.00            Min.   : 0.000  
##  1st Qu.:  20.0    1st Qu.:  2.375   1st Qu.:  3.75            1st Qu.: 1.000  
##  Median : 100.0    Median : 11.000   Median : 17.00            Median : 5.000  
##  Mean   : 127.1    Mean   : 14.165   Mean   : 21.82            Mean   : 6.008  
##  3rd Qu.: 200.0    3rd Qu.: 22.250   3rd Qu.: 35.00            3rd Qu.:10.000  
##  Max.   :1060.0    Max.   :118.000   Max.   :182.00            Max.   :20.000  
##  Saturated.Fat....Daily.Value.   Trans.Fat       Cholesterol    
##  Min.   :  0.00                Min.   :0.0000   Min.   :  0.00  
##  1st Qu.:  4.75                1st Qu.:0.0000   1st Qu.:  5.00  
##  Median : 24.00                Median :0.0000   Median : 35.00  
##  Mean   : 29.97                Mean   :0.2038   Mean   : 54.94  
##  3rd Qu.: 48.00                3rd Qu.:0.0000   3rd Qu.: 65.00  
##  Max.   :102.00                Max.   :2.5000   Max.   :575.00  
##  Cholesterol....Daily.Value.     Sodium       Sodium....Daily.Value.
##  Min.   :  0.00              Min.   :   0.0   Min.   :  0.00        
##  1st Qu.:  2.00              1st Qu.: 107.5   1st Qu.:  4.75        
##  Median : 11.00              Median : 190.0   Median :  8.00        
##  Mean   : 18.39              Mean   : 495.8   Mean   : 20.68        
##  3rd Qu.: 21.25              3rd Qu.: 865.0   3rd Qu.: 36.25        
##  Max.   :192.00              Max.   :3600.0   Max.   :150.00        
##  Carbohydrates    Carbohydrates....Daily.Value. Dietary.Fiber  
##  Min.   :  0.00   Min.   : 0.00                 Min.   :0.000  
##  1st Qu.: 30.00   1st Qu.:10.00                 1st Qu.:0.000  
##  Median : 44.00   Median :15.00                 Median :1.000  
##  Mean   : 47.35   Mean   :15.78                 Mean   :1.631  
##  3rd Qu.: 60.00   3rd Qu.:20.00                 3rd Qu.:3.000  
##  Max.   :141.00   Max.   :47.00                 Max.   :7.000  
##  Dietary.Fiber....Daily.Value.     Sugars          Protein     
##  Min.   : 0.000                Min.   :  0.00   Min.   : 0.00  
##  1st Qu.: 0.000                1st Qu.:  5.75   1st Qu.: 4.00  
##  Median : 5.000                Median : 17.50   Median :12.00  
##  Mean   : 6.531                Mean   : 29.42   Mean   :13.34  
##  3rd Qu.:10.000                3rd Qu.: 48.00   3rd Qu.:19.00  
##  Max.   :28.000                Max.   :128.00   Max.   :87.00  
##  Vitamin.A....Daily.Value. Vitamin.C....Daily.Value. Calcium....Daily.Value.
##  Min.   :  0.00            Min.   :  0.000           Min.   : 0.00          
##  1st Qu.:  2.00            1st Qu.:  0.000           1st Qu.: 6.00          
##  Median :  8.00            Median :  0.000           Median :20.00          
##  Mean   : 13.43            Mean   :  8.535           Mean   :20.97          
##  3rd Qu.: 15.00            3rd Qu.:  4.000           3rd Qu.:30.00          
##  Max.   :170.00            Max.   :240.000           Max.   :70.00          
##  Iron....Daily.Value.
##  Min.   : 0.000      
##  1st Qu.: 0.000      
##  Median : 4.000      
##  Mean   : 7.735      
##  3rd Qu.:15.000      
##  Max.   :40.000
colSums(is.na(data))
##                      Category                          Item 
##                             0                             0 
##                  Serving.Size                      Calories 
##                             0                             0 
##             Calories.from.Fat                     Total.Fat 
##                             0                             0 
##     Total.Fat....Daily.Value.                 Saturated.Fat 
##                             0                             0 
## Saturated.Fat....Daily.Value.                     Trans.Fat 
##                             0                             0 
##                   Cholesterol   Cholesterol....Daily.Value. 
##                             0                             0 
##                        Sodium        Sodium....Daily.Value. 
##                             0                             0 
##                 Carbohydrates Carbohydrates....Daily.Value. 
##                             0                             0 
##                 Dietary.Fiber Dietary.Fiber....Daily.Value. 
##                             0                             0 
##                        Sugars                       Protein 
##                             0                             0 
##     Vitamin.A....Daily.Value.     Vitamin.C....Daily.Value. 
##                             0                             0 
##       Calcium....Daily.Value.          Iron....Daily.Value. 
##                             0                             0

Feature Selection

dt1 <- data %>%
  select(Calories, Total.Fat, Carbohydrates, Protein, Sugars, Sodium)
head(dt1)
##   Calories Total.Fat Carbohydrates Protein Sugars Sodium
## 1      300        13            31      17      3    750
## 2      250         8            30      18      3    770
## 3      370        23            29      14      2    780
## 4      450        28            30      21      2    860
## 5      400        23            30      21      2    880
## 6      430        23            31      26      3    960

Korelasi Heatmap

#heatmap korelasi 
corr_matrix <- cor(dt1, use = "complete.obs")

corrplot(corr_matrix, method = "color", type = "upper",
         addCoef.col = "black", tl.col = "black",
         title = "Heatmap Fitur Terpilih")

## Normalisasi Data

dt_scaled <- scale(dt1)
dt_scaled <- as.data.frame(dt_scaled)

Visualisasi Awal

plot(dt1$Protein, dt1$Total.Fat,
     xlab = "Protein",
     ylab = "Total Fat",
     main = "Visualisasi Awal Data",
     pch = 19)

## Elbow Method

wss <- vector()

for (k in 1:10) {
  kmeans_model <- kmeans(dt_scaled, centers = k, nstart = 25)
  wss[k] <- kmeans_model$tot.withinss
}

plot(1:10, wss, type = "b",
     xlab = "Jumlah Cluster",
     ylab = "Inertia (WSS)",
     main = "Elbow Method")

## K-Means Clustering

set.seed(42)
kmeans_model <- kmeans(dt_scaled, centers = 3, nstart = 25)

dt1$cluster_kmeans <- kmeans_model$cluster

head(dt1)
##   Calories Total.Fat Carbohydrates Protein Sugars Sodium cluster_kmeans
## 1      300        13            31      17      3    750              1
## 2      250         8            30      18      3    770              1
## 3      370        23            29      14      2    780              3
## 4      450        28            30      21      2    860              3
## 5      400        23            30      21      2    880              3
## 6      430        23            31      26      3    960              3

Hierarchical Clustering

dist_matrix <- dist(dt_scaled, method = "euclidean")
hc <- hclust(dist_matrix, method = "ward.D2")

plot(hc, main = "Dendrogram")

dt1$cluster_hier <- cutree(hc, k = 3)

DBSCAN

dbscan_model <- dbscan(dt_scaled, eps = 0.8, minPts = 5)

dt1$cluster_dbscan <- dbscan_model$cluster

plot(dt1$Calories, dt1$Total.Fat,
     col = dt1$cluster_dbscan + 1,
     pch = 19,
     main = "DBSCAN Clustering")

## Silhouette Score

sil <- silhouette(kmeans_model$cluster, dist(dt_scaled))
mean(sil[, 3])
## [1] 0.4240723

Rata rata tiap cluster

dt1 %>%
  group_by(cluster_kmeans) %>%
  summarise(across(where(is.numeric), mean))
## # A tibble: 3 × 9
##   cluster_kmeans Calories Total.Fat Carbohydrates Protein Sugars Sodium
##            <int>    <dbl>     <dbl>         <dbl>   <dbl>  <dbl>  <dbl>
## 1              1     197.      5.34          31.4    6.37  23.2    169.
## 2              2     512.     14.9           82.9   12.4   73.0    205.
## 3              3     569.     29.6           49.4   26.6    7.81  1306.
## # ℹ 2 more variables: cluster_hier <dbl>, cluster_dbscan <dbl>
dt1 %>%
  group_by(cluster_hier) %>%
  summarise(across(where(is.numeric), mean))
## # A tibble: 3 × 9
##   cluster_hier Calories Total.Fat Carbohydrates Protein Sugars Sodium
##          <int>    <dbl>     <dbl>         <dbl>   <dbl>  <dbl>  <dbl>
## 1            1     519.     26.3           46.5   24.3    7.51 1175. 
## 2            2     114.      3.79          16.5    3.80  11.0    89.6
## 3            3     385.      9.77          65.5    9.49  58.3   153. 
## # ℹ 2 more variables: cluster_kmeans <dbl>, cluster_dbscan <dbl>
dt1 %>%
  group_by(cluster_dbscan) %>%
  summarise(across(where(is.numeric), mean))
## # A tibble: 2 × 9
##   cluster_dbscan Calories Total.Fat Carbohydrates Protein Sugars Sodium
##            <int>    <dbl>     <dbl>         <dbl>   <dbl>  <dbl>  <dbl>
## 1              0     780.      36.0          87      27.7   39    1135.
## 2              1     330.      12.2          43.7    12.0   28.5   437.
## # ℹ 2 more variables: cluster_kmeans <dbl>, cluster_hier <dbl>