library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.3 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(factoextra)
## Welcome to factoextra!
## Want to learn more? See two factoextra-related books at https://www.datanovia.com/en/product/practical-guide-to-principal-component-methods-in-r/
library(cluster)
library(dbscan)
##
## Attaching package: 'dbscan'
##
## The following object is masked from 'package:stats':
##
## as.dendrogram
library(corrplot)
## corrplot 0.95 loaded
data <- read.csv("menu.csv")
head(data)
## Category Item Serving.Size Calories
## 1 Breakfast Egg McMuffin 4.8 oz (136 g) 300
## 2 Breakfast Egg White Delight 4.8 oz (135 g) 250
## 3 Breakfast Sausage McMuffin 3.9 oz (111 g) 370
## 4 Breakfast Sausage McMuffin with Egg 5.7 oz (161 g) 450
## 5 Breakfast Sausage McMuffin with Egg Whites 5.7 oz (161 g) 400
## 6 Breakfast Steak & Egg McMuffin 6.5 oz (185 g) 430
## Calories.from.Fat Total.Fat Total.Fat....Daily.Value. Saturated.Fat
## 1 120 13 20 5
## 2 70 8 12 3
## 3 200 23 35 8
## 4 250 28 43 10
## 5 210 23 35 8
## 6 210 23 36 9
## Saturated.Fat....Daily.Value. Trans.Fat Cholesterol
## 1 25 0 260
## 2 15 0 25
## 3 42 0 45
## 4 52 0 285
## 5 42 0 50
## 6 46 1 300
## Cholesterol....Daily.Value. Sodium Sodium....Daily.Value. Carbohydrates
## 1 87 750 31 31
## 2 8 770 32 30
## 3 15 780 33 29
## 4 95 860 36 30
## 5 16 880 37 30
## 6 100 960 40 31
## Carbohydrates....Daily.Value. Dietary.Fiber Dietary.Fiber....Daily.Value.
## 1 10 4 17
## 2 10 4 17
## 3 10 4 17
## 4 10 4 17
## 5 10 4 17
## 6 10 4 18
## Sugars Protein Vitamin.A....Daily.Value. Vitamin.C....Daily.Value.
## 1 3 17 10 0
## 2 3 18 6 0
## 3 2 14 8 0
## 4 2 21 15 0
## 5 2 21 6 0
## 6 3 26 15 2
## Calcium....Daily.Value. Iron....Daily.Value.
## 1 25 15
## 2 25 8
## 3 25 10
## 4 30 15
## 5 25 10
## 6 30 20
str(data)
## 'data.frame': 260 obs. of 24 variables:
## $ Category : chr "Breakfast" "Breakfast" "Breakfast" "Breakfast" ...
## $ Item : chr "Egg McMuffin" "Egg White Delight" "Sausage McMuffin" "Sausage McMuffin with Egg" ...
## $ Serving.Size : chr "4.8 oz (136 g)" "4.8 oz (135 g)" "3.9 oz (111 g)" "5.7 oz (161 g)" ...
## $ Calories : int 300 250 370 450 400 430 460 520 410 470 ...
## $ Calories.from.Fat : int 120 70 200 250 210 210 230 270 180 220 ...
## $ Total.Fat : num 13 8 23 28 23 23 26 30 20 25 ...
## $ Total.Fat....Daily.Value. : int 20 12 35 43 35 36 40 47 32 38 ...
## $ Saturated.Fat : num 5 3 8 10 8 9 13 14 11 12 ...
## $ Saturated.Fat....Daily.Value.: int 25 15 42 52 42 46 65 68 56 59 ...
## $ Trans.Fat : num 0 0 0 0 0 1 0 0 0 0 ...
## $ Cholesterol : int 260 25 45 285 50 300 250 250 35 35 ...
## $ Cholesterol....Daily.Value. : int 87 8 15 95 16 100 83 83 11 11 ...
## $ Sodium : int 750 770 780 860 880 960 1300 1410 1300 1420 ...
## $ Sodium....Daily.Value. : int 31 32 33 36 37 40 54 59 54 59 ...
## $ Carbohydrates : int 31 30 29 30 30 31 38 43 36 42 ...
## $ Carbohydrates....Daily.Value.: int 10 10 10 10 10 10 13 14 12 14 ...
## $ Dietary.Fiber : int 4 4 4 4 4 4 2 3 2 3 ...
## $ Dietary.Fiber....Daily.Value.: int 17 17 17 17 17 18 7 12 7 12 ...
## $ Sugars : int 3 3 2 2 2 3 3 4 3 4 ...
## $ Protein : int 17 18 14 21 21 26 19 19 20 20 ...
## $ Vitamin.A....Daily.Value. : int 10 6 8 15 6 15 10 15 2 6 ...
## $ Vitamin.C....Daily.Value. : int 0 0 0 0 0 2 8 8 8 8 ...
## $ Calcium....Daily.Value. : int 25 25 25 30 25 30 15 20 15 15 ...
## $ Iron....Daily.Value. : int 15 8 10 15 10 20 15 20 10 15 ...
summary(data)
## Category Item Serving.Size Calories
## Length:260 Length:260 Length:260 Min. : 0.0
## Class :character Class :character Class :character 1st Qu.: 210.0
## Mode :character Mode :character Mode :character Median : 340.0
## Mean : 368.3
## 3rd Qu.: 500.0
## Max. :1880.0
## Calories.from.Fat Total.Fat Total.Fat....Daily.Value. Saturated.Fat
## Min. : 0.0 Min. : 0.000 Min. : 0.00 Min. : 0.000
## 1st Qu.: 20.0 1st Qu.: 2.375 1st Qu.: 3.75 1st Qu.: 1.000
## Median : 100.0 Median : 11.000 Median : 17.00 Median : 5.000
## Mean : 127.1 Mean : 14.165 Mean : 21.82 Mean : 6.008
## 3rd Qu.: 200.0 3rd Qu.: 22.250 3rd Qu.: 35.00 3rd Qu.:10.000
## Max. :1060.0 Max. :118.000 Max. :182.00 Max. :20.000
## Saturated.Fat....Daily.Value. Trans.Fat Cholesterol
## Min. : 0.00 Min. :0.0000 Min. : 0.00
## 1st Qu.: 4.75 1st Qu.:0.0000 1st Qu.: 5.00
## Median : 24.00 Median :0.0000 Median : 35.00
## Mean : 29.97 Mean :0.2038 Mean : 54.94
## 3rd Qu.: 48.00 3rd Qu.:0.0000 3rd Qu.: 65.00
## Max. :102.00 Max. :2.5000 Max. :575.00
## Cholesterol....Daily.Value. Sodium Sodium....Daily.Value.
## Min. : 0.00 Min. : 0.0 Min. : 0.00
## 1st Qu.: 2.00 1st Qu.: 107.5 1st Qu.: 4.75
## Median : 11.00 Median : 190.0 Median : 8.00
## Mean : 18.39 Mean : 495.8 Mean : 20.68
## 3rd Qu.: 21.25 3rd Qu.: 865.0 3rd Qu.: 36.25
## Max. :192.00 Max. :3600.0 Max. :150.00
## Carbohydrates Carbohydrates....Daily.Value. Dietary.Fiber
## Min. : 0.00 Min. : 0.00 Min. :0.000
## 1st Qu.: 30.00 1st Qu.:10.00 1st Qu.:0.000
## Median : 44.00 Median :15.00 Median :1.000
## Mean : 47.35 Mean :15.78 Mean :1.631
## 3rd Qu.: 60.00 3rd Qu.:20.00 3rd Qu.:3.000
## Max. :141.00 Max. :47.00 Max. :7.000
## Dietary.Fiber....Daily.Value. Sugars Protein
## Min. : 0.000 Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.000 1st Qu.: 5.75 1st Qu.: 4.00
## Median : 5.000 Median : 17.50 Median :12.00
## Mean : 6.531 Mean : 29.42 Mean :13.34
## 3rd Qu.:10.000 3rd Qu.: 48.00 3rd Qu.:19.00
## Max. :28.000 Max. :128.00 Max. :87.00
## Vitamin.A....Daily.Value. Vitamin.C....Daily.Value. Calcium....Daily.Value.
## Min. : 0.00 Min. : 0.000 Min. : 0.00
## 1st Qu.: 2.00 1st Qu.: 0.000 1st Qu.: 6.00
## Median : 8.00 Median : 0.000 Median :20.00
## Mean : 13.43 Mean : 8.535 Mean :20.97
## 3rd Qu.: 15.00 3rd Qu.: 4.000 3rd Qu.:30.00
## Max. :170.00 Max. :240.000 Max. :70.00
## Iron....Daily.Value.
## Min. : 0.000
## 1st Qu.: 0.000
## Median : 4.000
## Mean : 7.735
## 3rd Qu.:15.000
## Max. :40.000
colSums(is.na(data))
## Category Item
## 0 0
## Serving.Size Calories
## 0 0
## Calories.from.Fat Total.Fat
## 0 0
## Total.Fat....Daily.Value. Saturated.Fat
## 0 0
## Saturated.Fat....Daily.Value. Trans.Fat
## 0 0
## Cholesterol Cholesterol....Daily.Value.
## 0 0
## Sodium Sodium....Daily.Value.
## 0 0
## Carbohydrates Carbohydrates....Daily.Value.
## 0 0
## Dietary.Fiber Dietary.Fiber....Daily.Value.
## 0 0
## Sugars Protein
## 0 0
## Vitamin.A....Daily.Value. Vitamin.C....Daily.Value.
## 0 0
## Calcium....Daily.Value. Iron....Daily.Value.
## 0 0
dt1 <- data %>%
select(Calories, Total.Fat, Carbohydrates, Protein, Sugars, Sodium)
head(dt1)
## Calories Total.Fat Carbohydrates Protein Sugars Sodium
## 1 300 13 31 17 3 750
## 2 250 8 30 18 3 770
## 3 370 23 29 14 2 780
## 4 450 28 30 21 2 860
## 5 400 23 30 21 2 880
## 6 430 23 31 26 3 960
#heatmap korelasi
corr_matrix <- cor(dt1, use = "complete.obs")
corrplot(corr_matrix, method = "color", type = "upper",
addCoef.col = "black", tl.col = "black",
title = "Heatmap Fitur Terpilih")
## Normalisasi Data
dt_scaled <- scale(dt1)
dt_scaled <- as.data.frame(dt_scaled)
plot(dt1$Protein, dt1$Total.Fat,
xlab = "Protein",
ylab = "Total Fat",
main = "Visualisasi Awal Data",
pch = 19)
## Elbow Method
wss <- vector()
for (k in 1:10) {
kmeans_model <- kmeans(dt_scaled, centers = k, nstart = 25)
wss[k] <- kmeans_model$tot.withinss
}
plot(1:10, wss, type = "b",
xlab = "Jumlah Cluster",
ylab = "Inertia (WSS)",
main = "Elbow Method")
## K-Means Clustering
set.seed(42)
kmeans_model <- kmeans(dt_scaled, centers = 3, nstart = 25)
dt1$cluster_kmeans <- kmeans_model$cluster
head(dt1)
## Calories Total.Fat Carbohydrates Protein Sugars Sodium cluster_kmeans
## 1 300 13 31 17 3 750 1
## 2 250 8 30 18 3 770 1
## 3 370 23 29 14 2 780 3
## 4 450 28 30 21 2 860 3
## 5 400 23 30 21 2 880 3
## 6 430 23 31 26 3 960 3
dist_matrix <- dist(dt_scaled, method = "euclidean")
hc <- hclust(dist_matrix, method = "ward.D2")
plot(hc, main = "Dendrogram")
dt1$cluster_hier <- cutree(hc, k = 3)
dbscan_model <- dbscan(dt_scaled, eps = 0.8, minPts = 5)
dt1$cluster_dbscan <- dbscan_model$cluster
plot(dt1$Calories, dt1$Total.Fat,
col = dt1$cluster_dbscan + 1,
pch = 19,
main = "DBSCAN Clustering")
## Silhouette Score
sil <- silhouette(kmeans_model$cluster, dist(dt_scaled))
mean(sil[, 3])
## [1] 0.4240723
dt1 %>%
group_by(cluster_kmeans) %>%
summarise(across(where(is.numeric), mean))
## # A tibble: 3 × 9
## cluster_kmeans Calories Total.Fat Carbohydrates Protein Sugars Sodium
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 197. 5.34 31.4 6.37 23.2 169.
## 2 2 512. 14.9 82.9 12.4 73.0 205.
## 3 3 569. 29.6 49.4 26.6 7.81 1306.
## # ℹ 2 more variables: cluster_hier <dbl>, cluster_dbscan <dbl>
dt1 %>%
group_by(cluster_hier) %>%
summarise(across(where(is.numeric), mean))
## # A tibble: 3 × 9
## cluster_hier Calories Total.Fat Carbohydrates Protein Sugars Sodium
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 519. 26.3 46.5 24.3 7.51 1175.
## 2 2 114. 3.79 16.5 3.80 11.0 89.6
## 3 3 385. 9.77 65.5 9.49 58.3 153.
## # ℹ 2 more variables: cluster_kmeans <dbl>, cluster_dbscan <dbl>
dt1 %>%
group_by(cluster_dbscan) %>%
summarise(across(where(is.numeric), mean))
## # A tibble: 2 × 9
## cluster_dbscan Calories Total.Fat Carbohydrates Protein Sugars Sodium
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0 780. 36.0 87 27.7 39 1135.
## 2 1 330. 12.2 43.7 12.0 28.5 437.
## # ℹ 2 more variables: cluster_kmeans <dbl>, cluster_hier <dbl>