Setup
Load Data
data <- read.csv("customer_segmentation.csv")
head(data)
## ID CS_helpful Recommend Come_again All_Products Profesionalism Limitation
## 1 1 2 2 2 2 2 2
## 2 2 1 2 1 1 1 1
## 3 3 2 1 1 1 1 2
## 4 4 3 3 2 4 1 2
## 5 5 2 1 3 5 2 1
## 6 6 1 1 3 2 1 1
## Online_grocery delivery Pick_up Find_items other_shops Gender Age Education
## 1 2 3 4 1 2 1 2 2
## 2 2 3 3 1 2 1 2 2
## 3 3 3 2 1 3 1 2 2
## 4 3 3 2 2 2 1 3 5
## 5 2 3 1 2 3 2 4 2
## 6 1 2 1 1 4 1 2 5
Data Preparation
data_scaled <- scale(data)
summary(data_scaled)
## ID CS_helpful Recommend Come_again
## Min. :-1.6170 Min. :-0.8049 Min. :-0.4923 Min. :-0.6155
## 1st Qu.:-0.8085 1st Qu.:-0.8049 1st Qu.:-0.4923 1st Qu.:-0.6155
## Median : 0.0000 Median :-0.8049 Median :-0.4923 Median :-0.6155
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.8085 3rd Qu.: 0.5572 3rd Qu.:-0.4923 3rd Qu.: 0.7385
## Max. : 1.6170 Max. : 1.9194 Max. : 2.6021 Max. : 2.0926
## All_Products Profesionalism Limitation Online_grocery
## Min. :-1.02434 Min. :-0.693 Min. :-0.6236 Min. :-1.6587
## 1st Qu.:-0.78960 1st Qu.:-0.693 1st Qu.:-0.6236 1st Qu.:-0.3554
## Median :-0.08536 Median :-0.693 Median :-0.6236 Median :-0.3554
## Mean : 0.00000 Mean : 0.000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.:-0.08536 3rd Qu.: 1.001 3rd Qu.: 0.6236 3rd Qu.: 0.9478
## Max. : 2.73157 Max. : 2.695 Max. : 3.1180 Max. : 0.9478
## delivery Pick_up Find_items other_shops
## Min. :-1.9194 Min. :-1.3763 Min. :-0.6774 Min. :-1.1342
## 1st Qu.:-0.5572 1st Qu.:-0.4301 1st Qu.:-0.6774 1st Qu.:-0.9560
## Median : 0.8049 Median :-0.4301 Median :-0.6774 Median :-0.4213
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.8049 3rd Qu.: 0.5161 3rd Qu.: 0.8129 3rd Qu.: 0.8263
## Max. : 0.8049 Max. : 2.4086 Max. : 2.3033 Max. : 1.7175
## Gender Age Education
## Min. :-0.5983 Min. :-0.6155 Min. :-1.3448
## 1st Qu.:-0.5983 1st Qu.:-0.6155 1st Qu.:-0.7285
## Median :-0.5983 Median :-0.6155 Median :-0.4203
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 1.0470 3rd Qu.: 0.7385 3rd Qu.: 1.1207
## Max. : 1.5954 Max. : 2.0926 Max. : 1.1207
Determine Optimal Number of Clusters
fviz_nbclust(data_scaled, kmeans, method = "wss")

fviz_nbclust(data_scaled, kmeans, method = "silhouette")

K-Means Clustering
set.seed(123)
k <- 3
km <- kmeans(data_scaled, centers = k, nstart = 25)
km
## K-means clustering with 3 clusters of sizes 12, 6, 4
##
## Cluster means:
## ID CS_helpful Recommend Come_again All_Products Profesionalism
## 1 -0.02566635 -0.01031923 -0.1054899 -0.50262359 -0.39835424 0.01283318
## 2 0.00000000 -0.80490011 -0.4922862 0.06154575 0.07113469 -0.69299145
## 3 0.07699905 1.23830786 1.0548991 1.41555215 1.08836068 1.00098765
## Limitation Online_grocery delivery Pick_up Find_items other_shops
## 1 1.850372e-17 0.40480555 0.2373423 0.5949772 -0.1806489 -0.4212692
## 2 -4.157397e-01 -0.78986449 -1.0112848 -0.4301040 -0.1806489 0.7669260
## 3 6.236096e-01 -0.02961992 0.8049001 -1.1397755 0.8129201 0.1134186
## Gender Age Education
## 1 -0.2326695 -0.3897897 -0.3688989
## 2 -0.2326695 0.5128812 0.8125115
## 3 1.0470128 0.4000473 -0.1120706
##
## Clustering vector:
## [1] 1 1 1 3 3 2 1 2 1 1 2 1 2 1 2 2 1 1 3 3 1 1
##
## Within cluster sum of squares by cluster:
## [1] 110.31871 48.18146 63.52384
## (between_SS / total_SS = 29.5 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
Cluster Visualization
fviz_cluster(km, data_scaled)

Append Cluster Assignments
data$cluster <- km$cluster
head(data)
## ID CS_helpful Recommend Come_again All_Products Profesionalism Limitation
## 1 1 2 2 2 2 2 2
## 2 2 1 2 1 1 1 1
## 3 3 2 1 1 1 1 2
## 4 4 3 3 2 4 1 2
## 5 5 2 1 3 5 2 1
## 6 6 1 1 3 2 1 1
## Online_grocery delivery Pick_up Find_items other_shops Gender Age Education
## 1 2 3 4 1 2 1 2 2
## 2 2 3 3 1 2 1 2 2
## 3 3 3 2 1 3 1 2 2
## 4 3 3 2 2 2 1 3 5
## 5 2 3 1 2 3 2 4 2
## 6 1 2 1 1 4 1 2 5
## cluster
## 1 1
## 2 1
## 3 1
## 4 3
## 5 3
## 6 2
Cluster Summaries
data %>%
group_by(cluster) %>%
summarise(across(everything(), mean))
## # A tibble: 3 × 16
## cluster ID CS_helpful Recommend Come_again All_Products Profesionalism
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 11.3 1.58 1.25 1.08 1.67 1.42
## 2 2 11.5 1 1 1.5 2.17 1
## 3 3 12 2.5 2 2.5 3.25 2
## # ℹ 9 more variables: Limitation <dbl>, Online_grocery <dbl>, delivery <dbl>,
## # Pick_up <dbl>, Find_items <dbl>, other_shops <dbl>, Gender <dbl>,
## # Age <dbl>, Education <dbl>