Setup

Load Data

data <- read.csv("customer_segmentation.csv")
head(data)
##   ID CS_helpful Recommend Come_again All_Products Profesionalism Limitation
## 1  1          2         2          2            2              2          2
## 2  2          1         2          1            1              1          1
## 3  3          2         1          1            1              1          2
## 4  4          3         3          2            4              1          2
## 5  5          2         1          3            5              2          1
## 6  6          1         1          3            2              1          1
##   Online_grocery delivery Pick_up Find_items other_shops Gender Age Education
## 1              2        3       4          1           2      1   2         2
## 2              2        3       3          1           2      1   2         2
## 3              3        3       2          1           3      1   2         2
## 4              3        3       2          2           2      1   3         5
## 5              2        3       1          2           3      2   4         2
## 6              1        2       1          1           4      1   2         5

Data Preparation

data_scaled <- scale(data)
summary(data_scaled)
##        ID            CS_helpful        Recommend         Come_again     
##  Min.   :-1.6170   Min.   :-0.8049   Min.   :-0.4923   Min.   :-0.6155  
##  1st Qu.:-0.8085   1st Qu.:-0.8049   1st Qu.:-0.4923   1st Qu.:-0.6155  
##  Median : 0.0000   Median :-0.8049   Median :-0.4923   Median :-0.6155  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.8085   3rd Qu.: 0.5572   3rd Qu.:-0.4923   3rd Qu.: 0.7385  
##  Max.   : 1.6170   Max.   : 1.9194   Max.   : 2.6021   Max.   : 2.0926  
##   All_Products      Profesionalism     Limitation      Online_grocery   
##  Min.   :-1.02434   Min.   :-0.693   Min.   :-0.6236   Min.   :-1.6587  
##  1st Qu.:-0.78960   1st Qu.:-0.693   1st Qu.:-0.6236   1st Qu.:-0.3554  
##  Median :-0.08536   Median :-0.693   Median :-0.6236   Median :-0.3554  
##  Mean   : 0.00000   Mean   : 0.000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.:-0.08536   3rd Qu.: 1.001   3rd Qu.: 0.6236   3rd Qu.: 0.9478  
##  Max.   : 2.73157   Max.   : 2.695   Max.   : 3.1180   Max.   : 0.9478  
##     delivery          Pick_up          Find_items       other_shops     
##  Min.   :-1.9194   Min.   :-1.3763   Min.   :-0.6774   Min.   :-1.1342  
##  1st Qu.:-0.5572   1st Qu.:-0.4301   1st Qu.:-0.6774   1st Qu.:-0.9560  
##  Median : 0.8049   Median :-0.4301   Median :-0.6774   Median :-0.4213  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.8049   3rd Qu.: 0.5161   3rd Qu.: 0.8129   3rd Qu.: 0.8263  
##  Max.   : 0.8049   Max.   : 2.4086   Max.   : 2.3033   Max.   : 1.7175  
##      Gender             Age            Education      
##  Min.   :-0.5983   Min.   :-0.6155   Min.   :-1.3448  
##  1st Qu.:-0.5983   1st Qu.:-0.6155   1st Qu.:-0.7285  
##  Median :-0.5983   Median :-0.6155   Median :-0.4203  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 1.0470   3rd Qu.: 0.7385   3rd Qu.: 1.1207  
##  Max.   : 1.5954   Max.   : 2.0926   Max.   : 1.1207

Determine Optimal Number of Clusters

fviz_nbclust(data_scaled, kmeans, method = "wss")

fviz_nbclust(data_scaled, kmeans, method = "silhouette")

K-Means Clustering

set.seed(123)
k <- 3
km <- kmeans(data_scaled, centers = k, nstart = 25)
km
## K-means clustering with 3 clusters of sizes 12, 6, 4
## 
## Cluster means:
##            ID  CS_helpful  Recommend  Come_again All_Products Profesionalism
## 1 -0.02566635 -0.01031923 -0.1054899 -0.50262359  -0.39835424     0.01283318
## 2  0.00000000 -0.80490011 -0.4922862  0.06154575   0.07113469    -0.69299145
## 3  0.07699905  1.23830786  1.0548991  1.41555215   1.08836068     1.00098765
##      Limitation Online_grocery   delivery    Pick_up Find_items other_shops
## 1  1.850372e-17     0.40480555  0.2373423  0.5949772 -0.1806489  -0.4212692
## 2 -4.157397e-01    -0.78986449 -1.0112848 -0.4301040 -0.1806489   0.7669260
## 3  6.236096e-01    -0.02961992  0.8049001 -1.1397755  0.8129201   0.1134186
##       Gender        Age  Education
## 1 -0.2326695 -0.3897897 -0.3688989
## 2 -0.2326695  0.5128812  0.8125115
## 3  1.0470128  0.4000473 -0.1120706
## 
## Clustering vector:
##  [1] 1 1 1 3 3 2 1 2 1 1 2 1 2 1 2 2 1 1 3 3 1 1
## 
## Within cluster sum of squares by cluster:
## [1] 110.31871  48.18146  63.52384
##  (between_SS / total_SS =  29.5 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

Cluster Visualization

fviz_cluster(km, data_scaled)

Append Cluster Assignments

data$cluster <- km$cluster
head(data)
##   ID CS_helpful Recommend Come_again All_Products Profesionalism Limitation
## 1  1          2         2          2            2              2          2
## 2  2          1         2          1            1              1          1
## 3  3          2         1          1            1              1          2
## 4  4          3         3          2            4              1          2
## 5  5          2         1          3            5              2          1
## 6  6          1         1          3            2              1          1
##   Online_grocery delivery Pick_up Find_items other_shops Gender Age Education
## 1              2        3       4          1           2      1   2         2
## 2              2        3       3          1           2      1   2         2
## 3              3        3       2          1           3      1   2         2
## 4              3        3       2          2           2      1   3         5
## 5              2        3       1          2           3      2   4         2
## 6              1        2       1          1           4      1   2         5
##   cluster
## 1       1
## 2       1
## 3       1
## 4       3
## 5       3
## 6       2

Cluster Summaries

data %>% 
  group_by(cluster) %>% 
  summarise(across(everything(), mean))
## # A tibble: 3 × 16
##   cluster    ID CS_helpful Recommend Come_again All_Products Profesionalism
##     <int> <dbl>      <dbl>     <dbl>      <dbl>        <dbl>          <dbl>
## 1       1  11.3       1.58      1.25       1.08         1.67           1.42
## 2       2  11.5       1         1          1.5          2.17           1   
## 3       3  12         2.5       2          2.5          3.25           2   
## # ℹ 9 more variables: Limitation <dbl>, Online_grocery <dbl>, delivery <dbl>,
## #   Pick_up <dbl>, Find_items <dbl>, other_shops <dbl>, Gender <dbl>,
## #   Age <dbl>, Education <dbl>