K-Means Cluster Analysis - Customer Segmentation

# Load libraries
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(cluster)
library(factoextra)

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

# Load the dataset
data <- read_csv("customer_segmentation.csv")

## Rows: 22 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): ID, CS_helpful, Recommend, Come_again, All_Products, Profesionalis...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

data <- data %>% select(-ID)
head(data)

## # A tibble: 6 × 14
##   CS_helpful Recommend Come_again All_Products Profesionalism Limitation
##        <dbl>     <dbl>      <dbl>        <dbl>          <dbl>      <dbl>
## 1          2         2          2            2              2          2
## 2          1         2          1            1              1          1
## 3          2         1          1            1              1          2
## 4          3         3          2            4              1          2
## 5          2         1          3            5              2          1
## 6          1         1          3            2              1          1
## # ℹ 8 more variables: Online_grocery <dbl>, delivery <dbl>, Pick_up <dbl>,
## #   Find_items <dbl>, other_shops <dbl>, Gender <dbl>, Age <dbl>,
## #   Education <dbl>

# Scale the data
data_scaled <- scale(data)

# Elbow method to choose optimal k
fviz_nbclust(data_scaled, kmeans, method = "wss") +
  labs(title = "Elbow Method for Optimal k")

# Run K-Means with k = 3
set.seed(42)
kmeans_result <- kmeans(data_scaled, centers = 3, nstart = 25)
data$Cluster <- as.factor(kmeans_result$cluster)
head(data)

## # A tibble: 6 × 15
##   CS_helpful Recommend Come_again All_Products Profesionalism Limitation
##        <dbl>     <dbl>      <dbl>        <dbl>          <dbl>      <dbl>
## 1          2         2          2            2              2          2
## 2          1         2          1            1              1          1
## 3          2         1          1            1              1          2
## 4          3         3          2            4              1          2
## 5          2         1          3            5              2          1
## 6          1         1          3            2              1          1
## # ℹ 9 more variables: Online_grocery <dbl>, delivery <dbl>, Pick_up <dbl>,
## #   Find_items <dbl>, other_shops <dbl>, Gender <dbl>, Age <dbl>,
## #   Education <dbl>, Cluster <fct>

# Summarize clusters
data %>%
  group_by(Cluster) %>%
  summarise(across(everything(), mean, .names = "avg_{.col}"))

## # A tibble: 3 × 15
##   Cluster avg_CS_helpful avg_Recommend avg_Come_again avg_All_Products
##   <fct>            <dbl>         <dbl>          <dbl>            <dbl>
## 1 1                 1             1              1.5              2.17
## 2 2                 2.5           2              2.5              3.25
## 3 3                 1.58          1.25           1.08             1.67
## # ℹ 10 more variables: avg_Profesionalism <dbl>, avg_Limitation <dbl>,
## #   avg_Online_grocery <dbl>, avg_delivery <dbl>, avg_Pick_up <dbl>,
## #   avg_Find_items <dbl>, avg_other_shops <dbl>, avg_Gender <dbl>,
## #   avg_Age <dbl>, avg_Education <dbl>

# Visualize the clusters
fviz_cluster(kmeans_result, data = data_scaled,
             geom = "point", ellipse.type = "norm",
             palette = "jco", ggtheme = theme_minimal())