Load Libraries

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.1     ✔ tibble    3.3.0
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(summarytools)
## Warning in fun(libname, pkgname): couldn't connect to display ":0"
## system might not have X11 capabilities; in case of errors when using dfSummary(), set st_options(use.x11 = FALSE)
## 
## Attaching package: 'summarytools'
## 
## The following object is masked from 'package:tibble':
## 
##     view
library(ggplot2)

Load Data

data <- read.csv("customer_segmentation.csv")

# Clean column names
colnames(data) <- trimws(colnames(data))

# View structure
str(data)
## 'data.frame':    22 obs. of  15 variables:
##  $ ID            : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ CS_helpful    : int  2 1 2 3 2 1 2 1 1 1 ...
##  $ Recommend     : int  2 2 1 3 1 1 1 1 1 1 ...
##  $ Come_again    : int  2 1 1 2 3 3 1 1 1 1 ...
##  $ All_Products  : int  2 1 1 4 5 2 2 2 2 1 ...
##  $ Profesionalism: int  2 1 1 1 2 1 2 1 2 1 ...
##  $ Limitation    : int  2 1 2 2 1 1 1 2 1 1 ...
##  $ Online_grocery: int  2 2 3 3 2 1 2 1 2 3 ...
##  $ delivery      : int  3 3 3 3 3 2 2 1 1 2 ...
##  $ Pick_up       : int  4 3 2 2 1 1 2 2 3 2 ...
##  $ Find_items    : int  1 1 1 2 2 1 1 2 1 1 ...
##  $ other_shops   : int  2 2 3 2 3 4 1 4 1 1 ...
##  $ Gender        : int  1 1 1 1 2 1 1 1 2 2 ...
##  $ Age           : int  2 2 2 3 4 2 2 2 2 2 ...
##  $ Education     : int  2 2 2 5 2 5 3 2 1 2 ...

Data Overview

summary(data)
##        ID          CS_helpful      Recommend       Come_again   
##  Min.   : 1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.: 6.25   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :11.50   Median :1.000   Median :1.000   Median :1.000  
##  Mean   :11.50   Mean   :1.591   Mean   :1.318   Mean   :1.455  
##  3rd Qu.:16.75   3rd Qu.:2.000   3rd Qu.:1.000   3rd Qu.:2.000  
##  Max.   :22.00   Max.   :3.000   Max.   :3.000   Max.   :3.000  
##   All_Products   Profesionalism    Limitation  Online_grocery     delivery    
##  Min.   :1.000   Min.   :1.000   Min.   :1.0   Min.   :1.000   Min.   :1.000  
##  1st Qu.:1.250   1st Qu.:1.000   1st Qu.:1.0   1st Qu.:2.000   1st Qu.:2.000  
##  Median :2.000   Median :1.000   Median :1.0   Median :2.000   Median :3.000  
##  Mean   :2.091   Mean   :1.409   Mean   :1.5   Mean   :2.273   Mean   :2.409  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.0   3rd Qu.:3.000   3rd Qu.:3.000  
##  Max.   :5.000   Max.   :3.000   Max.   :4.0   Max.   :3.000   Max.   :3.000  
##     Pick_up        Find_items     other_shops        Gender     
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:1.000   1st Qu.:1.250   1st Qu.:1.000  
##  Median :2.000   Median :1.000   Median :2.000   Median :1.000  
##  Mean   :2.455   Mean   :1.455   Mean   :2.591   Mean   :1.273  
##  3rd Qu.:3.000   3rd Qu.:2.000   3rd Qu.:3.750   3rd Qu.:1.750  
##  Max.   :5.000   Max.   :3.000   Max.   :5.000   Max.   :2.000  
##       Age          Education    
##  Min.   :2.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:2.000  
##  Median :2.000   Median :2.500  
##  Mean   :2.455   Mean   :3.182  
##  3rd Qu.:3.000   3rd Qu.:5.000  
##  Max.   :4.000   Max.   :5.000
dfSummary(data)
## Warning in png(png_loc <- tempfile(fileext = ".png"), width = 150 *
## graph.magnif, : unable to open connection to X11 display ''
## Warning in png(png_loc <- tempfile(fileext = ".png"), width = 150 *
## graph.magnif, : unable to open connection to X11 display ''
## Warning in png(png_loc <- tempfile(fileext = ".png"), width = 150 *
## graph.magnif, : unable to open connection to X11 display ''
## Warning in png(png_loc <- tempfile(fileext = ".png"), width = 150 *
## graph.magnif, : unable to open connection to X11 display ''
## Warning in png(png_loc <- tempfile(fileext = ".png"), width = 150 *
## graph.magnif, : unable to open connection to X11 display ''
## Warning in png(png_loc <- tempfile(fileext = ".png"), width = 150 *
## graph.magnif, : unable to open connection to X11 display ''
## Warning in png(png_loc <- tempfile(fileext = ".png"), width = 150 *
## graph.magnif, : unable to open connection to X11 display ''
## Warning in png(png_loc <- tempfile(fileext = ".png"), width = 150 *
## graph.magnif, : unable to open connection to X11 display ''
## Warning in png(png_loc <- tempfile(fileext = ".png"), width = 150 *
## graph.magnif, : unable to open connection to X11 display ''
## Warning in png(png_loc <- tempfile(fileext = ".png"), width = 150 *
## graph.magnif, : unable to open connection to X11 display ''
## Warning in png(png_loc <- tempfile(fileext = ".png"), width = 150 *
## graph.magnif, : unable to open connection to X11 display ''
## Warning in png(png_loc <- tempfile(fileext = ".png"), width = 150 *
## graph.magnif, : unable to open connection to X11 display ''
## Warning in png(png_loc <- tempfile(fileext = ".png"), width = 150 *
## graph.magnif, : unable to open connection to X11 display ''
## Warning in png(png_loc <- tempfile(fileext = ".png"), width = 150 *
## graph.magnif, : unable to open connection to X11 display ''
## Warning in png(png_loc <- tempfile(fileext = ".png"), width = 150 *
## graph.magnif, : unable to open connection to X11 display ''
## Data Frame Summary  
## data  
## Dimensions: 22 x 15  
## Duplicates: 0  
## 
## ------------------------------------------------------------------------------------------------------------
## No   Variable         Stats / Values           Freqs (% of Valid)   Graph               Valid      Missing  
## ---- ---------------- ------------------------ -------------------- ------------------- ---------- ---------
## 1    ID               Mean (sd) : 11.5 (6.5)   22 distinct values   : : : :             22         0        
##      [integer]        min < med < max:         (Integer sequence)   : : : :             (100.0%)   (0.0%)   
##                       1 < 11.5 < 22                                 : : : :                                 
##                       IQR (CV) : 10.5 (0.6)                         : : : : :                               
##                                                                     : : : : :                               
## 
## 2    CS_helpful       Mean (sd) : 1.6 (0.7)    1 : 12 (54.5%)       IIIIIIIIII          22         0        
##      [integer]        min < med < max:         2 :  7 (31.8%)       IIIIII              (100.0%)   (0.0%)   
##                       1 < 1 < 3                3 :  3 (13.6%)       II                                      
##                       IQR (CV) : 1 (0.5)                                                                    
## 
## 3    Recommend        Mean (sd) : 1.3 (0.6)    1 : 17 (77.3%)       IIIIIIIIIIIIIII     22         0        
##      [integer]        min < med < max:         2 :  3 (13.6%)       II                  (100.0%)   (0.0%)   
##                       1 < 1 < 3                3 :  2 ( 9.1%)       I                                       
##                       IQR (CV) : 0 (0.5)                                                                    
## 
## 4    Come_again       Mean (sd) : 1.5 (0.7)    1 : 15 (68.2%)       IIIIIIIIIIIII       22         0        
##      [integer]        min < med < max:         2 :  4 (18.2%)       III                 (100.0%)   (0.0%)   
##                       1 < 1 < 3                3 :  3 (13.6%)       II                                      
##                       IQR (CV) : 1 (0.5)                                                                    
## 
## 5    All_Products     Mean (sd) : 2.1 (1.1)    1 :  6 (27.3%)       IIIII               22         0        
##      [integer]        min < med < max:         2 : 12 (54.5%)       IIIIIIIIII          (100.0%)   (0.0%)   
##                       1 < 2 < 5                3 :  1 ( 4.5%)                                               
##                       IQR (CV) : 0.8 (0.5)     4 :  2 ( 9.1%)       I                                       
##                                                5 :  1 ( 4.5%)                                               
## 
## 6    Profesionalism   Mean (sd) : 1.4 (0.6)    1 : 14 (63.6%)       IIIIIIIIIIII        22         0        
##      [integer]        min < med < max:         2 :  7 (31.8%)       IIIIII              (100.0%)   (0.0%)   
##                       1 < 1 < 3                3 :  1 ( 4.5%)                                               
##                       IQR (CV) : 1 (0.4)                                                                    
## 
## 7    Limitation       Mean (sd) : 1.5 (0.8)    1 : 14 (63.6%)       IIIIIIIIIIII        22         0        
##      [integer]        min < med < max:         2 :  6 (27.3%)       IIIII               (100.0%)   (0.0%)   
##                       1 < 1 < 4                3 :  1 ( 4.5%)                                               
##                       IQR (CV) : 1 (0.5)       4 :  1 ( 4.5%)                                               
## 
## 8    Online_grocery   Mean (sd) : 2.3 (0.8)    1 :  4 (18.2%)       III                 22         0        
##      [integer]        min < med < max:         2 :  8 (36.4%)       IIIIIII             (100.0%)   (0.0%)   
##                       1 < 2 < 3                3 : 10 (45.5%)       IIIIIIIII                               
##                       IQR (CV) : 1 (0.3)                                                                    
## 
## 9    delivery         Mean (sd) : 2.4 (0.7)    1 :  3 (13.6%)       II                  22         0        
##      [integer]        min < med < max:         2 :  7 (31.8%)       IIIIII              (100.0%)   (0.0%)   
##                       1 < 3 < 3                3 : 12 (54.5%)       IIIIIIIIII                              
##                       IQR (CV) : 1 (0.3)                                                                    
## 
## 10   Pick_up          Mean (sd) : 2.5 (1.1)    1 : 4 (18.2%)        III                 22         0        
##      [integer]        min < med < max:         2 : 8 (36.4%)        IIIIIII             (100.0%)   (0.0%)   
##                       1 < 2 < 5                3 : 7 (31.8%)        IIIIII                                  
##                       IQR (CV) : 1 (0.4)       4 : 2 ( 9.1%)        I                                       
##                                                5 : 1 ( 4.5%)                                                
## 
## 11   Find_items       Mean (sd) : 1.5 (0.7)    1 : 14 (63.6%)       IIIIIIIIIIII        22         0        
##      [integer]        min < med < max:         2 :  6 (27.3%)       IIIII               (100.0%)   (0.0%)   
##                       1 < 1 < 3                3 :  2 ( 9.1%)       I                                       
##                       IQR (CV) : 1 (0.5)                                                                    
## 
## 12   other_shops      Mean (sd) : 2.6 (1.4)    1 : 6 (27.3%)        IIIII               22         0        
##      [integer]        min < med < max:         2 : 6 (27.3%)        IIIII               (100.0%)   (0.0%)   
##                       1 < 2 < 5                3 : 4 (18.2%)        III                                     
##                       IQR (CV) : 2.5 (0.5)     4 : 3 (13.6%)        II                                      
##                                                5 : 3 (13.6%)        II                                      
## 
## 13   Gender           Min  : 1                 1 : 16 (72.7%)       IIIIIIIIIIIIII      22         0        
##      [integer]        Mean : 1.3               2 :  6 (27.3%)       IIIII               (100.0%)   (0.0%)   
##                       Max  : 2                                                                              
## 
## 14   Age              Mean (sd) : 2.5 (0.7)    2 : 15 (68.2%)       IIIIIIIIIIIII       22         0        
##      [integer]        min < med < max:         3 :  4 (18.2%)       III                 (100.0%)   (0.0%)   
##                       2 < 2 < 4                4 :  3 (13.6%)       II                                      
##                       IQR (CV) : 1 (0.3)                                                                    
## 
## 15   Education        Mean (sd) : 3.2 (1.6)    1 : 3 (13.6%)        II                  22         0        
##      [integer]        min < med < max:         2 : 8 (36.4%)        IIIIIII             (100.0%)   (0.0%)   
##                       1 < 2.5 < 5              3 : 2 ( 9.1%)        I                                       
##                       IQR (CV) : 3 (0.5)       5 : 9 (40.9%)        IIIIIIII                                
## ------------------------------------------------------------------------------------------------------------

Demographics Analysis

# Gender distribution
ggplot(data, aes(x = factor(Gender))) +
  geom_bar() +
  labs(title = "Gender Distribution", x = "Gender", y = "Count")

# Age distribution
ggplot(data, aes(x = factor(Age))) +
  geom_bar() +
  labs(title = "Age Distribution", x = "Age Group", y = "Count")

# Education distribution
ggplot(data, aes(x = factor(Education))) +
  geom_bar() +
  labs(title = "Education Levels", x = "Education", y = "Count")

Customer Behavior Analysis

behavior_vars <- data %>%
  select(CS_helpful, Recommend, Come_again, All_Products)

# Correlation matrix
cor(behavior_vars, use = "complete.obs")
##              CS_helpful  Recommend Come_again All_Products
## CS_helpful    1.0000000 0.48809623  0.2714620   0.29345435
## Recommend     0.4880962 1.00000000  0.3808907   0.02515624
## Come_again    0.2714620 0.38089069  1.0000000   0.36875582
## All_Products  0.2934543 0.02515624  0.3687558   1.00000000
# Scatterplot matrix
pairs(behavior_vars, main = "Customer Behavior Relationships")

Service Usage Analysis

services <- data %>%
  select(Online_grocery, delivery, Pick_up, Find_items)

# Average usage
colMeans(services, na.rm = TRUE)
## Online_grocery       delivery        Pick_up     Find_items 
##       2.272727       2.409091       2.454545       1.454545
# Boxplot
services_long <- pivot_longer(services, cols = everything())

ggplot(services_long, aes(x = name, y = value)) +
  geom_boxplot() +
  labs(title = "Service Usage Distribution", x = "Service", y = "Score")

Customer Satisfaction Insights

satisfaction <- data %>%
  select(CS_helpful, Profesionalism, Limitation)

# Means
colMeans(satisfaction, na.rm = TRUE)
##     CS_helpful Profesionalism     Limitation 
##       1.590909       1.409091       1.500000
# Boxplot
satisfaction_long <- pivot_longer(satisfaction, cols = everything())

ggplot(satisfaction_long, aes(x = name, y = value)) +
  geom_boxplot() +
  labs(title = "Customer Satisfaction Metrics", x = "Metric", y = "Score")

Satisfaction vs Loyalty

ggplot(data, aes(x = CS_helpful, y = Recommend)) +
  geom_point() +
  geom_smooth(method = "lm") +
  labs(title = "Helpfulness vs Recommendation")
## `geom_smooth()` using formula = 'y ~ x'

ggplot(data, aes(x = CS_helpful, y = Come_again)) +
  geom_point() +
  geom_smooth(method = "lm") +
  labs(title = "Helpfulness vs Return Intent")
## `geom_smooth()` using formula = 'y ~ x'

Customer Segmentation (K-Means)

seg_data <- data %>%
  select(CS_helpful, Recommend, Come_again, All_Products, delivery, Pick_up)

# Scale data
seg_scaled <- scale(seg_data)

# K-means clustering
set.seed(123)
clusters <- kmeans(seg_scaled, centers = 3)

# Add cluster to dataset
data$Cluster <- as.factor(clusters$cluster)

# Visualize clusters
ggplot(data, aes(x = CS_helpful, y = Recommend, color = Cluster)) +
  geom_point(size = 3) +
  labs(title = "Customer Segments")

Cluster Insights

aggregate(seg_data, by = list(Cluster = data$Cluster), mean)
##   Cluster CS_helpful Recommend Come_again All_Products delivery Pick_up
## 1       1       1.10     1.000      1.300        2.000      1.7   2.200
## 2       2       2.50     2.000      2.500        3.250      3.0   1.250
## 3       3       1.75     1.375      1.125        1.625      3.0   3.375