library(tidyverse)
library(psych)
library(corrplot)

Load Data

data <- read.csv("customer_segmentation.csv")

str(data)
## 'data.frame':    22 obs. of  15 variables:
##  $ ID            : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ CS_helpful    : int  2 1 2 3 2 1 2 1 1 1 ...
##  $ Recommend     : int  2 2 1 3 1 1 1 1 1 1 ...
##  $ Come_again    : int  2 1 1 2 3 3 1 1 1 1 ...
##  $ All_Products  : int  2 1 1 4 5 2 2 2 2 1 ...
##  $ Profesionalism: int  2 1 1 1 2 1 2 1 2 1 ...
##  $ Limitation    : int  2 1 2 2 1 1 1 2 1 1 ...
##  $ Online_grocery: int  2 2 3 3 2 1 2 1 2 3 ...
##  $ delivery      : int  3 3 3 3 3 2 2 1 1 2 ...
##  $ Pick_up       : int  4 3 2 2 1 1 2 2 3 2 ...
##  $ Find_items    : int  1 1 1 2 2 1 1 2 1 1 ...
##  $ other_shops   : int  2 2 3 2 3 4 1 4 1 1 ...
##  $ Gender        : int  1 1 1 1 2 1 1 1 2 2 ...
##  $ Age           : int  2 2 2 3 4 2 2 2 2 2 ...
##  $ Education     : int  2 2 2 5 2 5 3 2 1 2 ...
summary(data)
##        ID          CS_helpful      Recommend       Come_again   
##  Min.   : 1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.: 6.25   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :11.50   Median :1.000   Median :1.000   Median :1.000  
##  Mean   :11.50   Mean   :1.591   Mean   :1.318   Mean   :1.455  
##  3rd Qu.:16.75   3rd Qu.:2.000   3rd Qu.:1.000   3rd Qu.:2.000  
##  Max.   :22.00   Max.   :3.000   Max.   :3.000   Max.   :3.000  
##   All_Products   Profesionalism    Limitation  Online_grocery     delivery    
##  Min.   :1.000   Min.   :1.000   Min.   :1.0   Min.   :1.000   Min.   :1.000  
##  1st Qu.:1.250   1st Qu.:1.000   1st Qu.:1.0   1st Qu.:2.000   1st Qu.:2.000  
##  Median :2.000   Median :1.000   Median :1.0   Median :2.000   Median :3.000  
##  Mean   :2.091   Mean   :1.409   Mean   :1.5   Mean   :2.273   Mean   :2.409  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.0   3rd Qu.:3.000   3rd Qu.:3.000  
##  Max.   :5.000   Max.   :3.000   Max.   :4.0   Max.   :3.000   Max.   :3.000  
##     Pick_up        Find_items     other_shops        Gender     
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:1.000   1st Qu.:1.250   1st Qu.:1.000  
##  Median :2.000   Median :1.000   Median :2.000   Median :1.000  
##  Mean   :2.455   Mean   :1.455   Mean   :2.591   Mean   :1.273  
##  3rd Qu.:3.000   3rd Qu.:2.000   3rd Qu.:3.750   3rd Qu.:1.750  
##  Max.   :5.000   Max.   :3.000   Max.   :5.000   Max.   :2.000  
##       Age          Education    
##  Min.   :2.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:2.000  
##  Median :2.000   Median :2.500  
##  Mean   :2.455   Mean   :3.182  
##  3rd Qu.:3.000   3rd Qu.:5.000  
##  Max.   :4.000   Max.   :5.000
head(data)
##   ID CS_helpful Recommend Come_again All_Products Profesionalism Limitation
## 1  1          2         2          2            2              2          2
## 2  2          1         2          1            1              1          1
## 3  3          2         1          1            1              1          2
## 4  4          3         3          2            4              1          2
## 5  5          2         1          3            5              2          1
## 6  6          1         1          3            2              1          1
##   Online_grocery delivery Pick_up Find_items other_shops Gender Age Education
## 1              2        3       4          1           2      1   2         2
## 2              2        3       3          1           2      1   2         2
## 3              3        3       2          1           3      1   2         2
## 4              3        3       2          2           2      1   3         5
## 5              2        3       1          2           3      2   4         2
## 6              1        2       1          1           4      1   2         5

Clean Column Names

names(data) <- gsub(" ", "", names(data))
names(data)
##  [1] "ID"             "CS_helpful"     "Recommend"      "Come_again"    
##  [5] "All_Products"   "Profesionalism" "Limitation"     "Online_grocery"
##  [9] "delivery"       "Pick_up"        "Find_items"     "other_shops"   
## [13] "Gender"         "Age"            "Education"

Convert Variables

data$Gender <- factor(
  data$Gender,
  levels = c(1, 2),
  labels = c("Male", "Female")
)

data$Education <- factor(
  data$Education,
  levels = c(1, 2, 3, 4, 5),
  labels = c("Primary", "Secondary", "HighSchool", "Bachelor", "Postgrad")
)

data$Age <- factor(data$Age, ordered = TRUE)

str(data)
## 'data.frame':    22 obs. of  15 variables:
##  $ ID            : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ CS_helpful    : int  2 1 2 3 2 1 2 1 1 1 ...
##  $ Recommend     : int  2 2 1 3 1 1 1 1 1 1 ...
##  $ Come_again    : int  2 1 1 2 3 3 1 1 1 1 ...
##  $ All_Products  : int  2 1 1 4 5 2 2 2 2 1 ...
##  $ Profesionalism: int  2 1 1 1 2 1 2 1 2 1 ...
##  $ Limitation    : int  2 1 2 2 1 1 1 2 1 1 ...
##  $ Online_grocery: int  2 2 3 3 2 1 2 1 2 3 ...
##  $ delivery      : int  3 3 3 3 3 2 2 1 1 2 ...
##  $ Pick_up       : int  4 3 2 2 1 1 2 2 3 2 ...
##  $ Find_items    : int  1 1 1 2 2 1 1 2 1 1 ...
##  $ other_shops   : int  2 2 3 2 3 4 1 4 1 1 ...
##  $ Gender        : Factor w/ 2 levels "Male","Female": 1 1 1 1 2 1 1 1 2 2 ...
##  $ Age           : Ord.factor w/ 3 levels "2"<"3"<"4": 1 1 1 2 3 1 1 1 1 1 ...
##  $ Education     : Factor w/ 5 levels "Primary","Secondary",..: 2 2 2 5 2 5 3 2 1 2 ...

Descriptive Statistics

items <- c(
  "CS_helpful", "Recommend", "Come_again",
  "All_Products", "Profesionalism", "Limitation",
  "Online_grocery", "delivery", "Pick_up",
  "Find_items", "other_shops"
)

descr <- psych::describe(data[, items])
descr
##                vars  n mean   sd median trimmed  mad min max range  skew
## CS_helpful        1 22 1.59 0.73      1    1.50 0.00   1   3     2  0.73
## Recommend         2 22 1.32 0.65      1    1.17 0.00   1   3     2  1.67
## Come_again        3 22 1.45 0.74      1    1.33 0.00   1   3     2  1.16
## All_Products      4 22 2.09 1.06      2    1.94 0.00   1   5     4  1.18
## Profesionalism    5 22 1.41 0.59      1    1.33 0.00   1   3     2  1.00
## Limitation        6 22 1.50 0.80      1    1.33 0.00   1   4     3  1.59
## Online_grocery    7 22 2.27 0.77      2    2.33 1.48   1   3     2 -0.46
## delivery          8 22 2.41 0.73      3    2.50 0.00   1   3     2 -0.73
## Pick_up           9 22 2.45 1.06      2    2.39 1.48   1   5     4  0.46
## Find_items       10 22 1.45 0.67      1    1.33 0.00   1   3     2  1.06
## other_shops      11 22 2.59 1.40      2    2.50 1.48   1   5     4  0.42
##                kurtosis   se
## CS_helpful        -0.89 0.16
## Recommend          1.38 0.14
## Come_again        -0.23 0.16
## All_Products       0.79 0.23
## Profesionalism    -0.14 0.13
## Limitation         1.99 0.17
## Online_grocery    -1.25 0.16
## delivery          -0.89 0.16
## Pick_up           -0.37 0.23
## Find_items        -0.19 0.14
## other_shops       -1.21 0.30

Frequency Tables

table(data$Gender)
## 
##   Male Female 
##     16      6
prop.table(table(data$Gender))
## 
##      Male    Female 
## 0.7272727 0.2727273
table(data$Education)
## 
##    Primary  Secondary HighSchool   Bachelor   Postgrad 
##          3          8          2          0          9
prop.table(table(data$Education))
## 
##    Primary  Secondary HighSchool   Bachelor   Postgrad 
## 0.13636364 0.36363636 0.09090909 0.00000000 0.40909091
table(data$Age)
## 
##  2  3  4 
## 15  4  3
prop.table(table(data$Age))
## 
##         2         3         4 
## 0.6818182 0.1818182 0.1363636

Graph 1: Customer Service Helpful

ggplot(data, aes(x = CS_helpful)) +
  geom_bar(fill = "steelblue") +
  labs(
    title = "Distribution of Customer Service Helpful",
    x = "Response",
    y = "Count"
  )

Graph 2: Gender Distribution

ggplot(data, aes(x = Gender)) +
  geom_bar(fill = "darkgreen") +
  labs(
    title = "Gender Distribution",
    x = "Gender",
    y = "Count"
  )

Correlation Matrix

cor_mat <- cor(data[, items], use = "pairwise.complete.obs")
round(cor_mat, 2)
##                CS_helpful Recommend Come_again All_Products Profesionalism
## CS_helpful           1.00      0.49       0.27         0.29           0.51
## Recommend            0.49      1.00       0.38         0.03           0.39
## Come_again           0.27      0.38       1.00         0.37           0.43
## All_Products         0.29      0.03       0.37         1.00           0.09
## Profesionalism       0.51      0.39       0.43         0.09           1.00
## Limitation           0.61      0.05       0.00         0.06           0.05
## Online_grocery       0.21      0.30      -0.15        -0.15           0.06
## delivery             0.59      0.42       0.17         0.07           0.25
## Pick_up             -0.18     -0.08      -0.52        -0.25          -0.16
## Find_items           0.30     -0.02       0.04         0.54          -0.01
## other_shops         -0.31     -0.06       0.33         0.22          -0.19
##                Limitation Online_grocery delivery Pick_up Find_items
## CS_helpful           0.61           0.21     0.59   -0.18       0.30
## Recommend            0.05           0.30     0.42   -0.08      -0.02
## Come_again           0.00          -0.15     0.17   -0.52       0.04
## All_Products         0.06          -0.15     0.07   -0.25       0.54
## Profesionalism       0.05           0.06     0.25   -0.16      -0.01
## Limitation           1.00          -0.15     0.36    0.00       0.44
## Online_grocery      -0.15           1.00     0.30    0.31      -0.16
## delivery             0.36           0.30     1.00    0.12       0.28
## Pick_up              0.00           0.31     0.12    1.00      -0.10
## Find_items           0.44          -0.16     0.28   -0.10       1.00
## other_shops         -0.06          -0.11    -0.20   -0.03       0.00
##                other_shops
## CS_helpful           -0.31
## Recommend            -0.06
## Come_again            0.33
## All_Products          0.22
## Profesionalism       -0.19
## Limitation           -0.06
## Online_grocery       -0.11
## delivery             -0.20
## Pick_up              -0.03
## Find_items            0.00
## other_shops           1.00

Correlation Plot

corrplot(
  cor_mat,
  method = "color",
  type = "upper",
  tl.col = "black",
  addCoef.col = "black",
  number.cex = 0.6
)

Reliability Analysis

scale_items <- c(
  "CS_helpful", "Recommend", "Come_again",
  "All_Products", "Profesionalism", "Limitation",
  "Online_grocery", "delivery", "Pick_up",
  "Find_items", "other_shops"
)

alpha_result <- psych::alpha(data[, scale_items])
## Some items ( Pick_up other_shops ) were negatively correlated with the first principal component and 
## probably should be reversed.  
## To do this, run the function again with the 'check.keys=TRUE' option
alpha_result
## 
## Reliability analysis   
## Call: psych::alpha(x = data[, scale_items])
## 
##   raw_alpha std.alpha G6(smc) average_r S/N  ase mean   sd median_r
##       0.47      0.59    0.82      0.12 1.5 0.17  1.9 0.35    0.057
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.07  0.47  0.75
## Duhachek  0.14  0.47  0.81
## 
##  Reliability if an item is dropped:
##                raw_alpha std.alpha G6(smc) average_r  S/N alpha se var.r med.r
## CS_helpful          0.35      0.47    0.67     0.081 0.88     0.21 0.053 0.046
## Recommend           0.41      0.53    0.79     0.101 1.12     0.19 0.068 0.057
## Come_again          0.42      0.56    0.77     0.113 1.28     0.19 0.062 0.056
## All_Products        0.41      0.56    0.76     0.114 1.29     0.19 0.068 0.050
## Profesionalism      0.44      0.56    0.79     0.111 1.25     0.18 0.067 0.056
## Limitation          0.43      0.56    0.74     0.113 1.27     0.19 0.067 0.090
## Online_grocery      0.48      0.60    0.81     0.133 1.53     0.17 0.069 0.072
## delivery            0.37      0.50    0.77     0.090 0.99     0.20 0.066 0.044
## Pick_up             0.58      0.66    0.83     0.163 1.94     0.14 0.058 0.168
## Find_items          0.42      0.56    0.78     0.113 1.28     0.19 0.068 0.072
## other_shops         0.58      0.64    0.83     0.152 1.79     0.14 0.065 0.117
## 
##  Item statistics 
##                 n raw.r std.r  r.cor r.drop mean   sd
## CS_helpful     22 0.662 0.775  0.832  0.531  1.6 0.73
## Recommend      22 0.506 0.590  0.538  0.363  1.3 0.65
## Come_again     22 0.466 0.475  0.448  0.294  1.5 0.74
## All_Products   22 0.527 0.464  0.442  0.279  2.1 1.06
## Profesionalism 22 0.383 0.496  0.447  0.239  1.4 0.59
## Limitation     22 0.452 0.481  0.485  0.261  1.5 0.80
## Online_grocery 22 0.272 0.297  0.209  0.074  2.3 0.77
## delivery       22 0.614 0.689  0.654  0.471  2.4 0.73
## Pick_up        22 0.089 0.021 -0.092 -0.185  2.5 1.06
## Find_items     22 0.475 0.474  0.432  0.322  1.5 0.67
## other_shops    22 0.311 0.120  0.014 -0.060  2.6 1.40
## 
## Non missing response frequency for each item
##                   1    2    3    4    5 miss
## CS_helpful     0.55 0.32 0.14 0.00 0.00    0
## Recommend      0.77 0.14 0.09 0.00 0.00    0
## Come_again     0.68 0.18 0.14 0.00 0.00    0
## All_Products   0.27 0.55 0.05 0.09 0.05    0
## Profesionalism 0.64 0.32 0.05 0.00 0.00    0
## Limitation     0.64 0.27 0.05 0.05 0.00    0
## Online_grocery 0.18 0.36 0.45 0.00 0.00    0
## delivery       0.14 0.32 0.55 0.00 0.00    0
## Pick_up        0.18 0.36 0.32 0.09 0.05    0
## Find_items     0.64 0.27 0.09 0.00 0.00    0
## other_shops    0.27 0.27 0.18 0.14 0.14    0

Satisfaction Score

data$Satisfaction_Total <- rowMeans(data[, scale_items], na.rm = TRUE)
summary(data$Satisfaction_Total)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.182   1.636   1.864   1.868   2.182   2.455

Satisfaction by Gender

data %>%
  group_by(Gender) %>%
  summarise(
    n = n(),
    mean_satisfaction = mean(Satisfaction_Total, na.rm = TRUE),
    sd_satisfaction = sd(Satisfaction_Total, na.rm = TRUE)
  )
## # A tibble: 2 × 4
##   Gender     n mean_satisfaction sd_satisfaction
##   <fct>  <int>             <dbl>           <dbl>
## 1 Male      16              1.87           0.317
## 2 Female     6              1.86           0.452

Satisfaction by Education

data %>%
  group_by(Education) %>%
  summarise(
    n = n(),
    mean_satisfaction = mean(Satisfaction_Total, na.rm = TRUE),
    sd_satisfaction = sd(Satisfaction_Total, na.rm = TRUE)
  )
## # A tibble: 4 × 4
##   Education      n mean_satisfaction sd_satisfaction
##   <fct>      <int>             <dbl>           <dbl>
## 1 Primary        3              1.82           0.417
## 2 Secondary      8              1.90           0.338
## 3 HighSchool     2              1.91           0.514
## 4 Postgrad       9              1.85           0.369

Graph 3: Satisfaction by Gender

ggplot(data, aes(x = Gender, y = Satisfaction_Total, fill = Gender)) +
  geom_boxplot() +
  labs(
    title = "Satisfaction by Gender",
    x = "Gender",
    y = "Satisfaction (mean score)"
  ) +
  theme(legend.position = "none")

Save Processed File

write.csv(data, "customer_segmentation_processed.csv", row.names = FALSE)