Looking at the grocery shopping dataset helped me better understand how customers feel about their shopping experience. The data showed that people rated customer service, product options, and delivery and pickup differently. Some customers had more positive experiences, while others gave lower ratings, which shows that not everyone is equally satisfied.

The dataset also included information like age, gender, and education, which helped show that different groups of people shop differently. This means businesses can’t treat all customers the same and should instead focus on different groups when making decisions. Overall, this analysis shows how customer feedback and basic demographic information can help businesses improve their services and better meet customer needs.

# Automatically install and load required packages
packages <- c("tidyverse")
for (p in packages) {
  if (!require(p, character.only = TRUE)) {
    install.packages(p, dependencies = TRUE)
    library(p, character.only = TRUE)
  }
}
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Load the CSV dataset
data <- read.csv("customer_segmentation.csv")
head(data)
##   ID CS_helpful Recommend Come_again All_Products Profesionalism Limitation
## 1  1          2         2          2            2              2          2
## 2  2          1         2          1            1              1          1
## 3  3          2         1          1            1              1          2
## 4  4          3         3          2            4              1          2
## 5  5          2         1          3            5              2          1
## 6  6          1         1          3            2              1          1
##   Online_grocery delivery Pick_up Find_items other_shops Gender Age Education
## 1              2        3       4          1           2      1   2         2
## 2              2        3       3          1           2      1   2         2
## 3              3        3       2          1           3      1   2         2
## 4              3        3       2          2           2      1   3         5
## 5              2        3       1          2           3      2   4         2
## 6              1        2       1          1           4      1   2         5
# Basic summary
summary(data)
##        ID          CS_helpful      Recommend       Come_again   
##  Min.   : 1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.: 6.25   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :11.50   Median :1.000   Median :1.000   Median :1.000  
##  Mean   :11.50   Mean   :1.591   Mean   :1.318   Mean   :1.455  
##  3rd Qu.:16.75   3rd Qu.:2.000   3rd Qu.:1.000   3rd Qu.:2.000  
##  Max.   :22.00   Max.   :3.000   Max.   :3.000   Max.   :3.000  
##   All_Products   Profesionalism    Limitation  Online_grocery     delivery    
##  Min.   :1.000   Min.   :1.000   Min.   :1.0   Min.   :1.000   Min.   :1.000  
##  1st Qu.:1.250   1st Qu.:1.000   1st Qu.:1.0   1st Qu.:2.000   1st Qu.:2.000  
##  Median :2.000   Median :1.000   Median :1.0   Median :2.000   Median :3.000  
##  Mean   :2.091   Mean   :1.409   Mean   :1.5   Mean   :2.273   Mean   :2.409  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.0   3rd Qu.:3.000   3rd Qu.:3.000  
##  Max.   :5.000   Max.   :3.000   Max.   :4.0   Max.   :3.000   Max.   :3.000  
##     Pick_up        Find_items     other_shops        Gender     
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:1.000   1st Qu.:1.250   1st Qu.:1.000  
##  Median :2.000   Median :1.000   Median :2.000   Median :1.000  
##  Mean   :2.455   Mean   :1.455   Mean   :2.591   Mean   :1.273  
##  3rd Qu.:3.000   3rd Qu.:2.000   3rd Qu.:3.750   3rd Qu.:1.750  
##  Max.   :5.000   Max.   :3.000   Max.   :5.000   Max.   :2.000  
##       Age          Education    
##  Min.   :2.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:2.000  
##  Median :2.000   Median :2.500  
##  Mean   :2.455   Mean   :3.182  
##  3rd Qu.:3.000   3rd Qu.:5.000  
##  Max.   :4.000   Max.   :5.000
str(data)
## 'data.frame':    22 obs. of  15 variables:
##  $ ID            : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ CS_helpful    : int  2 1 2 3 2 1 2 1 1 1 ...
##  $ Recommend     : int  2 2 1 3 1 1 1 1 1 1 ...
##  $ Come_again    : int  2 1 1 2 3 3 1 1 1 1 ...
##  $ All_Products  : int  2 1 1 4 5 2 2 2 2 1 ...
##  $ Profesionalism: int  2 1 1 1 2 1 2 1 2 1 ...
##  $ Limitation    : int  2 1 2 2 1 1 1 2 1 1 ...
##  $ Online_grocery: int  2 2 3 3 2 1 2 1 2 3 ...
##  $ delivery      : int  3 3 3 3 3 2 2 1 1 2 ...
##  $ Pick_up       : int  4 3 2 2 1 1 2 2 3 2 ...
##  $ Find_items    : int  1 1 1 2 2 1 1 2 1 1 ...
##  $ other_shops   : int  2 2 3 2 3 4 1 4 1 1 ...
##  $ Gender        : int  1 1 1 1 2 1 1 1 2 2 ...
##  $ Age           : int  2 2 2 3 4 2 2 2 2 2 ...
##  $ Education     : int  2 2 2 5 2 5 3 2 1 2 ...
colSums(is.na(data))
##             ID     CS_helpful      Recommend     Come_again   All_Products 
##              0              0              0              0              0 
## Profesionalism     Limitation Online_grocery       delivery        Pick_up 
##              0              0              0              0              0 
##     Find_items    other_shops         Gender            Age      Education 
##              0              0              0              0              0
# Customer Service Helpfulness
ggplot(data, aes(x = CS_helpful)) +
  geom_bar(fill = "blue") +
  theme_minimal() +
  labs(title = "Customer Service Helpfulness Ratings")

# Likelihood to Recommend
ggplot(data, aes(x = Recommend)) +
  geom_bar(fill = "green") +
  theme_minimal() +
  labs(title = "Likelihood to Recommend")

# Likelihood to Return
ggplot(data, aes(x = Come_again)) +
  geom_bar(fill = "purple") +
  theme_minimal() +
  labs(title = "Likelihood to Return")

# Online Grocery vs Delivery
ggplot(data, aes(x = Online_grocery, y = delivery)) +
  geom_point(color = "red") +
  theme_minimal() +
  labs(title = "Online Grocery vs Delivery Ratings")

# Age Distribution
ggplot(data, aes(x = Age)) +
  geom_bar(fill = "orange") +
  theme_minimal() +
  labs(title = "Age Distribution")

# Customer Segmentation
set.seed(123)
data_scaled <- scale(data[, sapply(data, is.numeric)])
kmeans_result <- kmeans(data_scaled, centers = 3)
data$Cluster <- as.factor(kmeans_result$cluster)

ggplot(data, aes(x = Recommend, y = Come_again, color = Cluster)) +
  geom_point(size = 3) +
  theme_minimal() +
  labs(title = "Customer Segments")