Introduction

This report analyzes customer survey data to understand satisfaction, behavior, and demographic patterns. The goal is to identify trends and relationships between variables.

Load Libraries

install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("reshape2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(dplyr)
library(reshape2)
## 
## Attaching package: 'reshape2'
## 
## The following object is masked from 'package:tidyr':
## 
##     smiths

Load Dataset

data <- read_csv("/cloud/project/customer_segmentation.csv")
## Rows: 22 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): ID, CS_helpful, Recommend, Come_again, All_Products, Profesionalis...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data)
## # A tibble: 6 × 15
##      ID CS_helpful Recommend Come_again All_Products Profesionalism Limitation
##   <dbl>      <dbl>     <dbl>      <dbl>        <dbl>          <dbl>      <dbl>
## 1     1          2         2          2            2              2          2
## 2     2          1         2          1            1              1          1
## 3     3          2         1          1            1              1          2
## 4     4          3         3          2            4              1          2
## 5     5          2         1          3            5              2          1
## 6     6          1         1          3            2              1          1
## # ℹ 8 more variables: Online_grocery <dbl>, delivery <dbl>, Pick_up <dbl>,
## #   Find_items <dbl>, other_shops <dbl>, Gender <dbl>, Age <dbl>,
## #   Education <dbl>

Data Structure

str(data)
## spc_tbl_ [22 × 15] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ ID            : num [1:22] 1 2 3 4 5 6 7 8 9 10 ...
##  $ CS_helpful    : num [1:22] 2 1 2 3 2 1 2 1 1 1 ...
##  $ Recommend     : num [1:22] 2 2 1 3 1 1 1 1 1 1 ...
##  $ Come_again    : num [1:22] 2 1 1 2 3 3 1 1 1 1 ...
##  $ All_Products  : num [1:22] 2 1 1 4 5 2 2 2 2 1 ...
##  $ Profesionalism: num [1:22] 2 1 1 1 2 1 2 1 2 1 ...
##  $ Limitation    : num [1:22] 2 1 2 2 1 1 1 2 1 1 ...
##  $ Online_grocery: num [1:22] 2 2 3 3 2 1 2 1 2 3 ...
##  $ delivery      : num [1:22] 3 3 3 3 3 2 2 1 1 2 ...
##  $ Pick_up       : num [1:22] 4 3 2 2 1 1 2 2 3 2 ...
##  $ Find_items    : num [1:22] 1 1 1 2 2 1 1 2 1 1 ...
##  $ other_shops   : num [1:22] 2 2 3 2 3 4 1 4 1 1 ...
##  $ Gender        : num [1:22] 1 1 1 1 2 1 1 1 2 2 ...
##  $ Age           : num [1:22] 2 2 2 3 4 2 2 2 2 2 ...
##  $ Education     : num [1:22] 2 2 2 5 2 5 3 2 1 2 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   ID = col_double(),
##   ..   CS_helpful = col_double(),
##   ..   Recommend = col_double(),
##   ..   Come_again = col_double(),
##   ..   All_Products = col_double(),
##   ..   Profesionalism = col_double(),
##   ..   Limitation = col_double(),
##   ..   Online_grocery = col_double(),
##   ..   delivery = col_double(),
##   ..   Pick_up = col_double(),
##   ..   Find_items = col_double(),
##   ..   other_shops = col_double(),
##   ..   Gender = col_double(),
##   ..   Age = col_double(),
##   ..   Education = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
summary(data)
##        ID          CS_helpful      Recommend       Come_again   
##  Min.   : 1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.: 6.25   1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :11.50   Median :1.000   Median :1.000   Median :1.000  
##  Mean   :11.50   Mean   :1.591   Mean   :1.318   Mean   :1.455  
##  3rd Qu.:16.75   3rd Qu.:2.000   3rd Qu.:1.000   3rd Qu.:2.000  
##  Max.   :22.00   Max.   :3.000   Max.   :3.000   Max.   :3.000  
##   All_Products   Profesionalism    Limitation  Online_grocery     delivery    
##  Min.   :1.000   Min.   :1.000   Min.   :1.0   Min.   :1.000   Min.   :1.000  
##  1st Qu.:1.250   1st Qu.:1.000   1st Qu.:1.0   1st Qu.:2.000   1st Qu.:2.000  
##  Median :2.000   Median :1.000   Median :1.0   Median :2.000   Median :3.000  
##  Mean   :2.091   Mean   :1.409   Mean   :1.5   Mean   :2.273   Mean   :2.409  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:2.0   3rd Qu.:3.000   3rd Qu.:3.000  
##  Max.   :5.000   Max.   :3.000   Max.   :4.0   Max.   :3.000   Max.   :3.000  
##     Pick_up        Find_items     other_shops        Gender     
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:1.000   1st Qu.:1.250   1st Qu.:1.000  
##  Median :2.000   Median :1.000   Median :2.000   Median :1.000  
##  Mean   :2.455   Mean   :1.455   Mean   :2.591   Mean   :1.273  
##  3rd Qu.:3.000   3rd Qu.:2.000   3rd Qu.:3.750   3rd Qu.:1.750  
##  Max.   :5.000   Max.   :3.000   Max.   :5.000   Max.   :2.000  
##       Age          Education    
##  Min.   :2.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:2.000  
##  Median :2.000   Median :2.500  
##  Mean   :2.455   Mean   :3.182  
##  3rd Qu.:3.000   3rd Qu.:5.000  
##  Max.   :4.000   Max.   :5.000

Data Cleaning

# Fix column name typo if it exists
colnames(data)[colnames(data) == "Profesionalism."] <- "Professionalism"

# Check missing values
colSums(is.na(data))
##             ID     CS_helpful      Recommend     Come_again   All_Products 
##              0              0              0              0              0 
## Profesionalism     Limitation Online_grocery       delivery        Pick_up 
##              0              0              0              0              0 
##     Find_items    other_shops         Gender            Age      Education 
##              0              0              0              0              0
# Remove missing values
clean_data <- na.omit(data)

# Confirm no missing values
colSums(is.na(clean_data))
##             ID     CS_helpful      Recommend     Come_again   All_Products 
##              0              0              0              0              0 
## Profesionalism     Limitation Online_grocery       delivery        Pick_up 
##              0              0              0              0              0 
##     Find_items    other_shops         Gender            Age      Education 
##              0              0              0              0              0

Exploratory Data Analysis

Age Distribution

ggplot(clean_data, aes(x = Age)) +
  geom_histogram(binwidth = 1) +
  labs(title = "Age Distribution", x = "Age", y = "Count")

Gender Distribution

ggplot(clean_data, aes(x = factor(Gender))) +
  geom_bar() +
  labs(title = "Gender Distribution", x = "Gender", y = "Count")

Customer Satisfaction Analysis

Recommend vs Come Again

ggplot(clean_data, aes(x = Recommend, y = Come_again)) +
  geom_point() +
  labs(title = "Recommendation vs Returning Customers",
       x = "Recommend",
       y = "Come Again")

Customer Service Helpfulness

ggplot(clean_data, aes(x = CS_helpful)) +
  geom_bar() +
  labs(title = "Customer Service Helpfulness Ratings",
       x = "Rating",
       y = "Count")

Shopping Behavior

Online Grocery Usage

ggplot(clean_data, aes(x = Online_grocery)) +
  geom_bar() +
  labs(title = "Online Grocery Usage",
       x = "Usage",
       y = "Count")

Delivery Preference

ggplot(clean_data, aes(x = delivery)) +
  geom_bar() +
  labs(title = "Delivery Preference",
       x = "Delivery Type",
       y = "Count")

Correlation Analysis

numeric_data <- clean_data %>% select(where(is.numeric))
cor_matrix <- cor(numeric_data)
cor_matrix
##                         ID  CS_helpful   Recommend  Come_again All_Products
## ID              1.00000000  0.15482785 -0.08509414 -0.12908035  -0.11705779
## CS_helpful      0.15482785  1.00000000  0.48809623  0.27146195   0.29345435
## Recommend      -0.08509414  0.48809623  1.00000000  0.38089069   0.02515624
## Come_again     -0.12908035  0.27146195  0.38089069  1.00000000   0.36875582
## All_Products   -0.11705779  0.29345435  0.02515624  0.36875582   1.00000000
## Profesionalism  0.25465839  0.51442802  0.39143306  0.42695809   0.08951478
## Limitation      0.19664246  0.60674478  0.04594474  0.00000000   0.05576720
## Online_grocery  0.23893106  0.20749595  0.29678764 -0.14514393  -0.14833305
## delivery        0.09489449  0.59036145  0.41510987  0.16766768   0.07197937
## Pick_up         0.15959528 -0.17854819 -0.08238912 -0.52135402  -0.25000740
## Find_items      0.24044075  0.29879792 -0.01996410  0.04367853   0.53916624
## other_shops     0.09671790 -0.30898381 -0.05968695  0.32594355   0.21734201
## Gender          0.08043618  0.06467921  0.01469318  0.32146531   0.14267528
## Age            -0.10922184 -0.16766768 -0.11789474  0.12698413   0.30821382
## Education       0.21244579  0.06542384  0.12385279  0.08671100   0.07266003
##                Profesionalism  Limitation Online_grocery    delivery
## ID                 0.25465839  0.19664246     0.23893106  0.09489449
## CS_helpful         0.51442802  0.60674478     0.20749595  0.59036145
## Recommend          0.39143306  0.04594474     0.29678764  0.41510987
## Come_again         0.42695809  0.00000000    -0.14514393  0.16766768
## All_Products       0.08951478  0.05576720    -0.14833305  0.07197937
## Profesionalism     1.00000000  0.05030388     0.05734345  0.25471679
## Limitation         0.05030388  1.00000000    -0.15480679  0.36404687
## Online_grocery     0.05734345 -0.15480679     1.00000000  0.29971638
## delivery           0.25471679  0.36404687     0.29971638  1.00000000
## Pick_up           -0.15959528  0.00000000     0.30963403  0.11717225
## Find_items        -0.01092912  0.44257084    -0.15975979  0.28122157
## other_shops       -0.19082180 -0.06351171    -0.11262158 -0.19968341
## Gender             0.45044262  0.00000000    -0.08663791 -0.06467921
## Age               -0.22837293 -0.32166527    -0.06111323 -0.09581010
## Education         -0.28024764 -0.07321628     0.07302945 -0.02544260
##                    Pick_up   Find_items  other_shops      Gender         Age
## ID              0.15959528  0.240440748  0.096717897  0.08043618 -0.10922184
## CS_helpful     -0.17854819  0.298797921 -0.308983807  0.06467921 -0.16766768
## Recommend      -0.08238912 -0.019964097 -0.059686954  0.01469318 -0.11789474
## Come_again     -0.52135402  0.043678535  0.325943546  0.32146531  0.12698413
## All_Products   -0.25000740  0.539166240  0.217342007  0.14267528  0.30821382
## Profesionalism -0.15959528 -0.010929125 -0.190821797  0.45044262 -0.22837293
## Limitation      0.00000000  0.442570837 -0.063511705  0.00000000 -0.32166527
## Online_grocery  0.30963403 -0.159759789 -0.112621585 -0.08663791 -0.06111323
## delivery        0.11717225  0.281221573 -0.199683413 -0.06467921 -0.09581010
## Pick_up         1.00000000 -0.103782087 -0.029202713 -0.46727535 -0.21630646
## Find_items     -0.10378209  1.000000000  0.004599561  0.04246039  0.04367853
## other_shops    -0.02920271  0.004599561  1.000000000 -0.11509630 -0.04178763
## Gender         -0.46727535  0.042460389 -0.115096299  1.00000000  0.18002057
## Age            -0.21630646  0.043678535 -0.041787634  0.18002057  1.00000000
## Education      -0.24491202  0.095442714  0.013316169 -0.26341476  0.32516624
##                  Education
## ID              0.21244579
## CS_helpful      0.06542384
## Recommend       0.12385279
## Come_again      0.08671100
## All_Products    0.07266003
## Profesionalism -0.28024764
## Limitation     -0.07321628
## Online_grocery  0.07302945
## delivery       -0.02544260
## Pick_up        -0.24491202
## Find_items      0.09544271
## other_shops     0.01331617
## Gender         -0.26341476
## Age             0.32516624
## Education       1.00000000

Correlation Heatmap

melted_cor <- melt(cor_matrix)

ggplot(melted_cor, aes(x = Var1, y = Var2, fill = value)) +
  geom_tile() +
  labs(title = "Correlation Heatmap")

Regression Analysis

model <- lm(Recommend ~ CS_helpful + Come_again + delivery, data = clean_data)
summary(model)
## 
## Call:
## lm(formula = Recommend ~ CS_helpful + Come_again + delivery, 
##     data = clean_data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.92048 -0.37864  0.01432  0.18322  1.11248 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   0.1483     0.4636   0.320    0.753
## CS_helpful    0.2663     0.2158   1.234    0.233
## Come_again    0.2333     0.1756   1.329    0.201
## delivery      0.1689     0.2106   0.802    0.433
## 
## Residual standard error: 0.5719 on 18 degrees of freedom
## Multiple R-squared:  0.3288, Adjusted R-squared:  0.217 
## F-statistic: 2.939 on 3 and 18 DF,  p-value: 0.06113

Key Insights

mean(clean_data$Recommend)
## [1] 1.318182
mean(clean_data$Come_again)
## [1] 1.454545

Conclusion

This analysis explored customer satisfaction, demographics, and shopping behavior. The results suggest that improving customer service and delivery options can increase customer loyalty and recommendations.

Further analysis could include clustering techniques to better segment customers.