This report analyzes customer survey data to understand satisfaction, behavior, and demographic patterns. The goal is to identify trends and relationships between variables.
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("reshape2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(dplyr)
library(reshape2)
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
data <- read_csv("/cloud/project/customer_segmentation.csv")
## Rows: 22 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (15): ID, CS_helpful, Recommend, Come_again, All_Products, Profesionalis...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data)
## # A tibble: 6 × 15
## ID CS_helpful Recommend Come_again All_Products Profesionalism Limitation
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 2 2 2 2 2 2
## 2 2 1 2 1 1 1 1
## 3 3 2 1 1 1 1 2
## 4 4 3 3 2 4 1 2
## 5 5 2 1 3 5 2 1
## 6 6 1 1 3 2 1 1
## # ℹ 8 more variables: Online_grocery <dbl>, delivery <dbl>, Pick_up <dbl>,
## # Find_items <dbl>, other_shops <dbl>, Gender <dbl>, Age <dbl>,
## # Education <dbl>
str(data)
## spc_tbl_ [22 × 15] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ ID : num [1:22] 1 2 3 4 5 6 7 8 9 10 ...
## $ CS_helpful : num [1:22] 2 1 2 3 2 1 2 1 1 1 ...
## $ Recommend : num [1:22] 2 2 1 3 1 1 1 1 1 1 ...
## $ Come_again : num [1:22] 2 1 1 2 3 3 1 1 1 1 ...
## $ All_Products : num [1:22] 2 1 1 4 5 2 2 2 2 1 ...
## $ Profesionalism: num [1:22] 2 1 1 1 2 1 2 1 2 1 ...
## $ Limitation : num [1:22] 2 1 2 2 1 1 1 2 1 1 ...
## $ Online_grocery: num [1:22] 2 2 3 3 2 1 2 1 2 3 ...
## $ delivery : num [1:22] 3 3 3 3 3 2 2 1 1 2 ...
## $ Pick_up : num [1:22] 4 3 2 2 1 1 2 2 3 2 ...
## $ Find_items : num [1:22] 1 1 1 2 2 1 1 2 1 1 ...
## $ other_shops : num [1:22] 2 2 3 2 3 4 1 4 1 1 ...
## $ Gender : num [1:22] 1 1 1 1 2 1 1 1 2 2 ...
## $ Age : num [1:22] 2 2 2 3 4 2 2 2 2 2 ...
## $ Education : num [1:22] 2 2 2 5 2 5 3 2 1 2 ...
## - attr(*, "spec")=
## .. cols(
## .. ID = col_double(),
## .. CS_helpful = col_double(),
## .. Recommend = col_double(),
## .. Come_again = col_double(),
## .. All_Products = col_double(),
## .. Profesionalism = col_double(),
## .. Limitation = col_double(),
## .. Online_grocery = col_double(),
## .. delivery = col_double(),
## .. Pick_up = col_double(),
## .. Find_items = col_double(),
## .. other_shops = col_double(),
## .. Gender = col_double(),
## .. Age = col_double(),
## .. Education = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
summary(data)
## ID CS_helpful Recommend Come_again
## Min. : 1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.: 6.25 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:1.000
## Median :11.50 Median :1.000 Median :1.000 Median :1.000
## Mean :11.50 Mean :1.591 Mean :1.318 Mean :1.455
## 3rd Qu.:16.75 3rd Qu.:2.000 3rd Qu.:1.000 3rd Qu.:2.000
## Max. :22.00 Max. :3.000 Max. :3.000 Max. :3.000
## All_Products Profesionalism Limitation Online_grocery delivery
## Min. :1.000 Min. :1.000 Min. :1.0 Min. :1.000 Min. :1.000
## 1st Qu.:1.250 1st Qu.:1.000 1st Qu.:1.0 1st Qu.:2.000 1st Qu.:2.000
## Median :2.000 Median :1.000 Median :1.0 Median :2.000 Median :3.000
## Mean :2.091 Mean :1.409 Mean :1.5 Mean :2.273 Mean :2.409
## 3rd Qu.:2.000 3rd Qu.:2.000 3rd Qu.:2.0 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :5.000 Max. :3.000 Max. :4.0 Max. :3.000 Max. :3.000
## Pick_up Find_items other_shops Gender
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:1.250 1st Qu.:1.000
## Median :2.000 Median :1.000 Median :2.000 Median :1.000
## Mean :2.455 Mean :1.455 Mean :2.591 Mean :1.273
## 3rd Qu.:3.000 3rd Qu.:2.000 3rd Qu.:3.750 3rd Qu.:1.750
## Max. :5.000 Max. :3.000 Max. :5.000 Max. :2.000
## Age Education
## Min. :2.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000
## Median :2.000 Median :2.500
## Mean :2.455 Mean :3.182
## 3rd Qu.:3.000 3rd Qu.:5.000
## Max. :4.000 Max. :5.000
# Fix column name typo if it exists
colnames(data)[colnames(data) == "Profesionalism."] <- "Professionalism"
# Check missing values
colSums(is.na(data))
## ID CS_helpful Recommend Come_again All_Products
## 0 0 0 0 0
## Profesionalism Limitation Online_grocery delivery Pick_up
## 0 0 0 0 0
## Find_items other_shops Gender Age Education
## 0 0 0 0 0
# Remove missing values
clean_data <- na.omit(data)
# Confirm no missing values
colSums(is.na(clean_data))
## ID CS_helpful Recommend Come_again All_Products
## 0 0 0 0 0
## Profesionalism Limitation Online_grocery delivery Pick_up
## 0 0 0 0 0
## Find_items other_shops Gender Age Education
## 0 0 0 0 0
ggplot(clean_data, aes(x = Age)) +
geom_histogram(binwidth = 1) +
labs(title = "Age Distribution", x = "Age", y = "Count")
ggplot(clean_data, aes(x = factor(Gender))) +
geom_bar() +
labs(title = "Gender Distribution", x = "Gender", y = "Count")
ggplot(clean_data, aes(x = Recommend, y = Come_again)) +
geom_point() +
labs(title = "Recommendation vs Returning Customers",
x = "Recommend",
y = "Come Again")
ggplot(clean_data, aes(x = CS_helpful)) +
geom_bar() +
labs(title = "Customer Service Helpfulness Ratings",
x = "Rating",
y = "Count")
ggplot(clean_data, aes(x = Online_grocery)) +
geom_bar() +
labs(title = "Online Grocery Usage",
x = "Usage",
y = "Count")
ggplot(clean_data, aes(x = delivery)) +
geom_bar() +
labs(title = "Delivery Preference",
x = "Delivery Type",
y = "Count")
numeric_data <- clean_data %>% select(where(is.numeric))
cor_matrix <- cor(numeric_data)
cor_matrix
## ID CS_helpful Recommend Come_again All_Products
## ID 1.00000000 0.15482785 -0.08509414 -0.12908035 -0.11705779
## CS_helpful 0.15482785 1.00000000 0.48809623 0.27146195 0.29345435
## Recommend -0.08509414 0.48809623 1.00000000 0.38089069 0.02515624
## Come_again -0.12908035 0.27146195 0.38089069 1.00000000 0.36875582
## All_Products -0.11705779 0.29345435 0.02515624 0.36875582 1.00000000
## Profesionalism 0.25465839 0.51442802 0.39143306 0.42695809 0.08951478
## Limitation 0.19664246 0.60674478 0.04594474 0.00000000 0.05576720
## Online_grocery 0.23893106 0.20749595 0.29678764 -0.14514393 -0.14833305
## delivery 0.09489449 0.59036145 0.41510987 0.16766768 0.07197937
## Pick_up 0.15959528 -0.17854819 -0.08238912 -0.52135402 -0.25000740
## Find_items 0.24044075 0.29879792 -0.01996410 0.04367853 0.53916624
## other_shops 0.09671790 -0.30898381 -0.05968695 0.32594355 0.21734201
## Gender 0.08043618 0.06467921 0.01469318 0.32146531 0.14267528
## Age -0.10922184 -0.16766768 -0.11789474 0.12698413 0.30821382
## Education 0.21244579 0.06542384 0.12385279 0.08671100 0.07266003
## Profesionalism Limitation Online_grocery delivery
## ID 0.25465839 0.19664246 0.23893106 0.09489449
## CS_helpful 0.51442802 0.60674478 0.20749595 0.59036145
## Recommend 0.39143306 0.04594474 0.29678764 0.41510987
## Come_again 0.42695809 0.00000000 -0.14514393 0.16766768
## All_Products 0.08951478 0.05576720 -0.14833305 0.07197937
## Profesionalism 1.00000000 0.05030388 0.05734345 0.25471679
## Limitation 0.05030388 1.00000000 -0.15480679 0.36404687
## Online_grocery 0.05734345 -0.15480679 1.00000000 0.29971638
## delivery 0.25471679 0.36404687 0.29971638 1.00000000
## Pick_up -0.15959528 0.00000000 0.30963403 0.11717225
## Find_items -0.01092912 0.44257084 -0.15975979 0.28122157
## other_shops -0.19082180 -0.06351171 -0.11262158 -0.19968341
## Gender 0.45044262 0.00000000 -0.08663791 -0.06467921
## Age -0.22837293 -0.32166527 -0.06111323 -0.09581010
## Education -0.28024764 -0.07321628 0.07302945 -0.02544260
## Pick_up Find_items other_shops Gender Age
## ID 0.15959528 0.240440748 0.096717897 0.08043618 -0.10922184
## CS_helpful -0.17854819 0.298797921 -0.308983807 0.06467921 -0.16766768
## Recommend -0.08238912 -0.019964097 -0.059686954 0.01469318 -0.11789474
## Come_again -0.52135402 0.043678535 0.325943546 0.32146531 0.12698413
## All_Products -0.25000740 0.539166240 0.217342007 0.14267528 0.30821382
## Profesionalism -0.15959528 -0.010929125 -0.190821797 0.45044262 -0.22837293
## Limitation 0.00000000 0.442570837 -0.063511705 0.00000000 -0.32166527
## Online_grocery 0.30963403 -0.159759789 -0.112621585 -0.08663791 -0.06111323
## delivery 0.11717225 0.281221573 -0.199683413 -0.06467921 -0.09581010
## Pick_up 1.00000000 -0.103782087 -0.029202713 -0.46727535 -0.21630646
## Find_items -0.10378209 1.000000000 0.004599561 0.04246039 0.04367853
## other_shops -0.02920271 0.004599561 1.000000000 -0.11509630 -0.04178763
## Gender -0.46727535 0.042460389 -0.115096299 1.00000000 0.18002057
## Age -0.21630646 0.043678535 -0.041787634 0.18002057 1.00000000
## Education -0.24491202 0.095442714 0.013316169 -0.26341476 0.32516624
## Education
## ID 0.21244579
## CS_helpful 0.06542384
## Recommend 0.12385279
## Come_again 0.08671100
## All_Products 0.07266003
## Profesionalism -0.28024764
## Limitation -0.07321628
## Online_grocery 0.07302945
## delivery -0.02544260
## Pick_up -0.24491202
## Find_items 0.09544271
## other_shops 0.01331617
## Gender -0.26341476
## Age 0.32516624
## Education 1.00000000
melted_cor <- melt(cor_matrix)
ggplot(melted_cor, aes(x = Var1, y = Var2, fill = value)) +
geom_tile() +
labs(title = "Correlation Heatmap")
model <- lm(Recommend ~ CS_helpful + Come_again + delivery, data = clean_data)
summary(model)
##
## Call:
## lm(formula = Recommend ~ CS_helpful + Come_again + delivery,
## data = clean_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.92048 -0.37864 0.01432 0.18322 1.11248
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1483 0.4636 0.320 0.753
## CS_helpful 0.2663 0.2158 1.234 0.233
## Come_again 0.2333 0.1756 1.329 0.201
## delivery 0.1689 0.2106 0.802 0.433
##
## Residual standard error: 0.5719 on 18 degrees of freedom
## Multiple R-squared: 0.3288, Adjusted R-squared: 0.217
## F-statistic: 2.939 on 3 and 18 DF, p-value: 0.06113
mean(clean_data$Recommend)
## [1] 1.318182
mean(clean_data$Come_again)
## [1] 1.454545
This analysis explored customer satisfaction, demographics, and shopping behavior. The results suggest that improving customer service and delivery options can increase customer loyalty and recommendations.
Further analysis could include clustering techniques to better segment customers.