library(pacman)
p_load(tidyverse,here, factoextra, cluster, dendextend, broom, gt)K-means Cluster Analysis
Utilizing the Amazon Consumer Behavior Dataset sourced from Kaggle (https://www.kaggle.com/datasets/swathiunnikrishnan/amazon-consumer-behaviour-dataset) for conducting clustering analysis.
The dataset is specifically curated to explore and understand Amazon consumer behavior, encompassing a wide array of customer interactions and browsing patterns within the Amazon ecosystem.
The clustering analysis aims to segment the dataset based on various variables, primarily focusing on purchase frequency. Subsequently, the clusters will be analyzed descriptively to unveil insights into their behaviors.
The objective of this analysis is to propose marketing and product development strategies aimed at optimizing purchase frequency for Amazon. This proposal is based on the analysis of k-means clusters categorized by purchase frequency, with the goal of enhancing customer engagement while maximizing purchase frequency for each cluster.
'data.frame': 602 obs. of 23 variables:
$ Timestamp : chr "2023/06/04 1:28:19 PM GMT+5:30" "2023/06/04 2:30:44 PM GMT+5:30" "2023/06/04 5:04:56 PM GMT+5:30" "2023/06/04 5:13:00 PM GMT+5:30" ...
$ age : int 23 23 24 24 22 21 22 21 20 23 ...
$ Gender : chr "Female" "Female" "Prefer not to say" "Female" ...
$ Purchase_Frequency : chr "Few times a month" "Once a month" "Few times a month" "Once a month" ...
$ Purchase_Categories : chr "Beauty and Personal Care" "Clothing and Fashion" "Groceries and Gourmet Food;Clothing and Fashion" "Beauty and Personal Care;Clothing and Fashion;others" ...
$ Personalized_Recommendation_Frequency : chr "Yes" "Yes" "No" "Sometimes" ...
$ Browsing_Frequency : chr "Few times a week" "Few times a month" "Few times a month" "Few times a month" ...
$ Product_Search_Method : chr "Keyword" "Keyword" "Keyword" "Keyword" ...
$ Search_Result_Exploration : chr "Multiple pages" "Multiple pages" "Multiple pages" "First page" ...
$ Customer_Reviews_Importance : int 1 1 2 5 1 1 1 1 1 1 ...
$ Add_to_Cart_Browsing : chr "Yes" "Yes" "Yes" "Maybe" ...
$ Cart_Completion_Frequency : chr "Sometimes" "Often" "Sometimes" "Sometimes" ...
$ Cart_Abandonment_Factors : chr "Found a better price elsewhere" "High shipping costs" "Found a better price elsewhere" "Found a better price elsewhere" ...
$ Saveforlater_Frequency : chr "Sometimes" "Rarely" "Rarely" "Sometimes" ...
$ Review_Left : chr "Yes" "No" "No" "Yes" ...
$ Review_Reliability : chr "Occasionally" "Heavily" "Occasionally" "Heavily" ...
$ Review_Helpfulness : chr "Yes" "Yes" "No" "Yes" ...
$ Personalized_Recommendation_Frequency.1: int 2 2 4 3 4 5 3 3 2 3 ...
$ Recommendation_Helpfulness : chr "Yes" "Sometimes" "No" "Sometimes" ...
$ Rating_Accuracy : int 1 3 3 3 2 5 3 1 2 3 ...
$ Shopping_Satisfaction : int 1 2 3 4 2 2 3 3 2 4 ...
$ Service_Appreciation : chr "Competitive prices" "Wide product selection" "Competitive prices" "Competitive prices" ...
$ Improvement_Areas : chr "Reducing packaging waste" "Reducing packaging waste" "Product quality and accuracy" "Product quality and accuracy" ...
This analysis is only for their behaviors so delete these variables:
Timestamp, age, Gender, Purchase_Categories, Product_Search_Method, Search_Result_Exploration, Cart_Abandonment_Factors, Service_Appreciation, Improvement_Areas
# purchase_frequency
purchase_data$Purchase_Frequency <- gsub("Less than once a month", "1", x =
purchase_data$Purchase_Frequency)
purchase_data$Purchase_Frequency <- gsub("Once a month", "2",
purchase_data$Purchase_Frequency)
purchase_data$Purchase_Frequency <- gsub("Few times a month", "3",
purchase_data$Purchase_Frequency)
purchase_data$Purchase_Frequency <- gsub("Once a week", "4",
purchase_data$Purchase_Frequency)
purchase_data$Purchase_Frequency <- gsub("Multiple times a week", "5",
purchase_data$Purchase_Frequency)
# Personalised Recommendation
purchase_data$Personalized_Recommendation_Frequency <- gsub("Yes", "2", purchase_data$Personalized_Recommendation_Frequency)
purchase_data$Personalized_Recommendation_Frequency <- gsub("Sometimes", "1", purchase_data$Personalized_Recommendation_Frequency)
purchase_data$Personalized_Recommendation_Frequency <- gsub("No", "0", purchase_data$Personalized_Recommendation_Frequency)
# browsing frequency
purchase_data$Browsing_Frequency <- gsub("Rarely", "1", x =
purchase_data$Browsing_Frequency)
purchase_data$Browsing_Frequency <- gsub("Few times a month", "2",
purchase_data$Browsing_Frequency)
purchase_data$Browsing_Frequency <- gsub("Few times a week", "3",
purchase_data$Browsing_Frequency)
purchase_data$Browsing_Frequency <- gsub("Multiple times a day", "4",
purchase_data$Browsing_Frequency)
# Personalised Recommendation
purchase_data$Add_to_Cart_Browsing<- gsub("Yes", "2", purchase_data$Add_to_Cart_Browsing)
purchase_data$Add_to_Cart_Browsing <- gsub("Maybe", "1", purchase_data$Add_to_Cart_Browsing)
purchase_data$Add_to_Cart_Browsing <- gsub("No", "0", purchase_data$Add_to_Cart_Browsing)
# Cart Completion
purchase_data$Cart_Completion_Frequency <- gsub("Never", "0", x =
purchase_data$Cart_Completion_Frequency)
purchase_data$Cart_Completion_Frequency <- gsub("Rarely", "1", x =
purchase_data$Cart_Completion_Frequency)
purchase_data$Cart_Completion_Frequency <- gsub("Sometimes", "2",
purchase_data$Cart_Completion_Frequency)
purchase_data$Cart_Completion_Frequency <- gsub("Often", "3",
purchase_data$Cart_Completion_Frequency)
purchase_data$Cart_Completion_Frequency <- gsub("Always", "4",
purchase_data$Cart_Completion_Frequency)
# Save for later frequency
purchase_data$Saveforlater_Frequency <- gsub("Never", "0", x =
purchase_data$Saveforlater_Frequency)
purchase_data$Saveforlater_Frequency <- gsub("Rarely", "1", x =
purchase_data$Saveforlater_Frequency)
purchase_data$Saveforlater_Frequency <- gsub("Sometimes", "2",
purchase_data$Saveforlater_Frequency)
purchase_data$Saveforlater_Frequency <- gsub("Often", "3",
purchase_data$Saveforlater_Frequency)
purchase_data$Saveforlater_Frequency <- gsub("Always", "4",
purchase_data$Saveforlater_Frequency)
# review left
purchase_data$Review_Left <-ifelse(purchase_data$Review_Left=="Yes",1,0)
# review reliability
purchase_data$Review_Reliability <- gsub("Never", "0", x =
purchase_data$Review_Reliability)
purchase_data$Review_Reliability <- gsub("Rarely", "1", x =
purchase_data$Review_Reliability)
purchase_data$Review_Reliability <- gsub("Occasionally", "2",
purchase_data$Review_Reliability)
purchase_data$Review_Reliability <- gsub("Moderately", "3",
purchase_data$Review_Reliability)
purchase_data$Review_Reliability <- gsub("Heavily", "4",
purchase_data$Review_Reliability)
# Review_Helpfulness
purchase_data$Review_Helpfulness<- gsub("Yes", "2", purchase_data$Review_Helpfulness)
purchase_data$Review_Helpfulness <- gsub("Sometimes", "1", purchase_data$Review_Helpfulness)
purchase_data$Review_Helpfulness <- gsub("No", "0", purchase_data$Review_Helpfulness)
# Recommendation_Helpfulness
purchase_data$Recommendation_Helpfulness<- gsub("Yes", "2", purchase_data$Recommendation_Helpfulness)
purchase_data$Recommendation_Helpfulness <- gsub("Sometimes", "1", purchase_data$Recommendation_Helpfulness)
purchase_data$Recommendation_Helpfulness <- gsub("No", "0", purchase_data$Recommendation_Helpfulness) 'data.frame': 602 obs. of 14 variables:
$ Purchase_Frequency : int 3 2 3 2 1 1 1 3 1 1 ...
$ Personalized_Recommendation_Frequency : int 2 2 0 1 2 0 0 0 1 0 ...
$ Browsing_Frequency : int 3 2 2 2 2 1 1 1 2 1 ...
$ Customer_Reviews_Importance : int 1 1 2 5 1 1 1 1 1 1 ...
$ Add_to_Cart_Browsing : int 2 2 2 1 2 2 2 2 2 2 ...
$ Cart_Completion_Frequency : int 2 3 2 2 2 1 3 3 2 1 ...
$ Saveforlater_Frequency : int 2 1 1 2 1 0 1 1 3 1 ...
$ Review_Left : num 1 0 0 1 0 0 0 0 0 0 ...
$ Review_Reliability : int 2 4 2 4 4 4 4 4 3 4 ...
$ Review_Helpfulness : int 2 2 0 2 2 2 2 1 2 2 ...
$ Personalized_Recommendation_Frequency.1: int 2 2 4 3 4 5 3 3 2 3 ...
$ Recommendation_Helpfulness : int 2 1 0 1 2 0 1 1 2 2 ...
$ Rating_Accuracy : int 1 3 3 3 2 5 3 1 2 3 ...
$ Shopping_Satisfaction : int 1 2 3 4 2 2 3 3 2 4 ...
Purchase_Frequency: How frequently do customers make purchases on Amazon?
Browsing_Frequency: How often do customers browse Amazon’s website or app?
Saveforlater_Frequency: Do you use Amazon’s “Save for Later” feature, and if so, how often?
Review_Helpfulness: Do you find helpful information from other customers’ reviews?
# Print the data frame
correlation <- correlation_df |>
arrange(desc(abs_correlations)) |>
filter(abs_correlations > 0.2)
correlation|> gt()| Variable | abs_correlations | p_value |
|---|---|---|
| Browsing_Frequency | 0.3686821 | 8.068092e-21 |
| Saveforlater_Frequency | 0.2953307 | 1.396140e-13 |
| Review_Helpfulness | 0.2177724 | 6.776562e-08 |
| cluster | n |
|---|---|
| 1 | 338 |
| 2 | 264 |
Cluster 1:
The average “save for later frequency” of users in cluster 1 is 2.48, “browsing frequency” is 2.84, and “purchase frequency” is 3.64.
All the values are higher than those of cluster 2. This indicates that in this cluster, users tend to save the products for later purchase, and the browsing frequency is also relatively high. After that, users are also more likely to buy the products.
This could mean that these users are more loyal users who use amazon frequently to purchase.
Cluster 2:
Users in Cluster 2 have an average “save for later frequency” of 1.625, a “browsing frequency” of 2.14, and a “purchase frequency” of 1.68.
Users in this cluster save and browse relatively less frequently than those in Cluster 1, and purchase frequently also less than Cluster 1.
This may indicate that these users are more cautious in their purchasing decisions.
# browsing frequency vs purchase_frequency
clusters_complete |>
ggplot(aes(Purchase_Frequency, fill=factor(cluster)))+
geom_bar() +
facet_wrap(~ Browsing_Frequency, labeller = labeller(
Browsing_Frequency = c(
"1" = "1 = Rarely","2" = "2 = Few times a month",
"3" = "3 = Few times a week", "4" = "4 = Multiple times a day"))) +
labs(title = "Purchase Frequency by Browsing Frequency",
x = "Purchase Frequency", y = "Count", fill = "Cluster")Cluster 1: The browsing frequency for this cluster of users is distributed between 2 and 4, and the purchasing frequency is distributed between 3 and 5.
Cluster 2: Relatively, the browsing frequency for cluster 2 of customers is distributed between 1 and 3, and the purchasing frequency is distributed between 1 and 3.
clusters_complete |>
ggplot(aes(Purchase_Frequency, fill=factor(cluster)))+
geom_bar() +
facet_wrap(~ Saveforlater_Frequency, labeller = labeller(
Saveforlater_Frequency = c(
"0" = "0 = Never", "1" = "1 = Rarely", "2" = "2 = Sometimes",
"3" = "3 = Often","4" = "4 = Always"))) +
labs(title = "Purchase Frequency by Save for Later Frequency",
x = "Purchase Frequency", y = "Count", fill = "Cluster")Cluster 1: The save for later frequency for this cluster of users is distributed between 2 and 4, and the purchasing frequency is distributed between 3 and 5.
Cluster 2: The save for later frequency for this cluster of users is distributed between 0 and 3, and the purchasing frequency is distributed between 1 and 3.
clusters_complete |>
ggplot(aes(Purchase_Frequency, fill=factor(cluster)))+
geom_bar() +
facet_wrap(~Review_Helpfulness, labeller = labeller(
Review_Helpfulness = c(
"0" = "0 = No", "1" = "1 = Sometimes", "2" = "2 = Yes")))+
labs(title = "Purchase Frequency by Review Helpfulness",
x = "Purchase Frequency", y = "Count", fill="Cluster")Cluster 1: The review helpfulness of this cluster is distributed across all frequencies, with less than cluster 2 at review helpfulness 2, and the purchasing frequency is distributed between 3 and 5.
Cluster 2: The review helpfulness of this cluster is distributed across all frequencies, with less than cluster 1 at review helpfulness 0, and the purchasing frequency is distributed between 1 and 3.
Based on our analysis, we can summarise the features of these two clusters. And provide some suggestions for amazon.
For Cluster 1, they are active shoppers, always on the lookout for new products and offers, and often use the “save for later” feature to save items, and value other factors such as marketing or promotions more than reviews.
For Cluster 2, they may only shop when they need to, use the save for later” feature infrequently. they are cautious consumers, tend to spend more time evaluating products and making multiple comparisons based on reviews.
From a marketing perspective:
For Cluster 1, the marketing team can focus on launching frequent promotions to highlight new products. They can also prioritise marketing campaigns that emphasise offers and discounts.
For Cluster 2, marketing teams can take a more informative approach, providing detailed product information and using customer reviews to build trust.
From a product development perspective:
For Cluster 1, product development teams can focus on introducing new features related to the “Save for Later” feature or enhancing the user experience for active shoppers.
For Cluster 2, they can prioritise features to improve the review and comparison process. They could improve the usability of the review section, provide comparison tools, and provide detailed product descriptions.