Introduction

Welcome to my visualization la. We will explore the ‘completejourney’ dataset, an insight into customer interactions and experiences. This this report, we will employ the power of exploratory data visualization to uncover hidden patterns and trends that illuminate the dynamics of consumer behavior and satisfaction.Through a series of crafted plots, we delve into demographics, satisfaction levels, and journey duration of customers, seeking actionable insights that inform strategic decision-making. Join us as we decipher the narrative woven within the dataset, and explore the complexities of customer journeys and paving the way for enhanced engagement and organizational succss.

‘Completejourney’ dataset exploration

Plot 1

library(ggplot2)
library(completejourney)
library(dplyr)
library(lubridate)

data_merged <- transactions_sample %>%
  left_join(products, by = "product_id") %>%
  left_join(demographics, by = "household_id")
category_sales <- data_merged %>%
  group_by(product_category, income) %>%
  summarise(total_sales_value = sum(sales_value, na.rm = TRUE), .groups = 'drop')

top_categories <- category_sales %>%
  group_by(product_category) %>%
  summarise(total_sales_value = sum(total_sales_value, na.rm = TRUE)) %>%
  top_n(10, wt = total_sales_value) %>%
  pull(product_category)


filtered_data <- category_sales %>%
  filter(product_category %in% top_categories)
ggplot(filtered_data, aes(x = reorder(product_category, -total_sales_value), 
                          y = total_sales_value, fill = income)) +
  geom_col(position = position_dodge(width = 0.8)) +  
  labs(title = "Top 10 Product Categories by Household Income Levels",
       subtitle = "Total sales value for top product categories across income levels",
       x = "Product Category",
       y = "Total Sales Value",
       fill = "Income Level") +
  scale_y_continuous(limits = c(0, 3000)) +  
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +  
  scale_fill_brewer(palette = "Set2")  

PLot 2

data_merged <- transactions_sample %>%
  left_join(products, by = "product_id") %>%
  left_join(demographics, by = "household_id")

family_category_sales <- data_merged %>%
  group_by(product_category, household_size) %>%
  summarise(total_sales_value = sum(sales_value, na.rm = TRUE)) %>%
  ungroup()

top_5_categories <- family_category_sales %>%
  group_by(product_category) %>%
  summarise(total_sales_value = sum(total_sales_value, na.rm = TRUE)) %>%
  top_n(5, wt = total_sales_value) %>%
  pull(product_category)

filtered_family_data <- family_category_sales %>%
  filter(product_category %in% top_5_categories) %>%
  filter(!is.na(household_size))  

filtered_family_data$household_size <- factor(filtered_family_data$household_size, 
                                              levels = c("1", "2", "3", "4", "5+"), 
                                              labels = c("1 person", "2 people", "3 people", "4 people", "5+ people"))

ggplot(filtered_family_data, aes(x = household_size, y = total_sales_value, fill = product_category)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.8)) +  
  labs(title = "Category Spending by Household Size",
       subtitle = "Total sales value for top product categories across different household sizes",
       x = "Household Size",
       y = "Total Sales Value",
       fill = "Product Category") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))  

Plot 3

summary(transactions_sample$transaction_timestamp)
##                       Min.                    1st Qu. 
## "2017-01-01 07:30:27.0000" "2017-04-01 13:23:49.0000" 
##                     Median                       Mean 
## "2017-07-02 11:43:04.0000" "2017-07-02 09:39:57.5612" 
##                    3rd Qu.                       Max. 
## "2017-10-02 16:28:50.2500" "2017-12-31 22:47:38.0000"
summary(transactions_sample$day_of_week)
## Length  Class   Mode 
##      0   NULL   NULL
summary(demographics$income)
## Under 15K    15-24K    25-34K    35-49K    50-74K    75-99K  100-124K  125-149K 
##        61        74        77       172       192        96        34        38 
##  150-174K  175-199K  200-249K     250K+ 
##        30        11         5        11
transactions_sample <- transactions_sample %>%
  mutate(day_of_week = wday(transaction_timestamp, label = TRUE))

weekly_sales <- transactions_sample %>%
  left_join(demographics, by = "household_id") %>%
  group_by(day_of_week, income) %>%
  summarise(total_sales_value = sum(sales_value, na.rm = TRUE))

head(weekly_sales)
## # A tibble: 6 × 3
## # Groups:   day_of_week [1]
##   day_of_week income    total_sales_value
##   <ord>       <ord>                 <dbl>
## 1 Sun         Under 15K             1569.
## 2 Sun         15-24K                1639.
## 3 Sun         25-34K                2004.
## 4 Sun         35-49K                4295.
## 5 Sun         50-74K                6151.
## 6 Sun         75-99K                3340.
max_sales_value <- max(weekly_sales$total_sales_value, na.rm = TRUE)

ggplot(weekly_sales, aes(x = day_of_week, y = total_sales_value, fill = income)) +
  geom_col(position = "dodge") +
  labs(title = "Weekly Sales Trend by Household Income Levels",
       subtitle = "Total sales value by day of the week segmented by income",
       x = "Day of the Week",
       y = "Total Sales Value",
       fill = "Income Level") +
  scale_y_continuous(limits = c(0, 8000)) + 
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))