Welcome to my visualization la. We will explore the ‘completejourney’ dataset, an insight into customer interactions and experiences. This this report, we will employ the power of exploratory data visualization to uncover hidden patterns and trends that illuminate the dynamics of consumer behavior and satisfaction.Through a series of crafted plots, we delve into demographics, satisfaction levels, and journey duration of customers, seeking actionable insights that inform strategic decision-making. Join us as we decipher the narrative woven within the dataset, and explore the complexities of customer journeys and paving the way for enhanced engagement and organizational succss.
library(ggplot2)
library(completejourney)
library(dplyr)
library(lubridate)
data_merged <- transactions_sample %>%
left_join(products, by = "product_id") %>%
left_join(demographics, by = "household_id")
category_sales <- data_merged %>%
group_by(product_category, income) %>%
summarise(total_sales_value = sum(sales_value, na.rm = TRUE), .groups = 'drop')
top_categories <- category_sales %>%
group_by(product_category) %>%
summarise(total_sales_value = sum(total_sales_value, na.rm = TRUE)) %>%
top_n(10, wt = total_sales_value) %>%
pull(product_category)
filtered_data <- category_sales %>%
filter(product_category %in% top_categories)
ggplot(filtered_data, aes(x = reorder(product_category, -total_sales_value),
y = total_sales_value, fill = income)) +
geom_col(position = position_dodge(width = 0.8)) +
labs(title = "Top 10 Product Categories by Household Income Levels",
subtitle = "Total sales value for top product categories across income levels",
x = "Product Category",
y = "Total Sales Value",
fill = "Income Level") +
scale_y_continuous(limits = c(0, 3000)) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_brewer(palette = "Set2")
data_merged <- transactions_sample %>%
left_join(products, by = "product_id") %>%
left_join(demographics, by = "household_id")
family_category_sales <- data_merged %>%
group_by(product_category, household_size) %>%
summarise(total_sales_value = sum(sales_value, na.rm = TRUE)) %>%
ungroup()
top_5_categories <- family_category_sales %>%
group_by(product_category) %>%
summarise(total_sales_value = sum(total_sales_value, na.rm = TRUE)) %>%
top_n(5, wt = total_sales_value) %>%
pull(product_category)
filtered_family_data <- family_category_sales %>%
filter(product_category %in% top_5_categories) %>%
filter(!is.na(household_size))
filtered_family_data$household_size <- factor(filtered_family_data$household_size,
levels = c("1", "2", "3", "4", "5+"),
labels = c("1 person", "2 people", "3 people", "4 people", "5+ people"))
ggplot(filtered_family_data, aes(x = household_size, y = total_sales_value, fill = product_category)) +
geom_bar(stat = "identity", position = position_dodge(width = 0.8)) +
labs(title = "Category Spending by Household Size",
subtitle = "Total sales value for top product categories across different household sizes",
x = "Household Size",
y = "Total Sales Value",
fill = "Product Category") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
summary(transactions_sample$transaction_timestamp)
## Min. 1st Qu.
## "2017-01-01 07:30:27.0000" "2017-04-01 13:23:49.0000"
## Median Mean
## "2017-07-02 11:43:04.0000" "2017-07-02 09:39:57.5612"
## 3rd Qu. Max.
## "2017-10-02 16:28:50.2500" "2017-12-31 22:47:38.0000"
summary(transactions_sample$day_of_week)
## Length Class Mode
## 0 NULL NULL
summary(demographics$income)
## Under 15K 15-24K 25-34K 35-49K 50-74K 75-99K 100-124K 125-149K
## 61 74 77 172 192 96 34 38
## 150-174K 175-199K 200-249K 250K+
## 30 11 5 11
transactions_sample <- transactions_sample %>%
mutate(day_of_week = wday(transaction_timestamp, label = TRUE))
weekly_sales <- transactions_sample %>%
left_join(demographics, by = "household_id") %>%
group_by(day_of_week, income) %>%
summarise(total_sales_value = sum(sales_value, na.rm = TRUE))
head(weekly_sales)
## # A tibble: 6 × 3
## # Groups: day_of_week [1]
## day_of_week income total_sales_value
## <ord> <ord> <dbl>
## 1 Sun Under 15K 1569.
## 2 Sun 15-24K 1639.
## 3 Sun 25-34K 2004.
## 4 Sun 35-49K 4295.
## 5 Sun 50-74K 6151.
## 6 Sun 75-99K 3340.
max_sales_value <- max(weekly_sales$total_sales_value, na.rm = TRUE)
ggplot(weekly_sales, aes(x = day_of_week, y = total_sales_value, fill = income)) +
geom_col(position = "dodge") +
labs(title = "Weekly Sales Trend by Household Income Levels",
subtitle = "Total sales value by day of the week segmented by income",
x = "Day of the Week",
y = "Total Sales Value",
fill = "Income Level") +
scale_y_continuous(limits = c(0, 8000)) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))