Introduction

Identifying Areas of Growth

The problem we are attempting to solve is how can increase sales value for the lowest earning month?

How We Solve The Problem

We will solve this problem by first identifying which month generated the lowest earning by sales value. Once we have identified the month, this opens up the opportunity to explore income ranges and ages of customers,stores that are under performing, and do a further break down of which week in the month is the lowest in sales value.

How This Will Drive Value

This detailed analysis of Regork’s customers that are not driving sales value during this month will allow them to create more targeted advertisements and campaigns tailored directly to them in the hopes of increasing sales value.

Importing Packages

library(completejourney)
library(tidyverse)
library(dplyr)
library(ggplot2)
library(lubridate)
transactions <- get_transactions()
transactions
demographics
products
coupons
coupon_redemptions

Identifying Lowest Month by Sales-Value

lowest_month <- transactions %>%
  mutate(month = month(transaction_timestamp, label = TRUE, abbr = TRUE)) %>%
  group_by(month) %>%
  summarise(total_sales = sum(sales_value, na.rm = TRUE), .groups = "drop") %>%
  arrange(total_sales)

ggplot(lowest_month, aes(x = month, y = total_sales)) +
  geom_col(fill = "steelblue") +
  geom_col(
    data = lowest_month, 
    aes(x = month, y = total_sales), 
    fill = "red"
  ) +
  labs(
    title = "Monthly Sales Revenue",
    x = "Month",
    y = "Total Sales ($)"
  ) +
  theme_minimal()

lowest_month %>%
  arrange(total_sales) %>%
  slice(1:5)

Demographic Details

Now that we know February has the lowest total sales revenue, lets explore customer demographics during this month

feb_sales <- transactions %>%
  filter(month(transaction_timestamp) == 2) %>%
  inner_join(demographics, by = "household_id")

feb_summary <- feb_sales %>%
  group_by(age, income) %>%
  summarise(total_sales = sum(sales_value, na.rm = TRUE), .groups = "drop") %>%
  arrange(desc(total_sales))

ggplot(feb_summary, aes(x = income, y = total_sales, fill = age)) +
  geom_col(position = "dodge") +
  labs(
    title = "February Sales Revenue by Age & Income",
    x = "Income Level",
    y = "Total Sales ($)",
    fill = "Age Group"
  ) +
  theme_minimal()

We can see that the 50-74k income group tends to spend the most during the month of February, specifically the 45-54 years olds within this group.

feb_income_products <- transactions %>%
  filter(month(transaction_timestamp) == 2) %>%
  inner_join(demographics, by = "household_id") %>%
  filter(income == "50-74K") %>%
  inner_join(products, by = "product_id") %>%
  group_by(age,week, product_id, product_category) %>%
  summarise(total_sales = sum(sales_value, na.rm = TRUE), .groups = "drop")

top_products_weekly <- feb_income_products %>%
  group_by(age, week) %>%
  slice_max(order_by = total_sales, n = 1) %>%
  ungroup()

ggplot(top_products_weekly, aes(x = factor(week), y = total_sales, fill = product_category)) +
  geom_col() +
  facet_wrap(~ age) +
  labs(
    title = "Top Product Each Week in February (Income 50-74K)",
    x = "Week",
    y = "Total Sales ($)",
    fill = "Product"
  ) +
  theme_minimal()

feb_coupon_misc <- transactions %>%
  filter(month(transaction_timestamp) == 2) %>%
  inner_join(products, by = "product_id") %>%
  filter(product_category == "COUPON/MISC ITEMS")

weekly_store_sales <- feb_coupon_misc %>%
  group_by(week, store_id) %>%
  summarise(total_sales = sum(sales_value, na.rm = TRUE), .groups = "drop")

top_store_each_week <- weekly_store_sales %>%
  group_by(week) %>%
  slice_max(order_by = total_sales, n = 1) %>%
  ungroup()

ggplot(top_store_each_week, aes(x = factor(week), y = total_sales, fill = store_id)) +
  geom_col() +
  labs(
    title = "Top Store Each Week in February for Coupon/Misc Items",
    x = "Week",
    y = "Total Sales ($)",
    fill = "Store ID"
  ) +
  theme_minimal()