### Peak Shopping Times for Each Age Range
# Name the data sets
transactions <- transactions_sample
demographics <- demographics
# Join data the data using household id
joined_data <- transactions_sample %>%
left_join(demographics, by = "household_id")
# Isolate "hour" from transaction time stamp
joined_data <- joined_data %>%
mutate(t_time= ymd_hms(transaction_timestamp),
t_hour = hour(t_time))
# Create 3-Hour blocks for t_time
joined_data <- joined_data %>%
mutate(hour_block = cut(t_hour,
breaks = c(0,3,6,9,12,15,18,21,24),
labels = c("Midnight-3 AM", "3 AM-6 AM", "6 AM-9 AM", "9 AM-Noon", "Noon- 3 PM", "3 PM-6 PM", "6 PM-9 PM", "9 PM-Midnight"),
include.lowest = TRUE))
# Count how many transaction in each 3-hour block
blocked_data <- joined_data %>%
group_by(age, hour_block) %>%
summarise(t_per_block = n(), .groups = 'drop')
# Create bar chart
ggplot(blocked_data, aes(x = hour_block, y = t_per_block, fill = age)) +
geom_bar(stat = "identity", position = "dodge") +
ylim(0,5000) +
labs(
title = "Transactions by 3-Hour Time Blocks for Different Age Ranges",
subtitle = "Insights Into the Shopping Habits of Each Age Range",
x = "Time Block",
y = "Total Transactions",
fill = "Age Range",
caption = "Source: Complete Journey Dataset (Demographics and Transactions Data)"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Name the data sets
products <- products
transactions <- transactions_sample
# Join the data
sales_category <- transactions %>%
inner_join(products, by = "product_id")
# Data Visualization
ggplot(sales_category, aes(x = department, y = sales_value)) +
geom_boxplot(fill = "purple", outlier.color = "green", outlier.size = 1.5) +
labs(
title = "Distribution of Sales Value by Product Category",
subtitle = "Visualizing price variance within product categories",
x = "Product Category",
y = "Sales Value ($)",
caption = "Source: Complete Journey Datasets (Products and Transactions Data)"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Name the data sets
products <- products
transactions <- transactions_sample
# Merge data and calculate mean discount for each department
average_discount <- transactions %>%
inner_join(products, by = "product_id") %>%
mutate(discount = abs(retail_disc - sales_value)) %>%
group_by(department) %>%
summarise(average_discount = round(mean(discount, na.rm = TRUE), 2))
# Data Visualization
ggplot(average_discount, aes(x = reorder(department, average_discount), y = average_discount, fill = department)) +
geom_bar(stat = "identity", show.legend = FALSE) +
labs(
title = "Average Discount for Each Product Category",
subtitle = "Comparing average discounts across various product categories",
x = "Product Category",
y = "Average Discount ($)",
caption = "Source: Complete Journey Data Set (Products and Transactions Data)"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
