# Join transactions with products and demographics
transactions_products <- transactions %>%
inner_join(products, by = "product_id")
complete_data <- transactions_products %>%
inner_join(demographics, by = "household_id")
# Filter households with children
households_with_children <- complete_data %>%
filter(kids_count > 0)
# Group by product type and summarize sales
top_product_types_sales <- households_with_children %>%
group_by(product_type) %>%
summarise(total_sales = sum(sales_value, na.rm = TRUE) / 1000) %>%
arrange(desc(total_sales)) %>%
slice_head(n = 10)
# Create bar plot for top product types
ggplot(top_product_types_sales, aes(x = reorder(product_type, -total_sales), y = total_sales)) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(title = "Top 10 Product Types by Total Sales (in Thousands) for Households with Children",
x = "Product Type",
y = "Total Sales (in Thousands)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Join transactions with products and demographics
complete_data <- transactions %>%
inner_join(products, by = "product_id") %>%
inner_join(demographics, by = "household_id")
# Identify top product for each age group
top_products_by_age <- complete_data %>%
group_by(age) %>%
summarise(top_product = product_category[which.max(sales_value)],
total_sales = max(sales_value)) %>%
arrange(age)
ggplot(top_products_by_age, aes(x = reorder(age, total_sales), y = total_sales, fill = top_product)) +
geom_bar(stat = "identity") +
labs(title = "Top Product by Age",
x = "Age",
y = "Total Sales",
fill = "Top Product") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

transactions_products <- transactions %>%
inner_join(products, by = "product_id")
complete_data <- transactions_products %>%
inner_join(demographics, by = "household_id")
# Summarize data by age group and brand
age_brand_data <- complete_data %>%
group_by(age, brand) %>%
summarise(total_sales = sum(sales_value, na.rm = TRUE)) %>%
ungroup() %>%
group_by(age) %>%
mutate(percentage = total_sales / sum(total_sales) * 100) %>%
arrange(desc(total_sales))
## `summarise()` has grouped output by 'age'. You can override using the `.groups`
## argument.
ggplot(age_brand_data, aes(x = reorder(brand, -total_sales), y = percentage, fill = age)) +
geom_bar(stat = "identity", position = "stack") +
labs(title = "Brand Usage by Age Group",
x = "Brand",
y = "Percentage of Total Sales") +
scale_fill_viridis_d() +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
