# Join transactions with products and demographics
transactions_products <- transactions %>%
  inner_join(products, by = "product_id")

complete_data <- transactions_products %>%
  inner_join(demographics, by = "household_id")

# Filter households with children
households_with_children <- complete_data %>%
  filter(kids_count > 0)
# Group by product type and summarize sales
top_product_types_sales <- households_with_children %>%
  group_by(product_type) %>%
  summarise(total_sales = sum(sales_value, na.rm = TRUE) / 1000) %>%
  arrange(desc(total_sales)) %>%
  slice_head(n = 10)
# Create bar plot for top product types
ggplot(top_product_types_sales, aes(x = reorder(product_type, -total_sales), y = total_sales)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(title = "Top 10 Product Types by Total Sales (in Thousands) for Households with Children",
       x = "Product Type",
       y = "Total Sales (in Thousands)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Join transactions with products and demographics
complete_data <- transactions %>%
  inner_join(products, by = "product_id") %>%
  inner_join(demographics, by = "household_id")

  
# Identify top product for each age group
top_products_by_age <- complete_data %>%
  group_by(age) %>%
  summarise(top_product = product_category[which.max(sales_value)],
            total_sales = max(sales_value)) %>%
  arrange(age)

ggplot(top_products_by_age, aes(x = reorder(age, total_sales), y = total_sales, fill = top_product)) +
  geom_bar(stat = "identity") +
  labs(title = "Top Product by Age",
       x = "Age",
       y = "Total Sales",
       fill = "Top Product") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

transactions_products <- transactions %>%
  inner_join(products, by = "product_id")

complete_data <- transactions_products %>%
  inner_join(demographics, by = "household_id")

# Summarize data by age group and brand
age_brand_data <- complete_data %>%
  group_by(age, brand) %>%
  summarise(total_sales = sum(sales_value, na.rm = TRUE)) %>%
  ungroup() %>%
  group_by(age) %>%
  mutate(percentage = total_sales / sum(total_sales) * 100) %>%
  arrange(desc(total_sales))
## `summarise()` has grouped output by 'age'. You can override using the `.groups`
## argument.
ggplot(age_brand_data, aes(x = reorder(brand, -total_sales), y = percentage, fill = age)) +
  geom_bar(stat = "identity", position = "stack") +
  labs(title = "Brand Usage by Age Group",
       x = "Brand",
       y = "Percentage of Total Sales") +
  scale_fill_viridis_d() +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))