library(tidyr)
library(stringr) 
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(completejourney)
## Welcome to the completejourney package! Learn more about these data
## sets at http://bit.ly/completejourney.
transactions <- completejourney::get_transactions()
#Income Shopping
income_spending <- transactions %>%
  inner_join(products, by = "product_id") %>%
  inner_join(demographics, by = "household_id") %>%
  group_by(income, product_category) %>%
  summarize(total_sales = sum(sales_value, na.rm = TRUE)) %>%
  arrange(income, desc(total_sales)) %>%
  group_by(income) %>%
  slice_max(total_sales, n = 5)
## `summarise()` has grouped output by 'income'. You can override using the
## `.groups` argument.
#Plot
ggplot(income_spending, aes(x = income, y = total_sales, fill = product_category)) +
  geom_bar(stat = "identity", position = "stack") +
  labs(
    title = "Top Product Choices by Income Group",
    x = "Income Group",
    y = "Total Sales",
    fill = "Product Category"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

#Brand Preference by Household Size
household_spending <- transactions %>%
  left_join(demographics, by = "household_id") %>%
  left_join(products, by = "product_id") %>%
  mutate(brand_type = ifelse(brand == "National", "National Brand", "Private Brand")) %>%
  filter(!is.na(household_size), !is.na(brand_type)) #To remove NA data from those data tables

#Plot
ggplot(household_spending, aes(x = factor(household_size), y = sales_value, fill = brand_type)) +
  geom_violin(alpha = 0.6) +  
  labs(
    title = "Brand Preference by Household Size",
    x = "Household Size",
    y = "Sales Value",
    fill = "Brand Type") +
  theme_minimal()

#Purchase Frequency by Age
purchase_by_age <- transactions %>%
  left_join(demographics, by = "household_id") %>%
  filter(!is.na(age)) %>% 
  group_by(age) %>%
  summarize(total_purchases = n(), .groups = "drop") %>%
  mutate(percentage = total_purchases / sum(total_purchases) * 100)

#Plot - I ask ChatGPT for help on how to put in percentage for the pie chart
ggplot(purchase_by_age, aes(x = "", y = total_purchases, fill = age)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar("y", start = 0) +  # Convert to pie chart
  geom_text(aes(label = paste0(round(percentage, 1), "%")), 
            position = position_stack(vjust = 0.5), 
            size = 3, color = "white") +
  labs(
    title = "Purchasing Freuquency by Age Group",
    fill = "Age Group")+
  theme_void()