library(tidyverse)
library(completejourney)
transactions <- get_transactions()

products
## # A tibble: 92,331 × 7
##    product_id manufacturer_id department    brand  product_category product_type
##    <chr>      <chr>           <chr>         <fct>  <chr>            <chr>       
##  1 25671      2               GROCERY       Natio… FRZN ICE         ICE - CRUSH…
##  2 26081      2               MISCELLANEOUS Natio… <NA>             <NA>        
##  3 26093      69              PASTRY        Priva… BREAD            BREAD:ITALI…
##  4 26190      69              GROCERY       Priva… FRUIT - SHELF S… APPLE SAUCE 
##  5 26355      69              GROCERY       Priva… COOKIES/CONES    SPECIALTY C…
##  6 26426      69              GROCERY       Priva… SPICES & EXTRAC… SPICES & SE…
##  7 26540      69              GROCERY       Priva… COOKIES/CONES    TRAY PACK/C…
##  8 26601      69              DRUG GM       Priva… VITAMINS         VITAMIN - M…
##  9 26636      69              PASTRY        Priva… BREAKFAST SWEETS SW GDS: SW …
## 10 26691      16              GROCERY       Priva… PNT BTR/JELLY/J… HONEY       
## # ℹ 92,321 more rows
## # ℹ 1 more variable: package_size <chr>

demographics
## # A tibble: 801 × 8
##    household_id age   income    home_ownership marital_status household_size
##    <chr>        <ord> <ord>     <ord>          <ord>          <ord>         
##  1 1            65+   35-49K    Homeowner      Married        2             
##  2 1001         45-54 50-74K    Homeowner      Unmarried      1             
##  3 1003         35-44 25-34K    <NA>           Unmarried      1             
##  4 1004         25-34 15-24K    <NA>           Unmarried      1             
##  5 101          45-54 Under 15K Homeowner      Married        4             
##  6 1012         35-44 35-49K    <NA>           Married        5+            
##  7 1014         45-54 15-24K    <NA>           Married        4             
##  8 1015         45-54 50-74K    Homeowner      Unmarried      1             
##  9 1018         45-54 35-49K    Homeowner      Married        5+            
## 10 1020         45-54 25-34K    Homeowner      Married        2             
## # ℹ 791 more rows
## # ℹ 2 more variables: household_comp <ord>, kids_count <ord>

transactions
## # A tibble: 1,469,307 × 11
##    household_id store_id basket_id   product_id quantity sales_value retail_disc
##    <chr>        <chr>    <chr>       <chr>         <dbl>       <dbl>       <dbl>
##  1 900          330      31198570044 1095275           1        0.5         0   
##  2 900          330      31198570047 9878513           1        0.99        0.1 
##  3 1228         406      31198655051 1041453           1        1.43        0.15
##  4 906          319      31198705046 1020156           1        1.5         0.29
##  5 906          319      31198705046 1053875           2        2.78        0.8 
##  6 906          319      31198705046 1060312           1        5.49        0.5 
##  7 906          319      31198705046 1075313           1        1.5         0.29
##  8 1058         381      31198676055 985893            1        1.88        0.21
##  9 1058         381      31198676055 988791            1        1.5         1.29
## 10 1058         381      31198676055 9297106           1        2.69        0   
## # ℹ 1,469,297 more rows
## # ℹ 4 more variables: coupon_disc <dbl>, coupon_match_disc <dbl>, week <int>,
## #   transaction_timestamp <dttm>

Plot # 1

transactions %>%
  inner_join(demographics, by = "household_id") %>%  
  group_by(age, income) %>%  
  summarise(total_sales = sum(sales_value, na.rm = TRUE)) %>% 
  ggplot(aes(x = age, y = total_sales)) + 
  geom_col(fill = "blue", alpha = 0.5) +  
  scale_y_continuous(name = "Total Sales per Age Group", labels = scales:: dollar)+
  scale_x_discrete(name = "Age") +
  labs(
    title = "Total Sales for Each Age Group",
    subtitle = paste("Analyzing the total sales for each age group based on income
level. It appears that sales peak at the income range of 
50-74k with an age of 45-54. The sales drop off at an income
range of 200-249K")
  )+
  facet_wrap(~income) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

Plot # 2


## Filtering to find the most popular 16 OZ product 

top_16oz_product_type <- transactions %>%
  inner_join(products, by = "product_id") %>%
  filter(package_size == "16 OZ") %>%  # Filter only 16oz products
  group_by(product_type) %>%  # Group by product type
  summarise(total_sales = sum(sales_value, na.rm = TRUE)) %>%  # Sum sales
  arrange(desc(total_sales)) %>%  # Sort in descending order
  slice(1)


transactions %>%
  inner_join(products, by = "product_id") %>%
  filter(package_size %in% c("16 OZ", "20 OZ", "24 OZ", "32 OZ")) %>%
  ggplot(aes(x = package_size)) +
  geom_bar(fill = "orange") + 
  scale_y_continuous(name = "Count") +
  scale_x_discrete(name = "Package Size") +
  labs(title = "Count of Transactions by Package Size",
       subtitle = "After anaylysing the count of package size, it is easy to see
that the bigger the size the less that are sold. Diving deeper into the most popular
category which is 16 OZ. It was found that Strawberries were the most bought item from
the 16 OZ category. Bringing in 13563.77 dollars in total sales") 

Plot # 3

products %>%
  inner_join(transactions, by = "product_id") %>%
  filter(tolower(product_category) %in% tolower(c("COOKIES/CONES", "ICE CREAM/MILK/SHERBTS", "DRY MIX DESSERTS", "CAKES"))) %>%
  group_by(package_size, product_category, brand) %>%  
  summarise(total_sales = sum(sales_value), .groups = "drop") %>%
  ggplot(aes(x = total_sales, y = product_category)) +
  geom_point(color = "red", alpha = .4) +
  scale_x_log10(name = "Total Sales", labels = scales:: dollar)+
  scale_y_discrete(name = "Desserts") +
  facet_wrap(~brand) +
  labs(
    title = "Total Sales for Dessert Products",
    subtitle = "Sales Data based on Kroger Desserts and their Total Sales per each 
brand. What sticks out in this plot is that in the cookies/cones row the 
National sales are spread much further in price range in comparision to 
cookie/cones in Private sales"
  )