income_products <- transactions %>%
  inner_join(products, by = "product_id") %>%
  inner_join(demographics, by = "household_id") %>%
  mutate(
    income_group = case_when(
      income %in% c("Under 15K", "15-24K", "25-34K") ~ "34k or less",
      income %in% c("35-49K", "50-74K") ~ "35-74K",
      income %in% c("75-99K", "100-124K", "125-149K") ~ "75-149K",
      income %in% c("150-174K", "175-199K", "200-249K", "250K+") ~ "150K or more",
    )) %>% 
  group_by(income_group, product_category) %>% 
  summarize(total_spent = sum(quantity, na.rm = TRUE), .groups = "drop") %>%
  arrange(income_group, desc(total_spent)) %>%  
  group_by(income_group) %>%
  filter(product_category != "COUPON/MISC ITEMS") %>%
  slice_max(total_spent, n = 5) 

ggplot(income_products, aes(x = reorder(product_category, total_spent), y = total_spent, fill = income_group)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  facet_wrap(~income_group, scales = "free_y") +  # Facet per ogni fascia di reddito
  labs(
    title = "Top 5 most mold products by income level",
    x = "Product",
    y = "Total Quantity Sold",
    caption = "The product category 'COUPON/MISC ITEMS' has the highest selling in each income group,\nhence it has not been included in the comparison"
  ) +
  theme_minimal() + 
   theme(
    plot.title = element_text(size = 12, face = "bold", margin = margin(b = 20)),
    strip.text = element_text(size = 9, face = "bold"),
    plot.caption = element_text(size = 7, hjust = 0, margin = margin(t = 30)),
    axis.title.x = element_text(margin = margin(t = 15)),  
    axis.title.y = element_text(margin = margin(r = 10))  
  )

discount_effectiveness <- transactions %>%
  inner_join(products, by = "product_id") %>%
  mutate(discounted = ifelse(coupon_disc > 0 | retail_disc > 0, "Discounted", "Full Price")) %>%
  group_by(product_category, discounted) %>%
  summarize(count = n(), .groups = "drop") %>%
  mutate(pct = count / sum(count) * 100) %>%
  filter(discounted == "Discounted") %>%
  arrange(desc(pct)) %>%
  slice_max(pct, n = 10) 

# Plot
ggplot(discount_effectiveness, aes(x = reorder(product_category, pct), y = pct, fill = pct)) +
  geom_col() +
  coord_flip() +
  labs(
    title = "Discount usage by product category",
    x = "Product Category",
    y = "Percentage of Transactions Using a Discount (%)",
    caption = "The study has been limited to the first 10 categories with the highest %discount usage",
    fill = "Discount Usage (%)"
  ) +
  theme_minimal() + 
  theme(
    plot.title = element_text(size = 12, face = "bold", margin = margin(b = 20)),
    strip.text = element_text(size = 9, face = "bold"),
    plot.caption = element_text(size = 7, hjust = 0, margin = margin(t = 30)),
    axis.title.x = element_text(margin = margin(t = 15)),  
    axis.title.y = element_text(margin = margin(r = 10))  
  )

soft_drinks_trend <- transactions %>%
  left_join(products, by = "product_id") %>%
  filter(product_category == "SOFT DRINKS") %>%
  mutate(month = floor_date(transaction_timestamp, "month")) %>%
  group_by(month) %>%
  summarize(total_sales = sum(sales_value, na.rm = TRUE), .groups = "drop")

# Creazione del grafico a linee
ggplot(soft_drinks_trend, aes(x = month, y = total_sales)) +
  geom_line(color = "blue", linewidth = 1) +
  geom_point(color = "red", size = 2) + 
  labs(
    title = "Monthly sales of soft srinks",
    subtitle = "Aggregated sales per month to identify seasonality",
    x = "Month",
    y = "Total Sales ($)",
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 12, hjust = 0.5, face = "bold"),
    plot.subtitle = element_text(size = 10, hjust = 0.5, margin = margin(b = 15)),
    axis.title.x = element_text(margin = margin(t = 10)),
    axis.title.y = element_text(margin = margin(r = 10)),
  )

LS0tCnRpdGxlOiAiTGFiIDUgLSBHaWFjb21vIFJ1YmVydGkiCm91dHB1dDoKICBodG1sX2RvY3VtZW50OiBodG1sX25vdGVib29rCi0tLQoKYGBge3IsIGluY2x1ZGU9RkFMU0V9CmxpYnJhcnkoY29tcGxldGVqb3VybmV5KQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShsdWJyaWRhdGUpCmxpYnJhcnkoZ2dwbG90MikKdHJhbnNhY3Rpb25zIDwtIGdldF90cmFuc2FjdGlvbnMoKQpgYGAKCgpgYGB7cn0KaW5jb21lX3Byb2R1Y3RzIDwtIHRyYW5zYWN0aW9ucyAlPiUKICBpbm5lcl9qb2luKHByb2R1Y3RzLCBieSA9ICJwcm9kdWN0X2lkIikgJT4lCiAgaW5uZXJfam9pbihkZW1vZ3JhcGhpY3MsIGJ5ID0gImhvdXNlaG9sZF9pZCIpICU+JQogIG11dGF0ZSgKICAgIGluY29tZV9ncm91cCA9IGNhc2Vfd2hlbigKICAgICAgaW5jb21lICVpbiUgYygiVW5kZXIgMTVLIiwgIjE1LTI0SyIsICIyNS0zNEsiKSB+ICIzNGsgb3IgbGVzcyIsCiAgICAgIGluY29tZSAlaW4lIGMoIjM1LTQ5SyIsICI1MC03NEsiKSB+ICIzNS03NEsiLAogICAgICBpbmNvbWUgJWluJSBjKCI3NS05OUsiLCAiMTAwLTEyNEsiLCAiMTI1LTE0OUsiKSB+ICI3NS0xNDlLIiwKICAgICAgaW5jb21lICVpbiUgYygiMTUwLTE3NEsiLCAiMTc1LTE5OUsiLCAiMjAwLTI0OUsiLCAiMjUwSysiKSB+ICIxNTBLIG9yIG1vcmUiLAogICAgKSkgJT4lIAogIGdyb3VwX2J5KGluY29tZV9ncm91cCwgcHJvZHVjdF9jYXRlZ29yeSkgJT4lIAogIHN1bW1hcml6ZSh0b3RhbF9zcGVudCA9IHN1bShxdWFudGl0eSwgbmEucm0gPSBUUlVFKSwgLmdyb3VwcyA9ICJkcm9wIikgJT4lCiAgYXJyYW5nZShpbmNvbWVfZ3JvdXAsIGRlc2ModG90YWxfc3BlbnQpKSAlPiUgIAogIGdyb3VwX2J5KGluY29tZV9ncm91cCkgJT4lCiAgZmlsdGVyKHByb2R1Y3RfY2F0ZWdvcnkgIT0gIkNPVVBPTi9NSVNDIElURU1TIikgJT4lCiAgc2xpY2VfbWF4KHRvdGFsX3NwZW50LCBuID0gNSkgCgpnZ3Bsb3QoaW5jb21lX3Byb2R1Y3RzLCBhZXMoeCA9IHJlb3JkZXIocHJvZHVjdF9jYXRlZ29yeSwgdG90YWxfc3BlbnQpLCB5ID0gdG90YWxfc3BlbnQsIGZpbGwgPSBpbmNvbWVfZ3JvdXApKSArCiAgZ2VvbV9jb2woc2hvdy5sZWdlbmQgPSBGQUxTRSkgKwogIGNvb3JkX2ZsaXAoKSArCiAgZmFjZXRfd3JhcCh+aW5jb21lX2dyb3VwLCBzY2FsZXMgPSAiZnJlZV95IikgKyAgIyBGYWNldCBwZXIgb2duaSBmYXNjaWEgZGkgcmVkZGl0bwogIGxhYnMoCiAgICB0aXRsZSA9ICJUb3AgNSBtb3N0IG1vbGQgcHJvZHVjdHMgYnkgaW5jb21lIGxldmVsIiwKICAgIHggPSAiUHJvZHVjdCIsCiAgICB5ID0gIlRvdGFsIFF1YW50aXR5IFNvbGQiLAogICAgY2FwdGlvbiA9ICJUaGUgcHJvZHVjdCBjYXRlZ29yeSAnQ09VUE9OL01JU0MgSVRFTVMnIGhhcyB0aGUgaGlnaGVzdCBzZWxsaW5nIGluIGVhY2ggaW5jb21lIGdyb3VwLFxuaGVuY2UgaXQgaGFzIG5vdCBiZWVuIGluY2x1ZGVkIGluIHRoZSBjb21wYXJpc29uIgogICkgKwogIHRoZW1lX21pbmltYWwoKSArIAogICB0aGVtZSgKICAgIHBsb3QudGl0bGUgPSBlbGVtZW50X3RleHQoc2l6ZSA9IDEyLCBmYWNlID0gImJvbGQiLCBtYXJnaW4gPSBtYXJnaW4oYiA9IDIwKSksCiAgICBzdHJpcC50ZXh0ID0gZWxlbWVudF90ZXh0KHNpemUgPSA5LCBmYWNlID0gImJvbGQiKSwKICAgIHBsb3QuY2FwdGlvbiA9IGVsZW1lbnRfdGV4dChzaXplID0gNywgaGp1c3QgPSAwLCBtYXJnaW4gPSBtYXJnaW4odCA9IDMwKSksCiAgICBheGlzLnRpdGxlLnggPSBlbGVtZW50X3RleHQobWFyZ2luID0gbWFyZ2luKHQgPSAxNSkpLCAgCiAgICBheGlzLnRpdGxlLnkgPSBlbGVtZW50X3RleHQobWFyZ2luID0gbWFyZ2luKHIgPSAxMCkpICAKICApCgpgYGAKCgoKYGBge3J9CmRpc2NvdW50X2VmZmVjdGl2ZW5lc3MgPC0gdHJhbnNhY3Rpb25zICU+JQogIGlubmVyX2pvaW4ocHJvZHVjdHMsIGJ5ID0gInByb2R1Y3RfaWQiKSAlPiUKICBtdXRhdGUoZGlzY291bnRlZCA9IGlmZWxzZShjb3Vwb25fZGlzYyA+IDAgfCByZXRhaWxfZGlzYyA+IDAsICJEaXNjb3VudGVkIiwgIkZ1bGwgUHJpY2UiKSkgJT4lCiAgZ3JvdXBfYnkocHJvZHVjdF9jYXRlZ29yeSwgZGlzY291bnRlZCkgJT4lCiAgc3VtbWFyaXplKGNvdW50ID0gbigpLCAuZ3JvdXBzID0gImRyb3AiKSAlPiUKICBtdXRhdGUocGN0ID0gY291bnQgLyBzdW0oY291bnQpICogMTAwKSAlPiUKICBmaWx0ZXIoZGlzY291bnRlZCA9PSAiRGlzY291bnRlZCIpICU+JQogIGFycmFuZ2UoZGVzYyhwY3QpKSAlPiUKICBzbGljZV9tYXgocGN0LCBuID0gMTApIAoKIyBQbG90CmdncGxvdChkaXNjb3VudF9lZmZlY3RpdmVuZXNzLCBhZXMoeCA9IHJlb3JkZXIocHJvZHVjdF9jYXRlZ29yeSwgcGN0KSwgeSA9IHBjdCwgZmlsbCA9IHBjdCkpICsKICBnZW9tX2NvbCgpICsKICBjb29yZF9mbGlwKCkgKwogIGxhYnMoCiAgICB0aXRsZSA9ICJEaXNjb3VudCB1c2FnZSBieSBwcm9kdWN0IGNhdGVnb3J5IiwKICAgIHggPSAiUHJvZHVjdCBDYXRlZ29yeSIsCiAgICB5ID0gIlBlcmNlbnRhZ2Ugb2YgVHJhbnNhY3Rpb25zIFVzaW5nIGEgRGlzY291bnQgKCUpIiwKICAgIGNhcHRpb24gPSAiVGhlIHN0dWR5IGhhcyBiZWVuIGxpbWl0ZWQgdG8gdGhlIGZpcnN0IDEwIGNhdGVnb3JpZXMgd2l0aCB0aGUgaGlnaGVzdCAlZGlzY291bnQgdXNhZ2UiLAogICAgZmlsbCA9ICJEaXNjb3VudCBVc2FnZSAoJSkiCiAgKSArCiAgdGhlbWVfbWluaW1hbCgpICsgCiAgdGhlbWUoCiAgICBwbG90LnRpdGxlID0gZWxlbWVudF90ZXh0KHNpemUgPSAxMiwgZmFjZSA9ICJib2xkIiwgbWFyZ2luID0gbWFyZ2luKGIgPSAyMCkpLAogICAgc3RyaXAudGV4dCA9IGVsZW1lbnRfdGV4dChzaXplID0gOSwgZmFjZSA9ICJib2xkIiksCiAgICBwbG90LmNhcHRpb24gPSBlbGVtZW50X3RleHQoc2l6ZSA9IDcsIGhqdXN0ID0gMCwgbWFyZ2luID0gbWFyZ2luKHQgPSAzMCkpLAogICAgYXhpcy50aXRsZS54ID0gZWxlbWVudF90ZXh0KG1hcmdpbiA9IG1hcmdpbih0ID0gMTUpKSwgIAogICAgYXhpcy50aXRsZS55ID0gZWxlbWVudF90ZXh0KG1hcmdpbiA9IG1hcmdpbihyID0gMTApKSAgCiAgKQpgYGAKCmBgYHtyfQpzb2Z0X2RyaW5rc190cmVuZCA8LSB0cmFuc2FjdGlvbnMgJT4lCiAgbGVmdF9qb2luKHByb2R1Y3RzLCBieSA9ICJwcm9kdWN0X2lkIikgJT4lCiAgZmlsdGVyKHByb2R1Y3RfY2F0ZWdvcnkgPT0gIlNPRlQgRFJJTktTIikgJT4lCiAgbXV0YXRlKG1vbnRoID0gZmxvb3JfZGF0ZSh0cmFuc2FjdGlvbl90aW1lc3RhbXAsICJtb250aCIpKSAlPiUKICBncm91cF9ieShtb250aCkgJT4lCiAgc3VtbWFyaXplKHRvdGFsX3NhbGVzID0gc3VtKHNhbGVzX3ZhbHVlLCBuYS5ybSA9IFRSVUUpLCAuZ3JvdXBzID0gImRyb3AiKQoKIyBDcmVhemlvbmUgZGVsIGdyYWZpY28gYSBsaW5lZQpnZ3Bsb3Qoc29mdF9kcmlua3NfdHJlbmQsIGFlcyh4ID0gbW9udGgsIHkgPSB0b3RhbF9zYWxlcykpICsKICBnZW9tX2xpbmUoY29sb3IgPSAiYmx1ZSIsIGxpbmV3aWR0aCA9IDEpICsKICBnZW9tX3BvaW50KGNvbG9yID0gInJlZCIsIHNpemUgPSAyKSArIAogIGxhYnMoCiAgICB0aXRsZSA9ICJNb250aGx5IHNhbGVzIG9mIHNvZnQgc3JpbmtzIiwKICAgIHN1YnRpdGxlID0gIkFnZ3JlZ2F0ZWQgc2FsZXMgcGVyIG1vbnRoIHRvIGlkZW50aWZ5IHNlYXNvbmFsaXR5IiwKICAgIHggPSAiTW9udGgiLAogICAgeSA9ICJUb3RhbCBTYWxlcyAoJCkiLAogICkgKwogIHRoZW1lX21pbmltYWwoKSArCiAgdGhlbWUoCiAgICBwbG90LnRpdGxlID0gZWxlbWVudF90ZXh0KHNpemUgPSAxMiwgaGp1c3QgPSAwLjUsIGZhY2UgPSAiYm9sZCIpLAogICAgcGxvdC5zdWJ0aXRsZSA9IGVsZW1lbnRfdGV4dChzaXplID0gMTAsIGhqdXN0ID0gMC41LCBtYXJnaW4gPSBtYXJnaW4oYiA9IDE1KSksCiAgICBheGlzLnRpdGxlLnggPSBlbGVtZW50X3RleHQobWFyZ2luID0gbWFyZ2luKHQgPSAxMCkpLAogICAgYXhpcy50aXRsZS55ID0gZWxlbWVudF90ZXh0KG1hcmdpbiA9IG1hcmdpbihyID0gMTApKSwKICApCgpgYGAKCg==