library(completejourney)
library(dplyr)
library(ggplot2)
library(lubridate)

transactions <- get_data("transactions")
Loading completejourney data sets from GitHub
Download complete. Learn more about these data sets at http://bit.ly/completejourney
sales_demo <- transactions %>%
  inner_join(demographics, by = "household_id")

# Manual axes
age_order <- c("19-24", "25-34", "35-44", "45-54", "55-64", "65+")
income_order <- c("Under 15K", "15-24K", "25-34K", "35-49K", "50-74K", "75-99K",
                  "100-124K", "125-149K", "150-174K", "175-199K", "200-249K", "250K+")

sales_demo <- sales_demo %>%
  mutate(age = factor(age, levels = age_order, ordered = TRUE),
         income = factor(income, levels = income_order, ordered = TRUE),
         year = year(transaction_timestamp))

# average sales for each income and age
avg_sales_per_year <- sales_demo %>%
  group_by(income, age, year) %>%
  summarise(sales_value = sum(sales_value, na.rm = TRUE), .groups = "drop")

# average sales for each pairing of income and age
avg_sales <- avg_sales_per_year %>%
  group_by(income, age) %>%
  summarise(sales_value = mean(sales_value, na.rm = TRUE), .groups = "drop")

# scale bubble size
max_avg_sales <- max(avg_sales$sales_value, na.rm = TRUE)
avg_sales <- avg_sales %>%
  mutate(bubble_size = (sales_value / max_avg_sales) * 500)


ggplot(avg_sales, aes(x = income, y = age, size = bubble_size, color = sales_value)) +
  geom_point(alpha = 0.8, stroke = 1) +
  scale_color_gradient(low = "lightgreen", high = "darkgreen") +
  labs(title = "Average Total Sales Per Year by Income and Age Range",
       x = "Income",
       y = "Age Range",
       color = "Total Sales") +
  guides(size = "none") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) 

library(tidyverse)


merged_data <- transactions %>%
  inner_join(demographics, by = "household_id") %>%
  inner_join(products, by = "product_id")

# separate kids vs no kids
merged_data <- merged_data %>%
  mutate(has_kids = ifelse(kids_count > 0, "Has Kids", "No Kids"))

# remove Gasoline purchases
filtered_data <- merged_data %>%
  filter(product_type != "GASOLINE-REG UNLEADED")

# group by products and kids
top_products <- filtered_data %>%
  group_by(has_kids, product_type) %>%
  summarise(total_purchases = sum(quantity, na.rm = TRUE)) %>%
  arrange(has_kids, desc(total_purchases)) %>%
  group_by(has_kids) %>%
  slice_head(n = 10)  
`summarise()` has grouped output by 'has_kids'. You can override using the `.groups` argument.
ggplot(top_products, aes(x = reorder(product_type, -total_purchases), y = total_purchases, fill = has_kids)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Top 10 Product Types Purchased by Households (Excluding Gasoline)",
       x = "Product Type",
       y = "Total Purchases") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

NA
NA
library(patchwork)

merged_data <- transactions %>%
  inner_join(demographics, by = "household_id") %>%
  filter(!is.na(retail_disc), retail_disc > 0, sales_value > 0)  

# Calculate discount percent
merged_data <- merged_data %>%
  mutate(discount_percent = (retail_disc / (sales_value + retail_disc)) * 100)  
        

# total sales per store
store_sales <- merged_data %>%
  group_by(store_id) %>%
  summarise(total_sales = sum(sales_value, na.rm = TRUE)) %>%
  arrange(desc(total_sales))  

# top 3 and bottom 3 stores
top_3_stores <- head(store_sales, 3)$store_id
bottom_3_stores <- tail(store_sales, 3)$store_id

# filter for top and bottom stores
filtered_data <- merged_data %>%
  filter(store_id %in% c(top_3_stores, bottom_3_stores)) %>%
  filter(discount_percent <= 50) %>%
  mutate(Total_Sales_Value_Performance = ifelse(store_id %in% top_3_stores, "Top 3 Stores", "Bottom 3 Stores"))


box_plot <- ggplot(filtered_data, aes(x = Total_Sales_Value_Performance, y = discount_percent, fill = Total_Sales_Value_Performance)) +
  geom_boxplot(outlier.color = "red", outlier.shape = 16) +
  scale_fill_manual(values = c("Bottom 3 Stores" = "orange", "Top 3 Stores" = "navyblue")) + 
  labs(title = "Distribution of Discount % of Stores with Highest and Lowest Total Sales Value",
       x = "Stores",
       y = "Discount Percentage") +
  theme_minimal()
box_plot

LS0tDQp0aXRsZTogIldlZWtfNV9MYWJfQWJrdXJ0emVyIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KDQpgYGB7cn0NCmxpYnJhcnkoY29tcGxldGVqb3VybmV5KQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkobHVicmlkYXRlKQ0KDQp0cmFuc2FjdGlvbnMgPC0gZ2V0X2RhdGEoInRyYW5zYWN0aW9ucyIpDQoNCnNhbGVzX2RlbW8gPC0gdHJhbnNhY3Rpb25zICU+JQ0KICBpbm5lcl9qb2luKGRlbW9ncmFwaGljcywgYnkgPSAiaG91c2Vob2xkX2lkIikNCg0KIyBNYW51YWwgYXhlcw0KYWdlX29yZGVyIDwtIGMoIjE5LTI0IiwgIjI1LTM0IiwgIjM1LTQ0IiwgIjQ1LTU0IiwgIjU1LTY0IiwgIjY1KyIpDQppbmNvbWVfb3JkZXIgPC0gYygiVW5kZXIgMTVLIiwgIjE1LTI0SyIsICIyNS0zNEsiLCAiMzUtNDlLIiwgIjUwLTc0SyIsICI3NS05OUsiLA0KICAgICAgICAgICAgICAgICAgIjEwMC0xMjRLIiwgIjEyNS0xNDlLIiwgIjE1MC0xNzRLIiwgIjE3NS0xOTlLIiwgIjIwMC0yNDlLIiwgIjI1MEsrIikNCg0Kc2FsZXNfZGVtbyA8LSBzYWxlc19kZW1vICU+JQ0KICBtdXRhdGUoYWdlID0gZmFjdG9yKGFnZSwgbGV2ZWxzID0gYWdlX29yZGVyLCBvcmRlcmVkID0gVFJVRSksDQogICAgICAgICBpbmNvbWUgPSBmYWN0b3IoaW5jb21lLCBsZXZlbHMgPSBpbmNvbWVfb3JkZXIsIG9yZGVyZWQgPSBUUlVFKSwNCiAgICAgICAgIHllYXIgPSB5ZWFyKHRyYW5zYWN0aW9uX3RpbWVzdGFtcCkpDQoNCiMgYXZlcmFnZSBzYWxlcyBmb3IgZWFjaCBpbmNvbWUgYW5kIGFnZQ0KYXZnX3NhbGVzX3Blcl95ZWFyIDwtIHNhbGVzX2RlbW8gJT4lDQogIGdyb3VwX2J5KGluY29tZSwgYWdlLCB5ZWFyKSAlPiUNCiAgc3VtbWFyaXNlKHNhbGVzX3ZhbHVlID0gc3VtKHNhbGVzX3ZhbHVlLCBuYS5ybSA9IFRSVUUpLCAuZ3JvdXBzID0gImRyb3AiKQ0KDQojIGF2ZXJhZ2Ugc2FsZXMgZm9yIGVhY2ggcGFpcmluZyBvZiBpbmNvbWUgYW5kIGFnZQ0KYXZnX3NhbGVzIDwtIGF2Z19zYWxlc19wZXJfeWVhciAlPiUNCiAgZ3JvdXBfYnkoaW5jb21lLCBhZ2UpICU+JQ0KICBzdW1tYXJpc2Uoc2FsZXNfdmFsdWUgPSBtZWFuKHNhbGVzX3ZhbHVlLCBuYS5ybSA9IFRSVUUpLCAuZ3JvdXBzID0gImRyb3AiKQ0KDQojIHNjYWxlIGJ1YmJsZSBzaXplDQptYXhfYXZnX3NhbGVzIDwtIG1heChhdmdfc2FsZXMkc2FsZXNfdmFsdWUsIG5hLnJtID0gVFJVRSkNCmF2Z19zYWxlcyA8LSBhdmdfc2FsZXMgJT4lDQogIG11dGF0ZShidWJibGVfc2l6ZSA9IChzYWxlc192YWx1ZSAvIG1heF9hdmdfc2FsZXMpICogNTAwKQ0KDQoNCmdncGxvdChhdmdfc2FsZXMsIGFlcyh4ID0gaW5jb21lLCB5ID0gYWdlLCBzaXplID0gYnViYmxlX3NpemUsIGNvbG9yID0gc2FsZXNfdmFsdWUpKSArDQogIGdlb21fcG9pbnQoYWxwaGEgPSAwLjgsIHN0cm9rZSA9IDEpICsNCiAgc2NhbGVfY29sb3JfZ3JhZGllbnQobG93ID0gImxpZ2h0Z3JlZW4iLCBoaWdoID0gImRhcmtncmVlbiIpICsNCiAgbGFicyh0aXRsZSA9ICJBdmVyYWdlIFRvdGFsIFNhbGVzIFBlciBZZWFyIGJ5IEluY29tZSBhbmQgQWdlIFJhbmdlIiwNCiAgICAgICB4ID0gIkluY29tZSIsDQogICAgICAgeSA9ICJBZ2UgUmFuZ2UiLA0KICAgICAgIGNvbG9yID0gIlRvdGFsIFNhbGVzIikgKw0KICBndWlkZXMoc2l6ZSA9ICJub25lIikgKw0KICB0aGVtZV9taW5pbWFsKCkgKw0KICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDQ1LCBoanVzdCA9IDEpKSANCmBgYA0KDQpgYGB7cn0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KDQoNCm1lcmdlZF9kYXRhIDwtIHRyYW5zYWN0aW9ucyAlPiUNCiAgaW5uZXJfam9pbihkZW1vZ3JhcGhpY3MsIGJ5ID0gImhvdXNlaG9sZF9pZCIpICU+JQ0KICBpbm5lcl9qb2luKHByb2R1Y3RzLCBieSA9ICJwcm9kdWN0X2lkIikNCg0KIyBzZXBhcmF0ZSBraWRzIHZzIG5vIGtpZHMNCm1lcmdlZF9kYXRhIDwtIG1lcmdlZF9kYXRhICU+JQ0KICBtdXRhdGUoaGFzX2tpZHMgPSBpZmVsc2Uoa2lkc19jb3VudCA+IDAsICJIYXMgS2lkcyIsICJObyBLaWRzIikpDQoNCiMgcmVtb3ZlIEdhc29saW5lIHB1cmNoYXNlcw0KZmlsdGVyZWRfZGF0YSA8LSBtZXJnZWRfZGF0YSAlPiUNCiAgZmlsdGVyKHByb2R1Y3RfdHlwZSAhPSAiR0FTT0xJTkUtUkVHIFVOTEVBREVEIikNCg0KIyBncm91cCBieSBwcm9kdWN0cyBhbmQga2lkcw0KdG9wX3Byb2R1Y3RzIDwtIGZpbHRlcmVkX2RhdGEgJT4lDQogIGdyb3VwX2J5KGhhc19raWRzLCBwcm9kdWN0X3R5cGUpICU+JQ0KICBzdW1tYXJpc2UodG90YWxfcHVyY2hhc2VzID0gc3VtKHF1YW50aXR5LCBuYS5ybSA9IFRSVUUpKSAlPiUNCiAgYXJyYW5nZShoYXNfa2lkcywgZGVzYyh0b3RhbF9wdXJjaGFzZXMpKSAlPiUNCiAgZ3JvdXBfYnkoaGFzX2tpZHMpICU+JQ0KICBzbGljZV9oZWFkKG4gPSAxMCkgIA0KDQpnZ3Bsb3QodG9wX3Byb2R1Y3RzLCBhZXMoeCA9IHJlb3JkZXIocHJvZHVjdF90eXBlLCAtdG90YWxfcHVyY2hhc2VzKSwgeSA9IHRvdGFsX3B1cmNoYXNlcywgZmlsbCA9IGhhc19raWRzKSkgKw0KICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IiwgcG9zaXRpb24gPSAiZG9kZ2UiKSArDQogIGxhYnModGl0bGUgPSAiVG9wIDEwIFByb2R1Y3QgVHlwZXMgUHVyY2hhc2VkIGJ5IEhvdXNlaG9sZHMgKEV4Y2x1ZGluZyBHYXNvbGluZSkiLA0KICAgICAgIHggPSAiUHJvZHVjdCBUeXBlIiwNCiAgICAgICB5ID0gIlRvdGFsIFB1cmNoYXNlcyIpICsNCiAgdGhlbWVfbWluaW1hbCgpICsNCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA0NSwgaGp1c3QgPSAxKSkNCg0KDQpgYGANCg0KDQpgYGB7cn0NCmxpYnJhcnkocGF0Y2h3b3JrKQ0KDQptZXJnZWRfZGF0YSA8LSB0cmFuc2FjdGlvbnMgJT4lDQogIGlubmVyX2pvaW4oZGVtb2dyYXBoaWNzLCBieSA9ICJob3VzZWhvbGRfaWQiKSAlPiUNCiAgZmlsdGVyKCFpcy5uYShyZXRhaWxfZGlzYyksIHJldGFpbF9kaXNjID4gMCwgc2FsZXNfdmFsdWUgPiAwKSAgDQoNCiMgQ2FsY3VsYXRlIGRpc2NvdW50IHBlcmNlbnQNCm1lcmdlZF9kYXRhIDwtIG1lcmdlZF9kYXRhICU+JQ0KICBtdXRhdGUoZGlzY291bnRfcGVyY2VudCA9IChyZXRhaWxfZGlzYyAvIChzYWxlc192YWx1ZSArIHJldGFpbF9kaXNjKSkgKiAxMDApICANCiAgICAgICAgDQoNCiMgdG90YWwgc2FsZXMgcGVyIHN0b3JlDQpzdG9yZV9zYWxlcyA8LSBtZXJnZWRfZGF0YSAlPiUNCiAgZ3JvdXBfYnkoc3RvcmVfaWQpICU+JQ0KICBzdW1tYXJpc2UodG90YWxfc2FsZXMgPSBzdW0oc2FsZXNfdmFsdWUsIG5hLnJtID0gVFJVRSkpICU+JQ0KICBhcnJhbmdlKGRlc2ModG90YWxfc2FsZXMpKSAgDQoNCiMgdG9wIDMgYW5kIGJvdHRvbSAzIHN0b3Jlcw0KdG9wXzNfc3RvcmVzIDwtIGhlYWQoc3RvcmVfc2FsZXMsIDMpJHN0b3JlX2lkDQpib3R0b21fM19zdG9yZXMgPC0gdGFpbChzdG9yZV9zYWxlcywgMykkc3RvcmVfaWQNCg0KIyBmaWx0ZXIgZm9yIHRvcCBhbmQgYm90dG9tIHN0b3Jlcw0KZmlsdGVyZWRfZGF0YSA8LSBtZXJnZWRfZGF0YSAlPiUNCiAgZmlsdGVyKHN0b3JlX2lkICVpbiUgYyh0b3BfM19zdG9yZXMsIGJvdHRvbV8zX3N0b3JlcykpICU+JQ0KICBmaWx0ZXIoZGlzY291bnRfcGVyY2VudCA8PSA1MCkgJT4lDQogIG11dGF0ZShUb3RhbF9TYWxlc19WYWx1ZV9QZXJmb3JtYW5jZSA9IGlmZWxzZShzdG9yZV9pZCAlaW4lIHRvcF8zX3N0b3JlcywgIlRvcCAzIFN0b3JlcyIsICJCb3R0b20gMyBTdG9yZXMiKSkNCg0KDQpib3hfcGxvdCA8LSBnZ3Bsb3QoZmlsdGVyZWRfZGF0YSwgYWVzKHggPSBUb3RhbF9TYWxlc19WYWx1ZV9QZXJmb3JtYW5jZSwgeSA9IGRpc2NvdW50X3BlcmNlbnQsIGZpbGwgPSBUb3RhbF9TYWxlc19WYWx1ZV9QZXJmb3JtYW5jZSkpICsNCiAgZ2VvbV9ib3hwbG90KG91dGxpZXIuY29sb3IgPSAicmVkIiwgb3V0bGllci5zaGFwZSA9IDE2KSArDQogIHNjYWxlX2ZpbGxfbWFudWFsKHZhbHVlcyA9IGMoIkJvdHRvbSAzIFN0b3JlcyIgPSAib3JhbmdlIiwgIlRvcCAzIFN0b3JlcyIgPSAibmF2eWJsdWUiKSkgKyANCiAgbGFicyh0aXRsZSA9ICJEaXN0cmlidXRpb24gb2YgRGlzY291bnQgJSBvZiBTdG9yZXMgd2l0aCBIaWdoZXN0IGFuZCBMb3dlc3QgVG90YWwgU2FsZXMgVmFsdWUiLA0KICAgICAgIHggPSAiU3RvcmVzIiwNCiAgICAgICB5ID0gIkRpc2NvdW50IFBlcmNlbnRhZ2UiKSArDQogIHRoZW1lX21pbmltYWwoKQ0KYm94X3Bsb3QNCg0KYGBgDQoNCg0K