filtered_trans <- demographics %>%
  left_join(transactions) %>%
  mutate(transaction_quarter = quarter(transaction_timestamp, type = "year.quarter"))
## Joining, by = "household_id"
full_view <- filtered_trans %>%
  inner_join(products, key = "product_id") %>%
  inner_join(coupon_redemptions) %>%
  inner_join(coupons) %>%
  inner_join(campaign_descriptions) %>%
  inner_join(campaigns)
## Joining, by = "product_id"
## Joining, by = "household_id"
## Joining, by = c("product_id", "coupon_upc", "campaign_id")
## Joining, by = "campaign_id"
## Joining, by = c("household_id", "campaign_id")
savings_view <- full_view %>%
  dplyr::mutate(coupon_savings = (coupon_disc + coupon_match_disc)) %>%
  dplyr::mutate(customer_amount = (sales_value - (coupon_savings))) %>%
#  dplyr::mutate(campaign_id = as.integer(campaign_id)) %>%
  group_by(transaction_quarter, week, household_id, household_size, campaign_id, campaign_type, age, income, marital_status, quantity, sales_value) %>%
  summarise(total_saving = sum(coupon_savings))
## `summarise()` has grouped output by 'transaction_quarter', 'week',
## 'household_id', 'household_size', 'campaign_id', 'campaign_type', 'age',
## 'income', 'marital_status', 'quantity'. You can override using the `.groups`
## argument.
test1 <- full_view %>% 
  count(campaign_type, household_size) %>%
  group_by(household_size) %>%
  mutate(pct = n/ sum(n))


test2 <- savings_view %>% 
  count(campaign_type, household_size) %>%
  group_by(household_size) %>%
  mutate(pct = n/ sum(n))


test3 <- full_view %>% #table by age
  count(campaign_type, age) %>%
  group_by(age) %>%
  mutate(pct = n/ sum(n))


test1 %>% 
  ggplot(aes(household_size, fill = campaign_type)) +
  geom_col(aes(household_size, pct)) +
  labs(title = "Campaigns per household (percentage)", subtitle = "subtitle") +
  theme_wsj() +
  theme(text = element_text(size = 7), axis.text = element_text(size = 7), 
        plot.title = element_text(size = 14), plot.subtitle = element_text(size = 9)) +
  scale_y_continuous()

test2 %>% 
  ggplot(aes(household_size, fill = campaign_type)) +
  geom_col(aes(household_size, pct)) +
  labs(title = "Campaigns per household (percentage)", subtitle = "subtitle") +
  theme_wsj() +
  theme(text = element_text(size = 7), axis.text = element_text(size = 7), 
        plot.title = element_text(size = 14), plot.subtitle = element_text(size = 9)) +
  scale_y_continuous()

test1
## # A tibble: 15 × 4
## # Groups:   household_size [5]
##    campaign_type household_size     n     pct
##    <ord>         <ord>          <int>   <dbl>
##  1 Type A        1              17214 0.980  
##  2 Type A        2              24300 0.976  
##  3 Type A        3               9392 0.965  
##  4 Type A        4               6105 0.939  
##  5 Type A        5+              5893 0.973  
##  6 Type B        1                286 0.0163 
##  7 Type B        2                489 0.0196 
##  8 Type B        3                277 0.0285 
##  9 Type B        4                357 0.0549 
## 10 Type B        5+               145 0.0240 
## 11 Type C        1                 61 0.00347
## 12 Type C        2                102 0.00410
## 13 Type C        3                 60 0.00617
## 14 Type C        4                 37 0.00569
## 15 Type C        5+                16 0.00264
test3
## # A tibble: 18 × 4
## # Groups:   age [6]
##    campaign_type age       n     pct
##    <ord>         <ord> <int>   <dbl>
##  1 Type A        19-24  1198 0.958  
##  2 Type A        25-34 10598 0.954  
##  3 Type A        35-44 16282 0.967  
##  4 Type A        45-54 27033 0.977  
##  5 Type A        55-64  4012 0.989  
##  6 Type A        65+    3781 0.991  
##  7 Type B        19-24    41 0.0328 
##  8 Type B        25-34   456 0.0410 
##  9 Type B        35-44   509 0.0302 
## 10 Type B        45-54   484 0.0175 
## 11 Type B        55-64    39 0.00962
## 12 Type B        65+      25 0.00655
## 13 Type C        19-24    11 0.0088 
## 14 Type C        25-34    58 0.00522
## 15 Type C        35-44    47 0.00279
## 16 Type C        45-54   145 0.00524
## 17 Type C        55-64     5 0.00123
## 18 Type C        65+      10 0.00262
test3 %>% 
  ggplot(aes(age, fill = campaign_type)) +
  geom_col(aes(age, pct)) +
  labs(title = "Campaigns per age group (percentage)", subtitle = "subtitle") +
  theme_wsj() +
  theme(text = element_text(size = 7), axis.text = element_text(size = 7), 
        plot.title = element_text(size = 14), plot.subtitle = element_text(size = 9)) +
  scale_y_continuous()

full_view %>% 
  count(campaign_type, household_size) %>%
  mutate(pct = n / sum(n)) %>%  # convert to proportions
  ggplot(aes(reorder(campaign_type, pct), pct)) +
  geom_col() +
  coord_flip() +  # now x becomes y
  labs(x = "campaign type", y = "Relative frequency") +
  scale_y_continuous(labels = scales::percent) +
  ggtitle("Percent of whole")

filtered_trans %>% #plot of all transactions
  select(sales_value, quantity, household_size) %>%
  filter(quantity < 40000, sales_value < 400) %>%
  ggplot(aes(x = quantity, y = sales_value, color = household_size)) +
  geom_point() + 
  scale_y_continuous(labels = scales::dollar) +
  labs(title = "spend by household size", subtitle = "OMG")

filtered_trans %>% #plot of all transactions
  select(sales_value, quantity, age) %>%
  filter(quantity < 40000, sales_value < 400) %>%
  ggplot(aes(x = quantity, y = sales_value, color = age)) +
  geom_point() + 
  scale_y_continuous(labels = scales::dollar) +
  labs(title = "spend by age", subtitle = "OMG")

savings_view %>%
  ggplot(aes(x = quantity, y = sales_value, color = household_size)) +
  geom_point() + 
  scale_y_continuous(labels = scales::dollar) +
  labs(title = "spend by household size", subtitle = "OMG")

savings_view %>% 
  filter(household_id != 1228) %>%
  arrange(campaign_id) %>%
  group_by(household_size, total_saving, transaction_quarter) %>%
  ggplot(aes(y = total_saving, x = transaction_quarter, fill = household_size)) +
  geom_bar(position = 'dodge', stat = 'identity') +
  scale_y_continuous(labels = scales::dollar) +
  labs(title = "savings by household size", subtitle = "OMG")

savings_view %>% 
  filter(household_id != 1228) %>%
  arrange(campaign_id) %>%
  ggplot(aes(y = total_saving, x = transaction_quarter, fill = age)) +
  geom_bar(position = 'dodge', stat = 'identity') +
  scale_y_continuous(labels = scales::dollar) +
  labs(title = "savings by age", subtitle = "OMG")

savings_view %>% 
  filter(household_id != 1228) %>%
  arrange(campaign_id) %>%
  ggplot(aes(y = total_saving, x = transaction_quarter, fill = marital_status)) +
  geom_bar(position = 'dodge', stat = 'identity') +
  scale_y_continuous(labels = scales::dollar) +
  labs(title = "savings by marital status", subtitle = "OMG")

savings_view %>% 
  filter(household_id != 1228) %>%
  arrange(campaign_id) %>%
  ggplot(aes(y = total_saving, x = transaction_quarter, fill = income)) +
  geom_bar(position = 'dodge', stat = 'identity') +
  scale_y_continuous(labels = scales::dollar) +
  labs(title = "savings by income", subtitle = "OMG")

savings_view %>% 
  filter(household_id != 1228) %>%
  arrange(campaign_id) %>%
  ggplot(aes(y = total_saving, x = week, color = campaign_type)) +
  geom_point(position = 'dodge', stat = 'identity') +
  scale_y_continuous(labels = scales::dollar) +
  labs(title = "savings by campaign", subtitle = "OMG")
## Warning: Width not defined. Set with `position_dodge(width = ?)`

#spend

spend_view <- full_view %>%
  dplyr::mutate(coupon_savings = (coupon_disc + coupon_match_disc)) %>%
  dplyr::mutate(customer_amount = (sales_value - (coupon_savings))) %>%
#  dplyr::mutate(campaign_id = as.integer(campaign_id)) %>%
  group_by(transaction_quarter, week, household_id, household_size, campaign_id, campaign_type, age, income, marital_status) %>%
  summarise(total_spend = sum(customer_amount))
## `summarise()` has grouped output by 'transaction_quarter', 'week',
## 'household_id', 'household_size', 'campaign_id', 'campaign_type', 'age',
## 'income'. You can override using the `.groups` argument.
spend_view %>% 
  filter(household_id != 1228) %>%
  arrange(campaign_id) %>%
  ggplot(aes(y = total_spend, x = transaction_quarter, fill = household_size)) +
  geom_bar(position = 'dodge', stat = 'identity') +
  scale_y_continuous(labels = scales::dollar) +
  labs(title = "spend by household size", subtitle = "OMG")

spend_view %>% 
  filter(household_id != 1228) %>%
  arrange(campaign_id) %>%
  ggplot(aes(y = total_spend, x = transaction_quarter, fill = age)) +
  geom_bar(position = 'dodge', stat = 'identity') +
  scale_y_continuous(labels = scales::dollar) +
  labs(title = "spend by age", subtitle = "OMG")

spend_view %>% 
  filter(household_id != 1228) %>%
  arrange(campaign_id) %>%
  ggplot(aes(y = total_spend, x = transaction_quarter, fill = marital_status)) +
  geom_bar(position = 'dodge', stat = 'identity') +
  scale_y_continuous(labels = scales::dollar) +
  labs(title = "spend by marital status", subtitle = "OMG")

spend_view %>% 
  filter(household_id != 1228) %>%
  arrange(campaign_id) %>%
  ggplot(aes(y = total_spend, x = transaction_quarter, fill = income)) +
  geom_bar(position = 'dodge', stat = 'identity') +
  scale_y_continuous(labels = scales::dollar) +
  labs(title = "spend by income", subtitle = "OMG")

spend_view %>% 
  filter(household_id != 1228) %>%
  arrange(campaign_id) %>%
  ggplot(aes(y = total_spend, x = week, color = campaign_type)) +
  geom_point(position = 'dodge', stat = 'identity') +
  scale_y_continuous(labels = scales::dollar) +
  labs(title = "spend by campaign", subtitle = "OMG")
## Warning: Width not defined. Set with `position_dodge(width = ?)`

spend_view %>%  #line view
  filter(household_id != 1228) %>%
  arrange(campaign_id) %>%
  ggplot(aes(y = total_spend, x = week)) +
  geom_line(data = spend_view, aes(week, total_spend, color = income)) +
  scale_y_continuous(labels = scales::dollar) +
  theme(legend.justification = 'centre',
                legend.position = 'bottom',
                legend.direction = "horizontal",
                legend.key.height = unit(0.5, "cm"),
                legend.key.width = unit(0.5,"cm")) +
  labs(title = "spend by income", subtitle = "OMG")

spend_view %>%  #line view
  filter(household_id != 1228) %>%
  arrange(campaign_id) %>%
  ggplot(aes(y = total_spend, x = week)) +
  geom_line(data = spend_view, aes(week, total_spend, color = household_size)) +
  scale_y_continuous(labels = scales::dollar) +
  theme(legend.justification = 'centre',
                legend.position = 'bottom',
                legend.direction = "horizontal",
                legend.key.height = unit(0.5, "cm"),
                legend.key.width = unit(0.5,"cm")) +
  labs(title = "spend by household size", subtitle = "OMG")

spend_view %>%  #point 
  filter(household_id != 1228) %>%
  arrange(campaign_id) %>%
  ggplot(aes(y = household_size, x = age)) +
  geom_point(data = spend_view, aes(age, household_size)) +
  labs(title = "age vs household size", subtitle = "OMG")